Create a Cache for Gemini model (Internal server error)

Hi,
I’m trying to use the caching feature in my golang backend but get Internal server errors. Would appreciate a second pair of eyes on whether I’m trying correctly.

I am saving text files in google cloud storage then passing them as uris to create a cache. On my project I have created a service account with the following bindings:

role: roles/aiplatform.serviceAgent
role: roles/aiplatform.user
role: roles/storage.objectAdmin

I have created the buckets and model in US-CENTRAL1.

Here is the code for my functions:

Golang:

func (s *CacheService) createCachedContent(ctx context.Context, gcsURIs []string, expirationTTL time.Duration) (*genai.CachedContent, error) {
	log.Printf("Creating cached content with %d GCS URIs", len(gcsURIs))

	modelName := fmt.Sprintf("projects/%s/locations/%s/publishers/google/models/gemini-1.5-pro-001", s.projectID, s.location)

	cc := &genai.CachedContent{
		Model: modelName,
		Expiration: genai.ExpireTimeOrTTL{
			TTL: expirationTTL,
		},
	}

	content := &genai.Content{
		Parts: make([]genai.Part, len(gcsURIs)),
	}

	for i, uri := range gcsURIs {
		log.Printf("Adding GCS URI to content: %s", uri)
		content.Parts[i] = genai.FileData{
			MIMEType: "text/plain",
			FileURI:  uri,
		}
	}

	cc.Contents = []*genai.Content{content}

	log.Println("Calling Vertex AI to create cached content")
	cachedContent, err := s.genaiClient.CreateCachedContent(ctx, cc)
	if err != nil {
		log.Printf("Error from Vertex AI while creating cached content: %v", err)
		return nil, fmt.Errorf("Vertex AI error: %v", err)
	}

	log.Printf("Cached content created successfully with name: %s", cachedContent.Name)
	return cachedContent, nil
}

And here is the code for the function using the REST API

func (s *CacheService) CreateCachedContentREST(ctx context.Context, gcsURIs []string, cacheExpirationTTL time.Duration) (*genai.CachedContent, error) {
	log.Printf("Creating cached content using REST API with %d GCS URIs", len(gcsURIs))

	accessToken, err := s.getAccessToken(ctx)
	if err != nil {
		return nil, fmt.Errorf("failed to get access token: %v", err)
	}

	requestBody := map[string]interface{}{
		"model": fmt.Sprintf("projects/%s/locations/%s/publishers/google/models/gemini-1.5-pro-001", s.projectID, s.location),
		"contents": []map[string]interface{}{
			{
				"role":  "user",
				"parts": make([]map[string]interface{}, len(gcsURIs)),
			},
		},
		"ttl": map[string]interface{}{
			"seconds": fmt.Sprintf("%d", int64(cacheExpirationTTL.Seconds())),
			"nanos":   "0",
		},
	}

	for i, uri := range gcsURIs {
		requestBody["contents"].([]map[string]interface{})[0]["parts"].([]map[string]interface{})[i] = map[string]interface{}{
			"fileData": map[string]string{
				"mimeType": "text/plain",
				"fileUri":  uri,
			},
		}
	}

	jsonBody, err := json.Marshal(requestBody)
	if err != nil {
		return nil, fmt.Errorf("failed to marshal request body: %v", err)
	}

	url := fmt.Sprintf("https://%s-aiplatform.googleapis.com/v1beta1/projects/%s/locations/%s/cachedContents", s.location, s.projectID, s.location)
	req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewBuffer(jsonBody))
	if err != nil {
		return nil, fmt.Errorf("failed to create request: %v", err)
	}

	req.Header.Set("Authorization", "Bearer "+accessToken)
	req.Header.Set("Content-Type", "application/json")

	client := &http.Client{}
	resp, err := client.Do(req)
	if err != nil {
		return nil, fmt.Errorf("failed to send request: %v", err)
	}
	defer resp.Body.Close()

	body, err := io.ReadAll(resp.Body)
	if err != nil {
		return nil, fmt.Errorf("failed to read response body: %v", err)
	}

	if resp.StatusCode != http.StatusOK {
		return nil, fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
	}

	var result map[string]interface{}
	if err := json.Unmarshal(body, &result); err != nil {
		return nil, fmt.Errorf("failed to unmarshal response: %v", err)
	}

	// Parse the response into a genai.CachedContent struct
	cachedContent := &genai.CachedContent{
		Name:  result["name"].(string),
		Model: result["model"].(string),
		Expiration: genai.ExpireTimeOrTTL{
			ExpireTime: parseExpireTime(result["expireTime"].(string)),
		},
	}

	// Parse contents if available
	if contentsRaw, ok := result["contents"].([]interface{}); ok {
		cachedContent.Contents = make([]*genai.Content, len(contentsRaw))
		for i, contentRaw := range contentsRaw {
			content := contentRaw.(map[string]interface{})
			cachedContent.Contents[i] = &genai.Content{
				Role: content["role"].(string),
			}
			// Parse parts if available
			if partsRaw, ok := content["parts"].([]interface{}); ok {
				cachedContent.Contents[i].Parts = make([]genai.Part, len(partsRaw))
				for j, partRaw := range partsRaw {
					part := partRaw.(map[string]interface{})
					if fileData, ok := part["fileData"].(map[string]interface{}); ok {
						cachedContent.Contents[i].Parts[j] = genai.FileData{
							MIMEType: fileData["mimeType"].(string),
							FileURI:  fileData["fileUri"].(string),
						}
					} else if text, ok := part["text"].(string); ok {
						cachedContent.Contents[i].Parts[j] = genai.Text(text)
					}
				}
			}
		}
	}

	log.Printf("Cached content created successfully using REST API with name: %s", cachedContent.Name)
	return cachedContent, nil
}

When I use the REST function I get this error:

2024/07/19 14:56:19 Error creating cached content: API request failed with status 500: {
“error”: {
“code”: 500,
“message”: “Internal error encountered.”,
“status”: “INTERNAL”
}
}

And when using the go SDK functions:
2024/07/19 15:05:56 Calling Vertex AI to create cached content
2024/07/19 15:05:57 Error from Vertex AI while creating cached content: rpc error: code = Internal desc = Internal error encountered.
2024/07/19 15:05:57 Error creating cached content: Vertex AI error: rpc error: code = Internal desc = Internal error encountered.

Welcome to the forums!

I’m trying to actually find a reference for this, but I’m not sure that “text/plain” is a supported file type. It certainly would explain why you’re getting a 500 error.

Is there a reason you don’t just include the text from these files directly?

In the go sdk example test, they use a json file from google storage and use the MIME type plain/text. google-cloud-go/vertexai/genai/caching_test.go at vertexai/v0.12.0 · googleapis/google-cloud-go · GitHub

I was initially mindful of the 10mb limit for a single cached item but I guess text files from the pdfs I’m processing (research papers on arxiv) would almost never hit that limit.

I’m gonna try to just save the texts directly. Thanks for the suggestion!

Update: I’m still getting a 500 error even just with text blobs.

To solve this issue I used the genai client from “github.com/google/generative-ai-go/genai”, not the genai client from the vertex AI package. Maybe I had not set up the vertex client properly.