Hi,
I am using the Batch api to generate embeddings but getting 429 RESOURCE_EXHAUSTED on even just 1 request. I am using similar code to the Batch API cookbook that is mentioned in the gemini embedding documentation.
My file is not too big:
Size: 3.4 MB
Number of lines (jsonlines): 1937
Maximum token count: 706226 (I used the count_tokens API on the entire jsonl file, not just the text to be embedded)
Here are some logs:
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=xxxxxxxxx&upload_protocol=resumable "HTTP/1.1 200 OK"
/tmp/ipykernel_133/2660981119.py:56: ExperimentalWarning: batches.create_embeddings() is experimental and may change without notice.
INFO:httpx:HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-001:asyncBatchEmbedContent "HTTP/1.1 429 Too Many Requests"
---------------------------------------------------------------------------
ClientError Traceback (most recent call last)
Cell In[99], line 56
48 uploaded_file = client.files.upload(
49 file=buffer,
50 config=types.UploadFileConfig(
(...)
53 )
54 )
55 print(uploaded_file.name)
---> 56 batch_job = client.batches.create_embeddings(
57 model="gemini-embedding-001",
58 src=types.EmbeddingsBatchJobSource(file_name=uploaded_file.name)
59 )
60 print(batch_job)
61 batch_jobs.append(batch_job)
File /opt/conda/lib/python3.11/site-packages/google/genai/batches.py:2667, in Batches.create_embeddings(self, model, src, config)
2665 raise ValueError('Vertex AI does not support batches.create_embeddings.')
2666 elif src.inlined_requests is None:
-> 2667 return self._create_embeddings(model=model, src=src, config=config)
2669 path, request_dict = _create_inlined_embedding_request_dict(
2670 self._api_client, parameter_model
2671 )
2673 response = self._api_client.request(
2674 'post', path, request_dict, http_options
2675 )
File /opt/conda/lib/python3.11/site-packages/google/genai/batches.py:2235, in Batches._create_embeddings(self, model, src, config)
2232 request_dict = _common.convert_to_dict(request_dict)
2233 request_dict = _common.encode_unserializable_types(request_dict)
-> 2235 response = self._api_client.request(
2236 'post', path, request_dict, http_options
2237 )
2239 response_dict = '' if not response.body else json.loads(response.body)
2241 if not self._api_client.vertexai:
File /opt/conda/lib/python3.11/site-packages/google/genai/_api_client.py:1290, in BaseApiClient.request(self, http_method, path, request_dict, http_options)
1280 def request(
1281 self,
1282 http_method: str,
(...)
1285 http_options: Optional[HttpOptionsOrDict] = None,
1286 ) -> SdkHttpResponse:
1287 http_request = self._build_request(
1288 http_method, path, request_dict, http_options
1289 )
-> 1290 response = self._request(http_request, http_options, stream=False)
1291 response_body = (
1292 response.response_stream[0] if response.response_stream else ''
1293 )
1294 return SdkHttpResponse(headers=response.headers, body=response_body)
File /opt/conda/lib/python3.11/site-packages/google/genai/_api_client.py:1126, in BaseApiClient._request(self, http_request, http_options, stream)
1123 retry = tenacity.Retrying(**retry_kwargs)
1124 return retry(self._request_once, http_request, stream) # type: ignore[no-any-return]
-> 1126 return self._retry(self._request_once, http_request, stream)
File /opt/conda/lib/python3.11/site-packages/tenacity/__init__.py:477, in Retrying.__call__(self, fn, *args, **kwargs)
475 retry_state = RetryCallState(retry_object=self, fn=fn, args=args, kwargs=kwargs)
476 while True:
--> 477 do = self.iter(retry_state=retry_state)
478 if isinstance(do, DoAttempt):
479 try:
File /opt/conda/lib/python3.11/site-packages/tenacity/__init__.py:378, in BaseRetrying.iter(self, retry_state)
376 result = None
377 for action in self.iter_state.actions:
--> 378 result = action(retry_state)
379 return result
File /opt/conda/lib/python3.11/site-packages/tenacity/__init__.py:420, in BaseRetrying._post_stop_check_actions.<locals>.exc_check(rs)
418 retry_exc = self.retry_error_cls(fut)
419 if self.reraise:
--> 420 raise retry_exc.reraise()
421 raise retry_exc from fut.exception()
File /opt/conda/lib/python3.11/site-packages/tenacity/__init__.py:187, in RetryError.reraise(self)
185 def reraise(self) -> t.NoReturn:
186 if self.last_attempt.failed:
--> 187 raise self.last_attempt.result()
188 raise self
File /opt/conda/lib/python3.11/concurrent/futures/_base.py:449, in Future.result(self, timeout)
447 raise CancelledError()
448 elif self._state == FINISHED:
--> 449 return self.__get_result()
451 self._condition.wait(timeout)
453 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
File /opt/conda/lib/python3.11/concurrent/futures/_base.py:401, in Future.__get_result(self)
399 if self._exception:
400 try:
--> 401 raise self._exception
402 finally:
403 # Break a reference cycle with the exception in self._exception
404 self = None
File /opt/conda/lib/python3.11/site-packages/tenacity/__init__.py:480, in Retrying.__call__(self, fn, *args, **kwargs)
478 if isinstance(do, DoAttempt):
479 try:
--> 480 result = fn(*args, **kwargs)
481 except BaseException: # noqa: B902
482 retry_state.set_exception(sys.exc_info()) # type: ignore[arg-type]
File /opt/conda/lib/python3.11/site-packages/google/genai/_api_client.py:1103, in BaseApiClient._request_once(self, http_request, stream)
1095 else:
1096 response = self._httpx_client.request(
1097 method=http_request.method,
1098 url=http_request.url,
(...)
1101 timeout=http_request.timeout,
1102 )
-> 1103 errors.APIError.raise_for_response(response)
1104 return HttpResponse(
1105 response.headers, response if stream else [response.text]
1106 )
File /opt/conda/lib/python3.11/site-packages/google/genai/errors.py:108, in APIError.raise_for_response(cls, response)
106 status_code = response.status_code
107 if 400 <= status_code < 500:
--> 108 raise ClientError(status_code, response_json, response)
109 elif 500 <= status_code < 600:
110 raise ServerError(status_code, response_json, response)
ClientError: 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}]}}
I have checked multiple times, and I am confident I am using the right API key and that we are in Tier 1. I haven’t used the API for anything else, and the fact that it says 429 Too Many Requests is also weird as it just happens on the very first request…
Any help or pointers is appreciated. We are trying to adopt the gemini-embedding-001 model in our pipeline, but not being able to use the batch api and the false flags around rate limits is a deal breaker for us.