I had written the following code:
import os
import time
from dotenv import load_dotenv
from google import genai
from google.genai import types
from prompts import *
from schemas import GroupBenefit
def extract_parameters(
client,
model,
contents,
config
):
print(f"\nAttempting extraction using model: {model}")
response = client.models.generate_content(
model=model,
contents=contents,
config=config
)
return response
def print_result(model: str, response: dict, start_time: float):
print(f"\nExtraction successful using model: {model}!")
print(response)
print(ā\nTotal time taken: %s secondsā % round((time.time() - start_time), 2))
if _name_ == ā_main_ā:
start_time = time.time()
load_dotenv()
MAIN_MODEL = os.getenv('MAIN_MODEL')
MINI_MODEL = os.getenv('MINI_MODEL')
TEMPERATURE = float(os.getenv('TEMPERATURE'))
client = genai.Client()
input_file_path = "OutputFiles/Renewal 2025_extracted.txt"
try:
file = client.files.upload(
file=input_file_path,
config={āmime_typeā: ātext/plainā}
)
contents = \[
get_gb_prompt_for_excel(naic="23"),
file
\]
config = types.GenerateContentConfig(
system_instruction=get_system_prompt(),
temperature=TEMPERATURE,
response_mime_type=āapplication/jsonā,
response_schema=GroupBenefit
)
model = MAIN_MODEL
response = extract_parameters(
client=client,
model=model,
contents=contents,
config=config
)
if hasattr(response, āparsedā) and response.parsed:
print_result(model, response.parsed.model_dump_json(indent=2), start_time)
except Exception as e:
err = str(e).lower()
if any(sub in err for sub in [ā503ā, āunavailableā, āoverloadedā]):
print(f"Model ({model}) overloaded. Retrying with another modelā¦")
try:
model = MINI_MODEL
response = extract_parameters(
client=client,
model=model,
contents=contents,
config=config
)
if hasattr(response, āparsedā) and response.parsed:
print_result(model, response.parsed.model_dump_json(indent=2), start_time)
except Exception as e:
print(ā\nExtraction failed after all retries!ā)
print(str(e))
raise
else:
print(err)
raise
====================================================================
The main and mini models are as follows:
MAIN_MODEL=gemini-2.5-pro
MINI_MODEL=gemini-2.5-flash
Whenever I am running the code, I am almost always getting the following error for āgemini-2.5-proā, and sometimes for āgemini-2.5-flashā:
503 UNAVAILABLE. {āerrorā: {ācodeā: 503, āmessageā: āThe model is overloaded. Please try again later.ā, āstatusā: āUNAVAILABLEā}}
I am using the free tier of the API.
Can you please resolve this issue at the earliest?
