In my use case, I need the Gemini File API to read three files three times, twice using Gemini 2.5-Flash and once using Gemini 2.5-Pro. Because I want to compare the results.
One of the files is an .mp4, which takes longer to become ACTIVE, so I wrote a function called wait_for_files_activate to ensure all files are active before running the model.
During main, the first run (at 01:50:25 — section 5, Flash, round 1) worked perfectly. However, the second run failed with an error saying the file was still processing, even though I hadn’t deleted any of them. After retrying, the third run succeeded.
It’s really strange. How could that happen? I couldn’t find any documentation explaining this situation.
async def wait_for_files_activate(client, file_objs, timeout: int = 300):
if not isinstance(file_objs, list):
file_objs = [file_objs]
start = time.perf_counter()
remaining = {f.name: f for f in file_objs}
while remaining:
for name in list(remaining.keys()):
try:
refreshed = client.files.get(name=name)
logging.info(f"{refreshed.name=}, {refreshed.state.name=}")
except Exception as e:
logging.warning(f"wait_for_files_activate: failed to get state for {name}: {e}")
continue
if refreshed.state.name == "ACTIVE":
remaining.pop(name)
logging.info(f"File {name} is now ACTIVE.")
if not remaining:
return
elapsed = time.perf_counter() - start
if elapsed > timeout:
raise TimeoutError(f"file does not ACTIVE in {timeout} seconds: {list(remaining.keys())}")
await asyncio.sleep(5)
my main function
async def process_one_chunk(i, keywords_file):
response = None
txt_file_path = os.path.join(SLICED_INPUT_DIR, f"{i:02d}_json.txt")
mp4_file_path = os.path.join(SLICED_INPUT_DIR, f"{i:02d}.mp4")
keywords_file = await asyncio.to_thread(client.files.upload, file="keywords.txt")
transcript_file = await asyncio.to_thread(client.files.upload, file=txt_file_path)
mp4_file = await asyncio.to_thread(client.files.upload, file=mp4_file_path)
await wait_for_files_activate(client, [transcript_file, mp4_file, keywords_file])
try:
for model_version, count in run_plan.items():
for j in range(1, count + 1):
for attempt in range(MAX_RETRY):
try:
response = await client.aio.models.generate_content(
model=f"gemini-2.5-{model_version}",
contents=[f"""
You have the following three files:
{mp4_file} (a conference video from the gaming industry, including audio and video)
{transcript_file} (a transcript in JSON format, where each entry contains "timestamp", "speaker", and "text")
{keywords_file} (a compiled list of industry-specific terminology for the gaming industry)
""", transcript_file, mp4_file, keywords_file]
)
text = response.text
if isinstance(text, str):
cleaned = re.sub(r"^```json\s*|\s*```$", "", text.strip(), flags=re.DOTALL)
output_text = re.sub(r"[《》【】「」]", "", cleaned)
output_text = json.loads(output_text)
else:
output_text = text
logging.info(f"finish {i} section of {model_version:5s} at the {j} part")
break
except Exception as e:
logging.warning(f" {i} section of {model_version:5s} at the {j} part {attempt+1} retry fail: {e}")
if response is not None:
logging.error(f"Response text: {response.text[:500]}...")
if attempt == MAX_RETRY - 1:
logging.error(f"Achive max retry. Skip {i} section of {model_version:5s} at the {j} part ")
break
else:
logging.info("sleep for 60 second")
await asyncio.sleep(60)
except Exception as e:
logging.error(f"{i} section of {model_version:5s} at the {j} part error: {e}")
return False
and my log show
2025-11-07 01:49:48,504 - INFO - refreshed.name='files/y165sefh039r', refreshed.state.name= 'ACTIVE'
2025-11-07 01:49:48,504 - INFO - File files/y165sefh039r is now ACTIVE.
2025-11-07 01:50:25,878 - INFO - finish 5 section of flash at the 1 part
2025-11-07 01:50:45,342 - WARNING - 5 section of flash at the 2 part 1 retry fail: Expecting value: line 1 column 1 (char 0)
2025-11-07 01:50:45,344 - ERROR - Response text: The video file `files/y165sefh039r` is currently in a `PROCESSING` state, which means its content (audio and video) is not yet accessible.
2025-11-07 01:50:45,345 - INFO - sleep for 60 second
2025-11-07 01:51:54,425 - WARNING - 5 section of flash at the 2 part 2 retry fail: Expecting value: line 1 column 1 (char 0)
2025-11-07 01:51:54,425 - ERROR - Response text: The video file `files/y165sefh039r` is currently in a `PROCESSING` state. I am unable to access its audio and video content at this time. Please try again once the file state changes to `ACTIVE`....
2025-11-07 01:51:54,425 - INFO - sleep for 60 second
2025-11-07 01:53:29,036 - INFO - finish 5 section of flash at the 2 part