500 Internal Error while extracting information from Excel file using Gemini 2.5 Pro

I had written the following code:

“”"

=== Excel Text Extraction Test ===

Tests Google Gemini 2.5 Pro’s ability to extract text from multi-sheet Excel files

“”"

import os

from dotenv import load_dotenv

from google import genai

from google.genai import types

load_dotenv()

model = os.getenv(‘MODEL_NAME’)

temperature = os.getenv(‘TEMPERATURE’)

# Initialize the client with API key

client = genai.Client()

def upload_excel_file(file_path: str) → str:

“”"

Uploads an Excel file to Gemini API using the Files API

Args:

file_path: Path to the Excel file

Returns:

file: Uploaded file

“”"

try:

# Upload the file

uploaded_file = client.files.upload(

file=file_path,

config=dict(

# mime_type=“application/vnd.openxmlformats-officedocument.spreadsheetml.sheet”

mime_type=“text/plain”

)

)

print(f" File uploaded successfully")

print(f" Name: {uploaded_file.name}")

print(f" MIME Type: {uploaded_file.mime_type}")

print(f" URI: {uploaded_file.uri}")

return uploaded_file

except Exception as e:

print(f"Error uploading file: {str(e)}")

raise

def extract_text_from_excel(file: str, system_instruction: str, prompt: str) → dict:

“”"

Extracts text from Excel file using Gemini 2.5 Pro

Args:

file: Uploaded file

system_instruction: System instruction for the prompt

prompt: Custom prompt for extraction

Returns:

dict: Extracted text and metadata

“”"

try:

# Create the request with the file

response = client.models.generate_content(

model=model,

contents=[

file,

prompt

],

config=types.GenerateContentConfig(

system_instruction=system_instruction,

temperature=temperature,

# max_output_tokens=8192,

safety_settings= [

types.SafetySetting(

category=‘HARM_CATEGORY_HATE_SPEECH’,

threshold=‘BLOCK_ONLY_HIGH’

),

]

)

)

extracted_text = response.text

return {

‘success’: True,

‘extracted_text’: extracted_text,

‘model_used’: model,

‘tokens_used’: {

‘prompt’: response.usage_metadata.prompt_token_count if hasattr(response, ‘usage_metadata’) else None,

‘completion’: response.usage_metadata.candidates_token_count if hasattr(response, ‘usage_metadata’) else None,

‘total’: response.usage_metadata.total_token_count if hasattr(response, ‘usage_metadata’) else None,

}

}

except Exception as e:

return {

‘success’: False,

‘error’: str(e),

‘extracted_text’: None

}

def test_excel_extraction(file_path: str, system_instruction: str, prompt: str):

“”"

Complete workflow to test Excel text extraction

Args:

file: Path to the Excel file

system_instruction: System instruction

prompt: Custom prompt

“”"

print(“\n” + “=”*80)

print(“EXCEL TEXT EXTRACTION TEST - GOOGLE GEMINI 2.5 PRO”)

print(“=”*80 + “\n”)

# Step 1: Upload file

print(“[Step 1] Uploading Excel file…”)

file = upload_excel_file(file_path)

# Step 2: Extract text

print(“\n[Step 2] Extracting text from Excel file…”)

result = extract_text_from_excel(file, system_instruction, prompt)

# Step 3: Display results

print(“\n[Step 3] Extraction Results:”)

print(“-” * 80)

if result[‘success’]:

print(f" Extraction successful!")

print(f"\nModel Used: {result[‘model_used’]}")

if result[‘tokens_used’][‘total’]:

print(f"\nToken Usage:")

print(f" - Prompt tokens: {result[‘tokens_used’][‘prompt’]}")

print(f" - Completion tokens: {result[‘tokens_used’][‘completion’]}")

print(f" - Total tokens: {result[‘tokens_used’][‘total’]}")

print(f"\n{‘=’*80}")

print(“EXTRACTED TEXT:”)

print(‘=’*80)

print(result[‘extracted_text’])

print(‘=’*80)

# Save to file

output_file = f"Output Files/{file_path.split(‘/’)[-1].split(‘.’)[0]}_extracted.txt"

with open(output_file, ‘w’, encoding=‘utf-8’) as f:

f.write(result[‘extracted_text’])

print(f"\nExtracted text saved to: {output_file}")

else:

print(f"Extraction failed!")

print(f"Error: {result[‘error’]}")

print(“\n” + “=”*80 + “\n”)

return result

# Example usage

if _name_ == “_main_”:

excel_file = “.\Input Files\Renewal 2025.xlsx”

system_instruction = “”"

|| ROLE ||

You are an expert in extracting information from insurance documents in Excel format with single or multiple sheets.

|| GOAL ||

Your job is to extract all information from the Excel file(s) provided. Preserve ALL original content. Do not make additions, modifications, or removals.

“”"

prompt = “”"

This is an insurance document in Excel format with the possibility of the presence of multiple sheets.

Please extract ALL information from ALL sheets including:

- Sheet names, if any

- All headers and column names

- All data rows

- Any special formatting or notes

- Table structures and relationships

Organize the output clearly by sheet name, if any, and maintain the table structure.

“”"

# Run the test

result = test_excel_extraction(excel_file, system_instruction, prompt)

# Additional analysis

if result[‘success’]:

print(“\n[Analysis]”)

text_length = len(result[‘extracted_text’])

print(f"Total characters extracted: {text_length:,}")

print(f"Total words extracted: {len(result[‘extracted_text’].split()):,}")

I am getting this error:

================================================================================
EXCEL TEXT EXTRACTION TEST - GOOGLE GEMINI 2.5 PRO

[Step 1] Uploading Excel file…
File uploaded successfully
Name: files/wmc0f4byonl3
MIME Type: text/plain
URI: https://generativelanguage.googleapis.com/v1beta/files/wmc0f4byonl3

[Step 2] Extracting text from Excel file…

[Step 3] Extraction Results:

Extraction failed!
Error: 500 INTERNAL. {‘error’: {‘code’: 500, ‘message’: ‘An internal error has occurred. Please retry or report in https://developers.generativeai.google/guide/troubleshooting’, ‘status’: ‘INTERNAL’}}

================================================================================

When I am changing the MIME type to:

mime_type=“application/vnd.openxmlformats-officedocument.spreadsheetml.sheet”

I am getting this error instead:

================================================================================
EXCEL TEXT EXTRACTION TEST - GOOGLE GEMINI 2.5 PRO

[Step 1] Uploading Excel file…
Error uploading file: ‘file’
Traceback (most recent call last):
File “C:\Users\USER\Documents\PiTangent Gitlab Repos\Policysmart\FirstTry\excel_read_claude.py”, line 191, in
result = test_excel_extraction(excel_file, system_instruction, prompt)
File “C:\Users\USER\Documents\PiTangent Gitlab Repos\Policysmart\FirstTry\excel_read_claude.py”, line 123, in test_excel_extraction
file = upload_excel_file(file_path)
File “C:\Users\USER\Documents\PiTangent Gitlab Repos\Policysmart\FirstTry\excel_read_claude.py”, line 33, in upload_excel_file
uploaded_file = client.files.upload(
File “C:\Users\USER\Documents\PiTangent Gitlab Repos\Policysmart\FirstTry\venv\lib\site-packages\google\genai\files.py”, line 494, in upload
response=return_file.json[‘file’],
KeyError: 'file