500 Internal Error while extracting information from Excel file using Gemini 2.5 Pro

I had written the following code:

“”"

=== Excel Text Extraction Test ===

Tests Google Gemini 2.5 Pro’s ability to extract text from multi-sheet Excel files

“”"

import os

from dotenv import load_dotenv

from google import genai

from google.genai import types

load_dotenv()

model = os.getenv(‘MODEL_NAME’)

temperature = os.getenv(‘TEMPERATURE’)

# Initialize the client with API key

client = genai.Client()

def upload_excel_file(file_path: str) → str:

“”"

Uploads an Excel file to Gemini API using the Files API

Args:

file_path: Path to the Excel file

Returns:

file: Uploaded file

“”"

try:

# Upload the file

uploaded_file = client.files.upload(

file=file_path,

config=dict(

# mime_type=“application/vnd.openxmlformats-officedocument.spreadsheetml.sheet”

mime_type=“text/plain”

)

)

print(f" File uploaded successfully")

print(f" Name: {uploaded_file.name}")

print(f" MIME Type: {uploaded_file.mime_type}")

print(f" URI: {uploaded_file.uri}")

return uploaded_file

except Exception as e:

print(f"Error uploading file: {str(e)}")

raise

def extract_text_from_excel(file: str, system_instruction: str, prompt: str) → dict:

“”"

Extracts text from Excel file using Gemini 2.5 Pro

Args:

file: Uploaded file

system_instruction: System instruction for the prompt

prompt: Custom prompt for extraction

Returns:

dict: Extracted text and metadata

“”"

try:

# Create the request with the file

response = client.models.generate_content(

model=model,

contents=[

file,

prompt

],

config=types.GenerateContentConfig(

system_instruction=system_instruction,

temperature=temperature,

# max_output_tokens=8192,

safety_settings= [

types.SafetySetting(

category=‘HARM_CATEGORY_HATE_SPEECH’,

threshold=‘BLOCK_ONLY_HIGH’

),

]

)

)

extracted_text = response.text

return {

‘success’: True,

‘extracted_text’: extracted_text,

‘model_used’: model,

‘tokens_used’: {

‘prompt’: response.usage_metadata.prompt_token_count if hasattr(response, ‘usage_metadata’) else None,

‘completion’: response.usage_metadata.candidates_token_count if hasattr(response, ‘usage_metadata’) else None,

‘total’: response.usage_metadata.total_token_count if hasattr(response, ‘usage_metadata’) else None,

}

}

except Exception as e:

return {

‘success’: False,

‘error’: str(e),

‘extracted_text’: None

}

def test_excel_extraction(file_path: str, system_instruction: str, prompt: str):

“”"

Complete workflow to test Excel text extraction

Args:

file: Path to the Excel file

system_instruction: System instruction

prompt: Custom prompt

“”"

print(“\n” + “=”*80)

print(“EXCEL TEXT EXTRACTION TEST - GOOGLE GEMINI 2.5 PRO”)

print(“=”*80 + “\n”)

# Step 1: Upload file

print(“[Step 1] Uploading Excel file…”)

file = upload_excel_file(file_path)

# Step 2: Extract text

print(“\n[Step 2] Extracting text from Excel file…”)

result = extract_text_from_excel(file, system_instruction, prompt)

# Step 3: Display results

print(“\n[Step 3] Extraction Results:”)

print(“-” * 80)

if result[‘success’]:

print(f" Extraction successful!")

print(f"\nModel Used: {result[‘model_used’]}")

if result[‘tokens_used’][‘total’]:

print(f"\nToken Usage:")

print(f" - Prompt tokens: {result[‘tokens_used’][‘prompt’]}")

print(f" - Completion tokens: {result[‘tokens_used’][‘completion’]}")

print(f" - Total tokens: {result[‘tokens_used’][‘total’]}")

print(f"\n{‘=’*80}")

print(“EXTRACTED TEXT:”)

print(‘=’*80)

print(result[‘extracted_text’])

print(‘=’*80)

# Save to file

output_file = f"Output Files/{file_path.split(‘/’)[-1].split(‘.’)[0]}_extracted.txt"

with open(output_file, ‘w’, encoding=‘utf-8’) as f:

f.write(result[‘extracted_text’])

print(f"\nExtracted text saved to: {output_file}")

else:

print(f"Extraction failed!")

print(f"Error: {result[‘error’]}")

print(“\n” + “=”*80 + “\n”)

return result

# Example usage

if _name_ == “_main_”:

excel_file = “.\Input Files\Renewal 2025.xlsx”

system_instruction = “”"

|| ROLE ||

You are an expert in extracting information from insurance documents in Excel format with single or multiple sheets.

|| GOAL ||

Your job is to extract all information from the Excel file(s) provided. Preserve ALL original content. Do not make additions, modifications, or removals.

“”"

prompt = “”"

This is an insurance document in Excel format with the possibility of the presence of multiple sheets.

Please extract ALL information from ALL sheets including:

- Sheet names, if any

- All headers and column names

- All data rows

- Any special formatting or notes

- Table structures and relationships

Organize the output clearly by sheet name, if any, and maintain the table structure.

“”"

# Run the test

result = test_excel_extraction(excel_file, system_instruction, prompt)

# Additional analysis

if result[‘success’]:

print(“\n[Analysis]”)

text_length = len(result[‘extracted_text’])

print(f"Total characters extracted: {text_length:,}")

print(f"Total words extracted: {len(result[‘extracted_text’].split()):,}")

I am getting this error:

================================================================================
EXCEL TEXT EXTRACTION TEST - GOOGLE GEMINI 2.5 PRO

[Step 1] Uploading Excel file…
File uploaded successfully
Name: files/wmc0f4byonl3
MIME Type: text/plain
URI: https://generativelanguage.googleapis.com/v1beta/files/wmc0f4byonl3

[Step 2] Extracting text from Excel file…

[Step 3] Extraction Results:

Extraction failed!
Error: 500 INTERNAL. {‘error’: {‘code’: 500, ‘message’: ‘An internal error has occurred. Please retry or report in https://developers.generativeai.google/guide/troubleshooting’, ‘status’: ‘INTERNAL’}}

================================================================================

When I am changing the MIME type to:

mime_type=“application/vnd.openxmlformats-officedocument.spreadsheetml.sheet”

I am getting this error instead:

================================================================================
EXCEL TEXT EXTRACTION TEST - GOOGLE GEMINI 2.5 PRO

[Step 1] Uploading Excel file…
Error uploading file: ‘file’
Traceback (most recent call last):
File “C:\Users\USER\Documents\PiTangent Gitlab Repos\Policysmart\FirstTry\excel_read_claude.py”, line 191, in
result = test_excel_extraction(excel_file, system_instruction, prompt)
File “C:\Users\USER\Documents\PiTangent Gitlab Repos\Policysmart\FirstTry\excel_read_claude.py”, line 123, in test_excel_extraction
file = upload_excel_file(file_path)
File “C:\Users\USER\Documents\PiTangent Gitlab Repos\Policysmart\FirstTry\excel_read_claude.py”, line 33, in upload_excel_file
uploaded_file = client.files.upload(
File “C:\Users\USER\Documents\PiTangent Gitlab Repos\Policysmart\FirstTry\venv\lib\site-packages\google\genai\files.py”, line 494, in upload
response=return_file.json[‘file’],
KeyError: 'file

Hi @Madhusree_Rana
Are you still facing this issue ?
The root cause of the 500 internal error is that when you set mime_type="text/plain", you are incorrectly telling the API that the file you are uploading is a plain text file, even though the actual file is an Excel file (.xlsx).
You can overcome both the issues by first, update your library to the latest version using pip install -U google-generativeai. Then, modify your upload function to use genai.upload_file(path=file_path), removing the manual mime_type setting entirely. This allows the library to automatically detect the correct file type and handle the API response correctly, fixing both errors.
Please let me know if this solution successfully resolves your problem.