Using python-genai SDK.
When providing an MCP server as a tool for gemini-2.5-flash
model(just that model!), safety settings reset to default. This behavior does not persist in AI studio(no MCP support) and with any other Gemini 2.5 model such as flash-lite or pro.
PoC script that should yield the same result:
import os
import asyncio
from datetime import datetime
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
from google import genai
from google.genai import types
client = genai.Client(api_key="APIKEY")
server_params = StdioServerParameters(
command="npx",
args=["-y", "@philschmid/weather-mcp"],
env=None,
)
async def run():
async with stdio_client(server_params) as (read, write):
async with ClientSession(read, write) as session:
await session.initialize()
prompt = f"What is the weather in London in {datetime.now().strftime('%Y-%m-%d')}?"
response = await client.aio.models.generate_content(
model="gemini-2.5-flash",
contents=prompt,
config=genai.types.GenerateContentConfig(
tools=[session],
system_instruction="actual instruction here. Something harmful(that will go over the defaults) will do.",
safety_settings=[
types.SafetySetting(
category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
threshold=types.HarmBlockThreshold.BLOCK_NONE,
),
types.SafetySetting(
category=types.HarmCategory.HARM_CATEGORY_HARASSMENT,
threshold=types.HarmBlockThreshold.BLOCK_NONE,
),
types.SafetySetting(
category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
threshold=types.HarmBlockThreshold.BLOCK_NONE,
),
types.SafetySetting(
category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
threshold=types.HarmBlockThreshold.BLOCK_NONE,
)
]),
),
print(f"Response content: {response}")
asyncio.run(run())
My results with 2.5 Flash:
Response content: (GenerateContentResponse(
automatic_function_calling_history=[],
model_version='gemini-2.5-flash',
prompt_feedback=GenerateContentResponsePromptFeedback(
block_reason=<BlockedReason.PROHIBITED_CONTENT: 'PROHIBITED_CONTENT'>
),
sdk_http_response=HttpResponse(
headers=<dict len=11>
),
usage_metadata=GenerateContentResponseUsageMetadata(
prompt_token_count=1109,
prompt_tokens_details=[
ModalityTokenCount(
modality=<MediaModality.TEXT: 'TEXT'>,
token_count=1109
),
],
total_token_count=1109
)
),)
Results using 2.5-Flash-Lite(could also use 2.5-Pro. Sometimes I wonder what’s worse since I got like 5 500 INTERNAL
trying to get a response):
Response content: (GenerateContentResponse(
automatic_function_calling_history=[
UserContent(
parts=[
Part(
text='prompt!'
),
],
role='user'
),
Content(
parts=[
Part(
function_call=FunctionCall(
args={
<... Max depth ...>: <... Max depth ...>,
<... Max depth ...>: <... Max depth ...>
},
name='get_weather_forecast'
)
),
],
role='model'
),
Content(
parts=[
Part(
function_response=FunctionResponse(
name='get_weather_forecast',
response={
<... Max depth ...>: <... Max depth ...>
}
)
),
],
role='user'
),
],
candidates=[
Candidate(
content=Content(
parts=[
Part(
text="content!"
),
],
role='model'
),
finish_reason=<FinishReason.STOP: 'STOP'>,
index=0
),
],
model_version='gemini-2.5-flash-lite',
sdk_http_response=HttpResponse(
headers=<dict len=11>
),
usage_metadata=GenerateContentResponseUsageMetadata(
candidates_token_count=78,
prompt_token_count=1433,
prompt_tokens_details=[
ModalityTokenCount(
modality=<MediaModality.TEXT: 'TEXT'>,
token_count=1433
),
],
total_token_count=1511
)
),)