Hi everyone,
I am wanting to using the new realtime api on the Gemini API, but I am unable to receive text AND audio, similar to how it works on Google AI studio. Is this not possible at the moment? Currently, I am using this code:
from google import genai
import os
os.environ['GOOGLE_API_KEY'] = "..."
client = genai.Client(http_options= {'api_version': 'v1alpha'})
MODEL = "gemini-2.0-flash-exp"
import asyncio
import base64
import contextlib
import datetime
import os
import json
import wave
import itertools
from IPython.display import display, Audio
from google import genai
from google.genai import types
config={
"generation_config": {
"response_modalities": ["AUDIO", "TEXT"],
"temperature": 0.65
}
}
async with client.aio.live.connect(model=MODEL, config=config) as session:
message = "Hello? Gemini are you there?"
print("> ", message, "\n")
await session.send(message, end_of_turn=True)
# For text responses, When the model's turn is complete it breaks out of the loop.
turn = session.receive()
async for chunk in turn:
if chunk.text is not None:
print(f'{chunk.text}', end="")
else:
print(chunk.text)
print(chunk.server_content.model_turn.parts[0])
and get back:
ConnectionClosedError: received 1007 (invalid frame payload data) Request trace id: b7ac8bb69f1977ce, [ORIGINAL ERROR] generic::invalid_argument: Error in program Instantiation for language; then sent 1007 (invalid frame payload data) Request trace id: b7ac8bb69f1977ce, [ORIGINAL ERROR] generic::invalid_argument: Error in program Instantiation for language
I am unsure what this means, but assume that it means that this is not supported? If anyone from the Gemini team or anyone else can fill me in, it would be greatly appreciated.
Thanks for a great model!
Landon