Hi there,
I’m using Gemini Live API (gemini-2.5-flash-native-audio-preview-12-2025) in NodeJS and I receive audio responses fine via the modelTurn event. However, I am not getting the outputAudioTranscription event.
Here is the code to my init function.
const initSession = async () => {
const gemini = new GoogleGenAI({apiKey: process.env.GEMINI_API_KEY});
const model = 'gemini-2.5-flash-native-audio-preview-12-2025';
const config = {
responseModalities: [Modality.AUDIO],
outputAudioTranscription: {}
};
try {
let startSession = await gemini.live.connect({
model,
config,
callbacks: {
onopen: () => console.log('Connected to Gemini Live API'),
onmessage: (message) => {
if ('serverContent' in message) {
console.log(message)
if ('modelTurn' in message.serverContent) {
const response = message.serverContent.modelTurn.parts[0];
if (response) {
if ('inlineData' in response) {
const {data, mimeType} = response.inlineData;
setResponseAudioString(prev => prev + data);
setResponseMimeType(mimeType);
}
}
}
else if ('outputTranscription' in message.serverContent) {
console.log("OUTPUT TRANSCRIPTION", message.serverContent.outputTranscription);
setResponseLog(prevResponses => [...prevResponses, message.serverContent.outputTranscription.text]);
}
else if ('generationComplete' in message.serverContent) {
const el = document.getElementById("chat-window");
if (el) el.scrollTop = el.scrollHeight;
setAudioReady(true);
}
}
},
onerror: (e) => {
console.error('Error', e.message);
},
onclose: (e) => {
console.error('Close:' + e.reason);
},
},
config: {
responseModalities: [Modality.AUDIO],
systemInstruction: "You are a helpful and friendly AI assistant.",
speechConfig: {
voiceConfig: {prebuiltVoiceConfig: {voiceName: 'Orus'}},
},
},
});
setSession(()=>startSession);
setLoading(false);
} catch (e) {
console.error(e);
}
};
Any help would be appreciated!