Gemini Live api answering its own question

I have configured the gemini live api to conduct interview of military veterans. There ar two major problems that i am facing.

  1. Repeating the question immediatly without even listening to user response.
  2. Answering its own questions.

I have tried everything but its not working.

Code : const {
GoogleGenAI,
Modality,
Behavior,
FunctionResponseScheduling
} = require(‘@google/genai’);
const jwt = require(‘jsonwebtoken’);

// — MOCK DATABASE & CONFIG —
// Replace with your actual env variables when running locally
const MODEL_NAME = ‘gemini-2.0-flash-exp’;
const API_KEY = process.env.GEMINI_API_KEY;

// — TOOL DEFINITION (Generic) —
const logNoteDeclaration = {
name: “log_note”,
description: “Log specific details extracted from the conversation.”,
behavior: Behavior.NON_BLOCKING,
parameters: {
type: “object”,
properties: {
note: { type: “string”, description: “The detail to record” }
},
required: [“note”]
}
};

const TOOLS = [{ functionDeclarations: [logNoteDeclaration] }];

async function handleConnection(ws, req) {
console.log(‘:electric_plug: Client Connected’);

const url = new URL(req.url, http://${req.headers.host});
const token = url.searchParams.get(‘token’);

// 1. Authentication Check (Simplified)
let userId;
try {
if (!token) throw new Error(“No token”);
// const decoded = jwt.verify(token, process.env.JWT_SECRET);
userId = “user_123”; // Mock ID
} catch (err) {
ws.close(1008, “Auth Failed”);
return;
}

// 2. Session Resumption Logic (Generic)
let previousSessionHandle = null;
// Example: Fetch from DB if session is < 15 mins old
// if (dbData.lastSessionHandle && isRecent) previousSessionHandle = dbData.lastSessionHandle;

// 3. System Instruction
const systemInstruction = You are a helpful AI assistant. Your goal is to interview the user and collect specific details. When the user provides a concrete detail, use the "log_note" tool. If the user is silent, wait patiently. ;

const ai = new GoogleGenAI({ apiKey: API_KEY });
let session = null;

try {
session = await ai.live.connect({
model: MODEL_NAME,
config: {
responseModalities: [Modality.AUDIO],
systemInstruction: systemInstruction,
tools: TOOLS,
toolConfig: { functionCallingConfig: { mode: “AUTO” } },
sessionResumption: { handle: previousSessionHandle }, // Pass handle if exists
speechConfig: {
voiceConfig: { prebuiltVoiceConfig: { voiceName: “Puck” } },
},
realtimeInputConfig: {
automaticActivityDetection: {
disabled: false,
prefixPaddingMs: 500,
silenceDurationMs: 2000,
}
},
},
callbacks: {
onopen: () => console.log(‘:robot: AI Connected’),

    onmessage: async (msg) => {
      // 1. Handle Audio Output
      if (msg.serverContent?.modelTurn?.parts) {
        msg.serverContent.modelTurn.parts.forEach(part => {
          if (part.inlineData?.mimeType.startsWith('audio/pcm')) {
            if (ws.readyState === ws.OPEN) ws.send(JSON.stringify(msg));
          }
        });
      }

      // 2. Handle Tool Calls (Server side)
      if (msg.toolCall) {
        await handleToolCall(session, msg.toolCall);
      }
      // Handle embedded tool calls
      if (msg.serverContent?.modelTurn?.parts) {
         for (const part of msg.serverContent.modelTurn.parts) {
            if (part.toolCall) await handleToolCall(session, part.toolCall);
         }
      }

      // 3. Save Session Handle for Resumption
      if (msg.sessionResumptionUpdate?.resumable) {
         console.log("New Handle:", msg.sessionResumptionUpdate.newHandle);
         // TODO: Save newHandle to DB for this userId
      }
    },

    onclose: () => console.log('❌ AI Session Closed'),
    onerror: (e) => console.error('🔥 AI Error:', e.message),
  },
});

// Initial Greeting
if (!previousSessionHandle) {
    await session.sendRealtimeInput({ text: "Hello, I am ready to start." });
}

} catch (err) {
console.error(“Connection Failed:”, err);
ws.close();
return;
}

// 4. Handle Incoming Audio/Text from Client
ws.on(‘message’, (data) => {
if (!session) return;
try {
const msg = JSON.parse(data.toString());
if (msg.realtime_input) {
if (msg.realtime_input.media_chunks) {
session.sendRealtimeInput({
audio: {
data: msg.realtime_input.media_chunks[0].data,
mimeType: “audio/pcm;rate=16000”
}
});
}
if (msg.realtime_input.text) {
session.sendRealtimeInput({ text: msg.realtime_input.text });
}
}
} catch (e) { console.error(“Parse Error”, e); }
});

ws.on(‘close’, () => {
console.log(‘:electric_plug: Client Disconnected’);
if (session) session.close();
});
}

// — GENERIC TOOL HANDLER —
async function handleToolCall(session, toolCall) {
const functionResponses = ;

for (const call of toolCall.functionCalls) {
if (call.name === “log_note”) {
const { note } = call.args;
console.log(📝 Captured Note: "${note}");

  // Return success to Gemini
  functionResponses.push({
    id: call.id,
    name: call.name,
    response: {
      result: { status: "success" }, 
      scheduling: FunctionResponseScheduling.WHEN_IDLE // Note: Check if this is causing the issue?
    }
  });
}

}

if (functionResponses.length > 0 && session) {
await session.sendToolResponse({ functionResponses });
}
}

module.exports = { handleConnection };

Is the EXP working for you? It’s not working for me anymore… I loved using the quirky model.

1 Like