I’m working with the Gemini API and noticed that when streaming responses with both text and image inputs, the chunks sometimes arrive out of order. My code works fine with text-only inputs, but multimodal requests seem to trigger this behavior.
for await (const chunk of response.stream) {
// Check if the chunk is text or binary (image) and process accordingly
if (chunk.Text) {
textChunks += chunk.Text(); // Collect text data
} else if (chunk.Binary) {
imageChunks.push(chunk.Binary()); // Collect binary image data
}
// Process both text and image when both parts are ready
if (textChunks && imageChunks.length) {
// Here you can handle the complete text and image data
console.log(“Text:”, textChunks);
console.log(“Image Data:”, imageChunks);
textChunks = “”; // Reset text after processing
imageChunks = ; // Reset image data after processing
}
}