Hi ,
I’m calling the gemma4 model both gemma-4-31b-it and gemma-4-26b-a4b-it via rest api and I can’t runoff thinking.
this is my url https://generativelanguage.googleapis.com/v1beta/models/gemma-4-31b-it:streamGenerateContent?key=33333333
and here is the body
{
"contents": \[
{
"role": "user",
"parts": \[
{
"text": "Who are you"
}
\]
}
\],
"generationConfig": {
"maxOutputTokens": 4048,
"responseMimeType": "application/json",
"frequencyPenalty": 0.0,
"presencePenalty": 0.0,
"temperature": 0.0 ,
"topP": 1
},
"systemInstruction": {
"parts": \[
{
"text": "You are a AI assistant."
}
\]
}
}
here are the results
[
{
"candidates": \[
{
"content": {
"parts": \[
{
"text": "\* User asks: \\"Who are you\\"\\n \* ",
"thought": **true**
}
\],
"role": "model"
},
"index": 0
}
\],
"usageMetadata": {
"promptTokenCount": 10,
"totalTokenCount": 24,
"promptTokensDetails": \[
{
"modality": "TEXT",
"tokenCount": 10
}
\],
"thoughtsTokenCount": 14
},
"modelVersion": "gemma-4-31b-it",
"responseId": "gKflaeq2J6Pw4-EP9--piAU"
},
{
"candidates": \[
{
"content": {
"parts": \[
{
"text": "Constraint: Generate only a valid, parseable JSON.\\n \* Constraint: No comments inside JSON.\\n ",
"thought": **true**
}
\],
"role": "model"
},
"index": 0
}
\],
"usageMetadata": {
"promptTokenCount": 10,
"totalTokenCount": 48,
"promptTokensDetails": \[
{
"modality": "TEXT",
"tokenCount": 10
}
\],
"thoughtsTokenCount": 38
},
"modelVersion": "gemma-4-31b-it",
"responseId": "gKflaeq2J6Pw4-EP9--piAU"
},
{
"candidates": \[
{
"content": {
"parts": \[
{
"text": "\* Constraint: No extra text outside JSON.\\n \* Constraint: If multiple, return a single array.",
"thought": **true**
}
\],
"role": "model"
},
"index": 0
}
\],
"usageMetadata": {
"promptTokenCount": 10,
"totalTokenCount": 72,
"promptTokensDetails": \[
{
"modality": "TEXT",
"tokenCount": 10
}
\],
"thoughtsTokenCount": 62
},
"modelVersion": "gemma-4-31b-it",
"responseId": "gKflaeq2J6Pw4-EP9--piAU"
},
{
"candidates": \[
{
"content": {
"parts": \[
{
"text": "\\n\\n \* Identity: AI assistant.\\n \* Role: Helpful, informative, and versatile.\\n\\n ",
"thought": **true**
}
\],
"role": "model"
},
"index": 0
}
\],
"usageMetadata": {
"promptTokenCount": 10,
"totalTokenCount": 96,
"promptTokensDetails": \[
{
"modality": "TEXT",
"tokenCount": 10
}
\],
"thoughtsTokenCount": 86
},
"modelVersion": "gemma-4-31b-it",
"responseId": "gKflaeq2J6Pw4-EP9--piAU"
},
{
"candidates": \[
{
"content": {
"parts": \[
{
"text": "\* Option 1: \`{\\"answer\\": \\"I am an AI assistant.\\"}\`\\n \* Option ",
"thought": **true**
}
\],
"role": "model"
},
"index": 0
}
\],
"usageMetadata": {
"promptTokenCount": 10,
"totalTokenCount": 120,
"promptTokensDetails": \[
{
"modality": "TEXT",
"tokenCount": 10
}
\],
"thoughtsTokenCount": 110
},
"modelVersion": "gemma-4-31b-it",
"responseId": "gKflaeq2J6Pw4-EP9--piAU"
},
{
"candidates": \[
{
"content": {
"parts": \[
{
"text": "2: \`{\\"identity\\": \\"AI Assistant\\", \\"description\\": \\"A large language model trained by Google.\\"}\`\\n\\n",
"thought": **true**
}
\],
"role": "model"
},
"index": 0
}
\],
"usageMetadata": {
"promptTokenCount": 10,
"totalTokenCount": 144,
"promptTokensDetails": \[
{
"modality": "TEXT",
"tokenCount": 10
}
\],
"thoughtsTokenCount": 134
},
"modelVersion": "gemma-4-31b-it",
"responseId": "gKflaeq2J6Pw4-EP9--piAU"
},
{
"candidates": \[
{
"content": {
"parts": \[
{
"text": " \* Valid JSON? Yes.\\n \* Double-quoted strings? Yes.\\n \* No",
"thought": **true**
}
\],
"role": "model"
},
"index": 0
}
\],
"usageMetadata": {
"promptTokenCount": 10,
"totalTokenCount": 168,
"promptTokensDetails": \[
{
"modality": "TEXT",
"tokenCount": 10
}
\],
"thoughtsTokenCount": 158
},
"modelVersion": "gemma-4-31b-it",
"responseId": "gKflaeq2J6Pw4-EP9--piAU"
},
{
"candidates": \[
{
"content": {
"parts": \[
{
"text": " comments? Yes.\\n \* No extra text? Yes.",
"thought": **true**
}
\],
"role": "model"
},
"index": 0
}
\],
"usageMetadata": {
"promptTokenCount": 10,
"totalTokenCount": 182,
"promptTokensDetails": \[
{
"modality": "TEXT",
"tokenCount": 10
}
\],
"thoughtsTokenCount": 172
},
"modelVersion": "gemma-4-31b-it",
"responseId": "gKflaeq2J6Pw4-EP9--piAU"
},
{
"candidates": \[
{
"content": {
"parts": \[
{
"text": "{\\n \\"identity\\": \\"AI assistant"
}
\],
"role": "model"
},
"index": 0
}
\],
"usageMetadata": {
"promptTokenCount": 10,
"candidatesTokenCount": 9,
"totalTokenCount": 191,
"promptTokensDetails": \[
{
"modality": "TEXT",
"tokenCount": 10
}
\],
"thoughtsTokenCount": 172
},
"modelVersion": "gemma-4-31b-it",
"responseId": "gKflaeq2J6Pw4-EP9--piAU"
},
{
"candidates": \[
{
"content": {
"parts": \[
{
"text": "\\",\\n \\"description\\": \\"A large language model trained by Google.\\"\\n}"
}
\],
"role": "model"
},
"index": 0
}
\],
"usageMetadata": {
"promptTokenCount": 10,
"candidatesTokenCount": 26,
"totalTokenCount": 208,
"promptTokensDetails": \[
{
"modality": "TEXT",
"tokenCount": 10
}
\],
"thoughtsTokenCount": 172
},
"modelVersion": "gemma-4-31b-it",
"responseId": "gKflaeq2J6Pw4-EP9--piAU"
},
{
"candidates": \[
{
"content": {
"parts": \[
{
"text": ""
}
\],
"role": "model"
},
"finishReason": "STOP",
"index": 0
}
\],
"usageMetadata": {
"promptTokenCount": 10,
"candidatesTokenCount": 26,
"totalTokenCount": 208,
"promptTokensDetails": \[
{
"modality": "TEXT",
"tokenCount": 10
}
\],
"thoughtsTokenCount": 172
},
"modelVersion": "gemma-4-31b-it",
"responseId": "gKflaeq2J6Pw4-EP9--piAU"
}
]
as you can see thinking is enabled. The doco says it is disabled by default.
Anyone know how to disabled it?