Get logprobs at output token level

T_A · May 16, 2025, 4:15pm

Here is a complete untested script:

from google import genai
from google.genai import types
import base64

"""
A simple example using Google's Vertex client, which allows one to generate logprobs once per day.
"""
PROJECT_NAME="my_project"
def generate():
  client = genai.Client(
      vertexai=True,
      project=PROJECT_NAME,
      location="us-central1",
  )


  model = "gemini-2.0-flash"
  contents = [
    types.Content(
      role="user",
      parts=[
        types.Part.from_text(text="""hello""")
      ]
    ),
  ]
  generate_content_config = types.GenerateContentConfig(
    temperature = 1,
    top_p = 0.95,
    response_logprobs=True,
    logprobs=1,
    max_output_tokens = 8192,
    response_modalities = ["TEXT"],
    safety_settings = [types.SafetySetting(
      category="HARM_CATEGORY_HATE_SPEECH",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_DANGEROUS_CONTENT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_SEXUALLY_EXPLICIT",
      threshold="OFF"
    ),types.SafetySetting(
      category="HARM_CATEGORY_HARASSMENT",
      threshold="OFF"
    )],
  )

  for chunk in client.models.generate_content_stream(
      model = model,
      contents = contents,
      config = generate_content_config,
      ):
      print(chunk.text, end="")
  return response

x = generate()
print(x)

It prints something like:

candidates=[Candidate(content=Content(parts=[Part(video_metadata=None, thought=None, code_execution_result=None, executable_code=None, file_data=None, function_call=None, function_response=None, inline_data=None, text=‘Hello! How can I help you today?’)], role=‘model’), citation_metadata=None, finish_message=None, token_count=None, finish_reason=<FinishReason.STOP: ‘STOP’>, avg_logprobs=-7.311527676052517, grounding_metadata=None, index=None, logprobs_result=LogprobsResult(chosen_candidates=[LogprobsResultCandidate(log_probability=-0.00019188585, token=‘Hello’, token_id=None), LogprobsResultCandidate(log_probability=-0.0030728758, token=‘!’, token_id=None), LogprobsResultCandidate(log_probability=-0.01058189, token=’ How’, token_id=None), LogprobsResultCandidate(log_probability=-7.783962e-05, token=’ can’, token_id=None), LogprobsResultCandidate(log_probability=-2.0266912e-06, token=’ I’, token_id=None), LogprobsResultCandidate(log_probability=-0.00032465608, token=’ help’, token_id=None), LogprobsResultCandidate(log_probability=-1.0729074e-06, token=’ you’, token_id=None), LogprobsResultCandidate(log_probability=-5.2448504e-06, token=’ today’, token_id=None), LogprobsResultCandidate(log_probability=-2.3844768e-07, token=‘?’, token_id=None)], top_candidates=[LogprobsResultTopCandidates(candidates=[LogprobsResultCandidate(log_probability=-0.00019188585, token=‘Hello’, token_id=None)]), LogprobsResultTopCandidates(candidates=[LogprobsResultCandidate(log_probability=-0.0030728758, token=‘!’, token_id=None)]), LogprobsResultTopCandidates(candidates=[LogprobsResultCandidate(log_probability=-0.01058189, token=’ How’, token_id=None)]), LogprobsResultTopCandidates(candidates=[LogprobsResultCandidate(log_probability=-7.783962e-05, token=’ can’, token_id=None)]), LogprobsResultTopCandidates(candidates=[LogprobsResultCandidate(log_probability=-2.0266912e-06, token=’ I’, token_id=None)]), LogprobsResultTopCandidates(candidates=[LogprobsResultCandidate(log_probability=-0.00032465608, token=’ help’, token_id=None)]), LogprobsResultTopCandidates(candidates=[LogprobsResultCandidate(log_probability=-1.0729074e-06, token=’ you’, token_id=None)]), LogprobsResultTopCandidates(candidates=[LogprobsResultCandidate(log_probability=-5.2448504e-06, token=’ today’, token_id=None)]), LogprobsResultTopCandidates(candidates=[LogprobsResultCandidate(log_probability=-2.3844768e-07, token=‘?’, token_id=None)])]), safety_ratings=None)] create_time=datetime.datetime(2025, 5, 7, 7, 47, 37, 19485, tzinfo=TzInfo(UTC)) response_id=‘GRAbaJ2YAZPw-O4Poo_T-Ac’ model_version=‘gemini-2.5-flash-preview-04-17’ prompt_feedback=None usage_metadata=GenerateContentResponseUsageMetadata(cache_tokens_details=None, cached_content_token_count=None, candidates_token_count=9, candidates_tokens_details=[ModalityTokenCount(modality=<MediaModality.TEXT: ‘TEXT’>, token_count=9)], prompt_token_count=1, prompt_tokens_details=[ModalityTokenCount(modality=<MediaModality.TEXT: ‘TEXT’>, token_count=1)], thoughts_token_count=245, tool_use_prompt_token_count=None, tool_use_prompt_tokens_details=None, total_token_count=255, traffic_type=<TrafficType.ON_DEMAND: ‘ON_DEMAND’>) automatic_function_calling_history= parsed=None

Topic		Replies	Views
Logprobs is not enabled for Gemini models Gemini API bug , api , models	9	623	November 11, 2025
Cannot use system instruction with stream mode of `gemini-1.5-flash-002` Gemini API gemini-15 , bug , api	7	514	January 10, 2025
OpenAI API compatibility and logprobs Gemini API api , gemini-api , openai_compatibility	4	202	August 1, 2025
oneOf in response_schema Gemini API api	25	2461	April 13, 2025
"finishReason" : "MAX_TOKENS" - But Text is Empty Gemini API prompt , rate-limits	12	2959	July 18, 2025

Get logprobs at output token level

Related topics