Language model

A language model instance satisfies the LanguageModel interface, which includes the following:

provider: The LLM provider name.
model_id: The model identifier.
metadata: Metadata about the model, such as pricing information or capabilities.
generate(LanguageModelInput) -> ModelResponse: Generate a non-streaming response from the model.
stream(LanguageModelInput) -> AsyncIterable<PartialModelResponse>: Generate a streaming response from the model.

All models in the library implement the LanguageModel interface and can be used interchangeably.

import type { LanguageModel } from "@hoangvvo/llm-sdk";
import { AnthropicModel } from "@hoangvvo/llm-sdk/anthropic";
import { CohereModel } from "@hoangvvo/llm-sdk/cohere";
import { GoogleModel } from "@hoangvvo/llm-sdk/google";
import { MistralModel } from "@hoangvvo/llm-sdk/mistral";
import { OpenAIChatModel, OpenAIModel } from "@hoangvvo/llm-sdk/openai";

function assert(
  condition: unknown,
  msg = "Assertion failed",
): asserts condition {
  if (!condition) {
    throw new Error(msg);
  }
}

try {
  const dotenv = await import("dotenv");
  const path = await import("path");
  dotenv.config({ path: path.join(import.meta.dirname, "../../.env") });
} catch {
  // Do nothing
}

export function getModel(provider: string, modelId: string): LanguageModel {
  switch (provider) {
    case "openai":
      assert(process.env["OPENAI_API_KEY"]);
      return new OpenAIModel({
        apiKey: process.env["OPENAI_API_KEY"],
        modelId,
      });
    case "openai-chat-completion":
      assert(process.env["OPENAI_API_KEY"]);
      return new OpenAIChatModel({
        apiKey: process.env["OPENAI_API_KEY"],
        modelId,
      });
    case "anthropic":
      assert(process.env["ANTHROPIC_API_KEY"]);
      return new AnthropicModel({
        apiKey: process.env["ANTHROPIC_API_KEY"],
        modelId,
      });
    case "google":
      assert(process.env["GOOGLE_API_KEY"]);
      return new GoogleModel({
        apiKey: process.env["GOOGLE_API_KEY"],
        modelId,
      });
    case "cohere":
      assert(process.env["CO_API_KEY"]);
      return new CohereModel({ apiKey: process.env["CO_API_KEY"], modelId });
    case "mistral":
      assert(process.env["MISTRAL_API_KEY"]);
      return new MistralModel({
        apiKey: process.env["MISTRAL_API_KEY"],
        modelId,
      });
    default:
      throw new Error(`Unsupported provider: ${provider}`);
  }
}

use llm_sdk::{
    anthropic::{AnthropicModel, AnthropicModelOptions},
    google::{GoogleModel, GoogleModelOptions},
    openai::{OpenAIChatModel, OpenAIChatModelOptions, OpenAIModel, OpenAIModelOptions},
    LanguageModel,
};

pub fn get_model(provider: &str, model_id: &str) -> Box<dyn LanguageModel> {
    match provider {
        "openai" => Box::new(OpenAIModel::new(
            model_id.to_string(),
            OpenAIModelOptions {
                api_key: std::env::var("OPENAI_API_KEY")
                    .expect("OPENAI_API_KEY environment variable must be set"),
                ..Default::default()
            },
        )),
        "openai-chat-completion" => Box::new(OpenAIChatModel::new(
            model_id.to_string(),
            OpenAIChatModelOptions {
                api_key: std::env::var("OPENAI_API_KEY")
                    .expect("OPENAI_API_KEY environment variable must be set"),
                ..Default::default()
            },
        )),
        "anthropic" => Box::new(AnthropicModel::new(
            model_id.to_string(),
            AnthropicModelOptions {
                api_key: std::env::var("ANTHROPIC_API_KEY")
                    .expect("ANTHROPIC_API_KEY environment variable must be set"),
                ..Default::default()
            },
        )),
        "google" => Box::new(GoogleModel::new(
            model_id.to_string(),
            GoogleModelOptions {
                api_key: std::env::var("GOOGLE_API_KEY")
                    .expect("GOOGLE_API_KEY environment variable must be set"),
                ..Default::default()
            },
        )),
        _ => panic!("Unsupported provider: {provider}"),
    }
}

package examples

import (
  "fmt"
  "os"

  llmsdk "github.com/hoangvvo/llm-sdk/sdk-go"
  "github.com/hoangvvo/llm-sdk/sdk-go/anthropic"
  "github.com/hoangvvo/llm-sdk/sdk-go/google"
  "github.com/hoangvvo/llm-sdk/sdk-go/openai"
  "github.com/joho/godotenv"
)

func init() {
  godotenv.Load("../.env")
}

// GetModel creates and returns a language model based on provider and model ID
func GetModel(provider, modelID string) llmsdk.LanguageModel {
  switch provider {
  case "openai":
    apiKey := os.Getenv("OPENAI_API_KEY")
    if apiKey == "" {
      panic("OPENAI_API_KEY environment variable is required")
    }
    return openai.NewOpenAIModel(modelID, openai.OpenAIModelOptions{
      APIKey: apiKey,
    })
  case "openai-chat-completion":
    apiKey := os.Getenv("OPENAI_API_KEY")
    if apiKey == "" {
      panic("OPENAI_API_KEY environment variable is required")
    }
    return openai.NewOpenAIChatModel(modelID, openai.OpenAIChatModelOptions{
      APIKey: apiKey,
    })
  case "anthropic":
    apiKey := os.Getenv("ANTHROPIC_API_KEY")
    if apiKey == "" {
      panic("ANTHROPIC_API_KEY environment variable is required")
    }
    return anthropic.NewAnthropicModel(modelID, anthropic.AnthropicModelOptions{
      APIKey: apiKey,
    })
  case "google":
    apiKey := os.Getenv("GOOGLE_API_KEY")
    if apiKey == "" {
      panic("GOOGLE_API_KEY environment variable is required")
    }
    return google.NewGoogleModel(modelID, google.GoogleModelOptions{
      APIKey: apiKey,
    })
  default:
    panic(fmt.Sprintf("Unsupported provider: %s", provider))
  }
}

Language Model Input

LanguageModelInput is a unified format to represent the input for generating responses from the language model, applicable to both non-streaming and streaming requests. The library converts these inputs into corresponding properties for each LLM provider, if applicable. This allows specifying:

The conversation history, which includes UserMessage, AssistantMessage, and ToolMessage.
Sampling parameters: max_tokens, temperature, top_p, top_k, presence_penalty, frequency_penalty, and seed.
Tool definitions and tool selection.
The response format to enforce the model to return structured objects instead of plain text.
modalities for the model to generate, such as text, images, or audio.
Specific part output options like audio, reasoning.

interface LanguageModelInput {
  /**
   * A system prompt is a way of providing context and instructions to the model
   */
  system_prompt?: string;
  /**
   * A list of messages comprising the conversation so far.
   */
  messages: Message[];
  /**
   * Definitions of tools that the model may use.
   */
  tools?: Tool[];
  tool_choice?: ToolChoiceOption;
  response_format?: ResponseFormatOption;
  /**
   * The maximum number of tokens that can be generated in the chat completion.
   */
  max_tokens?: number;
  /**
   * Amount of randomness injected into the response. Ranges from 0.0 to 1.0
   */
  temperature?: number;
  /**
   * An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. Ranges from 0.0 to 1.0
   */
  top_p?: number;
  /**
   * Only sample from the top K options for each subsequent token. Used to remove 'long tail' low probability responses. Must be a non-negative integer.
   */
  top_k?: number;
  /**
   * Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
   */
  presence_penalty?: number;
  /**
   * Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
   */
  frequency_penalty?: number;
  /**
   * The seed (integer), if set and supported by the model, to enable deterministic results.
   */
  seed?: number;
  /**
   * The modalities that the model should support.
   */
  modalities?: Modality[];
  /**
   * Options for audio generation.
   */
  audio?: AudioOptions;
  /**
   * Options for reasoning generation.
   */
  reasoning?: ReasoningOptions;
  /**
   * A set of key/value pairs that store additional information about the request. This is forwarded to the model provider if supported.
   */
  metadata?: Record<string, string>;
  /**
   * Extra options that the model may support.
   */
  extra?: Record<string, unknown>;
}

pub struct LanguageModelInput {
    /// A system prompt is a way of providing context and instructions to the
    /// model
    #[serde(skip_serializing_if = "Option::is_none")]
    pub system_prompt: Option<String>,
    /// A list of messages comprising the conversation so far.
    pub messages: Vec<Message>,
    /// Definitions of tools that the model may use.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tools: Option<Vec<Tool>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_choice: Option<ToolChoiceOption>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub response_format: Option<ResponseFormatOption>,
    /// The maximum number of tokens that can be generated in the chat
    /// completion.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub max_tokens: Option<u32>,
    /// Amount of randomness injected into the response. Ranges from 0.0 to 1.0
    #[serde(skip_serializing_if = "Option::is_none")]
    pub temperature: Option<f64>,
    /// An alternative to sampling with temperature, called nucleus sampling,
    /// where the model considers the results of the tokens with `top_p`
    /// probability mass. Ranges from 0.0 to 1.0
    #[serde(skip_serializing_if = "Option::is_none")]
    pub top_p: Option<f64>,
    /// Only sample from the top K options for each subsequent token. Used to
    /// remove 'long tail' low probability responses. Must be a non-negative
    /// integer.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub top_k: Option<i32>,
    /// Positive values penalize new tokens based on whether they appear in the
    /// text so far, increasing the model's likelihood to talk about new topics.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub presence_penalty: Option<f64>,
    /// Positive values penalize new tokens based on their existing frequency in
    /// the text so far, decreasing the model's likelihood to repeat the same
    /// line verbatim.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub frequency_penalty: Option<f64>,
    /// The seed (integer), if set and supported by the model, to enable
    /// deterministic results.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub seed: Option<i64>,
    /// The modalities that the model should support.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub modalities: Option<Vec<Modality>>,
    /// A set of key/value pairs that store additional information about the
    /// request. This is forwarded to the model provider if supported.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub metadata: Option<HashMap<String, String>>,
    /// Options for audio generation.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub audio: Option<AudioOptions>,
    /// Options for reasoning generation.
    pub reasoning: Option<ReasoningOptions>,
    /// Extra options that the model may support.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub extra: Option<LanguageModelInputExtra>,
}

type LanguageModelInput struct {
  // A system prompt is a way of providing context and instructions to the model
  SystemPrompt *string `json:"system_prompt,omitempty"`
  // A list of messages comprising the conversation so far.
  Messages []Message `json:"messages"`
  // Definitions of tools that the model may use.
  Tools          []Tool                `json:"tools,omitempty"`
  ToolChoice     *ToolChoiceOption     `json:"tool_choice,omitempty"`
  ResponseFormat *ResponseFormatOption `json:"response_format,omitempty"`
  // The maximum number of tokens that can be generated in the chat completion.
  MaxTokens *uint32 `json:"max_tokens,omitempty"`
  // Amount of randomness injected into the response. Ranges from 0.0 to 1.0
  Temperature *float64 `json:"temperature,omitempty"`
  // An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. Ranges from 0.0 to 1.0
  TopP *float64 `json:"top_p,omitempty"`
  // Only sample from the top K options for each subsequent token. Used to remove 'long tail' low probability responses. Must be a non-negative integer.
  TopK *int32 `json:"top_k,omitempty"`
  // Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
  PresencePenalty *float64 `json:"presence_penalty,omitempty"`
  // Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
  FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"`
  // The seed (integer), if set and supported by the model, to enable deterministic results.
  Seed *int64 `json:"seed,omitempty"`
  // The modalities that the model should support.
  Modalities []Modality `json:"modalities,omitempty"`
  // A set of key/value pairs that store additional information about the request. This is forwarded to the model provider if supported.
  Metadata map[string]string `json:"metadata,omitempty"`
  // Options for audio generation.
  Audio *AudioOptions `json:"audio,omitempty"`
  // Options for reasoning generation.
  Reasoning *ReasoningOptions `json:"reasoning,omitempty"`
  // Extra options that the model may support.
  Extra map[string]any `json:"extra,omitempty"`
}

Message

Messages are primitives that make up the conversation history, and Part are the building blocks of each message. The library converts them into a format suitable for the underlying LLM provider and maps those from different providers to the unified format.

Three message types are defined in the SDK: UserMessage, AssistantMessage, and ToolMessage.

type Message = UserMessage | AssistantMessage | ToolMessage;

interface UserMessage {
  role: "user";
  content: Part[];
}

interface AssistantMessage {
  role: "assistant";
  content: Part[];
}

interface ToolMessage {
  role: "tool";
  content: Part[];
}

pub enum Message {
    User(UserMessage),
    Assistant(AssistantMessage),
    Tool(ToolMessage),
}

pub struct UserMessage {
    pub content: Vec<Part>,
}

pub struct AssistantMessage {
    pub content: Vec<Part>,
}

pub struct ToolMessage {
    pub content: Vec<Part>,
}

type Message struct {
  UserMessage      *UserMessage      `json:"-"`
  AssistantMessage *AssistantMessage `json:"-"`
  ToolMessage      *ToolMessage      `json:"-"`
}

type UserMessage struct {
  Content []Part `json:"content"`
}

type AssistantMessage struct {
  Content []Part `json:"content"`
}

type ToolMessage struct {
  Content []Part `json:"content"`
}

Part

The following Part types are implemented in the SDK: TextPart, ImagePart, AudioPart, SourcePart (for citation), ToolCallPart, and ToolResultPart.

type Part =
  | TextPart
  | ImagePart
  | AudioPart
  | SourcePart
  | ToolCallPart
  | ToolResultPart
  | ReasoningPart;

pub enum Part {
    Text(TextPart),
    Image(ImagePart),
    Audio(AudioPart),
    Source(SourcePart),
    ToolCall(ToolCallPart),
    ToolResult(ToolResultPart),
    Reasoning(ReasoningPart),
}

type Part struct {
  TextPart       *TextPart       `json:"-"`
  ImagePart      *ImagePart      `json:"-"`
  AudioPart      *AudioPart      `json:"-"`
  SourcePart     *SourcePart     `json:"-"`
  ToolCallPart   *ToolCallPart   `json:"-"`
  ToolResultPart *ToolResultPart `json:"-"`
  ReasoningPart  *ReasoningPart  `json:"-"`
}

Text Part

interface TextPart {
  type: "text";
  text: string;
  citations?: Citation[];
}

pub struct TextPart {
    pub text: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub citations: Option<Vec<Citation>>,
}

type TextPart struct {
  Text      string     `json:"text"`
  Citations []Citation `json:"citations,omitempty"`
}

Image Part

interface ImagePart {
  type: "image";
  /**
   * The MIME type of the image. E.g. "image/jpeg", "image/png".
   */
  mime_type: string;
  /**
   * The base64-encoded image data.
   */
  data: string;
  /**
   * The width of the image in pixels.
   */
  width?: number;
  /**
   * The height of the image in pixels.
   */
  height?: number;
  /**
   * ID of the image part, if applicable
   */
  id?: string;
}

pub struct ImagePart {
    /// The MIME type of the image. E.g. "image/jpeg", "image/png".
    pub mime_type: String,
    /// The base64-encoded image data.
    pub data: String,
    /// The width of the image in pixels.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub width: Option<u32>,
    /// The height of the image in pixels.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub height: Option<u32>,
    /// The ID of the image part, if applicable
    #[serde(skip_serializing_if = "Option::is_none")]
    pub id: Option<String>,
}

type ImagePart struct {
  // The MIME type of the image. E.g. "image/jpeg", "image/png".
  MimeType string `json:"mime_type"`
  // The base64-encoded image data.
  Data string `json:"data"`
  // The width of the image in pixels.
  Width *int `json:"width,omitempty"`
  // The height of the image in pixels.
  Height *int `json:"height,omitempty"`
  // ID of the image part, if applicable
  ID *string `json:"id,omitempty"`
}

Audio Part

interface AudioPart {
  type: "audio";
  /**
   * The base64-encoded audio data.
   */
  data: string;
  format: AudioFormat;
  /**
   * The sample rate of the audio. E.g. 44100, 48000.
   */
  sample_rate?: number;
  /**
   * The number of channels of the audio. E.g. 1, 2.
   */
  channels?: number;
  /**
   * The transcript of the audio.
   */
  transcript?: string;
  /**
   * ID of the audio part, if applicable
   */
  id?: string;
}

type AudioFormat =
  | "wav"
  | "mp3"
  | "linear16"
  | "flac"
  | "mulaw"
  | "alaw"
  | "aac"
  | "opus";

pub struct AudioPart {
    /// The base64-encoded audio data.
    pub data: String,
    /// The format of the audio.
    pub format: AudioFormat,
    /// The sample rate of the audio. E.g. 44100, 48000.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub sample_rate: Option<u32>,
    /// The number of channels of the audio. E.g. 1, 2.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub channels: Option<u32>,
    /// The transcript of the audio.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub transcript: Option<String>,
    /// The ID of the audio part, if applicable
    #[serde(skip_serializing_if = "Option::is_none")]
    pub id: Option<String>,
}

pub enum AudioFormat {
    Wav,
    Mp3,
    Linear16,
    Flac,
    Mulaw,
    Alaw,
    Aac,
    Opus,
}

type AudioPart struct {
  // The base64-encoded audio data.
  Data   string      `json:"data"`
  Format AudioFormat `json:"format"`
  // The sample rate of the audio. E.g. 44100, 48000.
  SampleRate *int `json:"sample_rate,omitempty"`
  // The number of channels of the audio. E.g. 1, 2.
  Channels *int `json:"channels,omitempty"`
  // The transcript of the audio.
  Transcript *string `json:"transcript,omitempty"`
  // The ID of the part, if applicable.
  ID *string `json:"id,omitempty"`
}

type AudioFormat string

Source Part

interface SourcePart {
  type: "source";
  /**
   * The URL or identifier of the document.
   */
  source: string;
  /**
   * The title of the document.
   */
  title: string;
  /**
   * The content of the document.
   */
  content: Part[];
}

pub struct SourcePart {
    /// The URL or identifier of the document.
    pub source: String,
    /// The title of the document.
    pub title: String,
    /// The content of the document.
    pub content: Vec<Part>,
}

type SourcePart struct {
  // The URL or identifier of the document.
  Source string `json:"source"`
  // The title of the document.
  Title string `json:"title"`
  // The content of the document.
  Content []Part `json:"content"`
}

Tool Call Part

interface ToolCallPart {
  type: "tool-call";
  /**
   * The ID of the tool call, used to match the tool result with the tool call.
   */
  tool_call_id: string;
  /**
   * The name of the tool to call.
   */
  tool_name: string;
  /**
   * The arguments to pass to the tool.
   */
  args: Record<string, unknown>;
  /**
   * The ID of the tool call part, if applicable.
   * This is different from tool_call_id which is used to match tool results.
   */
  id?: string;
}

pub struct ToolCallPart {
    /// The ID of the tool call, used to match the tool result with the tool
    /// call.
    pub tool_call_id: String,
    /// The name of the tool to call.
    pub tool_name: String,
    /// The arguments to pass to the tool.
    pub args: Value,
    /// The ID of the tool call, if applicable
    /// This is different from `tool_call_id`, which is the ID used to match the
    /// tool result with the tool call.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub id: Option<String>,
}

type ToolCallPart struct {
  // The ID of the tool call, used to match the tool result with the tool call.
  ToolCallID string `json:"tool_call_id"`
  // The name of the tool to call.
  ToolName string `json:"tool_name"`
  // The arguments to pass to the tool.
  Args json.RawMessage `json:"args"`
  // The ID of the part, if applicable.
  // This is different from ToolCallID which is used to match tool results.
  ID *string `json:"id,omitempty"`
}

Tool Result Part

interface ToolResultPart {
  type: "tool-result";
  /**
   * The ID of the tool call from previous assistant message.
   */
  tool_call_id: string;
  /**
   * The name of the tool that was called.
   */
  tool_name: string;
  /**
   * The content of the tool result.
   */
  content: Part[];
  /**
   * Marks the tool result as an error.
   */
  is_error?: boolean;
}

pub struct ToolResultPart {
    /// The ID of the tool call from previous assistant message.
    pub tool_call_id: String,
    /// The name of the tool that was called.
    pub tool_name: String,
    /// The content of the tool result.
    pub content: Vec<Part>,
    /// Marks the tool result as an error.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub is_error: Option<bool>,
}

type ToolResultPart struct {
  // The ID of the tool call from previous assistant message.
  ToolCallID string `json:"tool_call_id"`
  // The name of the tool that was called.
  ToolName string `json:"tool_name"`
  // The content of the tool result.
  Content []Part `json:"content"`
  // Marks the tool result as an error.
  IsError bool `json:"is_error,omitempty"`
}

Reasoning Part

interface ReasoningPart {
  type: "reasoning";
  /**
   * The reasoning text content
   */
  text: string;
  /**
   * The reasoning internal signature
   */
  signature?: string;
  /**
   * The ID of the reasoning part, if applicable
   */
  id?: string;
}

pub struct ReasoningPart {
    /// The reasoning text content.
    pub text: String,
    /// The reasoning internal signature
    #[serde(skip_serializing_if = "Option::is_none")]
    pub signature: Option<String>,
    /// The ID of the reasoning part, if applicable
    #[serde(skip_serializing_if = "Option::is_none")]
    pub id: Option<String>,
}

type ReasoningPart struct {
  // The reasoning text content
  Text string `json:"text"`
  //  The reasoning internal signature
  Signature *string `json:"signature,omitempty"`
  // The ID of the reasoning part, if applicable.
  ID *string `json:"id,omitempty"`
}

Citation

Certain parts include Citations, which language models can use to cite SourcePart in their responses.

interface Citation {
  /**
   * The URL or identifier of the document being cited.
   */
  source: string;

  /**
   * The title of the document being cited.
   */
  title?: string;

  /**
   * The text snippet from the document being cited.
   */
  cited_text?: string;

  /**
   * The start index of the document content part being cited.
   */
  start_index: number;
  /**
   * The end index of the document content part being cited.
   */
  end_index: number;
}

pub struct Citation {
    /**
     * The URL or identifier of the document being cited.
     */
    pub source: String,
    /**
     * The title of the document being cited.
     */
    #[serde(skip_serializing_if = "Option::is_none")]
    pub title: Option<String>,
    /**
     * The text snippet from the document being cited.
     */
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cited_text: Option<String>,
    /**
     * The start index of the document content part being cited.
     */
    pub start_index: usize,
    /**
     * The end index of the document content part being cited.
     */
    pub end_index: usize,
}

type Citation struct {
  // The URL or identifier of the document being cited.
  Source string `json:"source"`
  // The title of the document being cited.
  Title *string `json:"title,omitempty"`
  // The text snippet from the document being cited.
  CitedText *string `json:"cited_text,omitempty"`
  // The start index of the document content part being cited.
  StartIndex int `json:"start_index"`
  // The end index of the document content part being cited.
  EndIndex int `json:"end_index"`
}

Model Response

The response from the language model is represented as a ModelResponse that includes:

content: An array of Part that represents the generated content, which usually comes from the AssistantMessage.
usage: Token usage information, if available.
cost: The estimated cost of the request, if the model’s pricing information is provided.

interface ModelResponse {
  content: Part[];
  usage?: ModelUsage;
  /**
   * The cost of the response.
   */
  cost?: number;
}

interface ModelUsage {
  input_tokens: number;
  output_tokens: number;
  input_tokens_details?: ModelTokensDetails;
  output_tokens_details?: ModelTokensDetails;
}

interface ModelTokensDetails {
  text_tokens?: number;
  cached_text_tokens?: number;
  audio_tokens?: number;
  cached_audio_tokens?: number;
  image_tokens?: number;
  cached_image_tokens?: number;
}

pub struct ModelResponse {
    pub content: Vec<Part>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub usage: Option<ModelUsage>,
    /// The cost of the response.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cost: Option<f64>,
}

pub struct ModelUsage {
    pub input_tokens: u32,
    pub output_tokens: u32,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub input_tokens_details: Option<ModelTokensDetails>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub output_tokens_details: Option<ModelTokensDetails>,
}

pub struct ModelTokensDetails {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub text_tokens: Option<u32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cached_text_tokens: Option<u32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub audio_tokens: Option<u32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cached_audio_tokens: Option<u32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub image_tokens: Option<u32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cached_image_tokens: Option<u32>,
}

type ModelResponse struct {
  Content []Part      `json:"content"`
  Usage   *ModelUsage `json:"usage,omitempty"`
  // The cost of the response.
  Cost *float64 `json:"cost,omitempty"`
}

type ModelUsage struct {
  InputTokens         int                 `json:"input_tokens"`
  OutputTokens        int                 `json:"output_tokens"`
  InputTokensDetails  *ModelTokensDetails `json:"input_tokens_details,omitempty"`
  OutputTokensDetails *ModelTokensDetails `json:"output_tokens_details,omitempty"`
}

type ModelTokensDetails struct {
  TextTokens        *int `json:"text_tokens,omitempty"`
  CachedTextTokens  *int `json:"cached_text_tokens,omitempty"`
  AudioTokens       *int `json:"audio_tokens,omitempty"`
  CachedAudioTokens *int `json:"cached_audio_tokens,omitempty"`
  ImageTokens       *int `json:"image_tokens,omitempty"`
  CachedImageTokens *int `json:"cached_image_tokens,omitempty"`
}

For streaming calls, the response is represented as a series of PartialModelResponse objects that include:

delta: A PartDelta and its index in the eventual content array.
usage: Token usage information, if available.

interface ContentDelta {
  index: number;
  part: PartDelta;
}

interface PartialModelResponse {
  delta?: ContentDelta;
  usage?: ModelUsage;
  cost?: number;
}

pub struct ContentDelta {
    pub index: usize,
    pub part: PartDelta,
}

pub struct PartialModelResponse {
    pub delta: Option<ContentDelta>,
    pub usage: Option<ModelUsage>,
    pub cost: Option<f64>,
}

type ContentDelta struct {
  Index int       `json:"index"`
  Part  PartDelta `json:"part"`
}

type PartialModelResponse struct {
  Delta *ContentDelta `json:"delta,omitempty"`
  Usage *ModelUsage   `json:"usage,omitempty"`
  Cost  *float64      `json:"cost,omitempty"`
}

All SDKs provide the StreamAccumulator utility to help build the final ModelResponse from a stream of PartialModelResponse.