Language model
A language model instance satisfies the LanguageModel interface, which includes the following:
provider: The LLM provider name.model_id: The model identifier.metadata: Metadata about the model, such as pricing information or capabilities.generate(LanguageModelInput) -> ModelResponse: Generate a non-streaming response from the model.stream(LanguageModelInput) -> AsyncIterable<PartialModelResponse>: Generate a streaming response from the model.
All models in the library implement the LanguageModel interface and can be used interchangeably.
import type { LanguageModel } from "@hoangvvo/llm-sdk";import { AnthropicModel } from "@hoangvvo/llm-sdk/anthropic";import { CohereModel } from "@hoangvvo/llm-sdk/cohere";import { GoogleModel } from "@hoangvvo/llm-sdk/google";import { MistralModel } from "@hoangvvo/llm-sdk/mistral";import { OpenAIChatModel, OpenAIModel } from "@hoangvvo/llm-sdk/openai";
function assert( condition: unknown, msg = "Assertion failed",): asserts condition { if (!condition) { throw new Error(msg); }}
try { const dotenv = await import("dotenv"); const path = await import("path"); dotenv.config({ path: path.join(import.meta.dirname, "../../.env") });} catch { // Do nothing}
export function getModel(provider: string, modelId: string): LanguageModel { switch (provider) { case "openai": assert(process.env["OPENAI_API_KEY"]); return new OpenAIModel({ apiKey: process.env["OPENAI_API_KEY"], modelId, }); case "openai-chat-completion": assert(process.env["OPENAI_API_KEY"]); return new OpenAIChatModel({ apiKey: process.env["OPENAI_API_KEY"], modelId, }); case "anthropic": assert(process.env["ANTHROPIC_API_KEY"]); return new AnthropicModel({ apiKey: process.env["ANTHROPIC_API_KEY"], modelId, }); case "google": assert(process.env["GOOGLE_API_KEY"]); return new GoogleModel({ apiKey: process.env["GOOGLE_API_KEY"], modelId, }); case "cohere": assert(process.env["CO_API_KEY"]); return new CohereModel({ apiKey: process.env["CO_API_KEY"], modelId }); case "mistral": assert(process.env["MISTRAL_API_KEY"]); return new MistralModel({ apiKey: process.env["MISTRAL_API_KEY"], modelId, }); default: throw new Error(`Unsupported provider: ${provider}`); }}use llm_sdk::{ anthropic::{AnthropicModel, AnthropicModelOptions}, google::{GoogleModel, GoogleModelOptions}, openai::{OpenAIChatModel, OpenAIChatModelOptions, OpenAIModel, OpenAIModelOptions}, LanguageModel,};
pub fn get_model(provider: &str, model_id: &str) -> Box<dyn LanguageModel> { match provider { "openai" => Box::new(OpenAIModel::new( model_id.to_string(), OpenAIModelOptions { api_key: std::env::var("OPENAI_API_KEY") .expect("OPENAI_API_KEY environment variable must be set"), ..Default::default() }, )), "openai-chat-completion" => Box::new(OpenAIChatModel::new( model_id.to_string(), OpenAIChatModelOptions { api_key: std::env::var("OPENAI_API_KEY") .expect("OPENAI_API_KEY environment variable must be set"), ..Default::default() }, )), "anthropic" => Box::new(AnthropicModel::new( model_id.to_string(), AnthropicModelOptions { api_key: std::env::var("ANTHROPIC_API_KEY") .expect("ANTHROPIC_API_KEY environment variable must be set"), ..Default::default() }, )), "google" => Box::new(GoogleModel::new( model_id.to_string(), GoogleModelOptions { api_key: std::env::var("GOOGLE_API_KEY") .expect("GOOGLE_API_KEY environment variable must be set"), ..Default::default() }, )), _ => panic!("Unsupported provider: {provider}"), }}package examples
import ( "fmt" "os"
llmsdk "github.com/hoangvvo/llm-sdk/sdk-go" "github.com/hoangvvo/llm-sdk/sdk-go/anthropic" "github.com/hoangvvo/llm-sdk/sdk-go/google" "github.com/hoangvvo/llm-sdk/sdk-go/openai" "github.com/joho/godotenv")
func init() { godotenv.Load("../.env")}
// GetModel creates and returns a language model based on provider and model IDfunc GetModel(provider, modelID string) llmsdk.LanguageModel { switch provider { case "openai": apiKey := os.Getenv("OPENAI_API_KEY") if apiKey == "" { panic("OPENAI_API_KEY environment variable is required") } return openai.NewOpenAIModel(modelID, openai.OpenAIModelOptions{ APIKey: apiKey, }) case "openai-chat-completion": apiKey := os.Getenv("OPENAI_API_KEY") if apiKey == "" { panic("OPENAI_API_KEY environment variable is required") } return openai.NewOpenAIChatModel(modelID, openai.OpenAIChatModelOptions{ APIKey: apiKey, }) case "anthropic": apiKey := os.Getenv("ANTHROPIC_API_KEY") if apiKey == "" { panic("ANTHROPIC_API_KEY environment variable is required") } return anthropic.NewAnthropicModel(modelID, anthropic.AnthropicModelOptions{ APIKey: apiKey, }) case "google": apiKey := os.Getenv("GOOGLE_API_KEY") if apiKey == "" { panic("GOOGLE_API_KEY environment variable is required") } return google.NewGoogleModel(modelID, google.GoogleModelOptions{ APIKey: apiKey, }) default: panic(fmt.Sprintf("Unsupported provider: %s", provider)) }}Language Model Input
Section titled “Language Model Input”LanguageModelInput is a unified format to represent the input for generating responses from the language model, applicable to both non-streaming and streaming requests. The library converts these inputs into corresponding properties for each LLM provider, if applicable. This allows specifying:
- The conversation history, which includes
UserMessage,AssistantMessage, andToolMessage. - Sampling parameters:
max_tokens,temperature,top_p,top_k,presence_penalty,frequency_penalty, andseed. - Tool definitions and tool selection.
- The response format to enforce the model to return structured objects instead of plain text.
modalitiesfor the model to generate, such as text, images, or audio.- Specific part output options like
audio,reasoning.
interface LanguageModelInput { /** * A system prompt is a way of providing context and instructions to the model */ system_prompt?: string; /** * A list of messages comprising the conversation so far. */ messages: Message[]; /** * Definitions of tools that the model may use. */ tools?: Tool[]; tool_choice?: ToolChoiceOption; response_format?: ResponseFormatOption; /** * The maximum number of tokens that can be generated in the chat completion. */ max_tokens?: number; /** * Amount of randomness injected into the response. Ranges from 0.0 to 1.0 */ temperature?: number; /** * An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. Ranges from 0.0 to 1.0 */ top_p?: number; /** * Only sample from the top K options for each subsequent token. Used to remove 'long tail' low probability responses. Must be a non-negative integer. */ top_k?: number; /** * Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. */ presence_penalty?: number; /** * Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. */ frequency_penalty?: number; /** * The seed (integer), if set and supported by the model, to enable deterministic results. */ seed?: number; /** * The modalities that the model should support. */ modalities?: Modality[]; /** * Options for audio generation. */ audio?: AudioOptions; /** * Options for reasoning generation. */ reasoning?: ReasoningOptions; /** * A set of key/value pairs that store additional information about the request. This is forwarded to the model provider if supported. */ metadata?: Record<string, string>; /** * Extra options that the model may support. */ extra?: Record<string, unknown>;}pub struct LanguageModelInput { /// A system prompt is a way of providing context and instructions to the /// model #[serde(skip_serializing_if = "Option::is_none")] pub system_prompt: Option<String>, /// A list of messages comprising the conversation so far. pub messages: Vec<Message>, /// Definitions of tools that the model may use. #[serde(skip_serializing_if = "Option::is_none")] pub tools: Option<Vec<Tool>>, #[serde(skip_serializing_if = "Option::is_none")] pub tool_choice: Option<ToolChoiceOption>, #[serde(skip_serializing_if = "Option::is_none")] pub response_format: Option<ResponseFormatOption>, /// The maximum number of tokens that can be generated in the chat /// completion. #[serde(skip_serializing_if = "Option::is_none")] pub max_tokens: Option<u32>, /// Amount of randomness injected into the response. Ranges from 0.0 to 1.0 #[serde(skip_serializing_if = "Option::is_none")] pub temperature: Option<f64>, /// An alternative to sampling with temperature, called nucleus sampling, /// where the model considers the results of the tokens with `top_p` /// probability mass. Ranges from 0.0 to 1.0 #[serde(skip_serializing_if = "Option::is_none")] pub top_p: Option<f64>, /// Only sample from the top K options for each subsequent token. Used to /// remove 'long tail' low probability responses. Must be a non-negative /// integer. #[serde(skip_serializing_if = "Option::is_none")] pub top_k: Option<i32>, /// Positive values penalize new tokens based on whether they appear in the /// text so far, increasing the model's likelihood to talk about new topics. #[serde(skip_serializing_if = "Option::is_none")] pub presence_penalty: Option<f64>, /// Positive values penalize new tokens based on their existing frequency in /// the text so far, decreasing the model's likelihood to repeat the same /// line verbatim. #[serde(skip_serializing_if = "Option::is_none")] pub frequency_penalty: Option<f64>, /// The seed (integer), if set and supported by the model, to enable /// deterministic results. #[serde(skip_serializing_if = "Option::is_none")] pub seed: Option<i64>, /// The modalities that the model should support. #[serde(skip_serializing_if = "Option::is_none")] pub modalities: Option<Vec<Modality>>, /// A set of key/value pairs that store additional information about the /// request. This is forwarded to the model provider if supported. #[serde(skip_serializing_if = "Option::is_none")] pub metadata: Option<HashMap<String, String>>, /// Options for audio generation. #[serde(skip_serializing_if = "Option::is_none")] pub audio: Option<AudioOptions>, /// Options for reasoning generation. pub reasoning: Option<ReasoningOptions>, /// Extra options that the model may support. #[serde(skip_serializing_if = "Option::is_none")] pub extra: Option<LanguageModelInputExtra>,}type LanguageModelInput struct { // A system prompt is a way of providing context and instructions to the model SystemPrompt *string `json:"system_prompt,omitempty"` // A list of messages comprising the conversation so far. Messages []Message `json:"messages"` // Definitions of tools that the model may use. Tools []Tool `json:"tools,omitempty"` ToolChoice *ToolChoiceOption `json:"tool_choice,omitempty"` ResponseFormat *ResponseFormatOption `json:"response_format,omitempty"` // The maximum number of tokens that can be generated in the chat completion. MaxTokens *uint32 `json:"max_tokens,omitempty"` // Amount of randomness injected into the response. Ranges from 0.0 to 1.0 Temperature *float64 `json:"temperature,omitempty"` // An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. Ranges from 0.0 to 1.0 TopP *float64 `json:"top_p,omitempty"` // Only sample from the top K options for each subsequent token. Used to remove 'long tail' low probability responses. Must be a non-negative integer. TopK *int32 `json:"top_k,omitempty"` // Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. PresencePenalty *float64 `json:"presence_penalty,omitempty"` // Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` // The seed (integer), if set and supported by the model, to enable deterministic results. Seed *int64 `json:"seed,omitempty"` // The modalities that the model should support. Modalities []Modality `json:"modalities,omitempty"` // A set of key/value pairs that store additional information about the request. This is forwarded to the model provider if supported. Metadata map[string]string `json:"metadata,omitempty"` // Options for audio generation. Audio *AudioOptions `json:"audio,omitempty"` // Options for reasoning generation. Reasoning *ReasoningOptions `json:"reasoning,omitempty"` // Extra options that the model may support. Extra map[string]any `json:"extra,omitempty"`}Message
Section titled “Message”Messages are primitives that make up the conversation history, and Part are the building blocks of each message. The library converts them into a format suitable for the underlying LLM provider and maps those from different providers to the unified format.
Three message types are defined in the SDK: UserMessage, AssistantMessage, and ToolMessage.
type Message = UserMessage | AssistantMessage | ToolMessage;
interface UserMessage { role: "user"; content: Part[];}
interface AssistantMessage { role: "assistant"; content: Part[];}
interface ToolMessage { role: "tool"; content: Part[];}pub enum Message { User(UserMessage), Assistant(AssistantMessage), Tool(ToolMessage),}
pub struct UserMessage { pub content: Vec<Part>,}
pub struct AssistantMessage { pub content: Vec<Part>,}
pub struct ToolMessage { pub content: Vec<Part>,}type Message struct { UserMessage *UserMessage `json:"-"` AssistantMessage *AssistantMessage `json:"-"` ToolMessage *ToolMessage `json:"-"`}
type UserMessage struct { Content []Part `json:"content"`}
type AssistantMessage struct { Content []Part `json:"content"`}
type ToolMessage struct { Content []Part `json:"content"`}The following Part types are implemented in the SDK: TextPart, ImagePart, AudioPart, SourcePart (for citation), ToolCallPart, and ToolResultPart.
type Part = | TextPart | ImagePart | AudioPart | SourcePart | ToolCallPart | ToolResultPart | ReasoningPart;pub enum Part { Text(TextPart), Image(ImagePart), Audio(AudioPart), Source(SourcePart), ToolCall(ToolCallPart), ToolResult(ToolResultPart), Reasoning(ReasoningPart),}type Part struct { TextPart *TextPart `json:"-"` ImagePart *ImagePart `json:"-"` AudioPart *AudioPart `json:"-"` SourcePart *SourcePart `json:"-"` ToolCallPart *ToolCallPart `json:"-"` ToolResultPart *ToolResultPart `json:"-"` ReasoningPart *ReasoningPart `json:"-"`}Text Part
Section titled “Text Part”interface TextPart { type: "text"; text: string; citations?: Citation[];}pub struct TextPart { pub text: String, #[serde(skip_serializing_if = "Option::is_none")] pub citations: Option<Vec<Citation>>,}type TextPart struct { Text string `json:"text"` Citations []Citation `json:"citations,omitempty"`}Image Part
Section titled “Image Part”interface ImagePart { type: "image"; /** * The MIME type of the image. E.g. "image/jpeg", "image/png". */ mime_type: string; /** * The base64-encoded image data. */ data: string; /** * The width of the image in pixels. */ width?: number; /** * The height of the image in pixels. */ height?: number; /** * ID of the image part, if applicable */ id?: string;}pub struct ImagePart { /// The MIME type of the image. E.g. "image/jpeg", "image/png". pub mime_type: String, /// The base64-encoded image data. pub data: String, /// The width of the image in pixels. #[serde(skip_serializing_if = "Option::is_none")] pub width: Option<u32>, /// The height of the image in pixels. #[serde(skip_serializing_if = "Option::is_none")] pub height: Option<u32>, /// The ID of the image part, if applicable #[serde(skip_serializing_if = "Option::is_none")] pub id: Option<String>,}type ImagePart struct { // The MIME type of the image. E.g. "image/jpeg", "image/png". MimeType string `json:"mime_type"` // The base64-encoded image data. Data string `json:"data"` // The width of the image in pixels. Width *int `json:"width,omitempty"` // The height of the image in pixels. Height *int `json:"height,omitempty"` // ID of the image part, if applicable ID *string `json:"id,omitempty"`}Audio Part
Section titled “Audio Part”interface AudioPart { type: "audio"; /** * The base64-encoded audio data. */ data: string; format: AudioFormat; /** * The sample rate of the audio. E.g. 44100, 48000. */ sample_rate?: number; /** * The number of channels of the audio. E.g. 1, 2. */ channels?: number; /** * The transcript of the audio. */ transcript?: string; /** * ID of the audio part, if applicable */ id?: string;}
type AudioFormat = | "wav" | "mp3" | "linear16" | "flac" | "mulaw" | "alaw" | "aac" | "opus";pub struct AudioPart { /// The base64-encoded audio data. pub data: String, /// The format of the audio. pub format: AudioFormat, /// The sample rate of the audio. E.g. 44100, 48000. #[serde(skip_serializing_if = "Option::is_none")] pub sample_rate: Option<u32>, /// The number of channels of the audio. E.g. 1, 2. #[serde(skip_serializing_if = "Option::is_none")] pub channels: Option<u32>, /// The transcript of the audio. #[serde(skip_serializing_if = "Option::is_none")] pub transcript: Option<String>, /// The ID of the audio part, if applicable #[serde(skip_serializing_if = "Option::is_none")] pub id: Option<String>,}
pub enum AudioFormat { Wav, Mp3, Linear16, Flac, Mulaw, Alaw, Aac, Opus,}type AudioPart struct { // The base64-encoded audio data. Data string `json:"data"` Format AudioFormat `json:"format"` // The sample rate of the audio. E.g. 44100, 48000. SampleRate *int `json:"sample_rate,omitempty"` // The number of channels of the audio. E.g. 1, 2. Channels *int `json:"channels,omitempty"` // The transcript of the audio. Transcript *string `json:"transcript,omitempty"` // The ID of the part, if applicable. ID *string `json:"id,omitempty"`}
type AudioFormat stringSource Part
Section titled “Source Part”interface SourcePart { type: "source"; /** * The URL or identifier of the document. */ source: string; /** * The title of the document. */ title: string; /** * The content of the document. */ content: Part[];}pub struct SourcePart { /// The URL or identifier of the document. pub source: String, /// The title of the document. pub title: String, /// The content of the document. pub content: Vec<Part>,}type SourcePart struct { // The URL or identifier of the document. Source string `json:"source"` // The title of the document. Title string `json:"title"` // The content of the document. Content []Part `json:"content"`}Tool Call Part
Section titled “Tool Call Part”interface ToolCallPart { type: "tool-call"; /** * The ID of the tool call, used to match the tool result with the tool call. */ tool_call_id: string; /** * The name of the tool to call. */ tool_name: string; /** * The arguments to pass to the tool. */ args: Record<string, unknown>; /** * The ID of the tool call part, if applicable. * This is different from tool_call_id which is used to match tool results. */ id?: string;}pub struct ToolCallPart { /// The ID of the tool call, used to match the tool result with the tool /// call. pub tool_call_id: String, /// The name of the tool to call. pub tool_name: String, /// The arguments to pass to the tool. pub args: Value, /// The ID of the tool call, if applicable /// This is different from `tool_call_id`, which is the ID used to match the /// tool result with the tool call. #[serde(skip_serializing_if = "Option::is_none")] pub id: Option<String>,}type ToolCallPart struct { // The ID of the tool call, used to match the tool result with the tool call. ToolCallID string `json:"tool_call_id"` // The name of the tool to call. ToolName string `json:"tool_name"` // The arguments to pass to the tool. Args json.RawMessage `json:"args"` // The ID of the part, if applicable. // This is different from ToolCallID which is used to match tool results. ID *string `json:"id,omitempty"`}Tool Result Part
Section titled “Tool Result Part”interface ToolResultPart { type: "tool-result"; /** * The ID of the tool call from previous assistant message. */ tool_call_id: string; /** * The name of the tool that was called. */ tool_name: string; /** * The content of the tool result. */ content: Part[]; /** * Marks the tool result as an error. */ is_error?: boolean;}pub struct ToolResultPart { /// The ID of the tool call from previous assistant message. pub tool_call_id: String, /// The name of the tool that was called. pub tool_name: String, /// The content of the tool result. pub content: Vec<Part>, /// Marks the tool result as an error. #[serde(skip_serializing_if = "Option::is_none")] pub is_error: Option<bool>,}type ToolResultPart struct { // The ID of the tool call from previous assistant message. ToolCallID string `json:"tool_call_id"` // The name of the tool that was called. ToolName string `json:"tool_name"` // The content of the tool result. Content []Part `json:"content"` // Marks the tool result as an error. IsError bool `json:"is_error,omitempty"`}Reasoning Part
Section titled “Reasoning Part”interface ReasoningPart { type: "reasoning"; /** * The reasoning text content */ text: string; /** * The reasoning internal signature */ signature?: string; /** * The ID of the reasoning part, if applicable */ id?: string;}pub struct ReasoningPart { /// The reasoning text content. pub text: String, /// The reasoning internal signature #[serde(skip_serializing_if = "Option::is_none")] pub signature: Option<String>, /// The ID of the reasoning part, if applicable #[serde(skip_serializing_if = "Option::is_none")] pub id: Option<String>,}type ReasoningPart struct { // The reasoning text content Text string `json:"text"` // The reasoning internal signature Signature *string `json:"signature,omitempty"` // The ID of the reasoning part, if applicable. ID *string `json:"id,omitempty"`}Citation
Section titled “Citation”Certain parts include Citations, which language models can use to cite SourcePart in their responses.
interface Citation { /** * The URL or identifier of the document being cited. */ source: string;
/** * The title of the document being cited. */ title?: string;
/** * The text snippet from the document being cited. */ cited_text?: string;
/** * The start index of the document content part being cited. */ start_index: number; /** * The end index of the document content part being cited. */ end_index: number;}pub struct Citation { /** * The URL or identifier of the document being cited. */ pub source: String, /** * The title of the document being cited. */ #[serde(skip_serializing_if = "Option::is_none")] pub title: Option<String>, /** * The text snippet from the document being cited. */ #[serde(skip_serializing_if = "Option::is_none")] pub cited_text: Option<String>, /** * The start index of the document content part being cited. */ pub start_index: usize, /** * The end index of the document content part being cited. */ pub end_index: usize,}type Citation struct { // The URL or identifier of the document being cited. Source string `json:"source"` // The title of the document being cited. Title *string `json:"title,omitempty"` // The text snippet from the document being cited. CitedText *string `json:"cited_text,omitempty"` // The start index of the document content part being cited. StartIndex int `json:"start_index"` // The end index of the document content part being cited. EndIndex int `json:"end_index"`}Model Response
Section titled “Model Response”The response from the language model is represented as a ModelResponse that includes:
content: An array ofPartthat represents the generated content, which usually comes from theAssistantMessage.usage: Token usage information, if available.cost: The estimated cost of the request, if the model’s pricing information is provided.
interface ModelResponse { content: Part[]; usage?: ModelUsage; /** * The cost of the response. */ cost?: number;}
interface ModelUsage { input_tokens: number; output_tokens: number; input_tokens_details?: ModelTokensDetails; output_tokens_details?: ModelTokensDetails;}
interface ModelTokensDetails { text_tokens?: number; cached_text_tokens?: number; audio_tokens?: number; cached_audio_tokens?: number; image_tokens?: number; cached_image_tokens?: number;}pub struct ModelResponse { pub content: Vec<Part>, #[serde(skip_serializing_if = "Option::is_none")] pub usage: Option<ModelUsage>, /// The cost of the response. #[serde(skip_serializing_if = "Option::is_none")] pub cost: Option<f64>,}
pub struct ModelUsage { pub input_tokens: u32, pub output_tokens: u32, #[serde(skip_serializing_if = "Option::is_none")] pub input_tokens_details: Option<ModelTokensDetails>, #[serde(skip_serializing_if = "Option::is_none")] pub output_tokens_details: Option<ModelTokensDetails>,}
pub struct ModelTokensDetails { #[serde(skip_serializing_if = "Option::is_none")] pub text_tokens: Option<u32>, #[serde(skip_serializing_if = "Option::is_none")] pub cached_text_tokens: Option<u32>, #[serde(skip_serializing_if = "Option::is_none")] pub audio_tokens: Option<u32>, #[serde(skip_serializing_if = "Option::is_none")] pub cached_audio_tokens: Option<u32>, #[serde(skip_serializing_if = "Option::is_none")] pub image_tokens: Option<u32>, #[serde(skip_serializing_if = "Option::is_none")] pub cached_image_tokens: Option<u32>,}type ModelResponse struct { Content []Part `json:"content"` Usage *ModelUsage `json:"usage,omitempty"` // The cost of the response. Cost *float64 `json:"cost,omitempty"`}
type ModelUsage struct { InputTokens int `json:"input_tokens"` OutputTokens int `json:"output_tokens"` InputTokensDetails *ModelTokensDetails `json:"input_tokens_details,omitempty"` OutputTokensDetails *ModelTokensDetails `json:"output_tokens_details,omitempty"`}
type ModelTokensDetails struct { TextTokens *int `json:"text_tokens,omitempty"` CachedTextTokens *int `json:"cached_text_tokens,omitempty"` AudioTokens *int `json:"audio_tokens,omitempty"` CachedAudioTokens *int `json:"cached_audio_tokens,omitempty"` ImageTokens *int `json:"image_tokens,omitempty"` CachedImageTokens *int `json:"cached_image_tokens,omitempty"`}For streaming calls, the response is represented as a series of PartialModelResponse objects that include:
delta: APartDeltaand its index in the eventualcontentarray.usage: Token usage information, if available.
interface ContentDelta { index: number; part: PartDelta;}
interface PartialModelResponse { delta?: ContentDelta; usage?: ModelUsage; cost?: number;}pub struct ContentDelta { pub index: usize, pub part: PartDelta,}
pub struct PartialModelResponse { pub delta: Option<ContentDelta>, pub usage: Option<ModelUsage>, pub cost: Option<f64>,}type ContentDelta struct { Index int `json:"index"` Part PartDelta `json:"part"`}
type PartialModelResponse struct { Delta *ContentDelta `json:"delta,omitempty"` Usage *ModelUsage `json:"usage,omitempty"` Cost *float64 `json:"cost,omitempty"`}All SDKs provide the StreamAccumulator utility to help build the final ModelResponse from a stream of PartialModelResponse.