Language model
A language model instance satisfies the LanguageModel
interface, which includes the following:
provider
: The LLM provider name.model_id
: The model identifier.metadata
: Metadata about the model, such as pricing information or capabilities.generate(LanguageModelInput) -> ModelResponse
: Generate a non-streaming response from the model.stream(LanguageModelInput) -> AsyncIterable<PartialModelResponse>
: Generate a streaming response from the model.
All models in the library implement the LanguageModel interface and can be used interchangeably.
import type { LanguageModel } from "@hoangvvo/llm-sdk";import { AnthropicModel } from "@hoangvvo/llm-sdk/anthropic";import { CohereModel } from "@hoangvvo/llm-sdk/cohere";import { GoogleModel } from "@hoangvvo/llm-sdk/google";import { MistralModel } from "@hoangvvo/llm-sdk/mistral";import { OpenAIChatModel, OpenAIModel } from "@hoangvvo/llm-sdk/openai";import assert from "node:assert";
export function getModel(provider: string, modelId: string): LanguageModel { switch (provider) { case "openai": assert(process.env["OPENAI_API_KEY"]); return new OpenAIModel({ apiKey: process.env["OPENAI_API_KEY"], modelId, }); case "openai-chat-completion": assert(process.env["OPENAI_API_KEY"]); return new OpenAIChatModel({ apiKey: process.env["OPENAI_API_KEY"], modelId, }); case "anthropic": assert(process.env["ANTHROPIC_API_KEY"]); return new AnthropicModel({ apiKey: process.env["ANTHROPIC_API_KEY"], modelId, }); case "google": assert(process.env["GOOGLE_API_KEY"]); return new GoogleModel({ apiKey: process.env["GOOGLE_API_KEY"], modelId, }); case "cohere": assert(process.env["CO_API_KEY"]); return new CohereModel({ apiKey: process.env["CO_API_KEY"], modelId }); case "mistral": assert(process.env["MISTRAL_API_KEY"]); return new MistralModel({ apiKey: process.env["MISTRAL_API_KEY"], modelId, }); default: throw new Error(`Unsupported provider: ${provider}`); }}
use llm_sdk::{ google::{GoogleModel, GoogleModelOptions}, openai::{OpenAIChatModel, OpenAIChatModelOptions, OpenAIModel, OpenAIModelOptions}, LanguageModel,};
pub fn get_model(provider: &str, model_id: &str) -> Box<dyn LanguageModel> { match provider { "openai" => Box::new(OpenAIModel::new( model_id.to_string(), OpenAIModelOptions { api_key: std::env::var("OPENAI_API_KEY") .expect("OPENAI_API_KEY environment variable must be set"), ..Default::default() }, )), "openai-chat-completion" => Box::new(OpenAIChatModel::new( model_id.to_string(), OpenAIChatModelOptions { api_key: std::env::var("OPENAI_API_KEY") .expect("OPENAI_API_KEY environment variable must be set"), ..Default::default() }, )), "google" => Box::new(GoogleModel::new( model_id.to_string(), GoogleModelOptions { api_key: std::env::var("GOOGLE_API_KEY") .expect("GOOGLE_API_KEY environment variable must be set"), ..Default::default() }, )), _ => panic!("Unsupported provider: {provider}"), }}
package examples
import ( "fmt" "os"
llmsdk "github.com/hoangvvo/llm-sdk/sdk-go" "github.com/hoangvvo/llm-sdk/sdk-go/google" "github.com/hoangvvo/llm-sdk/sdk-go/openai" "github.com/joho/godotenv")
func init() { godotenv.Load("../.env")}
// GetModel creates and returns a language model based on provider and model IDfunc GetModel(provider, modelID string) llmsdk.LanguageModel { switch provider { case "openai": apiKey := os.Getenv("OPENAI_API_KEY") if apiKey == "" { panic("OPENAI_API_KEY environment variable is required") } return openai.NewOpenAIModel(modelID, openai.OpenAIModelOptions{ APIKey: apiKey, }) case "openai-chat-completion": apiKey := os.Getenv("OPENAI_API_KEY") if apiKey == "" { panic("OPENAI_API_KEY environment variable is required") } return openai.NewOpenAIChatModel(modelID, openai.OpenAIChatModelOptions{ APIKey: apiKey, }) case "google": apiKey := os.Getenv("GOOGLE_API_KEY") if apiKey == "" { panic("GOOGLE_API_KEY environment variable is required") } return google.NewGoogleModel(modelID, google.GoogleModelOptions{ APIKey: apiKey, }) default: panic(fmt.Sprintf("Unsupported provider: %s", provider)) }}
Language Model Input
Section titled “Language Model Input”LanguageModelInput
is a unified format to represent the input for generating responses from the language model, applicable to both non-streaming and streaming requests. The library converts these inputs into corresponding properties for each LLM provider, if applicable. This allows specifying:
- The conversation history, which includes
UserMessage
,AssistantMessage
, andToolMessage
. - Sampling parameters:
max_tokens
,temperature
,top_p
,top_k
,presence_penalty
,frequency_penalty
, andseed
. - Tool definitions and tool selection.
- The response format to enforce the model to return structured objects instead of plain text.
modalities
for the model to generate, such as text, images, or audio.- Specific part output options like
audio
,reasoning
.
interface LanguageModelInput { /** * A system prompt is a way of providing context and instructions to the model */ system_prompt?: string; /** * A list of messages comprising the conversation so far. */ messages: Message[]; /** * Definitions of tools that the model may use. */ tools?: Tool[]; tool_choice?: ToolChoiceOption; response_format?: ResponseFormatOption; /** * The maximum number of tokens that can be generated in the chat completion. */ max_tokens?: number; /** * Amount of randomness injected into the response. Ranges from 0.0 to 1.0 */ temperature?: number; /** * An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. Ranges from 0.0 to 1.0 */ top_p?: number; /** * Only sample from the top K options for each subsequent token. Used to remove 'long tail' low probability responses. Must be a non-negative integer. */ top_k?: number; /** * Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. */ presence_penalty?: number; /** * Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. */ frequency_penalty?: number; /** * The seed (integer), if set and supported by the model, to enable deterministic results. */ seed?: number; /** * The modalities that the model should support. */ modalities?: Modality[]; /** * Options for audio generation. */ audio?: AudioOptions; /** * Options for reasoning generation. */ reasoning?: ReasoningOptions; /** * A set of key/value pairs that store additional information about the request. This is forwarded to the model provider if supported. */ metadata?: Record<string, string>; /** * Extra options that the model may support. */ extra?: Record<string, unknown>;}
pub struct LanguageModelInput { /// A system prompt is a way of providing context and instructions to the /// model #[serde(skip_serializing_if = "Option::is_none")] pub system_prompt: Option<String>, /// A list of messages comprising the conversation so far. pub messages: Vec<Message>, /// Definitions of tools that the model may use. #[serde(skip_serializing_if = "Option::is_none")] pub tools: Option<Vec<Tool>>, #[serde(skip_serializing_if = "Option::is_none")] pub tool_choice: Option<ToolChoiceOption>, #[serde(skip_serializing_if = "Option::is_none")] pub response_format: Option<ResponseFormatOption>, /// The maximum number of tokens that can be generated in the chat /// completion. #[serde(skip_serializing_if = "Option::is_none")] pub max_tokens: Option<u32>, /// Amount of randomness injected into the response. Ranges from 0.0 to 1.0 #[serde(skip_serializing_if = "Option::is_none")] pub temperature: Option<f64>, /// An alternative to sampling with temperature, called nucleus sampling, /// where the model considers the results of the tokens with `top_p` /// probability mass. Ranges from 0.0 to 1.0 #[serde(skip_serializing_if = "Option::is_none")] pub top_p: Option<f64>, /// Only sample from the top K options for each subsequent token. Used to /// remove 'long tail' low probability responses. Must be a non-negative /// integer. #[serde(skip_serializing_if = "Option::is_none")] pub top_k: Option<i32>, /// Positive values penalize new tokens based on whether they appear in the /// text so far, increasing the model's likelihood to talk about new topics. #[serde(skip_serializing_if = "Option::is_none")] pub presence_penalty: Option<f64>, /// Positive values penalize new tokens based on their existing frequency in /// the text so far, decreasing the model's likelihood to repeat the same /// line verbatim. #[serde(skip_serializing_if = "Option::is_none")] pub frequency_penalty: Option<f64>, /// The seed (integer), if set and supported by the model, to enable /// deterministic results. #[serde(skip_serializing_if = "Option::is_none")] pub seed: Option<i64>, /// The modalities that the model should support. #[serde(skip_serializing_if = "Option::is_none")] pub modalities: Option<Vec<Modality>>, /// A set of key/value pairs that store additional information about the /// request. This is forwarded to the model provider if supported. #[serde(skip_serializing_if = "Option::is_none")] pub metadata: Option<HashMap<String, String>>, /// Options for audio generation. #[serde(skip_serializing_if = "Option::is_none")] pub audio: Option<AudioOptions>, /// Options for reasoning generation. pub reasoning: Option<ReasoningOptions>, /// Extra options that the model may support. #[serde(skip_serializing_if = "Option::is_none")] pub extra: Option<LanguageModelInputExtra>,}
type LanguageModelInput struct { // A system prompt is a way of providing context and instructions to the model SystemPrompt *string `json:"system_prompt,omitempty"` // A list of messages comprising the conversation so far. Messages []Message `json:"messages"` // Definitions of tools that the model may use. Tools []Tool `json:"tools,omitempty"` ToolChoice *ToolChoiceOption `json:"tool_choice,omitempty"` ResponseFormat *ResponseFormatOption `json:"response_format,omitempty"` // The maximum number of tokens that can be generated in the chat completion. MaxTokens *uint32 `json:"max_tokens,omitempty"` // Amount of randomness injected into the response. Ranges from 0.0 to 1.0 Temperature *float64 `json:"temperature,omitempty"` // An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. Ranges from 0.0 to 1.0 TopP *float64 `json:"top_p,omitempty"` // Only sample from the top K options for each subsequent token. Used to remove 'long tail' low probability responses. Must be a non-negative integer. TopK *int32 `json:"top_k,omitempty"` // Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. PresencePenalty *float64 `json:"presence_penalty,omitempty"` // Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"` // The seed (integer), if set and supported by the model, to enable deterministic results. Seed *int64 `json:"seed,omitempty"` // The modalities that the model should support. Modalities []Modality `json:"modalities,omitempty"` // A set of key/value pairs that store additional information about the request. This is forwarded to the model provider if supported. Metadata map[string]string `json:"metadata,omitempty"` // Options for audio generation. Audio *AudioOptions `json:"audio,omitempty"` // Options for reasoning generation. Reasoning *ReasoningOptions `json:"reasoning,omitempty"` // Extra options that the model may support. Extra map[string]any `json:"extra,omitempty"`}
Message
Section titled “Message”Message
s are primitives that make up the conversation history, and Part
s are the building blocks of each message. The library converts them into a format suitable for the underlying LLM provider and maps those from different providers to the unified format.
Three message types are defined in the SDK: UserMessage
, AssistantMessage
, and ToolMessage
.
type Message = UserMessage | AssistantMessage | ToolMessage;
interface UserMessage { role: "user"; content: Part[];}
interface AssistantMessage { role: "assistant"; content: Part[];}
interface ToolMessage { role: "tool"; content: Part[];}
pub enum Message { User(UserMessage), Assistant(AssistantMessage), Tool(ToolMessage),}
pub struct UserMessage { pub content: Vec<Part>,}
pub struct AssistantMessage { pub content: Vec<Part>,}
pub struct ToolMessage { pub content: Vec<Part>,}
type Message struct { UserMessage *UserMessage `json:"-"` AssistantMessage *AssistantMessage `json:"-"` ToolMessage *ToolMessage `json:"-"`}
type UserMessage struct { Content []Part `json:"content"`}
type AssistantMessage struct { Content []Part `json:"content"`}
type ToolMessage struct { Content []Part `json:"content"`}
The following Part
types are implemented in the SDK: TextPart
, ImagePart
, AudioPart
, SourcePart
(for citation), ToolCallPart
, and ToolResultPart
.
type Part = | TextPart | ImagePart | AudioPart | SourcePart | ToolCallPart | ToolResultPart | ReasoningPart;
pub enum Part { Text(TextPart), Image(ImagePart), Audio(AudioPart), Source(SourcePart), ToolCall(ToolCallPart), ToolResult(ToolResultPart), Reasoning(ReasoningPart),}
type Part struct { TextPart *TextPart `json:"-"` ImagePart *ImagePart `json:"-"` AudioPart *AudioPart `json:"-"` SourcePart *SourcePart `json:"-"` ToolCallPart *ToolCallPart `json:"-"` ToolResultPart *ToolResultPart `json:"-"` ReasoningPart *ReasoningPart `json:"-"`}
Text Part
Section titled “Text Part”interface TextPart { type: "text"; text: string; citations?: Citation[];}
pub struct TextPart { pub text: String, #[serde(skip_serializing_if = "Option::is_none")] pub citations: Option<Vec<Citation>>,}
type TextPart struct { Text string `json:"text"` Citations []Citation `json:"citations,omitempty"`}
Image Part
Section titled “Image Part”interface ImagePart { type: "image"; /** * The MIME type of the image. E.g. "image/jpeg", "image/png". */ mime_type: string; /** * The base64-encoded image data. */ image_data: string; /** * The width of the image in pixels. */ width?: number; /** * The height of the image in pixels. */ height?: number; /** * ID of the image part, if applicable */ id?: string;}
pub struct ImagePart { /// The MIME type of the image. E.g. "image/jpeg", "image/png". pub mime_type: String, /// The base64-encoded image data. pub image_data: String, /// The width of the image in pixels. #[serde(skip_serializing_if = "Option::is_none")] pub width: Option<u32>, /// The height of the image in pixels. #[serde(skip_serializing_if = "Option::is_none")] pub height: Option<u32>, /// The ID of the image part, if applicable #[serde(skip_serializing_if = "Option::is_none")] pub id: Option<String>,}
type ImagePart struct { // The MIME type of the image. E.g. "image/jpeg", "image/png". MimeType string `json:"mime_type"` // The base64-encoded image data. ImageData string `json:"image_data"` // The width of the image in pixels. Width *int `json:"width,omitempty"` // The height of the image in pixels. Height *int `json:"height,omitempty"` // ID of the image part, if applicable ID *string `json:"id,omitempty"`}
Audio Part
Section titled “Audio Part”interface AudioPart { type: "audio"; /** * The base64-encoded audio data. */ audio_data: string; format: AudioFormat; /** * The sample rate of the audio. E.g. 44100, 48000. */ sample_rate?: number; /** * The number of channels of the audio. E.g. 1, 2. */ channels?: number; /** * The transcript of the audio. */ transcript?: string; /** * ID of the audio part, if applicable */ id?: string;}
type AudioFormat = | "wav" | "mp3" | "linear16" | "flac" | "mulaw" | "alaw" | "aac" | "opus";
pub struct AudioPart { /// The base64-encoded audio data. pub audio_data: String, /// The format of the audio. pub format: AudioFormat, /// The sample rate of the audio. E.g. 44100, 48000. #[serde(skip_serializing_if = "Option::is_none")] pub sample_rate: Option<u32>, /// The number of channels of the audio. E.g. 1, 2. #[serde(skip_serializing_if = "Option::is_none")] pub channels: Option<u32>, /// The transcript of the audio. #[serde(skip_serializing_if = "Option::is_none")] pub transcript: Option<String>, /// The ID of the audio part, if applicable #[serde(skip_serializing_if = "Option::is_none")] pub id: Option<String>,}
pub enum AudioFormat { Wav, Mp3, Linear16, Flac, Mulaw, Alaw, Aac, Opus,}
type AudioPart struct { // The base64-encoded audio data. AudioData string `json:"audio_data"` Format AudioFormat `json:"format"` // The sample rate of the audio. E.g. 44100, 48000. SampleRate *int `json:"sample_rate,omitempty"` // The number of channels of the audio. E.g. 1, 2. Channels *int `json:"channels,omitempty"` // The transcript of the audio. Transcript *string `json:"transcript,omitempty"` // The ID of the part, if applicable. ID *string `json:"id,omitempty"`}
type AudioFormat string
Source Part
Section titled “Source Part”interface SourcePart { type: "source"; /** * The source URL or identifier of the document. */ source: string; /** * The title of the document. */ title: string; /** * The content of the document. */ content: Part[];}
pub struct SourcePart { /// The source URL or identifier of the document. pub source: String, /// The title of the document. pub title: String, /// The content of the document. pub content: Vec<Part>,}
type SourcePart struct { // The source URL or identifier of the document. Source string `json:"source"` // The title of the document. Title string `json:"title"` // The content of the document. Content []Part `json:"content"`}
Tool Call Part
Section titled “Tool Call Part”interface ToolCallPart { type: "tool-call"; /** * The ID of the tool call, used to match the tool result with the tool call. */ tool_call_id: string; /** * The name of the tool to call. */ tool_name: string; /** * The arguments to pass to the tool. */ args: Record<string, unknown>; /** * The ID of the tool call part, if applicable. * This is different from tool_call_id which is used to match tool results. */ id?: string;}
pub struct ToolCallPart { /// The ID of the tool call, used to match the tool result with the tool /// call. pub tool_call_id: String, /// The name of the tool to call. pub tool_name: String, /// The arguments to pass to the tool. pub args: Value, /// The ID of the tool call, if applicable /// This is different from `tool_call_id`, which is the ID used to match the /// tool result with the tool call. #[serde(skip_serializing_if = "Option::is_none")] pub id: Option<String>,}
type ToolCallPart struct { // The ID of the tool call, used to match the tool result with the tool call. ToolCallID string `json:"tool_call_id"` // The name of the tool to call. ToolName string `json:"tool_name"` // The arguments to pass to the tool. Args json.RawMessage `json:"args"` // The ID of the part, if applicable. // This is different from ToolCallID which is used to match tool results. ID *string `json:"id,omitempty"`}
Tool Result Part
Section titled “Tool Result Part”interface ToolResultPart { type: "tool-result"; /** * The ID of the tool call from previous assistant message. */ tool_call_id: string; /** * The name of the tool that was called. */ tool_name: string; /** * The content of the tool result. */ content: Part[]; /** * Marks the tool result as an error. */ is_error?: boolean;}
pub struct ToolResultPart { /// The ID of the tool call from previous assistant message. pub tool_call_id: String, /// The name of the tool that was called. pub tool_name: String, /// The content of the tool result. pub content: Vec<Part>, /// Marks the tool result as an error. #[serde(skip_serializing_if = "Option::is_none")] pub is_error: Option<bool>,}
type ToolResultPart struct { // The ID of the tool call from previous assistant message. ToolCallID string `json:"tool_call_id"` // The name of the tool that was called. ToolName string `json:"tool_name"` // The content of the tool result. Content []Part `json:"content"` // Marks the tool result as an error. IsError bool `json:"is_error,omitempty"`}
Reasoning Part
Section titled “Reasoning Part”interface ReasoningPart { type: "reasoning"; /** * The reasoning text content */ text: string; /** * The reasoning internal signature */ signature?: string; /** * The ID of the reasoning part, if applicable */ id?: string;}
pub struct ReasoningPart { /// The reasoning text content. pub text: String, /// The reasoning internal signature #[serde(skip_serializing_if = "Option::is_none")] pub signature: Option<String>, /// The ID of the reasoning part, if applicable #[serde(skip_serializing_if = "Option::is_none")] pub id: Option<String>,}
type ReasoningPart struct { // The reasoning text content Text string `json:"text"` // The reasoning internal signature Signature *string `json:"signature,omitempty"` // The ID of the reasoning part, if applicable. ID *string `json:"id,omitempty"`}
Model Response
Section titled “Model Response”The response from the language model is represented as a ModelResponse
that includes:
content
: An array ofPart
that represents the generated content, which usually comes from theAssistantMessage
.usage
: Token usage information, if available.cost
: The estimated cost of the request, if the model’s pricing information is provided.
interface ModelResponse { content: Part[]; usage?: ModelUsage; /** * The cost of the response. */ cost?: number;}
interface ModelUsage { input_tokens: number; output_tokens: number; input_tokens_details?: ModelTokensDetails; output_tokens_details?: ModelTokensDetails;}
interface ModelTokensDetails { text_tokens?: number; cached_text_tokens?: number; audio_tokens?: number; cached_audio_tokens?: number; image_tokens?: number; cached_image_tokens?: number;}
pub struct ModelResponse { pub content: Vec<Part>, #[serde(skip_serializing_if = "Option::is_none")] pub usage: Option<ModelUsage>, /// The cost of the response. #[serde(skip_serializing_if = "Option::is_none")] pub cost: Option<f64>,}
pub struct ModelUsage { pub input_tokens: u32, pub output_tokens: u32, #[serde(skip_serializing_if = "Option::is_none")] pub input_tokens_details: Option<ModelTokensDetails>, #[serde(skip_serializing_if = "Option::is_none")] pub output_tokens_details: Option<ModelTokensDetails>,}
pub struct ModelTokensDetails { #[serde(skip_serializing_if = "Option::is_none")] pub text_tokens: Option<u32>, #[serde(skip_serializing_if = "Option::is_none")] pub cached_text_tokens: Option<u32>, #[serde(skip_serializing_if = "Option::is_none")] pub audio_tokens: Option<u32>, #[serde(skip_serializing_if = "Option::is_none")] pub cached_audio_tokens: Option<u32>, #[serde(skip_serializing_if = "Option::is_none")] pub image_tokens: Option<u32>, #[serde(skip_serializing_if = "Option::is_none")] pub cached_image_tokens: Option<u32>,}
type ModelResponse struct { Content []Part `json:"content"` Usage *ModelUsage `json:"usage,omitempty"` // The cost of the response. Cost *float64 `json:"cost,omitempty"`}
type ModelUsage struct { InputTokens int `json:"input_tokens"` OutputTokens int `json:"output_tokens"` InputTokensDetails *ModelTokensDetails `json:"input_tokens_details,omitempty"` OutputTokensDetails *ModelTokensDetails `json:"output_tokens_details,omitempty"`}
type ModelTokensDetails struct { TextTokens *int `json:"text_tokens,omitempty"` CachedTextTokens *int `json:"cached_text_tokens,omitempty"` AudioTokens *int `json:"audio_tokens,omitempty"` CachedAudioTokens *int `json:"cached_audio_tokens,omitempty"` ImageTokens *int `json:"image_tokens,omitempty"` CachedImageTokens *int `json:"cached_image_tokens,omitempty"`}
For streaming calls, the response is represented as a series of PartialModelResponse
objects that include:
delta
: APartDelta
and its index in the eventualcontent
array.usage
: Token usage information, if available.
interface ContentDelta { index: number; part: PartDelta;}
interface PartialModelResponse { delta?: ContentDelta; usage?: ModelUsage; cost?: number;}
pub struct ContentDelta { pub index: usize, pub part: PartDelta,}
pub struct PartialModelResponse { pub delta: Option<ContentDelta>, pub usage: Option<ModelUsage>, pub cost: Option<f64>,}
type ContentDelta struct { Index int `json:"index"` Part PartDelta `json:"part"`}
type PartialModelResponse struct { Delta *ContentDelta `json:"delta,omitempty"` Usage *ModelUsage `json:"usage,omitempty"` Cost *float64 `json:"cost,omitempty"`}
All SDKs provide the StreamAccumulator
utility to help build the final ModelResponse
from a stream of PartialModelResponse
.