Image generation
Image modality is represented as ImageParts.
interface ImagePart { type: "image"; /** * The MIME type of the image. E.g. "image/jpeg", "image/png". */ mime_type: string; /** * The base64-encoded image data. */ data: string; /** * The width of the image in pixels. */ width?: number; /** * The height of the image in pixels. */ height?: number; /** * ID of the image part, if applicable */ id?: string;}pub struct ImagePart { /// The MIME type of the image. E.g. "image/jpeg", "image/png". pub mime_type: String, /// The base64-encoded image data. pub data: String, /// The width of the image in pixels. #[serde(skip_serializing_if = "Option::is_none")] pub width: Option<u32>, /// The height of the image in pixels. #[serde(skip_serializing_if = "Option::is_none")] pub height: Option<u32>, /// The ID of the image part, if applicable #[serde(skip_serializing_if = "Option::is_none")] pub id: Option<String>,}type ImagePart struct { // The MIME type of the image. E.g. "image/jpeg", "image/png". MimeType string `json:"mime_type"` // The base64-encoded image data. Data string `json:"data"` // The width of the image in pixels. Width *int `json:"width,omitempty"` // The height of the image in pixels. Height *int `json:"height,omitempty"` // ID of the image part, if applicable ID *string `json:"id,omitempty"`}Generate images
Section titled “Generate images”To generate image, specify image in the input modalities.

import { getModel } from "./get-model.ts";import { spawn } from "node:child_process";import { unlink, writeFile } from "node:fs/promises";
const model = getModel("google", "gemini-2.0-flash-exp-image-generation");
console.log("Requesting image generation...");const response = await model.generate({ modalities: ["text", "image"], messages: [ { role: "user", content: [ { type: "text", text: "A bright, sunlit green hill with a single large, leafy tree, " + "fluffy clouds drifting across a deep blue sky, painted in the warm, " + "detailed, hand-painted style of a Studio Ghibli landscape, soft colors, " + "gentle light, and a sense of quiet wonder.", }, ], }, ],});
const imagePart = response.content.find((msg) => msg.type === "image");
if (!imagePart) { throw new Error("Image part not found in response");}
const fileName = `image.${imagePart.mime_type.split("/")[1] ?? "png"}`;
await writeFile(fileName, imagePart.data, { encoding: "base64" });console.log(`Saved image to ${fileName}`);
launchFile(fileName);
await new Promise((resolve) => setTimeout(resolve, 5000));
void unlink(fileName);
console.log("Done.");
function launchFile(path: string) { const platform = process.platform; let command: [string, string[]] | undefined;
if (platform === "darwin") { command = ["open", [path]]; } else if (platform === "linux") { command = ["xdg-open", [path]]; } else if (platform === "win32") { command = ["cmd", ["/C", "start", "", path]]; }
if (!command) { console.warn(`Open ${path} manually; unsupported platform: ${platform}`); return; }
try { const child = spawn(command[0], command[1], { stdio: "ignore", detached: true, }); child.unref(); } catch (error) { console.warn(`Failed to open ${path}:`, error); }}use base64::{engine::general_purpose::STANDARD as BASE64_STANDARD, Engine};use dotenvy::dotenv;use llm_sdk::{LanguageModelInput, Message, Modality, Part};use std::{fs, process::Command, time::Duration};use tokio::time::sleep;
mod common;
#[tokio::main]async fn main() { dotenv().ok();
let model = common::get_model("google", "gemini-2.0-flash-exp-image-generation");
println!("Requesting image generation..."); let response = model .generate(LanguageModelInput { modalities: Some(vec![Modality::Text, Modality::Image]), messages: vec![Message::user(vec![Part::text( "A bright, sunlit green hill with a single large, leafy tree, fluffy clouds \ drifting across a deep blue sky, painted in the warm, detailed, hand-painted \ style of a Studio Ghibli landscape, soft colors, gentle light, and a sense of \ quiet wonder.", )])], ..Default::default() }) .await .expect("model.generate failed"); // Generation response is intentionally not printed to keep output concise
if let Some(image_part) = response.content.iter().find_map(|p| match p { Part::Image(i) => Some(i), _ => None, }) { let ext = image_part.mime_type.split('/').nth(1).unwrap_or("png"); let file_name = format!("image.{ext}");
let image_bytes = BASE64_STANDARD .decode(&image_part.data) .expect("invalid base64 image data");
fs::write(&file_name, image_bytes).expect("failed to write image file"); println!("Saved image to {file_name}");
_ = open_file(&file_name);
sleep(Duration::from_secs(5)).await; let _ = fs::remove_file(&file_name); println!("Done."); } else { eprintln!("Image part not found in response"); }}
fn open_file(path: &str) -> std::io::Result<()> { #[cfg(target_os = "macos")] { Command::new("open").arg(path).status()?; }
#[cfg(target_os = "linux")] { Command::new("xdg-open").arg(path).status()?; }
#[cfg(target_os = "windows")] { Command::new("cmd") .args(["/C", "start", "", path]) .status()?; }
Ok(())}package main
import ( "context" "encoding/base64" "fmt" "log" "os" "os/exec" "runtime" "strings" "time"
llmsdk "github.com/hoangvvo/llm-sdk/sdk-go" "github.com/hoangvvo/llm-sdk/sdk-go/examples")
func main() { model := examples.GetModel("google", "gemini-2.0-flash-exp-image-generation")
log.Println("Requesting image generation...") response, err := model.Generate(context.Background(), &llmsdk.LanguageModelInput{ Modalities: []llmsdk.Modality{llmsdk.ModalityText, llmsdk.ModalityImage}, Messages: []llmsdk.Message{ llmsdk.NewUserMessage( llmsdk.NewTextPart( "A bright, sunlit green hill with a single large, leafy tree, " + "fluffy clouds drifting across a deep blue sky, painted in the warm, " + "detailed, hand-painted style of a Studio Ghibli landscape, soft colors, " + "gentle light, and a sense of quiet wonder.", ), ), }, }) if err != nil { log.Fatalf("Generation failed: %v", err) }
// Generation response is intentionally not printed to keep output concise
for _, part := range response.Content { if part.ImagePart != nil { mime := part.ImagePart.MimeType ext := "png" if mime != "" { if sp := strings.SplitN(mime, "/", 2); len(sp) == 2 && sp[1] != "" { ext = sp[1] } } fileName := fmt.Sprintf("image.%s", ext)
data, err := base64.StdEncoding.DecodeString(part.ImagePart.Data) if err != nil { log.Fatalf("Failed to decode image data: %v", err) }
if err := os.WriteFile(fileName, data, 0o644); err != nil { log.Fatalf("Failed to write image file: %v", err) } log.Println("Saved image to", fileName)
_ = openFile(fileName)
time.Sleep(5 * time.Second) _ = os.Remove(fileName) log.Println("Done.") return } }
log.Println("Image part not found in response")}
func openFile(path string) error { switch runtime.GOOS { case "darwin": return exec.Command("open", path).Start() case "linux": return exec.Command("xdg-open", path).Start() case "windows": return exec.Command("cmd", "/C", "start", "", path).Start() default: return fmt.Errorf("unsupported OS: %s", runtime.GOOS) }}