anthropic.rs

   1use std::io;
   2use std::str::FromStr;
   3use std::time::Duration;
   4
   5use anyhow::{Context as _, Result, anyhow};
   6use chrono::{DateTime, Utc};
   7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
   8use http_client::http::{self, HeaderMap, HeaderValue};
   9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
  10use serde::{Deserialize, Serialize};
  11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
  12use strum::{EnumIter, EnumString};
  13use thiserror::Error;
  14
  15pub mod batches;
  16
  17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
  18
  19pub const FINE_GRAINED_TOOL_STREAMING_BETA_HEADER: &str = "fine-grained-tool-streaming-2025-05-14";
  20pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07";
  21
  22#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  23#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  24pub struct AnthropicModelCacheConfiguration {
  25    pub min_total_token: u64,
  26    pub should_speculate: bool,
  27    pub max_cache_anchors: usize,
  28}
  29
  30#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  31#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  32pub enum AnthropicModelMode {
  33    #[default]
  34    Default,
  35    Thinking {
  36        budget_tokens: Option<u32>,
  37    },
  38}
  39
  40impl From<ModelMode> for AnthropicModelMode {
  41    fn from(value: ModelMode) -> Self {
  42        match value {
  43            ModelMode::Default => AnthropicModelMode::Default,
  44            ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
  45        }
  46    }
  47}
  48
  49impl From<AnthropicModelMode> for ModelMode {
  50    fn from(value: AnthropicModelMode) -> Self {
  51        match value {
  52            AnthropicModelMode::Default => ModelMode::Default,
  53            AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
  54        }
  55    }
  56}
  57
  58#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  59#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
  60pub enum Model {
  61    #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
  62    ClaudeOpus4,
  63    #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
  64    ClaudeOpus4_1,
  65    #[serde(
  66        rename = "claude-opus-4-thinking",
  67        alias = "claude-opus-4-thinking-latest"
  68    )]
  69    ClaudeOpus4Thinking,
  70    #[serde(
  71        rename = "claude-opus-4-1-thinking",
  72        alias = "claude-opus-4-1-thinking-latest"
  73    )]
  74    ClaudeOpus4_1Thinking,
  75    #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
  76    ClaudeOpus4_5,
  77    #[serde(
  78        rename = "claude-opus-4-5-thinking",
  79        alias = "claude-opus-4-5-thinking-latest"
  80    )]
  81    ClaudeOpus4_5Thinking,
  82    #[serde(rename = "claude-opus-4-6", alias = "claude-opus-4-6-latest")]
  83    ClaudeOpus4_6,
  84    #[serde(
  85        rename = "claude-opus-4-6-thinking",
  86        alias = "claude-opus-4-6-thinking-latest"
  87    )]
  88    ClaudeOpus4_6Thinking,
  89    #[serde(
  90        rename = "claude-opus-4-6-1m-context",
  91        alias = "claude-opus-4-6-1m-context-latest"
  92    )]
  93    ClaudeOpus4_6_1mContext,
  94    #[serde(
  95        rename = "claude-opus-4-6-1m-context-thinking",
  96        alias = "claude-opus-4-6-1m-context-thinking-latest"
  97    )]
  98    ClaudeOpus4_6_1mContextThinking,
  99    #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
 100    ClaudeSonnet4,
 101    #[serde(
 102        rename = "claude-sonnet-4-thinking",
 103        alias = "claude-sonnet-4-thinking-latest"
 104    )]
 105    ClaudeSonnet4Thinking,
 106    #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
 107    ClaudeSonnet4_5,
 108    #[serde(
 109        rename = "claude-sonnet-4-5-thinking",
 110        alias = "claude-sonnet-4-5-thinking-latest"
 111    )]
 112    ClaudeSonnet4_5Thinking,
 113    #[serde(
 114        rename = "claude-sonnet-4-5-1m-context",
 115        alias = "claude-sonnet-4-5-1m-context-latest"
 116    )]
 117    ClaudeSonnet4_5_1mContext,
 118    #[serde(
 119        rename = "claude-sonnet-4-5-1m-context-thinking",
 120        alias = "claude-sonnet-4-5-1m-context-thinking-latest"
 121    )]
 122    ClaudeSonnet4_5_1mContextThinking,
 123    #[default]
 124    #[serde(rename = "claude-sonnet-4-6", alias = "claude-sonnet-4-6-latest")]
 125    ClaudeSonnet4_6,
 126    #[serde(
 127        rename = "claude-sonnet-4-6-thinking",
 128        alias = "claude-sonnet-4-6-thinking-latest"
 129    )]
 130    ClaudeSonnet4_6Thinking,
 131    #[serde(
 132        rename = "claude-sonnet-4-6-1m-context",
 133        alias = "claude-sonnet-4-6-1m-context-latest"
 134    )]
 135    ClaudeSonnet4_6_1mContext,
 136    #[serde(
 137        rename = "claude-sonnet-4-6-1m-context-thinking",
 138        alias = "claude-sonnet-4-6-1m-context-thinking-latest"
 139    )]
 140    ClaudeSonnet4_6_1mContextThinking,
 141    #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
 142    Claude3_7Sonnet,
 143    #[serde(
 144        rename = "claude-3-7-sonnet-thinking",
 145        alias = "claude-3-7-sonnet-thinking-latest"
 146    )]
 147    Claude3_7SonnetThinking,
 148    #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
 149    Claude3_5Sonnet,
 150    #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
 151    ClaudeHaiku4_5,
 152    #[serde(
 153        rename = "claude-haiku-4-5-thinking",
 154        alias = "claude-haiku-4-5-thinking-latest"
 155    )]
 156    ClaudeHaiku4_5Thinking,
 157    #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
 158    Claude3_5Haiku,
 159    #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
 160    Claude3Opus,
 161    #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
 162    Claude3Sonnet,
 163    #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
 164    Claude3Haiku,
 165    #[serde(rename = "custom")]
 166    Custom {
 167        name: String,
 168        max_tokens: u64,
 169        /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
 170        display_name: Option<String>,
 171        /// Override this model with a different Anthropic model for tool calls.
 172        tool_override: Option<String>,
 173        /// Indicates whether this custom model supports caching.
 174        cache_configuration: Option<AnthropicModelCacheConfiguration>,
 175        max_output_tokens: Option<u64>,
 176        default_temperature: Option<f32>,
 177        #[serde(default)]
 178        extra_beta_headers: Vec<String>,
 179        #[serde(default)]
 180        mode: AnthropicModelMode,
 181    },
 182}
 183
 184impl Model {
 185    pub fn default_fast() -> Self {
 186        Self::Claude3_5Haiku
 187    }
 188
 189    pub fn from_id(id: &str) -> Result<Self> {
 190        if id.starts_with("claude-opus-4-6-1m-context-thinking") {
 191            return Ok(Self::ClaudeOpus4_6_1mContextThinking);
 192        }
 193
 194        if id.starts_with("claude-opus-4-6-1m-context") {
 195            return Ok(Self::ClaudeOpus4_6_1mContext);
 196        }
 197
 198        if id.starts_with("claude-opus-4-6-thinking") {
 199            return Ok(Self::ClaudeOpus4_6Thinking);
 200        }
 201
 202        if id.starts_with("claude-opus-4-6") {
 203            return Ok(Self::ClaudeOpus4_6);
 204        }
 205
 206        if id.starts_with("claude-opus-4-5-thinking") {
 207            return Ok(Self::ClaudeOpus4_5Thinking);
 208        }
 209
 210        if id.starts_with("claude-opus-4-5") {
 211            return Ok(Self::ClaudeOpus4_5);
 212        }
 213
 214        if id.starts_with("claude-opus-4-1-thinking") {
 215            return Ok(Self::ClaudeOpus4_1Thinking);
 216        }
 217
 218        if id.starts_with("claude-opus-4-thinking") {
 219            return Ok(Self::ClaudeOpus4Thinking);
 220        }
 221
 222        if id.starts_with("claude-opus-4-1") {
 223            return Ok(Self::ClaudeOpus4_1);
 224        }
 225
 226        if id.starts_with("claude-opus-4") {
 227            return Ok(Self::ClaudeOpus4);
 228        }
 229
 230        if id.starts_with("claude-sonnet-4-6-1m-context-thinking") {
 231            return Ok(Self::ClaudeSonnet4_6_1mContextThinking);
 232        }
 233
 234        if id.starts_with("claude-sonnet-4-6-1m-context") {
 235            return Ok(Self::ClaudeSonnet4_6_1mContext);
 236        }
 237
 238        if id.starts_with("claude-sonnet-4-6-thinking") {
 239            return Ok(Self::ClaudeSonnet4_6Thinking);
 240        }
 241
 242        if id.starts_with("claude-sonnet-4-6") {
 243            return Ok(Self::ClaudeSonnet4_6);
 244        }
 245
 246        if id.starts_with("claude-sonnet-4-5-1m-context-thinking") {
 247            return Ok(Self::ClaudeSonnet4_5_1mContextThinking);
 248        }
 249
 250        if id.starts_with("claude-sonnet-4-5-1m-context") {
 251            return Ok(Self::ClaudeSonnet4_5_1mContext);
 252        }
 253
 254        if id.starts_with("claude-sonnet-4-5-thinking") {
 255            return Ok(Self::ClaudeSonnet4_5Thinking);
 256        }
 257
 258        if id.starts_with("claude-sonnet-4-5") {
 259            return Ok(Self::ClaudeSonnet4_5);
 260        }
 261
 262        if id.starts_with("claude-sonnet-4-thinking") {
 263            return Ok(Self::ClaudeSonnet4Thinking);
 264        }
 265
 266        if id.starts_with("claude-sonnet-4") {
 267            return Ok(Self::ClaudeSonnet4);
 268        }
 269
 270        if id.starts_with("claude-3-7-sonnet-thinking") {
 271            return Ok(Self::Claude3_7SonnetThinking);
 272        }
 273
 274        if id.starts_with("claude-3-7-sonnet") {
 275            return Ok(Self::Claude3_7Sonnet);
 276        }
 277
 278        if id.starts_with("claude-3-5-sonnet") {
 279            return Ok(Self::Claude3_5Sonnet);
 280        }
 281
 282        if id.starts_with("claude-haiku-4-5-thinking") {
 283            return Ok(Self::ClaudeHaiku4_5Thinking);
 284        }
 285
 286        if id.starts_with("claude-haiku-4-5") {
 287            return Ok(Self::ClaudeHaiku4_5);
 288        }
 289
 290        if id.starts_with("claude-3-5-haiku") {
 291            return Ok(Self::Claude3_5Haiku);
 292        }
 293
 294        if id.starts_with("claude-3-opus") {
 295            return Ok(Self::Claude3Opus);
 296        }
 297
 298        if id.starts_with("claude-3-sonnet") {
 299            return Ok(Self::Claude3Sonnet);
 300        }
 301
 302        if id.starts_with("claude-3-haiku") {
 303            return Ok(Self::Claude3Haiku);
 304        }
 305
 306        Err(anyhow!("invalid model ID: {id}"))
 307    }
 308
 309    pub fn id(&self) -> &str {
 310        match self {
 311            Self::ClaudeOpus4 => "claude-opus-4-latest",
 312            Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
 313            Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
 314            Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
 315            Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
 316            Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking-latest",
 317            Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
 318            Self::ClaudeOpus4_6Thinking => "claude-opus-4-6-thinking-latest",
 319            Self::ClaudeOpus4_6_1mContext => "claude-opus-4-6-1m-context-latest",
 320            Self::ClaudeOpus4_6_1mContextThinking => "claude-opus-4-6-1m-context-thinking-latest",
 321            Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
 322            Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
 323            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
 324            Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
 325            Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-1m-context-latest",
 326            Self::ClaudeSonnet4_5_1mContextThinking => {
 327                "claude-sonnet-4-5-1m-context-thinking-latest"
 328            }
 329            Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
 330            Self::ClaudeSonnet4_6Thinking => "claude-sonnet-4-6-thinking-latest",
 331            Self::ClaudeSonnet4_6_1mContext => "claude-sonnet-4-6-1m-context-latest",
 332            Self::ClaudeSonnet4_6_1mContextThinking => {
 333                "claude-sonnet-4-6-1m-context-thinking-latest"
 334            }
 335            Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
 336            Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
 337            Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
 338            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
 339            Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
 340            Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
 341            Self::Claude3Opus => "claude-3-opus-latest",
 342            Self::Claude3Sonnet => "claude-3-sonnet-20240229",
 343            Self::Claude3Haiku => "claude-3-haiku-20240307",
 344            Self::Custom { name, .. } => name,
 345        }
 346    }
 347
 348    /// The id of the model that should be used for making API requests
 349    pub fn request_id(&self) -> &str {
 350        match self {
 351            Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
 352            Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
 353            Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-20251101",
 354            Self::ClaudeOpus4_6
 355            | Self::ClaudeOpus4_6Thinking
 356            | Self::ClaudeOpus4_6_1mContext
 357            | Self::ClaudeOpus4_6_1mContextThinking => "claude-opus-4-6",
 358            Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
 359            Self::ClaudeSonnet4_5
 360            | Self::ClaudeSonnet4_5Thinking
 361            | Self::ClaudeSonnet4_5_1mContext
 362            | Self::ClaudeSonnet4_5_1mContextThinking => "claude-sonnet-4-5-20250929",
 363            Self::ClaudeSonnet4_6
 364            | Self::ClaudeSonnet4_6Thinking
 365            | Self::ClaudeSonnet4_6_1mContext
 366            | Self::ClaudeSonnet4_6_1mContextThinking => "claude-sonnet-4-6",
 367            Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
 368            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
 369            Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
 370            Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
 371            Self::Claude3Opus => "claude-3-opus-latest",
 372            Self::Claude3Sonnet => "claude-3-sonnet-20240229",
 373            Self::Claude3Haiku => "claude-3-haiku-20240307",
 374            Self::Custom { name, .. } => name,
 375        }
 376    }
 377
 378    pub fn display_name(&self) -> &str {
 379        match self {
 380            Self::ClaudeOpus4 => "Claude Opus 4",
 381            Self::ClaudeOpus4_1 => "Claude Opus 4.1",
 382            Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
 383            Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
 384            Self::ClaudeOpus4_5 => "Claude Opus 4.5",
 385            Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
 386            Self::ClaudeOpus4_6 => "Claude Opus 4.6",
 387            Self::ClaudeOpus4_6Thinking => "Claude Opus 4.6 Thinking",
 388            Self::ClaudeOpus4_6_1mContext => "Claude Opus 4.6 (1M context)",
 389            Self::ClaudeOpus4_6_1mContextThinking => "Claude Opus 4.6 Thinking (1M context)",
 390            Self::ClaudeSonnet4 => "Claude Sonnet 4",
 391            Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
 392            Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
 393            Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
 394            Self::ClaudeSonnet4_5_1mContext => "Claude Sonnet 4.5 (1M context)",
 395            Self::ClaudeSonnet4_5_1mContextThinking => "Claude Sonnet 4.5 Thinking (1M context)",
 396            Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
 397            Self::ClaudeSonnet4_6Thinking => "Claude Sonnet 4.6 Thinking",
 398            Self::ClaudeSonnet4_6_1mContext => "Claude Sonnet 4.6 (1M context)",
 399            Self::ClaudeSonnet4_6_1mContextThinking => "Claude Sonnet 4.6 Thinking (1M context)",
 400            Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
 401            Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
 402            Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
 403            Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
 404            Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
 405            Self::Claude3_5Haiku => "Claude 3.5 Haiku",
 406            Self::Claude3Opus => "Claude 3 Opus",
 407            Self::Claude3Sonnet => "Claude 3 Sonnet",
 408            Self::Claude3Haiku => "Claude 3 Haiku",
 409            Self::Custom {
 410                name, display_name, ..
 411            } => display_name.as_ref().unwrap_or(name),
 412        }
 413    }
 414
 415    pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
 416        match self {
 417            Self::ClaudeOpus4
 418            | Self::ClaudeOpus4_1
 419            | Self::ClaudeOpus4Thinking
 420            | Self::ClaudeOpus4_1Thinking
 421            | Self::ClaudeOpus4_5
 422            | Self::ClaudeOpus4_5Thinking
 423            | Self::ClaudeOpus4_6
 424            | Self::ClaudeOpus4_6Thinking
 425            | Self::ClaudeOpus4_6_1mContext
 426            | Self::ClaudeOpus4_6_1mContextThinking
 427            | Self::ClaudeSonnet4
 428            | Self::ClaudeSonnet4Thinking
 429            | Self::ClaudeSonnet4_5
 430            | Self::ClaudeSonnet4_5Thinking
 431            | Self::ClaudeSonnet4_5_1mContext
 432            | Self::ClaudeSonnet4_5_1mContextThinking
 433            | Self::ClaudeSonnet4_6
 434            | Self::ClaudeSonnet4_6Thinking
 435            | Self::ClaudeSonnet4_6_1mContext
 436            | Self::ClaudeSonnet4_6_1mContextThinking
 437            | Self::Claude3_5Sonnet
 438            | Self::ClaudeHaiku4_5
 439            | Self::ClaudeHaiku4_5Thinking
 440            | Self::Claude3_5Haiku
 441            | Self::Claude3_7Sonnet
 442            | Self::Claude3_7SonnetThinking
 443            | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
 444                min_total_token: 2_048,
 445                should_speculate: true,
 446                max_cache_anchors: 4,
 447            }),
 448            Self::Custom {
 449                cache_configuration,
 450                ..
 451            } => cache_configuration.clone(),
 452            _ => None,
 453        }
 454    }
 455
 456    pub fn max_token_count(&self) -> u64 {
 457        match self {
 458            Self::ClaudeOpus4
 459            | Self::ClaudeOpus4_1
 460            | Self::ClaudeOpus4Thinking
 461            | Self::ClaudeOpus4_1Thinking
 462            | Self::ClaudeOpus4_5
 463            | Self::ClaudeOpus4_5Thinking
 464            | Self::ClaudeOpus4_6
 465            | Self::ClaudeOpus4_6Thinking
 466            | Self::ClaudeSonnet4
 467            | Self::ClaudeSonnet4Thinking
 468            | Self::ClaudeSonnet4_5
 469            | Self::ClaudeSonnet4_5Thinking
 470            | Self::ClaudeSonnet4_6
 471            | Self::ClaudeSonnet4_6Thinking
 472            | Self::Claude3_5Sonnet
 473            | Self::ClaudeHaiku4_5
 474            | Self::ClaudeHaiku4_5Thinking
 475            | Self::Claude3_5Haiku
 476            | Self::Claude3_7Sonnet
 477            | Self::Claude3_7SonnetThinking
 478            | Self::Claude3Opus
 479            | Self::Claude3Sonnet
 480            | Self::Claude3Haiku => 200_000,
 481            Self::ClaudeOpus4_6_1mContext
 482            | Self::ClaudeOpus4_6_1mContextThinking
 483            | Self::ClaudeSonnet4_5_1mContext
 484            | Self::ClaudeSonnet4_5_1mContextThinking
 485            | Self::ClaudeSonnet4_6_1mContext
 486            | Self::ClaudeSonnet4_6_1mContextThinking => 1_000_000,
 487            Self::Custom { max_tokens, .. } => *max_tokens,
 488        }
 489    }
 490
 491    pub fn max_output_tokens(&self) -> u64 {
 492        match self {
 493            Self::Claude3_5Sonnet | Self::Claude3_5Haiku => 8_192,
 494            Self::ClaudeOpus4
 495            | Self::ClaudeOpus4Thinking
 496            | Self::ClaudeOpus4_1
 497            | Self::ClaudeOpus4_1Thinking => 32_000,
 498            Self::ClaudeOpus4_5
 499            | Self::ClaudeOpus4_5Thinking
 500            | Self::ClaudeSonnet4
 501            | Self::ClaudeSonnet4Thinking
 502            | Self::ClaudeSonnet4_5
 503            | Self::ClaudeSonnet4_5Thinking
 504            | Self::ClaudeSonnet4_5_1mContext
 505            | Self::ClaudeSonnet4_5_1mContextThinking
 506            | Self::ClaudeSonnet4_6
 507            | Self::ClaudeSonnet4_6Thinking
 508            | Self::ClaudeSonnet4_6_1mContext
 509            | Self::ClaudeSonnet4_6_1mContextThinking
 510            | Self::Claude3_7Sonnet
 511            | Self::Claude3_7SonnetThinking
 512            | Self::ClaudeHaiku4_5
 513            | Self::ClaudeHaiku4_5Thinking => 64_000,
 514            Self::ClaudeOpus4_6
 515            | Self::ClaudeOpus4_6Thinking
 516            | Self::ClaudeOpus4_6_1mContext
 517            | Self::ClaudeOpus4_6_1mContextThinking => 128_000,
 518            Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
 519            Self::Custom {
 520                max_output_tokens, ..
 521            } => max_output_tokens.unwrap_or(4_096),
 522        }
 523    }
 524
 525    pub fn default_temperature(&self) -> f32 {
 526        match self {
 527            Self::ClaudeOpus4
 528            | Self::ClaudeOpus4_1
 529            | Self::ClaudeOpus4Thinking
 530            | Self::ClaudeOpus4_1Thinking
 531            | Self::ClaudeOpus4_5
 532            | Self::ClaudeOpus4_5Thinking
 533            | Self::ClaudeOpus4_6
 534            | Self::ClaudeOpus4_6Thinking
 535            | Self::ClaudeOpus4_6_1mContext
 536            | Self::ClaudeOpus4_6_1mContextThinking
 537            | Self::ClaudeSonnet4
 538            | Self::ClaudeSonnet4Thinking
 539            | Self::ClaudeSonnet4_5
 540            | Self::ClaudeSonnet4_5Thinking
 541            | Self::ClaudeSonnet4_5_1mContext
 542            | Self::ClaudeSonnet4_5_1mContextThinking
 543            | Self::ClaudeSonnet4_6
 544            | Self::ClaudeSonnet4_6Thinking
 545            | Self::ClaudeSonnet4_6_1mContext
 546            | Self::ClaudeSonnet4_6_1mContextThinking
 547            | Self::Claude3_5Sonnet
 548            | Self::Claude3_7Sonnet
 549            | Self::Claude3_7SonnetThinking
 550            | Self::ClaudeHaiku4_5
 551            | Self::ClaudeHaiku4_5Thinking
 552            | Self::Claude3_5Haiku
 553            | Self::Claude3Opus
 554            | Self::Claude3Sonnet
 555            | Self::Claude3Haiku => 1.0,
 556            Self::Custom {
 557                default_temperature,
 558                ..
 559            } => default_temperature.unwrap_or(1.0),
 560        }
 561    }
 562
 563    pub fn mode(&self) -> AnthropicModelMode {
 564        match self {
 565            Self::ClaudeOpus4
 566            | Self::ClaudeOpus4_1
 567            | Self::ClaudeOpus4_5
 568            | Self::ClaudeOpus4_6
 569            | Self::ClaudeOpus4_6_1mContext
 570            | Self::ClaudeSonnet4
 571            | Self::ClaudeSonnet4_5
 572            | Self::ClaudeSonnet4_5_1mContext
 573            | Self::ClaudeSonnet4_6
 574            | Self::ClaudeSonnet4_6_1mContext
 575            | Self::Claude3_5Sonnet
 576            | Self::Claude3_7Sonnet
 577            | Self::ClaudeHaiku4_5
 578            | Self::Claude3_5Haiku
 579            | Self::Claude3Opus
 580            | Self::Claude3Sonnet
 581            | Self::Claude3Haiku => AnthropicModelMode::Default,
 582            Self::ClaudeOpus4Thinking
 583            | Self::ClaudeOpus4_1Thinking
 584            | Self::ClaudeOpus4_5Thinking
 585            | Self::ClaudeOpus4_6Thinking
 586            | Self::ClaudeOpus4_6_1mContextThinking
 587            | Self::ClaudeSonnet4Thinking
 588            | Self::ClaudeSonnet4_5Thinking
 589            | Self::ClaudeSonnet4_5_1mContextThinking
 590            | Self::ClaudeSonnet4_6Thinking
 591            | Self::ClaudeSonnet4_6_1mContextThinking
 592            | Self::ClaudeHaiku4_5Thinking
 593            | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
 594                budget_tokens: Some(4_096),
 595            },
 596            Self::Custom { mode, .. } => mode.clone(),
 597        }
 598    }
 599
 600    pub fn beta_headers(&self) -> Option<String> {
 601        let mut headers = vec![];
 602
 603        match self {
 604            Self::ClaudeOpus4
 605            | Self::ClaudeOpus4_1
 606            | Self::ClaudeOpus4_5
 607            | Self::ClaudeOpus4_6
 608            | Self::ClaudeSonnet4
 609            | Self::ClaudeSonnet4_5
 610            | Self::ClaudeOpus4Thinking
 611            | Self::ClaudeOpus4_1Thinking
 612            | Self::ClaudeOpus4_5Thinking
 613            | Self::ClaudeOpus4_6Thinking
 614            | Self::ClaudeSonnet4Thinking
 615            | Self::ClaudeSonnet4_5Thinking => {
 616                headers.push(FINE_GRAINED_TOOL_STREAMING_BETA_HEADER.to_string());
 617            }
 618            Self::ClaudeOpus4_6_1mContext
 619            | Self::ClaudeOpus4_6_1mContextThinking
 620            | Self::ClaudeSonnet4_5_1mContext
 621            | Self::ClaudeSonnet4_5_1mContextThinking
 622            | Self::ClaudeSonnet4_6_1mContext
 623            | Self::ClaudeSonnet4_6_1mContextThinking => {
 624                headers.push(FINE_GRAINED_TOOL_STREAMING_BETA_HEADER.to_string());
 625                headers.push(CONTEXT_1M_BETA_HEADER.to_string());
 626            }
 627            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
 628                // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
 629                // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
 630                headers.push("token-efficient-tools-2025-02-19".to_string());
 631                headers.push(FINE_GRAINED_TOOL_STREAMING_BETA_HEADER.to_string());
 632            }
 633            Self::Custom {
 634                extra_beta_headers, ..
 635            } => {
 636                headers.extend(
 637                    extra_beta_headers
 638                        .iter()
 639                        .filter(|header| !header.trim().is_empty())
 640                        .cloned(),
 641                );
 642            }
 643            _ => {}
 644        }
 645
 646        if headers.is_empty() {
 647            None
 648        } else {
 649            Some(headers.join(","))
 650        }
 651    }
 652
 653    pub fn tool_model_id(&self) -> &str {
 654        if let Self::Custom {
 655            tool_override: Some(tool_override),
 656            ..
 657        } = self
 658        {
 659            tool_override
 660        } else {
 661            self.request_id()
 662        }
 663    }
 664}
 665
 666/// Generate completion with streaming.
 667pub async fn stream_completion(
 668    client: &dyn HttpClient,
 669    api_url: &str,
 670    api_key: &str,
 671    request: Request,
 672    beta_headers: Option<String>,
 673) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
 674    stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
 675        .await
 676        .map(|output| output.0)
 677}
 678
 679/// Generate completion without streaming.
 680pub async fn non_streaming_completion(
 681    client: &dyn HttpClient,
 682    api_url: &str,
 683    api_key: &str,
 684    request: Request,
 685    beta_headers: Option<String>,
 686) -> Result<Response, AnthropicError> {
 687    let (mut response, rate_limits) =
 688        send_request(client, api_url, api_key, &request, beta_headers).await?;
 689
 690    if response.status().is_success() {
 691        let mut body = String::new();
 692        response
 693            .body_mut()
 694            .read_to_string(&mut body)
 695            .await
 696            .map_err(AnthropicError::ReadResponse)?;
 697
 698        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
 699    } else {
 700        Err(handle_error_response(response, rate_limits).await)
 701    }
 702}
 703
 704async fn send_request(
 705    client: &dyn HttpClient,
 706    api_url: &str,
 707    api_key: &str,
 708    request: impl Serialize,
 709    beta_headers: Option<String>,
 710) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
 711    let uri = format!("{api_url}/v1/messages");
 712
 713    let mut request_builder = HttpRequest::builder()
 714        .method(Method::POST)
 715        .uri(uri)
 716        .header("Anthropic-Version", "2023-06-01")
 717        .header("X-Api-Key", api_key.trim())
 718        .header("Content-Type", "application/json");
 719
 720    if let Some(beta_headers) = beta_headers {
 721        request_builder = request_builder.header("Anthropic-Beta", beta_headers);
 722    }
 723
 724    let serialized_request =
 725        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
 726    let request = request_builder
 727        .body(AsyncBody::from(serialized_request))
 728        .map_err(AnthropicError::BuildRequestBody)?;
 729
 730    let response = client
 731        .send(request)
 732        .await
 733        .map_err(AnthropicError::HttpSend)?;
 734
 735    let rate_limits = RateLimitInfo::from_headers(response.headers());
 736
 737    Ok((response, rate_limits))
 738}
 739
 740async fn handle_error_response(
 741    mut response: http::Response<AsyncBody>,
 742    rate_limits: RateLimitInfo,
 743) -> AnthropicError {
 744    if response.status().as_u16() == 529 {
 745        return AnthropicError::ServerOverloaded {
 746            retry_after: rate_limits.retry_after,
 747        };
 748    }
 749
 750    if let Some(retry_after) = rate_limits.retry_after {
 751        return AnthropicError::RateLimit { retry_after };
 752    }
 753
 754    let mut body = String::new();
 755    let read_result = response
 756        .body_mut()
 757        .read_to_string(&mut body)
 758        .await
 759        .map_err(AnthropicError::ReadResponse);
 760
 761    if let Err(err) = read_result {
 762        return err;
 763    }
 764
 765    match serde_json::from_str::<Event>(&body) {
 766        Ok(Event::Error { error }) => AnthropicError::ApiError(error),
 767        Ok(_) | Err(_) => AnthropicError::HttpResponseError {
 768            status_code: response.status(),
 769            message: body,
 770        },
 771    }
 772}
 773
 774/// An individual rate limit.
 775#[derive(Debug)]
 776pub struct RateLimit {
 777    pub limit: usize,
 778    pub remaining: usize,
 779    pub reset: DateTime<Utc>,
 780}
 781
 782impl RateLimit {
 783    fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
 784        let limit =
 785            get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
 786        let remaining = get_header(
 787            &format!("anthropic-ratelimit-{resource}-remaining"),
 788            headers,
 789        )?
 790        .parse()?;
 791        let reset = DateTime::parse_from_rfc3339(get_header(
 792            &format!("anthropic-ratelimit-{resource}-reset"),
 793            headers,
 794        )?)?
 795        .to_utc();
 796
 797        Ok(Self {
 798            limit,
 799            remaining,
 800            reset,
 801        })
 802    }
 803}
 804
 805/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
 806#[derive(Debug)]
 807pub struct RateLimitInfo {
 808    pub retry_after: Option<Duration>,
 809    pub requests: Option<RateLimit>,
 810    pub tokens: Option<RateLimit>,
 811    pub input_tokens: Option<RateLimit>,
 812    pub output_tokens: Option<RateLimit>,
 813}
 814
 815impl RateLimitInfo {
 816    fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
 817        // Check if any rate limit headers exist
 818        let has_rate_limit_headers = headers
 819            .keys()
 820            .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
 821
 822        if !has_rate_limit_headers {
 823            return Self {
 824                retry_after: None,
 825                requests: None,
 826                tokens: None,
 827                input_tokens: None,
 828                output_tokens: None,
 829            };
 830        }
 831
 832        Self {
 833            retry_after: parse_retry_after(headers),
 834            requests: RateLimit::from_headers("requests", headers).ok(),
 835            tokens: RateLimit::from_headers("tokens", headers).ok(),
 836            input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
 837            output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
 838        }
 839    }
 840}
 841
 842/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
 843/// seconds). Note that other services might specify an HTTP date or some other format for this
 844/// header. Returns `None` if the header is not present or cannot be parsed.
 845pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
 846    headers
 847        .get("retry-after")
 848        .and_then(|v| v.to_str().ok())
 849        .and_then(|v| v.parse::<u64>().ok())
 850        .map(Duration::from_secs)
 851}
 852
 853fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
 854    Ok(headers
 855        .get(key)
 856        .with_context(|| format!("missing header `{key}`"))?
 857        .to_str()?)
 858}
 859
 860pub async fn stream_completion_with_rate_limit_info(
 861    client: &dyn HttpClient,
 862    api_url: &str,
 863    api_key: &str,
 864    request: Request,
 865    beta_headers: Option<String>,
 866) -> Result<
 867    (
 868        BoxStream<'static, Result<Event, AnthropicError>>,
 869        Option<RateLimitInfo>,
 870    ),
 871    AnthropicError,
 872> {
 873    let request = StreamingRequest {
 874        base: request,
 875        stream: true,
 876    };
 877
 878    let (response, rate_limits) =
 879        send_request(client, api_url, api_key, &request, beta_headers).await?;
 880
 881    if response.status().is_success() {
 882        let reader = BufReader::new(response.into_body());
 883        let stream = reader
 884            .lines()
 885            .filter_map(|line| async move {
 886                match line {
 887                    Ok(line) => {
 888                        let line = line.strip_prefix("data: ")?;
 889                        match serde_json::from_str(line) {
 890                            Ok(response) => Some(Ok(response)),
 891                            Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
 892                        }
 893                    }
 894                    Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
 895                }
 896            })
 897            .boxed();
 898        Ok((stream, Some(rate_limits)))
 899    } else {
 900        Err(handle_error_response(response, rate_limits).await)
 901    }
 902}
 903
 904#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 905#[serde(rename_all = "lowercase")]
 906pub enum CacheControlType {
 907    Ephemeral,
 908}
 909
 910#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 911pub struct CacheControl {
 912    #[serde(rename = "type")]
 913    pub cache_type: CacheControlType,
 914}
 915
 916#[derive(Debug, Serialize, Deserialize)]
 917pub struct Message {
 918    pub role: Role,
 919    pub content: Vec<RequestContent>,
 920}
 921
 922#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
 923#[serde(rename_all = "lowercase")]
 924pub enum Role {
 925    User,
 926    Assistant,
 927}
 928
 929#[derive(Debug, Serialize, Deserialize)]
 930#[serde(tag = "type")]
 931pub enum RequestContent {
 932    #[serde(rename = "text")]
 933    Text {
 934        text: String,
 935        #[serde(skip_serializing_if = "Option::is_none")]
 936        cache_control: Option<CacheControl>,
 937    },
 938    #[serde(rename = "thinking")]
 939    Thinking {
 940        thinking: String,
 941        signature: String,
 942        #[serde(skip_serializing_if = "Option::is_none")]
 943        cache_control: Option<CacheControl>,
 944    },
 945    #[serde(rename = "redacted_thinking")]
 946    RedactedThinking { data: String },
 947    #[serde(rename = "image")]
 948    Image {
 949        source: ImageSource,
 950        #[serde(skip_serializing_if = "Option::is_none")]
 951        cache_control: Option<CacheControl>,
 952    },
 953    #[serde(rename = "tool_use")]
 954    ToolUse {
 955        id: String,
 956        name: String,
 957        input: serde_json::Value,
 958        #[serde(skip_serializing_if = "Option::is_none")]
 959        cache_control: Option<CacheControl>,
 960    },
 961    #[serde(rename = "tool_result")]
 962    ToolResult {
 963        tool_use_id: String,
 964        is_error: bool,
 965        content: ToolResultContent,
 966        #[serde(skip_serializing_if = "Option::is_none")]
 967        cache_control: Option<CacheControl>,
 968    },
 969}
 970
 971#[derive(Debug, Serialize, Deserialize)]
 972#[serde(untagged)]
 973pub enum ToolResultContent {
 974    Plain(String),
 975    Multipart(Vec<ToolResultPart>),
 976}
 977
 978#[derive(Debug, Serialize, Deserialize)]
 979#[serde(tag = "type", rename_all = "lowercase")]
 980pub enum ToolResultPart {
 981    Text { text: String },
 982    Image { source: ImageSource },
 983}
 984
 985#[derive(Debug, Serialize, Deserialize)]
 986#[serde(tag = "type")]
 987pub enum ResponseContent {
 988    #[serde(rename = "text")]
 989    Text { text: String },
 990    #[serde(rename = "thinking")]
 991    Thinking { thinking: String },
 992    #[serde(rename = "redacted_thinking")]
 993    RedactedThinking { data: String },
 994    #[serde(rename = "tool_use")]
 995    ToolUse {
 996        id: String,
 997        name: String,
 998        input: serde_json::Value,
 999    },
1000}
1001
1002#[derive(Debug, Serialize, Deserialize)]
1003pub struct ImageSource {
1004    #[serde(rename = "type")]
1005    pub source_type: String,
1006    pub media_type: String,
1007    pub data: String,
1008}
1009
1010#[derive(Debug, Serialize, Deserialize)]
1011pub struct Tool {
1012    pub name: String,
1013    pub description: String,
1014    pub input_schema: serde_json::Value,
1015}
1016
1017#[derive(Debug, Serialize, Deserialize)]
1018#[serde(tag = "type", rename_all = "lowercase")]
1019pub enum ToolChoice {
1020    Auto,
1021    Any,
1022    Tool { name: String },
1023    None,
1024}
1025
1026#[derive(Debug, Serialize, Deserialize)]
1027#[serde(tag = "type", rename_all = "lowercase")]
1028pub enum Thinking {
1029    Enabled { budget_tokens: Option<u32> },
1030}
1031
1032#[derive(Debug, Serialize, Deserialize)]
1033#[serde(untagged)]
1034pub enum StringOrContents {
1035    String(String),
1036    Content(Vec<RequestContent>),
1037}
1038
1039#[derive(Debug, Serialize, Deserialize)]
1040pub struct Request {
1041    pub model: String,
1042    pub max_tokens: u64,
1043    pub messages: Vec<Message>,
1044    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1045    pub tools: Vec<Tool>,
1046    #[serde(default, skip_serializing_if = "Option::is_none")]
1047    pub thinking: Option<Thinking>,
1048    #[serde(default, skip_serializing_if = "Option::is_none")]
1049    pub tool_choice: Option<ToolChoice>,
1050    #[serde(default, skip_serializing_if = "Option::is_none")]
1051    pub system: Option<StringOrContents>,
1052    #[serde(default, skip_serializing_if = "Option::is_none")]
1053    pub metadata: Option<Metadata>,
1054    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1055    pub stop_sequences: Vec<String>,
1056    #[serde(default, skip_serializing_if = "Option::is_none")]
1057    pub temperature: Option<f32>,
1058    #[serde(default, skip_serializing_if = "Option::is_none")]
1059    pub top_k: Option<u32>,
1060    #[serde(default, skip_serializing_if = "Option::is_none")]
1061    pub top_p: Option<f32>,
1062}
1063
1064#[derive(Debug, Serialize, Deserialize)]
1065struct StreamingRequest {
1066    #[serde(flatten)]
1067    pub base: Request,
1068    pub stream: bool,
1069}
1070
1071#[derive(Debug, Serialize, Deserialize)]
1072pub struct Metadata {
1073    pub user_id: Option<String>,
1074}
1075
1076#[derive(Debug, Serialize, Deserialize, Default)]
1077pub struct Usage {
1078    #[serde(default, skip_serializing_if = "Option::is_none")]
1079    pub input_tokens: Option<u64>,
1080    #[serde(default, skip_serializing_if = "Option::is_none")]
1081    pub output_tokens: Option<u64>,
1082    #[serde(default, skip_serializing_if = "Option::is_none")]
1083    pub cache_creation_input_tokens: Option<u64>,
1084    #[serde(default, skip_serializing_if = "Option::is_none")]
1085    pub cache_read_input_tokens: Option<u64>,
1086}
1087
1088#[derive(Debug, Serialize, Deserialize)]
1089pub struct Response {
1090    pub id: String,
1091    #[serde(rename = "type")]
1092    pub response_type: String,
1093    pub role: Role,
1094    pub content: Vec<ResponseContent>,
1095    pub model: String,
1096    #[serde(default, skip_serializing_if = "Option::is_none")]
1097    pub stop_reason: Option<String>,
1098    #[serde(default, skip_serializing_if = "Option::is_none")]
1099    pub stop_sequence: Option<String>,
1100    pub usage: Usage,
1101}
1102
1103#[derive(Debug, Serialize, Deserialize)]
1104#[serde(tag = "type")]
1105pub enum Event {
1106    #[serde(rename = "message_start")]
1107    MessageStart { message: Response },
1108    #[serde(rename = "content_block_start")]
1109    ContentBlockStart {
1110        index: usize,
1111        content_block: ResponseContent,
1112    },
1113    #[serde(rename = "content_block_delta")]
1114    ContentBlockDelta { index: usize, delta: ContentDelta },
1115    #[serde(rename = "content_block_stop")]
1116    ContentBlockStop { index: usize },
1117    #[serde(rename = "message_delta")]
1118    MessageDelta { delta: MessageDelta, usage: Usage },
1119    #[serde(rename = "message_stop")]
1120    MessageStop,
1121    #[serde(rename = "ping")]
1122    Ping,
1123    #[serde(rename = "error")]
1124    Error { error: ApiError },
1125}
1126
1127#[derive(Debug, Serialize, Deserialize)]
1128#[serde(tag = "type")]
1129pub enum ContentDelta {
1130    #[serde(rename = "text_delta")]
1131    TextDelta { text: String },
1132    #[serde(rename = "thinking_delta")]
1133    ThinkingDelta { thinking: String },
1134    #[serde(rename = "signature_delta")]
1135    SignatureDelta { signature: String },
1136    #[serde(rename = "input_json_delta")]
1137    InputJsonDelta { partial_json: String },
1138}
1139
1140#[derive(Debug, Serialize, Deserialize)]
1141pub struct MessageDelta {
1142    pub stop_reason: Option<String>,
1143    pub stop_sequence: Option<String>,
1144}
1145
1146#[derive(Debug)]
1147pub enum AnthropicError {
1148    /// Failed to serialize the HTTP request body to JSON
1149    SerializeRequest(serde_json::Error),
1150
1151    /// Failed to construct the HTTP request body
1152    BuildRequestBody(http::Error),
1153
1154    /// Failed to send the HTTP request
1155    HttpSend(anyhow::Error),
1156
1157    /// Failed to deserialize the response from JSON
1158    DeserializeResponse(serde_json::Error),
1159
1160    /// Failed to read from response stream
1161    ReadResponse(io::Error),
1162
1163    /// HTTP error response from the API
1164    HttpResponseError {
1165        status_code: StatusCode,
1166        message: String,
1167    },
1168
1169    /// Rate limit exceeded
1170    RateLimit { retry_after: Duration },
1171
1172    /// Server overloaded
1173    ServerOverloaded { retry_after: Option<Duration> },
1174
1175    /// API returned an error response
1176    ApiError(ApiError),
1177}
1178
1179#[derive(Debug, Serialize, Deserialize, Error)]
1180#[error("Anthropic API Error: {error_type}: {message}")]
1181pub struct ApiError {
1182    #[serde(rename = "type")]
1183    pub error_type: String,
1184    pub message: String,
1185}
1186
1187/// An Anthropic API error code.
1188/// <https://docs.anthropic.com/en/api/errors#http-errors>
1189#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
1190#[strum(serialize_all = "snake_case")]
1191pub enum ApiErrorCode {
1192    /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
1193    InvalidRequestError,
1194    /// 401 - `authentication_error`: There's an issue with your API key.
1195    AuthenticationError,
1196    /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
1197    PermissionError,
1198    /// 404 - `not_found_error`: The requested resource was not found.
1199    NotFoundError,
1200    /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
1201    RequestTooLarge,
1202    /// 429 - `rate_limit_error`: Your account has hit a rate limit.
1203    RateLimitError,
1204    /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
1205    ApiError,
1206    /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
1207    OverloadedError,
1208}
1209
1210impl ApiError {
1211    pub fn code(&self) -> Option<ApiErrorCode> {
1212        ApiErrorCode::from_str(&self.error_type).ok()
1213    }
1214
1215    pub fn is_rate_limit_error(&self) -> bool {
1216        matches!(self.error_type.as_str(), "rate_limit_error")
1217    }
1218
1219    pub fn match_window_exceeded(&self) -> Option<u64> {
1220        let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
1221            return None;
1222        };
1223
1224        parse_prompt_too_long(&self.message)
1225    }
1226}
1227
1228pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1229    message
1230        .strip_prefix("prompt is too long: ")?
1231        .split_once(" tokens")?
1232        .0
1233        .parse()
1234        .ok()
1235}
1236
1237/// Request body for the token counting API.
1238/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
1239#[derive(Debug, Serialize)]
1240pub struct CountTokensRequest {
1241    pub model: String,
1242    pub messages: Vec<Message>,
1243    #[serde(default, skip_serializing_if = "Option::is_none")]
1244    pub system: Option<StringOrContents>,
1245    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1246    pub tools: Vec<Tool>,
1247    #[serde(default, skip_serializing_if = "Option::is_none")]
1248    pub thinking: Option<Thinking>,
1249    #[serde(default, skip_serializing_if = "Option::is_none")]
1250    pub tool_choice: Option<ToolChoice>,
1251}
1252
1253/// Response from the token counting API.
1254#[derive(Debug, Deserialize)]
1255pub struct CountTokensResponse {
1256    pub input_tokens: u64,
1257}
1258
1259/// Count the number of tokens in a message without creating it.
1260pub async fn count_tokens(
1261    client: &dyn HttpClient,
1262    api_url: &str,
1263    api_key: &str,
1264    request: CountTokensRequest,
1265) -> Result<CountTokensResponse, AnthropicError> {
1266    let uri = format!("{api_url}/v1/messages/count_tokens");
1267
1268    let request_builder = HttpRequest::builder()
1269        .method(Method::POST)
1270        .uri(uri)
1271        .header("Anthropic-Version", "2023-06-01")
1272        .header("X-Api-Key", api_key.trim())
1273        .header("Content-Type", "application/json");
1274
1275    let serialized_request =
1276        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1277    let http_request = request_builder
1278        .body(AsyncBody::from(serialized_request))
1279        .map_err(AnthropicError::BuildRequestBody)?;
1280
1281    let mut response = client
1282        .send(http_request)
1283        .await
1284        .map_err(AnthropicError::HttpSend)?;
1285
1286    let rate_limits = RateLimitInfo::from_headers(response.headers());
1287
1288    if response.status().is_success() {
1289        let mut body = String::new();
1290        response
1291            .body_mut()
1292            .read_to_string(&mut body)
1293            .await
1294            .map_err(AnthropicError::ReadResponse)?;
1295
1296        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1297    } else {
1298        Err(handle_error_response(response, rate_limits).await)
1299    }
1300}
1301
1302#[test]
1303fn test_match_window_exceeded() {
1304    let error = ApiError {
1305        error_type: "invalid_request_error".to_string(),
1306        message: "prompt is too long: 220000 tokens > 200000".to_string(),
1307    };
1308    assert_eq!(error.match_window_exceeded(), Some(220_000));
1309
1310    let error = ApiError {
1311        error_type: "invalid_request_error".to_string(),
1312        message: "prompt is too long: 1234953 tokens".to_string(),
1313    };
1314    assert_eq!(error.match_window_exceeded(), Some(1234953));
1315
1316    let error = ApiError {
1317        error_type: "invalid_request_error".to_string(),
1318        message: "not a prompt length error".to_string(),
1319    };
1320    assert_eq!(error.match_window_exceeded(), None);
1321
1322    let error = ApiError {
1323        error_type: "rate_limit_error".to_string(),
1324        message: "prompt is too long: 12345 tokens".to_string(),
1325    };
1326    assert_eq!(error.match_window_exceeded(), None);
1327
1328    let error = ApiError {
1329        error_type: "invalid_request_error".to_string(),
1330        message: "prompt is too long: invalid tokens".to_string(),
1331    };
1332    assert_eq!(error.match_window_exceeded(), None);
1333}