anthropic.rs

   1use std::io;
   2use std::str::FromStr;
   3use std::time::Duration;
   4
   5use anyhow::{Context as _, Result, anyhow};
   6use chrono::{DateTime, Utc};
   7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
   8use http_client::http::{self, HeaderMap, HeaderValue};
   9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
  10use serde::{Deserialize, Serialize};
  11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
  12use strum::{EnumIter, EnumString};
  13use thiserror::Error;
  14
  15pub mod batches;
  16
  17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
  18
  19pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07";
  20
  21#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  22#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  23pub struct AnthropicModelCacheConfiguration {
  24    pub min_total_token: u64,
  25    pub should_speculate: bool,
  26    pub max_cache_anchors: usize,
  27}
  28
  29#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  30#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  31pub enum AnthropicModelMode {
  32    #[default]
  33    Default,
  34    Thinking {
  35        budget_tokens: Option<u32>,
  36    },
  37}
  38
  39impl From<ModelMode> for AnthropicModelMode {
  40    fn from(value: ModelMode) -> Self {
  41        match value {
  42            ModelMode::Default => AnthropicModelMode::Default,
  43            ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
  44        }
  45    }
  46}
  47
  48impl From<AnthropicModelMode> for ModelMode {
  49    fn from(value: AnthropicModelMode) -> Self {
  50        match value {
  51            AnthropicModelMode::Default => ModelMode::Default,
  52            AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
  53        }
  54    }
  55}
  56
  57#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  58#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
  59pub enum Model {
  60    #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
  61    ClaudeOpus4,
  62    #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
  63    ClaudeOpus4_1,
  64    #[serde(
  65        rename = "claude-opus-4-thinking",
  66        alias = "claude-opus-4-thinking-latest"
  67    )]
  68    ClaudeOpus4Thinking,
  69    #[serde(
  70        rename = "claude-opus-4-1-thinking",
  71        alias = "claude-opus-4-1-thinking-latest"
  72    )]
  73    ClaudeOpus4_1Thinking,
  74    #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
  75    ClaudeOpus4_5,
  76    #[serde(
  77        rename = "claude-opus-4-5-thinking",
  78        alias = "claude-opus-4-5-thinking-latest"
  79    )]
  80    ClaudeOpus4_5Thinking,
  81    #[serde(rename = "claude-opus-4-6", alias = "claude-opus-4-6-latest")]
  82    ClaudeOpus4_6,
  83    #[serde(
  84        rename = "claude-opus-4-6-thinking",
  85        alias = "claude-opus-4-6-thinking-latest"
  86    )]
  87    ClaudeOpus4_6Thinking,
  88    #[serde(
  89        rename = "claude-opus-4-6-1m-context",
  90        alias = "claude-opus-4-6-1m-context-latest"
  91    )]
  92    ClaudeOpus4_6_1mContext,
  93    #[serde(
  94        rename = "claude-opus-4-6-1m-context-thinking",
  95        alias = "claude-opus-4-6-1m-context-thinking-latest"
  96    )]
  97    ClaudeOpus4_6_1mContextThinking,
  98    #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
  99    ClaudeSonnet4,
 100    #[serde(
 101        rename = "claude-sonnet-4-thinking",
 102        alias = "claude-sonnet-4-thinking-latest"
 103    )]
 104    ClaudeSonnet4Thinking,
 105    #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
 106    ClaudeSonnet4_5,
 107    #[serde(
 108        rename = "claude-sonnet-4-5-thinking",
 109        alias = "claude-sonnet-4-5-thinking-latest"
 110    )]
 111    ClaudeSonnet4_5Thinking,
 112    #[serde(
 113        rename = "claude-sonnet-4-5-1m-context",
 114        alias = "claude-sonnet-4-5-1m-context-latest"
 115    )]
 116    ClaudeSonnet4_5_1mContext,
 117    #[serde(
 118        rename = "claude-sonnet-4-5-1m-context-thinking",
 119        alias = "claude-sonnet-4-5-1m-context-thinking-latest"
 120    )]
 121    ClaudeSonnet4_5_1mContextThinking,
 122    #[default]
 123    #[serde(rename = "claude-sonnet-4-6", alias = "claude-sonnet-4-6-latest")]
 124    ClaudeSonnet4_6,
 125    #[serde(
 126        rename = "claude-sonnet-4-6-thinking",
 127        alias = "claude-sonnet-4-6-thinking-latest"
 128    )]
 129    ClaudeSonnet4_6Thinking,
 130    #[serde(
 131        rename = "claude-sonnet-4-6-1m-context",
 132        alias = "claude-sonnet-4-6-1m-context-latest"
 133    )]
 134    ClaudeSonnet4_6_1mContext,
 135    #[serde(
 136        rename = "claude-sonnet-4-6-1m-context-thinking",
 137        alias = "claude-sonnet-4-6-1m-context-thinking-latest"
 138    )]
 139    ClaudeSonnet4_6_1mContextThinking,
 140    #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
 141    Claude3_7Sonnet,
 142    #[serde(
 143        rename = "claude-3-7-sonnet-thinking",
 144        alias = "claude-3-7-sonnet-thinking-latest"
 145    )]
 146    Claude3_7SonnetThinking,
 147    #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
 148    Claude3_5Sonnet,
 149    #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
 150    ClaudeHaiku4_5,
 151    #[serde(
 152        rename = "claude-haiku-4-5-thinking",
 153        alias = "claude-haiku-4-5-thinking-latest"
 154    )]
 155    ClaudeHaiku4_5Thinking,
 156    #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
 157    Claude3_5Haiku,
 158    #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
 159    Claude3Opus,
 160    #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
 161    Claude3Sonnet,
 162    #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
 163    Claude3Haiku,
 164    #[serde(rename = "custom")]
 165    Custom {
 166        name: String,
 167        max_tokens: u64,
 168        /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
 169        display_name: Option<String>,
 170        /// Override this model with a different Anthropic model for tool calls.
 171        tool_override: Option<String>,
 172        /// Indicates whether this custom model supports caching.
 173        cache_configuration: Option<AnthropicModelCacheConfiguration>,
 174        max_output_tokens: Option<u64>,
 175        default_temperature: Option<f32>,
 176        #[serde(default)]
 177        extra_beta_headers: Vec<String>,
 178        #[serde(default)]
 179        mode: AnthropicModelMode,
 180    },
 181}
 182
 183impl Model {
 184    pub fn default_fast() -> Self {
 185        Self::Claude3_5Haiku
 186    }
 187
 188    pub fn from_id(id: &str) -> Result<Self> {
 189        if id.starts_with("claude-opus-4-6-1m-context-thinking") {
 190            return Ok(Self::ClaudeOpus4_6_1mContextThinking);
 191        }
 192
 193        if id.starts_with("claude-opus-4-6-1m-context") {
 194            return Ok(Self::ClaudeOpus4_6_1mContext);
 195        }
 196
 197        if id.starts_with("claude-opus-4-6-thinking") {
 198            return Ok(Self::ClaudeOpus4_6Thinking);
 199        }
 200
 201        if id.starts_with("claude-opus-4-6") {
 202            return Ok(Self::ClaudeOpus4_6);
 203        }
 204
 205        if id.starts_with("claude-opus-4-5-thinking") {
 206            return Ok(Self::ClaudeOpus4_5Thinking);
 207        }
 208
 209        if id.starts_with("claude-opus-4-5") {
 210            return Ok(Self::ClaudeOpus4_5);
 211        }
 212
 213        if id.starts_with("claude-opus-4-1-thinking") {
 214            return Ok(Self::ClaudeOpus4_1Thinking);
 215        }
 216
 217        if id.starts_with("claude-opus-4-thinking") {
 218            return Ok(Self::ClaudeOpus4Thinking);
 219        }
 220
 221        if id.starts_with("claude-opus-4-1") {
 222            return Ok(Self::ClaudeOpus4_1);
 223        }
 224
 225        if id.starts_with("claude-opus-4") {
 226            return Ok(Self::ClaudeOpus4);
 227        }
 228
 229        if id.starts_with("claude-sonnet-4-6-1m-context-thinking") {
 230            return Ok(Self::ClaudeSonnet4_6_1mContextThinking);
 231        }
 232
 233        if id.starts_with("claude-sonnet-4-6-1m-context") {
 234            return Ok(Self::ClaudeSonnet4_6_1mContext);
 235        }
 236
 237        if id.starts_with("claude-sonnet-4-6-thinking") {
 238            return Ok(Self::ClaudeSonnet4_6Thinking);
 239        }
 240
 241        if id.starts_with("claude-sonnet-4-6") {
 242            return Ok(Self::ClaudeSonnet4_6);
 243        }
 244
 245        if id.starts_with("claude-sonnet-4-5-1m-context-thinking") {
 246            return Ok(Self::ClaudeSonnet4_5_1mContextThinking);
 247        }
 248
 249        if id.starts_with("claude-sonnet-4-5-1m-context") {
 250            return Ok(Self::ClaudeSonnet4_5_1mContext);
 251        }
 252
 253        if id.starts_with("claude-sonnet-4-5-thinking") {
 254            return Ok(Self::ClaudeSonnet4_5Thinking);
 255        }
 256
 257        if id.starts_with("claude-sonnet-4-5") {
 258            return Ok(Self::ClaudeSonnet4_5);
 259        }
 260
 261        if id.starts_with("claude-sonnet-4-thinking") {
 262            return Ok(Self::ClaudeSonnet4Thinking);
 263        }
 264
 265        if id.starts_with("claude-sonnet-4") {
 266            return Ok(Self::ClaudeSonnet4);
 267        }
 268
 269        if id.starts_with("claude-3-7-sonnet-thinking") {
 270            return Ok(Self::Claude3_7SonnetThinking);
 271        }
 272
 273        if id.starts_with("claude-3-7-sonnet") {
 274            return Ok(Self::Claude3_7Sonnet);
 275        }
 276
 277        if id.starts_with("claude-3-5-sonnet") {
 278            return Ok(Self::Claude3_5Sonnet);
 279        }
 280
 281        if id.starts_with("claude-haiku-4-5-thinking") {
 282            return Ok(Self::ClaudeHaiku4_5Thinking);
 283        }
 284
 285        if id.starts_with("claude-haiku-4-5") {
 286            return Ok(Self::ClaudeHaiku4_5);
 287        }
 288
 289        if id.starts_with("claude-3-5-haiku") {
 290            return Ok(Self::Claude3_5Haiku);
 291        }
 292
 293        if id.starts_with("claude-3-opus") {
 294            return Ok(Self::Claude3Opus);
 295        }
 296
 297        if id.starts_with("claude-3-sonnet") {
 298            return Ok(Self::Claude3Sonnet);
 299        }
 300
 301        if id.starts_with("claude-3-haiku") {
 302            return Ok(Self::Claude3Haiku);
 303        }
 304
 305        Err(anyhow!("invalid model ID: {id}"))
 306    }
 307
 308    pub fn id(&self) -> &str {
 309        match self {
 310            Self::ClaudeOpus4 => "claude-opus-4-latest",
 311            Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
 312            Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
 313            Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
 314            Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
 315            Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking-latest",
 316            Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
 317            Self::ClaudeOpus4_6Thinking => "claude-opus-4-6-thinking-latest",
 318            Self::ClaudeOpus4_6_1mContext => "claude-opus-4-6-1m-context-latest",
 319            Self::ClaudeOpus4_6_1mContextThinking => "claude-opus-4-6-1m-context-thinking-latest",
 320            Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
 321            Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
 322            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
 323            Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
 324            Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-1m-context-latest",
 325            Self::ClaudeSonnet4_5_1mContextThinking => {
 326                "claude-sonnet-4-5-1m-context-thinking-latest"
 327            }
 328            Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
 329            Self::ClaudeSonnet4_6Thinking => "claude-sonnet-4-6-thinking-latest",
 330            Self::ClaudeSonnet4_6_1mContext => "claude-sonnet-4-6-1m-context-latest",
 331            Self::ClaudeSonnet4_6_1mContextThinking => {
 332                "claude-sonnet-4-6-1m-context-thinking-latest"
 333            }
 334            Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
 335            Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
 336            Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
 337            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
 338            Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
 339            Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
 340            Self::Claude3Opus => "claude-3-opus-latest",
 341            Self::Claude3Sonnet => "claude-3-sonnet-20240229",
 342            Self::Claude3Haiku => "claude-3-haiku-20240307",
 343            Self::Custom { name, .. } => name,
 344        }
 345    }
 346
 347    /// The id of the model that should be used for making API requests
 348    pub fn request_id(&self) -> &str {
 349        match self {
 350            Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
 351            Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
 352            Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-20251101",
 353            Self::ClaudeOpus4_6
 354            | Self::ClaudeOpus4_6Thinking
 355            | Self::ClaudeOpus4_6_1mContext
 356            | Self::ClaudeOpus4_6_1mContextThinking => "claude-opus-4-6",
 357            Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
 358            Self::ClaudeSonnet4_5
 359            | Self::ClaudeSonnet4_5Thinking
 360            | Self::ClaudeSonnet4_5_1mContext
 361            | Self::ClaudeSonnet4_5_1mContextThinking => "claude-sonnet-4-5-20250929",
 362            Self::ClaudeSonnet4_6
 363            | Self::ClaudeSonnet4_6Thinking
 364            | Self::ClaudeSonnet4_6_1mContext
 365            | Self::ClaudeSonnet4_6_1mContextThinking => "claude-sonnet-4-6",
 366            Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
 367            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
 368            Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
 369            Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
 370            Self::Claude3Opus => "claude-3-opus-latest",
 371            Self::Claude3Sonnet => "claude-3-sonnet-20240229",
 372            Self::Claude3Haiku => "claude-3-haiku-20240307",
 373            Self::Custom { name, .. } => name,
 374        }
 375    }
 376
 377    pub fn display_name(&self) -> &str {
 378        match self {
 379            Self::ClaudeOpus4 => "Claude Opus 4",
 380            Self::ClaudeOpus4_1 => "Claude Opus 4.1",
 381            Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
 382            Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
 383            Self::ClaudeOpus4_5 => "Claude Opus 4.5",
 384            Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
 385            Self::ClaudeOpus4_6 => "Claude Opus 4.6",
 386            Self::ClaudeOpus4_6Thinking => "Claude Opus 4.6 Thinking",
 387            Self::ClaudeOpus4_6_1mContext => "Claude Opus 4.6 (1M context)",
 388            Self::ClaudeOpus4_6_1mContextThinking => "Claude Opus 4.6 Thinking (1M context)",
 389            Self::ClaudeSonnet4 => "Claude Sonnet 4",
 390            Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
 391            Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
 392            Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
 393            Self::ClaudeSonnet4_5_1mContext => "Claude Sonnet 4.5 (1M context)",
 394            Self::ClaudeSonnet4_5_1mContextThinking => "Claude Sonnet 4.5 Thinking (1M context)",
 395            Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
 396            Self::ClaudeSonnet4_6Thinking => "Claude Sonnet 4.6 Thinking",
 397            Self::ClaudeSonnet4_6_1mContext => "Claude Sonnet 4.6 (1M context)",
 398            Self::ClaudeSonnet4_6_1mContextThinking => "Claude Sonnet 4.6 Thinking (1M context)",
 399            Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
 400            Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
 401            Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
 402            Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
 403            Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
 404            Self::Claude3_5Haiku => "Claude 3.5 Haiku",
 405            Self::Claude3Opus => "Claude 3 Opus",
 406            Self::Claude3Sonnet => "Claude 3 Sonnet",
 407            Self::Claude3Haiku => "Claude 3 Haiku",
 408            Self::Custom {
 409                name, display_name, ..
 410            } => display_name.as_ref().unwrap_or(name),
 411        }
 412    }
 413
 414    pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
 415        match self {
 416            Self::ClaudeOpus4
 417            | Self::ClaudeOpus4_1
 418            | Self::ClaudeOpus4Thinking
 419            | Self::ClaudeOpus4_1Thinking
 420            | Self::ClaudeOpus4_5
 421            | Self::ClaudeOpus4_5Thinking
 422            | Self::ClaudeOpus4_6
 423            | Self::ClaudeOpus4_6Thinking
 424            | Self::ClaudeOpus4_6_1mContext
 425            | Self::ClaudeOpus4_6_1mContextThinking
 426            | Self::ClaudeSonnet4
 427            | Self::ClaudeSonnet4Thinking
 428            | Self::ClaudeSonnet4_5
 429            | Self::ClaudeSonnet4_5Thinking
 430            | Self::ClaudeSonnet4_5_1mContext
 431            | Self::ClaudeSonnet4_5_1mContextThinking
 432            | Self::ClaudeSonnet4_6
 433            | Self::ClaudeSonnet4_6Thinking
 434            | Self::ClaudeSonnet4_6_1mContext
 435            | Self::ClaudeSonnet4_6_1mContextThinking
 436            | Self::Claude3_5Sonnet
 437            | Self::ClaudeHaiku4_5
 438            | Self::ClaudeHaiku4_5Thinking
 439            | Self::Claude3_5Haiku
 440            | Self::Claude3_7Sonnet
 441            | Self::Claude3_7SonnetThinking
 442            | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
 443                min_total_token: 2_048,
 444                should_speculate: true,
 445                max_cache_anchors: 4,
 446            }),
 447            Self::Custom {
 448                cache_configuration,
 449                ..
 450            } => cache_configuration.clone(),
 451            _ => None,
 452        }
 453    }
 454
 455    pub fn max_token_count(&self) -> u64 {
 456        match self {
 457            Self::ClaudeOpus4
 458            | Self::ClaudeOpus4_1
 459            | Self::ClaudeOpus4Thinking
 460            | Self::ClaudeOpus4_1Thinking
 461            | Self::ClaudeOpus4_5
 462            | Self::ClaudeOpus4_5Thinking
 463            | Self::ClaudeOpus4_6
 464            | Self::ClaudeOpus4_6Thinking
 465            | Self::ClaudeSonnet4
 466            | Self::ClaudeSonnet4Thinking
 467            | Self::ClaudeSonnet4_5
 468            | Self::ClaudeSonnet4_5Thinking
 469            | Self::ClaudeSonnet4_6
 470            | Self::ClaudeSonnet4_6Thinking
 471            | Self::Claude3_5Sonnet
 472            | Self::ClaudeHaiku4_5
 473            | Self::ClaudeHaiku4_5Thinking
 474            | Self::Claude3_5Haiku
 475            | Self::Claude3_7Sonnet
 476            | Self::Claude3_7SonnetThinking
 477            | Self::Claude3Opus
 478            | Self::Claude3Sonnet
 479            | Self::Claude3Haiku => 200_000,
 480            Self::ClaudeOpus4_6_1mContext
 481            | Self::ClaudeOpus4_6_1mContextThinking
 482            | Self::ClaudeSonnet4_5_1mContext
 483            | Self::ClaudeSonnet4_5_1mContextThinking
 484            | Self::ClaudeSonnet4_6_1mContext
 485            | Self::ClaudeSonnet4_6_1mContextThinking => 1_000_000,
 486            Self::Custom { max_tokens, .. } => *max_tokens,
 487        }
 488    }
 489
 490    pub fn max_output_tokens(&self) -> u64 {
 491        match self {
 492            Self::Claude3_5Sonnet | Self::Claude3_5Haiku => 8_192,
 493            Self::ClaudeOpus4
 494            | Self::ClaudeOpus4Thinking
 495            | Self::ClaudeOpus4_1
 496            | Self::ClaudeOpus4_1Thinking => 32_000,
 497            Self::ClaudeOpus4_5
 498            | Self::ClaudeOpus4_5Thinking
 499            | Self::ClaudeSonnet4
 500            | Self::ClaudeSonnet4Thinking
 501            | Self::ClaudeSonnet4_5
 502            | Self::ClaudeSonnet4_5Thinking
 503            | Self::ClaudeSonnet4_5_1mContext
 504            | Self::ClaudeSonnet4_5_1mContextThinking
 505            | Self::ClaudeSonnet4_6
 506            | Self::ClaudeSonnet4_6Thinking
 507            | Self::ClaudeSonnet4_6_1mContext
 508            | Self::ClaudeSonnet4_6_1mContextThinking
 509            | Self::Claude3_7Sonnet
 510            | Self::Claude3_7SonnetThinking
 511            | Self::ClaudeHaiku4_5
 512            | Self::ClaudeHaiku4_5Thinking => 64_000,
 513            Self::ClaudeOpus4_6
 514            | Self::ClaudeOpus4_6Thinking
 515            | Self::ClaudeOpus4_6_1mContext
 516            | Self::ClaudeOpus4_6_1mContextThinking => 128_000,
 517            Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
 518            Self::Custom {
 519                max_output_tokens, ..
 520            } => max_output_tokens.unwrap_or(4_096),
 521        }
 522    }
 523
 524    pub fn default_temperature(&self) -> f32 {
 525        match self {
 526            Self::ClaudeOpus4
 527            | Self::ClaudeOpus4_1
 528            | Self::ClaudeOpus4Thinking
 529            | Self::ClaudeOpus4_1Thinking
 530            | Self::ClaudeOpus4_5
 531            | Self::ClaudeOpus4_5Thinking
 532            | Self::ClaudeOpus4_6
 533            | Self::ClaudeOpus4_6Thinking
 534            | Self::ClaudeOpus4_6_1mContext
 535            | Self::ClaudeOpus4_6_1mContextThinking
 536            | Self::ClaudeSonnet4
 537            | Self::ClaudeSonnet4Thinking
 538            | Self::ClaudeSonnet4_5
 539            | Self::ClaudeSonnet4_5Thinking
 540            | Self::ClaudeSonnet4_5_1mContext
 541            | Self::ClaudeSonnet4_5_1mContextThinking
 542            | Self::ClaudeSonnet4_6
 543            | Self::ClaudeSonnet4_6Thinking
 544            | Self::ClaudeSonnet4_6_1mContext
 545            | Self::ClaudeSonnet4_6_1mContextThinking
 546            | Self::Claude3_5Sonnet
 547            | Self::Claude3_7Sonnet
 548            | Self::Claude3_7SonnetThinking
 549            | Self::ClaudeHaiku4_5
 550            | Self::ClaudeHaiku4_5Thinking
 551            | Self::Claude3_5Haiku
 552            | Self::Claude3Opus
 553            | Self::Claude3Sonnet
 554            | Self::Claude3Haiku => 1.0,
 555            Self::Custom {
 556                default_temperature,
 557                ..
 558            } => default_temperature.unwrap_or(1.0),
 559        }
 560    }
 561
 562    pub fn mode(&self) -> AnthropicModelMode {
 563        match self {
 564            Self::ClaudeOpus4
 565            | Self::ClaudeOpus4_1
 566            | Self::ClaudeOpus4_5
 567            | Self::ClaudeOpus4_6
 568            | Self::ClaudeOpus4_6_1mContext
 569            | Self::ClaudeSonnet4
 570            | Self::ClaudeSonnet4_5
 571            | Self::ClaudeSonnet4_5_1mContext
 572            | Self::ClaudeSonnet4_6
 573            | Self::ClaudeSonnet4_6_1mContext
 574            | Self::Claude3_5Sonnet
 575            | Self::Claude3_7Sonnet
 576            | Self::ClaudeHaiku4_5
 577            | Self::Claude3_5Haiku
 578            | Self::Claude3Opus
 579            | Self::Claude3Sonnet
 580            | Self::Claude3Haiku => AnthropicModelMode::Default,
 581            Self::ClaudeOpus4Thinking
 582            | Self::ClaudeOpus4_1Thinking
 583            | Self::ClaudeOpus4_5Thinking
 584            | Self::ClaudeOpus4_6Thinking
 585            | Self::ClaudeOpus4_6_1mContextThinking
 586            | Self::ClaudeSonnet4Thinking
 587            | Self::ClaudeSonnet4_5Thinking
 588            | Self::ClaudeSonnet4_5_1mContextThinking
 589            | Self::ClaudeSonnet4_6Thinking
 590            | Self::ClaudeSonnet4_6_1mContextThinking
 591            | Self::ClaudeHaiku4_5Thinking
 592            | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
 593                budget_tokens: Some(4_096),
 594            },
 595            Self::Custom { mode, .. } => mode.clone(),
 596        }
 597    }
 598
 599    pub fn beta_headers(&self) -> Option<String> {
 600        let mut headers = vec![];
 601
 602        match self {
 603            Self::ClaudeOpus4_6_1mContext
 604            | Self::ClaudeOpus4_6_1mContextThinking
 605            | Self::ClaudeSonnet4_5_1mContext
 606            | Self::ClaudeSonnet4_5_1mContextThinking
 607            | Self::ClaudeSonnet4_6_1mContext
 608            | Self::ClaudeSonnet4_6_1mContextThinking => {
 609                headers.push(CONTEXT_1M_BETA_HEADER.to_string());
 610            }
 611            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
 612                // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
 613                // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
 614                headers.push("token-efficient-tools-2025-02-19".to_string());
 615            }
 616            Self::Custom {
 617                extra_beta_headers, ..
 618            } => {
 619                headers.extend(
 620                    extra_beta_headers
 621                        .iter()
 622                        .filter(|header| !header.trim().is_empty())
 623                        .cloned(),
 624                );
 625            }
 626            _ => {}
 627        }
 628
 629        if headers.is_empty() {
 630            None
 631        } else {
 632            Some(headers.join(","))
 633        }
 634    }
 635
 636    pub fn tool_model_id(&self) -> &str {
 637        if let Self::Custom {
 638            tool_override: Some(tool_override),
 639            ..
 640        } = self
 641        {
 642            tool_override
 643        } else {
 644            self.request_id()
 645        }
 646    }
 647}
 648
 649/// Generate completion with streaming.
 650pub async fn stream_completion(
 651    client: &dyn HttpClient,
 652    api_url: &str,
 653    api_key: &str,
 654    request: Request,
 655    beta_headers: Option<String>,
 656) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
 657    stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
 658        .await
 659        .map(|output| output.0)
 660}
 661
 662/// Generate completion without streaming.
 663pub async fn non_streaming_completion(
 664    client: &dyn HttpClient,
 665    api_url: &str,
 666    api_key: &str,
 667    request: Request,
 668    beta_headers: Option<String>,
 669) -> Result<Response, AnthropicError> {
 670    let (mut response, rate_limits) =
 671        send_request(client, api_url, api_key, &request, beta_headers).await?;
 672
 673    if response.status().is_success() {
 674        let mut body = String::new();
 675        response
 676            .body_mut()
 677            .read_to_string(&mut body)
 678            .await
 679            .map_err(AnthropicError::ReadResponse)?;
 680
 681        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
 682    } else {
 683        Err(handle_error_response(response, rate_limits).await)
 684    }
 685}
 686
 687async fn send_request(
 688    client: &dyn HttpClient,
 689    api_url: &str,
 690    api_key: &str,
 691    request: impl Serialize,
 692    beta_headers: Option<String>,
 693) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
 694    let uri = format!("{api_url}/v1/messages");
 695
 696    let mut request_builder = HttpRequest::builder()
 697        .method(Method::POST)
 698        .uri(uri)
 699        .header("Anthropic-Version", "2023-06-01")
 700        .header("X-Api-Key", api_key.trim())
 701        .header("Content-Type", "application/json");
 702
 703    if let Some(beta_headers) = beta_headers {
 704        request_builder = request_builder.header("Anthropic-Beta", beta_headers);
 705    }
 706
 707    let serialized_request =
 708        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
 709    let request = request_builder
 710        .body(AsyncBody::from(serialized_request))
 711        .map_err(AnthropicError::BuildRequestBody)?;
 712
 713    let response = client
 714        .send(request)
 715        .await
 716        .map_err(AnthropicError::HttpSend)?;
 717
 718    let rate_limits = RateLimitInfo::from_headers(response.headers());
 719
 720    Ok((response, rate_limits))
 721}
 722
 723async fn handle_error_response(
 724    mut response: http::Response<AsyncBody>,
 725    rate_limits: RateLimitInfo,
 726) -> AnthropicError {
 727    if response.status().as_u16() == 529 {
 728        return AnthropicError::ServerOverloaded {
 729            retry_after: rate_limits.retry_after,
 730        };
 731    }
 732
 733    if let Some(retry_after) = rate_limits.retry_after {
 734        return AnthropicError::RateLimit { retry_after };
 735    }
 736
 737    let mut body = String::new();
 738    let read_result = response
 739        .body_mut()
 740        .read_to_string(&mut body)
 741        .await
 742        .map_err(AnthropicError::ReadResponse);
 743
 744    if let Err(err) = read_result {
 745        return err;
 746    }
 747
 748    match serde_json::from_str::<Event>(&body) {
 749        Ok(Event::Error { error }) => AnthropicError::ApiError(error),
 750        Ok(_) | Err(_) => AnthropicError::HttpResponseError {
 751            status_code: response.status(),
 752            message: body,
 753        },
 754    }
 755}
 756
 757/// An individual rate limit.
 758#[derive(Debug)]
 759pub struct RateLimit {
 760    pub limit: usize,
 761    pub remaining: usize,
 762    pub reset: DateTime<Utc>,
 763}
 764
 765impl RateLimit {
 766    fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
 767        let limit =
 768            get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
 769        let remaining = get_header(
 770            &format!("anthropic-ratelimit-{resource}-remaining"),
 771            headers,
 772        )?
 773        .parse()?;
 774        let reset = DateTime::parse_from_rfc3339(get_header(
 775            &format!("anthropic-ratelimit-{resource}-reset"),
 776            headers,
 777        )?)?
 778        .to_utc();
 779
 780        Ok(Self {
 781            limit,
 782            remaining,
 783            reset,
 784        })
 785    }
 786}
 787
 788/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
 789#[derive(Debug)]
 790pub struct RateLimitInfo {
 791    pub retry_after: Option<Duration>,
 792    pub requests: Option<RateLimit>,
 793    pub tokens: Option<RateLimit>,
 794    pub input_tokens: Option<RateLimit>,
 795    pub output_tokens: Option<RateLimit>,
 796}
 797
 798impl RateLimitInfo {
 799    fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
 800        // Check if any rate limit headers exist
 801        let has_rate_limit_headers = headers
 802            .keys()
 803            .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
 804
 805        if !has_rate_limit_headers {
 806            return Self {
 807                retry_after: None,
 808                requests: None,
 809                tokens: None,
 810                input_tokens: None,
 811                output_tokens: None,
 812            };
 813        }
 814
 815        Self {
 816            retry_after: parse_retry_after(headers),
 817            requests: RateLimit::from_headers("requests", headers).ok(),
 818            tokens: RateLimit::from_headers("tokens", headers).ok(),
 819            input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
 820            output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
 821        }
 822    }
 823}
 824
 825/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
 826/// seconds). Note that other services might specify an HTTP date or some other format for this
 827/// header. Returns `None` if the header is not present or cannot be parsed.
 828pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
 829    headers
 830        .get("retry-after")
 831        .and_then(|v| v.to_str().ok())
 832        .and_then(|v| v.parse::<u64>().ok())
 833        .map(Duration::from_secs)
 834}
 835
 836fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
 837    Ok(headers
 838        .get(key)
 839        .with_context(|| format!("missing header `{key}`"))?
 840        .to_str()?)
 841}
 842
 843pub async fn stream_completion_with_rate_limit_info(
 844    client: &dyn HttpClient,
 845    api_url: &str,
 846    api_key: &str,
 847    request: Request,
 848    beta_headers: Option<String>,
 849) -> Result<
 850    (
 851        BoxStream<'static, Result<Event, AnthropicError>>,
 852        Option<RateLimitInfo>,
 853    ),
 854    AnthropicError,
 855> {
 856    let request = StreamingRequest {
 857        base: request,
 858        stream: true,
 859    };
 860
 861    let (response, rate_limits) =
 862        send_request(client, api_url, api_key, &request, beta_headers).await?;
 863
 864    if response.status().is_success() {
 865        let reader = BufReader::new(response.into_body());
 866        let stream = reader
 867            .lines()
 868            .filter_map(|line| async move {
 869                match line {
 870                    Ok(line) => {
 871                        let line = line.strip_prefix("data: ")?;
 872                        match serde_json::from_str(line) {
 873                            Ok(response) => Some(Ok(response)),
 874                            Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
 875                        }
 876                    }
 877                    Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
 878                }
 879            })
 880            .boxed();
 881        Ok((stream, Some(rate_limits)))
 882    } else {
 883        Err(handle_error_response(response, rate_limits).await)
 884    }
 885}
 886
 887#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 888#[serde(rename_all = "lowercase")]
 889pub enum CacheControlType {
 890    Ephemeral,
 891}
 892
 893#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 894pub struct CacheControl {
 895    #[serde(rename = "type")]
 896    pub cache_type: CacheControlType,
 897}
 898
 899#[derive(Debug, Serialize, Deserialize)]
 900pub struct Message {
 901    pub role: Role,
 902    pub content: Vec<RequestContent>,
 903}
 904
 905#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
 906#[serde(rename_all = "lowercase")]
 907pub enum Role {
 908    User,
 909    Assistant,
 910}
 911
 912#[derive(Debug, Serialize, Deserialize)]
 913#[serde(tag = "type")]
 914pub enum RequestContent {
 915    #[serde(rename = "text")]
 916    Text {
 917        text: String,
 918        #[serde(skip_serializing_if = "Option::is_none")]
 919        cache_control: Option<CacheControl>,
 920    },
 921    #[serde(rename = "thinking")]
 922    Thinking {
 923        thinking: String,
 924        signature: String,
 925        #[serde(skip_serializing_if = "Option::is_none")]
 926        cache_control: Option<CacheControl>,
 927    },
 928    #[serde(rename = "redacted_thinking")]
 929    RedactedThinking { data: String },
 930    #[serde(rename = "image")]
 931    Image {
 932        source: ImageSource,
 933        #[serde(skip_serializing_if = "Option::is_none")]
 934        cache_control: Option<CacheControl>,
 935    },
 936    #[serde(rename = "tool_use")]
 937    ToolUse {
 938        id: String,
 939        name: String,
 940        input: serde_json::Value,
 941        #[serde(skip_serializing_if = "Option::is_none")]
 942        cache_control: Option<CacheControl>,
 943    },
 944    #[serde(rename = "tool_result")]
 945    ToolResult {
 946        tool_use_id: String,
 947        is_error: bool,
 948        content: ToolResultContent,
 949        #[serde(skip_serializing_if = "Option::is_none")]
 950        cache_control: Option<CacheControl>,
 951    },
 952}
 953
 954#[derive(Debug, Serialize, Deserialize)]
 955#[serde(untagged)]
 956pub enum ToolResultContent {
 957    Plain(String),
 958    Multipart(Vec<ToolResultPart>),
 959}
 960
 961#[derive(Debug, Serialize, Deserialize)]
 962#[serde(tag = "type", rename_all = "lowercase")]
 963pub enum ToolResultPart {
 964    Text { text: String },
 965    Image { source: ImageSource },
 966}
 967
 968#[derive(Debug, Serialize, Deserialize)]
 969#[serde(tag = "type")]
 970pub enum ResponseContent {
 971    #[serde(rename = "text")]
 972    Text { text: String },
 973    #[serde(rename = "thinking")]
 974    Thinking { thinking: String },
 975    #[serde(rename = "redacted_thinking")]
 976    RedactedThinking { data: String },
 977    #[serde(rename = "tool_use")]
 978    ToolUse {
 979        id: String,
 980        name: String,
 981        input: serde_json::Value,
 982    },
 983}
 984
 985#[derive(Debug, Serialize, Deserialize)]
 986pub struct ImageSource {
 987    #[serde(rename = "type")]
 988    pub source_type: String,
 989    pub media_type: String,
 990    pub data: String,
 991}
 992
 993#[derive(Debug, Serialize, Deserialize)]
 994pub struct Tool {
 995    pub name: String,
 996    pub description: String,
 997    pub input_schema: serde_json::Value,
 998}
 999
1000#[derive(Debug, Serialize, Deserialize)]
1001#[serde(tag = "type", rename_all = "lowercase")]
1002pub enum ToolChoice {
1003    Auto,
1004    Any,
1005    Tool { name: String },
1006    None,
1007}
1008
1009#[derive(Debug, Serialize, Deserialize)]
1010#[serde(tag = "type", rename_all = "lowercase")]
1011pub enum Thinking {
1012    Enabled { budget_tokens: Option<u32> },
1013    Adaptive,
1014}
1015
1016#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
1017#[serde(rename_all = "snake_case")]
1018#[strum(serialize_all = "snake_case")]
1019pub enum Effort {
1020    Low,
1021    Medium,
1022    High,
1023    Max,
1024}
1025
1026#[derive(Debug, Clone, Serialize, Deserialize)]
1027pub struct OutputConfig {
1028    pub effort: Option<Effort>,
1029}
1030
1031#[derive(Debug, Serialize, Deserialize)]
1032#[serde(untagged)]
1033pub enum StringOrContents {
1034    String(String),
1035    Content(Vec<RequestContent>),
1036}
1037
1038#[derive(Debug, Serialize, Deserialize)]
1039pub struct Request {
1040    pub model: String,
1041    pub max_tokens: u64,
1042    pub messages: Vec<Message>,
1043    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1044    pub tools: Vec<Tool>,
1045    #[serde(default, skip_serializing_if = "Option::is_none")]
1046    pub thinking: Option<Thinking>,
1047    #[serde(default, skip_serializing_if = "Option::is_none")]
1048    pub tool_choice: Option<ToolChoice>,
1049    #[serde(default, skip_serializing_if = "Option::is_none")]
1050    pub system: Option<StringOrContents>,
1051    #[serde(default, skip_serializing_if = "Option::is_none")]
1052    pub metadata: Option<Metadata>,
1053    #[serde(default, skip_serializing_if = "Option::is_none")]
1054    pub output_config: Option<OutputConfig>,
1055    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1056    pub stop_sequences: Vec<String>,
1057    #[serde(default, skip_serializing_if = "Option::is_none")]
1058    pub temperature: Option<f32>,
1059    #[serde(default, skip_serializing_if = "Option::is_none")]
1060    pub top_k: Option<u32>,
1061    #[serde(default, skip_serializing_if = "Option::is_none")]
1062    pub top_p: Option<f32>,
1063}
1064
1065#[derive(Debug, Serialize, Deserialize)]
1066struct StreamingRequest {
1067    #[serde(flatten)]
1068    pub base: Request,
1069    pub stream: bool,
1070}
1071
1072#[derive(Debug, Serialize, Deserialize)]
1073pub struct Metadata {
1074    pub user_id: Option<String>,
1075}
1076
1077#[derive(Debug, Serialize, Deserialize, Default)]
1078pub struct Usage {
1079    #[serde(default, skip_serializing_if = "Option::is_none")]
1080    pub input_tokens: Option<u64>,
1081    #[serde(default, skip_serializing_if = "Option::is_none")]
1082    pub output_tokens: Option<u64>,
1083    #[serde(default, skip_serializing_if = "Option::is_none")]
1084    pub cache_creation_input_tokens: Option<u64>,
1085    #[serde(default, skip_serializing_if = "Option::is_none")]
1086    pub cache_read_input_tokens: Option<u64>,
1087}
1088
1089#[derive(Debug, Serialize, Deserialize)]
1090pub struct Response {
1091    pub id: String,
1092    #[serde(rename = "type")]
1093    pub response_type: String,
1094    pub role: Role,
1095    pub content: Vec<ResponseContent>,
1096    pub model: String,
1097    #[serde(default, skip_serializing_if = "Option::is_none")]
1098    pub stop_reason: Option<String>,
1099    #[serde(default, skip_serializing_if = "Option::is_none")]
1100    pub stop_sequence: Option<String>,
1101    pub usage: Usage,
1102}
1103
1104#[derive(Debug, Serialize, Deserialize)]
1105#[serde(tag = "type")]
1106pub enum Event {
1107    #[serde(rename = "message_start")]
1108    MessageStart { message: Response },
1109    #[serde(rename = "content_block_start")]
1110    ContentBlockStart {
1111        index: usize,
1112        content_block: ResponseContent,
1113    },
1114    #[serde(rename = "content_block_delta")]
1115    ContentBlockDelta { index: usize, delta: ContentDelta },
1116    #[serde(rename = "content_block_stop")]
1117    ContentBlockStop { index: usize },
1118    #[serde(rename = "message_delta")]
1119    MessageDelta { delta: MessageDelta, usage: Usage },
1120    #[serde(rename = "message_stop")]
1121    MessageStop,
1122    #[serde(rename = "ping")]
1123    Ping,
1124    #[serde(rename = "error")]
1125    Error { error: ApiError },
1126}
1127
1128#[derive(Debug, Serialize, Deserialize)]
1129#[serde(tag = "type")]
1130pub enum ContentDelta {
1131    #[serde(rename = "text_delta")]
1132    TextDelta { text: String },
1133    #[serde(rename = "thinking_delta")]
1134    ThinkingDelta { thinking: String },
1135    #[serde(rename = "signature_delta")]
1136    SignatureDelta { signature: String },
1137    #[serde(rename = "input_json_delta")]
1138    InputJsonDelta { partial_json: String },
1139}
1140
1141#[derive(Debug, Serialize, Deserialize)]
1142pub struct MessageDelta {
1143    pub stop_reason: Option<String>,
1144    pub stop_sequence: Option<String>,
1145}
1146
1147#[derive(Debug)]
1148pub enum AnthropicError {
1149    /// Failed to serialize the HTTP request body to JSON
1150    SerializeRequest(serde_json::Error),
1151
1152    /// Failed to construct the HTTP request body
1153    BuildRequestBody(http::Error),
1154
1155    /// Failed to send the HTTP request
1156    HttpSend(anyhow::Error),
1157
1158    /// Failed to deserialize the response from JSON
1159    DeserializeResponse(serde_json::Error),
1160
1161    /// Failed to read from response stream
1162    ReadResponse(io::Error),
1163
1164    /// HTTP error response from the API
1165    HttpResponseError {
1166        status_code: StatusCode,
1167        message: String,
1168    },
1169
1170    /// Rate limit exceeded
1171    RateLimit { retry_after: Duration },
1172
1173    /// Server overloaded
1174    ServerOverloaded { retry_after: Option<Duration> },
1175
1176    /// API returned an error response
1177    ApiError(ApiError),
1178}
1179
1180#[derive(Debug, Serialize, Deserialize, Error)]
1181#[error("Anthropic API Error: {error_type}: {message}")]
1182pub struct ApiError {
1183    #[serde(rename = "type")]
1184    pub error_type: String,
1185    pub message: String,
1186}
1187
1188/// An Anthropic API error code.
1189/// <https://docs.anthropic.com/en/api/errors#http-errors>
1190#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
1191#[strum(serialize_all = "snake_case")]
1192pub enum ApiErrorCode {
1193    /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
1194    InvalidRequestError,
1195    /// 401 - `authentication_error`: There's an issue with your API key.
1196    AuthenticationError,
1197    /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
1198    PermissionError,
1199    /// 404 - `not_found_error`: The requested resource was not found.
1200    NotFoundError,
1201    /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
1202    RequestTooLarge,
1203    /// 429 - `rate_limit_error`: Your account has hit a rate limit.
1204    RateLimitError,
1205    /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
1206    ApiError,
1207    /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
1208    OverloadedError,
1209}
1210
1211impl ApiError {
1212    pub fn code(&self) -> Option<ApiErrorCode> {
1213        ApiErrorCode::from_str(&self.error_type).ok()
1214    }
1215
1216    pub fn is_rate_limit_error(&self) -> bool {
1217        matches!(self.error_type.as_str(), "rate_limit_error")
1218    }
1219
1220    pub fn match_window_exceeded(&self) -> Option<u64> {
1221        let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
1222            return None;
1223        };
1224
1225        parse_prompt_too_long(&self.message)
1226    }
1227}
1228
1229pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1230    message
1231        .strip_prefix("prompt is too long: ")?
1232        .split_once(" tokens")?
1233        .0
1234        .parse()
1235        .ok()
1236}
1237
1238/// Request body for the token counting API.
1239/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
1240#[derive(Debug, Serialize)]
1241pub struct CountTokensRequest {
1242    pub model: String,
1243    pub messages: Vec<Message>,
1244    #[serde(default, skip_serializing_if = "Option::is_none")]
1245    pub system: Option<StringOrContents>,
1246    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1247    pub tools: Vec<Tool>,
1248    #[serde(default, skip_serializing_if = "Option::is_none")]
1249    pub thinking: Option<Thinking>,
1250    #[serde(default, skip_serializing_if = "Option::is_none")]
1251    pub tool_choice: Option<ToolChoice>,
1252}
1253
1254/// Response from the token counting API.
1255#[derive(Debug, Deserialize)]
1256pub struct CountTokensResponse {
1257    pub input_tokens: u64,
1258}
1259
1260/// Count the number of tokens in a message without creating it.
1261pub async fn count_tokens(
1262    client: &dyn HttpClient,
1263    api_url: &str,
1264    api_key: &str,
1265    request: CountTokensRequest,
1266) -> Result<CountTokensResponse, AnthropicError> {
1267    let uri = format!("{api_url}/v1/messages/count_tokens");
1268
1269    let request_builder = HttpRequest::builder()
1270        .method(Method::POST)
1271        .uri(uri)
1272        .header("Anthropic-Version", "2023-06-01")
1273        .header("X-Api-Key", api_key.trim())
1274        .header("Content-Type", "application/json");
1275
1276    let serialized_request =
1277        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1278    let http_request = request_builder
1279        .body(AsyncBody::from(serialized_request))
1280        .map_err(AnthropicError::BuildRequestBody)?;
1281
1282    let mut response = client
1283        .send(http_request)
1284        .await
1285        .map_err(AnthropicError::HttpSend)?;
1286
1287    let rate_limits = RateLimitInfo::from_headers(response.headers());
1288
1289    if response.status().is_success() {
1290        let mut body = String::new();
1291        response
1292            .body_mut()
1293            .read_to_string(&mut body)
1294            .await
1295            .map_err(AnthropicError::ReadResponse)?;
1296
1297        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1298    } else {
1299        Err(handle_error_response(response, rate_limits).await)
1300    }
1301}
1302
1303#[test]
1304fn test_match_window_exceeded() {
1305    let error = ApiError {
1306        error_type: "invalid_request_error".to_string(),
1307        message: "prompt is too long: 220000 tokens > 200000".to_string(),
1308    };
1309    assert_eq!(error.match_window_exceeded(), Some(220_000));
1310
1311    let error = ApiError {
1312        error_type: "invalid_request_error".to_string(),
1313        message: "prompt is too long: 1234953 tokens".to_string(),
1314    };
1315    assert_eq!(error.match_window_exceeded(), Some(1234953));
1316
1317    let error = ApiError {
1318        error_type: "invalid_request_error".to_string(),
1319        message: "not a prompt length error".to_string(),
1320    };
1321    assert_eq!(error.match_window_exceeded(), None);
1322
1323    let error = ApiError {
1324        error_type: "rate_limit_error".to_string(),
1325        message: "prompt is too long: 12345 tokens".to_string(),
1326    };
1327    assert_eq!(error.match_window_exceeded(), None);
1328
1329    let error = ApiError {
1330        error_type: "invalid_request_error".to_string(),
1331        message: "prompt is too long: invalid tokens".to_string(),
1332    };
1333    assert_eq!(error.match_window_exceeded(), None);
1334}