anthropic.rs

   1use std::io;
   2use std::str::FromStr;
   3use std::time::Duration;
   4
   5use anyhow::{Context as _, Result, anyhow};
   6use chrono::{DateTime, Utc};
   7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
   8use http_client::http::{self, HeaderMap, HeaderValue};
   9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
  10use serde::{Deserialize, Serialize};
  11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
  12use strum::{EnumIter, EnumString};
  13use thiserror::Error;
  14
  15pub mod batches;
  16
  17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
  18
  19pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07";
  20
  21#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  22#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  23pub struct AnthropicModelCacheConfiguration {
  24    pub min_total_token: u64,
  25    pub should_speculate: bool,
  26    pub max_cache_anchors: usize,
  27}
  28
  29#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  30#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  31pub enum AnthropicModelMode {
  32    #[default]
  33    Default,
  34    Thinking {
  35        budget_tokens: Option<u32>,
  36    },
  37}
  38
  39impl From<ModelMode> for AnthropicModelMode {
  40    fn from(value: ModelMode) -> Self {
  41        match value {
  42            ModelMode::Default => AnthropicModelMode::Default,
  43            ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
  44        }
  45    }
  46}
  47
  48impl From<AnthropicModelMode> for ModelMode {
  49    fn from(value: AnthropicModelMode) -> Self {
  50        match value {
  51            AnthropicModelMode::Default => ModelMode::Default,
  52            AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
  53        }
  54    }
  55}
  56
  57#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  58#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
  59pub enum Model {
  60    #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
  61    ClaudeOpus4,
  62    #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
  63    ClaudeOpus4_1,
  64    #[serde(
  65        rename = "claude-opus-4-thinking",
  66        alias = "claude-opus-4-thinking-latest"
  67    )]
  68    ClaudeOpus4Thinking,
  69    #[serde(
  70        rename = "claude-opus-4-1-thinking",
  71        alias = "claude-opus-4-1-thinking-latest"
  72    )]
  73    ClaudeOpus4_1Thinking,
  74    #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
  75    ClaudeOpus4_5,
  76    #[serde(
  77        rename = "claude-opus-4-5-thinking",
  78        alias = "claude-opus-4-5-thinking-latest"
  79    )]
  80    ClaudeOpus4_5Thinking,
  81    #[serde(rename = "claude-opus-4-6", alias = "claude-opus-4-6-latest")]
  82    ClaudeOpus4_6,
  83    #[serde(
  84        rename = "claude-opus-4-6-thinking",
  85        alias = "claude-opus-4-6-thinking-latest"
  86    )]
  87    ClaudeOpus4_6Thinking,
  88    #[serde(
  89        rename = "claude-opus-4-6-1m-context",
  90        alias = "claude-opus-4-6-1m-context-latest"
  91    )]
  92    ClaudeOpus4_6_1mContext,
  93    #[serde(
  94        rename = "claude-opus-4-6-1m-context-thinking",
  95        alias = "claude-opus-4-6-1m-context-thinking-latest"
  96    )]
  97    ClaudeOpus4_6_1mContextThinking,
  98    #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
  99    ClaudeSonnet4,
 100    #[serde(
 101        rename = "claude-sonnet-4-thinking",
 102        alias = "claude-sonnet-4-thinking-latest"
 103    )]
 104    ClaudeSonnet4Thinking,
 105    #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
 106    ClaudeSonnet4_5,
 107    #[serde(
 108        rename = "claude-sonnet-4-5-thinking",
 109        alias = "claude-sonnet-4-5-thinking-latest"
 110    )]
 111    ClaudeSonnet4_5Thinking,
 112    #[serde(
 113        rename = "claude-sonnet-4-5-1m-context",
 114        alias = "claude-sonnet-4-5-1m-context-latest"
 115    )]
 116    ClaudeSonnet4_5_1mContext,
 117    #[serde(
 118        rename = "claude-sonnet-4-5-1m-context-thinking",
 119        alias = "claude-sonnet-4-5-1m-context-thinking-latest"
 120    )]
 121    ClaudeSonnet4_5_1mContextThinking,
 122    #[default]
 123    #[serde(rename = "claude-sonnet-4-6", alias = "claude-sonnet-4-6-latest")]
 124    ClaudeSonnet4_6,
 125    #[serde(
 126        rename = "claude-sonnet-4-6-thinking",
 127        alias = "claude-sonnet-4-6-thinking-latest"
 128    )]
 129    ClaudeSonnet4_6Thinking,
 130    #[serde(
 131        rename = "claude-sonnet-4-6-1m-context",
 132        alias = "claude-sonnet-4-6-1m-context-latest"
 133    )]
 134    ClaudeSonnet4_6_1mContext,
 135    #[serde(
 136        rename = "claude-sonnet-4-6-1m-context-thinking",
 137        alias = "claude-sonnet-4-6-1m-context-thinking-latest"
 138    )]
 139    ClaudeSonnet4_6_1mContextThinking,
 140    #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
 141    ClaudeHaiku4_5,
 142    #[serde(
 143        rename = "claude-haiku-4-5-thinking",
 144        alias = "claude-haiku-4-5-thinking-latest"
 145    )]
 146    ClaudeHaiku4_5Thinking,
 147    #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
 148    Claude3Haiku,
 149    #[serde(rename = "custom")]
 150    Custom {
 151        name: String,
 152        max_tokens: u64,
 153        /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
 154        display_name: Option<String>,
 155        /// Override this model with a different Anthropic model for tool calls.
 156        tool_override: Option<String>,
 157        /// Indicates whether this custom model supports caching.
 158        cache_configuration: Option<AnthropicModelCacheConfiguration>,
 159        max_output_tokens: Option<u64>,
 160        default_temperature: Option<f32>,
 161        #[serde(default)]
 162        extra_beta_headers: Vec<String>,
 163        #[serde(default)]
 164        mode: AnthropicModelMode,
 165    },
 166}
 167
 168impl Model {
 169    pub fn default_fast() -> Self {
 170        Self::ClaudeHaiku4_5
 171    }
 172
 173    pub fn from_id(id: &str) -> Result<Self> {
 174        if id.starts_with("claude-opus-4-6-1m-context-thinking") {
 175            return Ok(Self::ClaudeOpus4_6_1mContextThinking);
 176        }
 177
 178        if id.starts_with("claude-opus-4-6-1m-context") {
 179            return Ok(Self::ClaudeOpus4_6_1mContext);
 180        }
 181
 182        if id.starts_with("claude-opus-4-6-thinking") {
 183            return Ok(Self::ClaudeOpus4_6Thinking);
 184        }
 185
 186        if id.starts_with("claude-opus-4-6") {
 187            return Ok(Self::ClaudeOpus4_6);
 188        }
 189
 190        if id.starts_with("claude-opus-4-5-thinking") {
 191            return Ok(Self::ClaudeOpus4_5Thinking);
 192        }
 193
 194        if id.starts_with("claude-opus-4-5") {
 195            return Ok(Self::ClaudeOpus4_5);
 196        }
 197
 198        if id.starts_with("claude-opus-4-1-thinking") {
 199            return Ok(Self::ClaudeOpus4_1Thinking);
 200        }
 201
 202        if id.starts_with("claude-opus-4-thinking") {
 203            return Ok(Self::ClaudeOpus4Thinking);
 204        }
 205
 206        if id.starts_with("claude-opus-4-1") {
 207            return Ok(Self::ClaudeOpus4_1);
 208        }
 209
 210        if id.starts_with("claude-opus-4") {
 211            return Ok(Self::ClaudeOpus4);
 212        }
 213
 214        if id.starts_with("claude-sonnet-4-6-1m-context-thinking") {
 215            return Ok(Self::ClaudeSonnet4_6_1mContextThinking);
 216        }
 217
 218        if id.starts_with("claude-sonnet-4-6-1m-context") {
 219            return Ok(Self::ClaudeSonnet4_6_1mContext);
 220        }
 221
 222        if id.starts_with("claude-sonnet-4-6-thinking") {
 223            return Ok(Self::ClaudeSonnet4_6Thinking);
 224        }
 225
 226        if id.starts_with("claude-sonnet-4-6") {
 227            return Ok(Self::ClaudeSonnet4_6);
 228        }
 229
 230        if id.starts_with("claude-sonnet-4-5-1m-context-thinking") {
 231            return Ok(Self::ClaudeSonnet4_5_1mContextThinking);
 232        }
 233
 234        if id.starts_with("claude-sonnet-4-5-1m-context") {
 235            return Ok(Self::ClaudeSonnet4_5_1mContext);
 236        }
 237
 238        if id.starts_with("claude-sonnet-4-5-thinking") {
 239            return Ok(Self::ClaudeSonnet4_5Thinking);
 240        }
 241
 242        if id.starts_with("claude-sonnet-4-5") {
 243            return Ok(Self::ClaudeSonnet4_5);
 244        }
 245
 246        if id.starts_with("claude-sonnet-4-thinking") {
 247            return Ok(Self::ClaudeSonnet4Thinking);
 248        }
 249
 250        if id.starts_with("claude-sonnet-4") {
 251            return Ok(Self::ClaudeSonnet4);
 252        }
 253
 254        if id.starts_with("claude-haiku-4-5-thinking") {
 255            return Ok(Self::ClaudeHaiku4_5Thinking);
 256        }
 257
 258        if id.starts_with("claude-haiku-4-5") {
 259            return Ok(Self::ClaudeHaiku4_5);
 260        }
 261
 262        if id.starts_with("claude-3-haiku") {
 263            return Ok(Self::Claude3Haiku);
 264        }
 265
 266        Err(anyhow!("invalid model ID: {id}"))
 267    }
 268
 269    pub fn id(&self) -> &str {
 270        match self {
 271            Self::ClaudeOpus4 => "claude-opus-4-latest",
 272            Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
 273            Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
 274            Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
 275            Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
 276            Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking-latest",
 277            Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
 278            Self::ClaudeOpus4_6Thinking => "claude-opus-4-6-thinking-latest",
 279            Self::ClaudeOpus4_6_1mContext => "claude-opus-4-6-1m-context-latest",
 280            Self::ClaudeOpus4_6_1mContextThinking => "claude-opus-4-6-1m-context-thinking-latest",
 281            Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
 282            Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
 283            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
 284            Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
 285            Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-1m-context-latest",
 286            Self::ClaudeSonnet4_5_1mContextThinking => {
 287                "claude-sonnet-4-5-1m-context-thinking-latest"
 288            }
 289            Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
 290            Self::ClaudeSonnet4_6Thinking => "claude-sonnet-4-6-thinking-latest",
 291            Self::ClaudeSonnet4_6_1mContext => "claude-sonnet-4-6-1m-context-latest",
 292            Self::ClaudeSonnet4_6_1mContextThinking => {
 293                "claude-sonnet-4-6-1m-context-thinking-latest"
 294            }
 295            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
 296            Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
 297            Self::Claude3Haiku => "claude-3-haiku-20240307",
 298            Self::Custom { name, .. } => name,
 299        }
 300    }
 301
 302    /// The id of the model that should be used for making API requests
 303    pub fn request_id(&self) -> &str {
 304        match self {
 305            Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
 306            Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
 307            Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-20251101",
 308            Self::ClaudeOpus4_6
 309            | Self::ClaudeOpus4_6Thinking
 310            | Self::ClaudeOpus4_6_1mContext
 311            | Self::ClaudeOpus4_6_1mContextThinking => "claude-opus-4-6",
 312            Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
 313            Self::ClaudeSonnet4_5
 314            | Self::ClaudeSonnet4_5Thinking
 315            | Self::ClaudeSonnet4_5_1mContext
 316            | Self::ClaudeSonnet4_5_1mContextThinking => "claude-sonnet-4-5-20250929",
 317            Self::ClaudeSonnet4_6
 318            | Self::ClaudeSonnet4_6Thinking
 319            | Self::ClaudeSonnet4_6_1mContext
 320            | Self::ClaudeSonnet4_6_1mContextThinking => "claude-sonnet-4-6",
 321            Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
 322            Self::Claude3Haiku => "claude-3-haiku-20240307",
 323            Self::Custom { name, .. } => name,
 324        }
 325    }
 326
 327    pub fn display_name(&self) -> &str {
 328        match self {
 329            Self::ClaudeOpus4 => "Claude Opus 4",
 330            Self::ClaudeOpus4_1 => "Claude Opus 4.1",
 331            Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
 332            Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
 333            Self::ClaudeOpus4_5 => "Claude Opus 4.5",
 334            Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
 335            Self::ClaudeOpus4_6 => "Claude Opus 4.6",
 336            Self::ClaudeOpus4_6Thinking => "Claude Opus 4.6 Thinking",
 337            Self::ClaudeOpus4_6_1mContext => "Claude Opus 4.6 (1M context)",
 338            Self::ClaudeOpus4_6_1mContextThinking => "Claude Opus 4.6 Thinking (1M context)",
 339            Self::ClaudeSonnet4 => "Claude Sonnet 4",
 340            Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
 341            Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
 342            Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
 343            Self::ClaudeSonnet4_5_1mContext => "Claude Sonnet 4.5 (1M context)",
 344            Self::ClaudeSonnet4_5_1mContextThinking => "Claude Sonnet 4.5 Thinking (1M context)",
 345            Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
 346            Self::ClaudeSonnet4_6Thinking => "Claude Sonnet 4.6 Thinking",
 347            Self::ClaudeSonnet4_6_1mContext => "Claude Sonnet 4.6 (1M context)",
 348            Self::ClaudeSonnet4_6_1mContextThinking => "Claude Sonnet 4.6 Thinking (1M context)",
 349            Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
 350            Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
 351            Self::Claude3Haiku => "Claude 3 Haiku",
 352            Self::Custom {
 353                name, display_name, ..
 354            } => display_name.as_ref().unwrap_or(name),
 355        }
 356    }
 357
 358    pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
 359        match self {
 360            Self::ClaudeOpus4
 361            | Self::ClaudeOpus4_1
 362            | Self::ClaudeOpus4Thinking
 363            | Self::ClaudeOpus4_1Thinking
 364            | Self::ClaudeOpus4_5
 365            | Self::ClaudeOpus4_5Thinking
 366            | Self::ClaudeOpus4_6
 367            | Self::ClaudeOpus4_6Thinking
 368            | Self::ClaudeOpus4_6_1mContext
 369            | Self::ClaudeOpus4_6_1mContextThinking
 370            | Self::ClaudeSonnet4
 371            | Self::ClaudeSonnet4Thinking
 372            | Self::ClaudeSonnet4_5
 373            | Self::ClaudeSonnet4_5Thinking
 374            | Self::ClaudeSonnet4_5_1mContext
 375            | Self::ClaudeSonnet4_5_1mContextThinking
 376            | Self::ClaudeSonnet4_6
 377            | Self::ClaudeSonnet4_6Thinking
 378            | Self::ClaudeSonnet4_6_1mContext
 379            | Self::ClaudeSonnet4_6_1mContextThinking
 380            | Self::ClaudeHaiku4_5
 381            | Self::ClaudeHaiku4_5Thinking
 382            | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
 383                min_total_token: 2_048,
 384                should_speculate: true,
 385                max_cache_anchors: 4,
 386            }),
 387            Self::Custom {
 388                cache_configuration,
 389                ..
 390            } => cache_configuration.clone(),
 391        }
 392    }
 393
 394    pub fn max_token_count(&self) -> u64 {
 395        match self {
 396            Self::ClaudeOpus4
 397            | Self::ClaudeOpus4_1
 398            | Self::ClaudeOpus4Thinking
 399            | Self::ClaudeOpus4_1Thinking
 400            | Self::ClaudeOpus4_5
 401            | Self::ClaudeOpus4_5Thinking
 402            | Self::ClaudeOpus4_6
 403            | Self::ClaudeOpus4_6Thinking
 404            | Self::ClaudeSonnet4
 405            | Self::ClaudeSonnet4Thinking
 406            | Self::ClaudeSonnet4_5
 407            | Self::ClaudeSonnet4_5Thinking
 408            | Self::ClaudeSonnet4_6
 409            | Self::ClaudeSonnet4_6Thinking
 410            | Self::ClaudeHaiku4_5
 411            | Self::ClaudeHaiku4_5Thinking
 412            | Self::Claude3Haiku => 200_000,
 413            Self::ClaudeOpus4_6_1mContext
 414            | Self::ClaudeOpus4_6_1mContextThinking
 415            | Self::ClaudeSonnet4_5_1mContext
 416            | Self::ClaudeSonnet4_5_1mContextThinking
 417            | Self::ClaudeSonnet4_6_1mContext
 418            | Self::ClaudeSonnet4_6_1mContextThinking => 1_000_000,
 419            Self::Custom { max_tokens, .. } => *max_tokens,
 420        }
 421    }
 422
 423    pub fn max_output_tokens(&self) -> u64 {
 424        match self {
 425            Self::ClaudeOpus4
 426            | Self::ClaudeOpus4Thinking
 427            | Self::ClaudeOpus4_1
 428            | Self::ClaudeOpus4_1Thinking => 32_000,
 429            Self::ClaudeOpus4_5
 430            | Self::ClaudeOpus4_5Thinking
 431            | Self::ClaudeSonnet4
 432            | Self::ClaudeSonnet4Thinking
 433            | Self::ClaudeSonnet4_5
 434            | Self::ClaudeSonnet4_5Thinking
 435            | Self::ClaudeSonnet4_5_1mContext
 436            | Self::ClaudeSonnet4_5_1mContextThinking
 437            | Self::ClaudeSonnet4_6
 438            | Self::ClaudeSonnet4_6Thinking
 439            | Self::ClaudeSonnet4_6_1mContext
 440            | Self::ClaudeSonnet4_6_1mContextThinking
 441            | Self::ClaudeHaiku4_5
 442            | Self::ClaudeHaiku4_5Thinking => 64_000,
 443            Self::ClaudeOpus4_6
 444            | Self::ClaudeOpus4_6Thinking
 445            | Self::ClaudeOpus4_6_1mContext
 446            | Self::ClaudeOpus4_6_1mContextThinking => 128_000,
 447            Self::Claude3Haiku => 4_096,
 448            Self::Custom {
 449                max_output_tokens, ..
 450            } => max_output_tokens.unwrap_or(4_096),
 451        }
 452    }
 453
 454    pub fn default_temperature(&self) -> f32 {
 455        match self {
 456            Self::ClaudeOpus4
 457            | Self::ClaudeOpus4_1
 458            | Self::ClaudeOpus4Thinking
 459            | Self::ClaudeOpus4_1Thinking
 460            | Self::ClaudeOpus4_5
 461            | Self::ClaudeOpus4_5Thinking
 462            | Self::ClaudeOpus4_6
 463            | Self::ClaudeOpus4_6Thinking
 464            | Self::ClaudeOpus4_6_1mContext
 465            | Self::ClaudeOpus4_6_1mContextThinking
 466            | Self::ClaudeSonnet4
 467            | Self::ClaudeSonnet4Thinking
 468            | Self::ClaudeSonnet4_5
 469            | Self::ClaudeSonnet4_5Thinking
 470            | Self::ClaudeSonnet4_5_1mContext
 471            | Self::ClaudeSonnet4_5_1mContextThinking
 472            | Self::ClaudeSonnet4_6
 473            | Self::ClaudeSonnet4_6Thinking
 474            | Self::ClaudeSonnet4_6_1mContext
 475            | Self::ClaudeSonnet4_6_1mContextThinking
 476            | Self::ClaudeHaiku4_5
 477            | Self::ClaudeHaiku4_5Thinking
 478            | Self::Claude3Haiku => 1.0,
 479            Self::Custom {
 480                default_temperature,
 481                ..
 482            } => default_temperature.unwrap_or(1.0),
 483        }
 484    }
 485
 486    pub fn mode(&self) -> AnthropicModelMode {
 487        match self {
 488            Self::ClaudeOpus4
 489            | Self::ClaudeOpus4_1
 490            | Self::ClaudeOpus4_5
 491            | Self::ClaudeOpus4_6
 492            | Self::ClaudeOpus4_6_1mContext
 493            | Self::ClaudeSonnet4
 494            | Self::ClaudeSonnet4_5
 495            | Self::ClaudeSonnet4_5_1mContext
 496            | Self::ClaudeSonnet4_6
 497            | Self::ClaudeSonnet4_6_1mContext
 498            | Self::ClaudeHaiku4_5
 499            | Self::Claude3Haiku => AnthropicModelMode::Default,
 500            Self::ClaudeOpus4Thinking
 501            | Self::ClaudeOpus4_1Thinking
 502            | Self::ClaudeOpus4_5Thinking
 503            | Self::ClaudeOpus4_6Thinking
 504            | Self::ClaudeOpus4_6_1mContextThinking
 505            | Self::ClaudeSonnet4Thinking
 506            | Self::ClaudeSonnet4_5Thinking
 507            | Self::ClaudeSonnet4_5_1mContextThinking
 508            | Self::ClaudeSonnet4_6Thinking
 509            | Self::ClaudeSonnet4_6_1mContextThinking
 510            | Self::ClaudeHaiku4_5Thinking => AnthropicModelMode::Thinking {
 511                budget_tokens: Some(4_096),
 512            },
 513            Self::Custom { mode, .. } => mode.clone(),
 514        }
 515    }
 516
 517    pub fn beta_headers(&self) -> Option<String> {
 518        let mut headers = vec![];
 519
 520        match self {
 521            Self::ClaudeOpus4_6_1mContext
 522            | Self::ClaudeOpus4_6_1mContextThinking
 523            | Self::ClaudeSonnet4_5_1mContext
 524            | Self::ClaudeSonnet4_5_1mContextThinking
 525            | Self::ClaudeSonnet4_6_1mContext
 526            | Self::ClaudeSonnet4_6_1mContextThinking => {
 527                headers.push(CONTEXT_1M_BETA_HEADER.to_string());
 528            }
 529            Self::Custom {
 530                extra_beta_headers, ..
 531            } => {
 532                headers.extend(
 533                    extra_beta_headers
 534                        .iter()
 535                        .filter(|header| !header.trim().is_empty())
 536                        .cloned(),
 537                );
 538            }
 539            _ => {}
 540        }
 541
 542        if headers.is_empty() {
 543            None
 544        } else {
 545            Some(headers.join(","))
 546        }
 547    }
 548
 549    pub fn tool_model_id(&self) -> &str {
 550        if let Self::Custom {
 551            tool_override: Some(tool_override),
 552            ..
 553        } = self
 554        {
 555            tool_override
 556        } else {
 557            self.request_id()
 558        }
 559    }
 560}
 561
 562/// Generate completion with streaming.
 563pub async fn stream_completion(
 564    client: &dyn HttpClient,
 565    api_url: &str,
 566    api_key: &str,
 567    request: Request,
 568    beta_headers: Option<String>,
 569) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
 570    stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
 571        .await
 572        .map(|output| output.0)
 573}
 574
 575/// Generate completion without streaming.
 576pub async fn non_streaming_completion(
 577    client: &dyn HttpClient,
 578    api_url: &str,
 579    api_key: &str,
 580    request: Request,
 581    beta_headers: Option<String>,
 582) -> Result<Response, AnthropicError> {
 583    let (mut response, rate_limits) =
 584        send_request(client, api_url, api_key, &request, beta_headers).await?;
 585
 586    if response.status().is_success() {
 587        let mut body = String::new();
 588        response
 589            .body_mut()
 590            .read_to_string(&mut body)
 591            .await
 592            .map_err(AnthropicError::ReadResponse)?;
 593
 594        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
 595    } else {
 596        Err(handle_error_response(response, rate_limits).await)
 597    }
 598}
 599
 600async fn send_request(
 601    client: &dyn HttpClient,
 602    api_url: &str,
 603    api_key: &str,
 604    request: impl Serialize,
 605    beta_headers: Option<String>,
 606) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
 607    let uri = format!("{api_url}/v1/messages");
 608
 609    let mut request_builder = HttpRequest::builder()
 610        .method(Method::POST)
 611        .uri(uri)
 612        .header("Anthropic-Version", "2023-06-01")
 613        .header("X-Api-Key", api_key.trim())
 614        .header("Content-Type", "application/json");
 615
 616    if let Some(beta_headers) = beta_headers {
 617        request_builder = request_builder.header("Anthropic-Beta", beta_headers);
 618    }
 619
 620    let serialized_request =
 621        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
 622    let request = request_builder
 623        .body(AsyncBody::from(serialized_request))
 624        .map_err(AnthropicError::BuildRequestBody)?;
 625
 626    let response = client
 627        .send(request)
 628        .await
 629        .map_err(AnthropicError::HttpSend)?;
 630
 631    let rate_limits = RateLimitInfo::from_headers(response.headers());
 632
 633    Ok((response, rate_limits))
 634}
 635
 636async fn handle_error_response(
 637    mut response: http::Response<AsyncBody>,
 638    rate_limits: RateLimitInfo,
 639) -> AnthropicError {
 640    if response.status().as_u16() == 529 {
 641        return AnthropicError::ServerOverloaded {
 642            retry_after: rate_limits.retry_after,
 643        };
 644    }
 645
 646    if let Some(retry_after) = rate_limits.retry_after {
 647        return AnthropicError::RateLimit { retry_after };
 648    }
 649
 650    let mut body = String::new();
 651    let read_result = response
 652        .body_mut()
 653        .read_to_string(&mut body)
 654        .await
 655        .map_err(AnthropicError::ReadResponse);
 656
 657    if let Err(err) = read_result {
 658        return err;
 659    }
 660
 661    match serde_json::from_str::<Event>(&body) {
 662        Ok(Event::Error { error }) => AnthropicError::ApiError(error),
 663        Ok(_) | Err(_) => AnthropicError::HttpResponseError {
 664            status_code: response.status(),
 665            message: body,
 666        },
 667    }
 668}
 669
 670/// An individual rate limit.
 671#[derive(Debug)]
 672pub struct RateLimit {
 673    pub limit: usize,
 674    pub remaining: usize,
 675    pub reset: DateTime<Utc>,
 676}
 677
 678impl RateLimit {
 679    fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
 680        let limit =
 681            get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
 682        let remaining = get_header(
 683            &format!("anthropic-ratelimit-{resource}-remaining"),
 684            headers,
 685        )?
 686        .parse()?;
 687        let reset = DateTime::parse_from_rfc3339(get_header(
 688            &format!("anthropic-ratelimit-{resource}-reset"),
 689            headers,
 690        )?)?
 691        .to_utc();
 692
 693        Ok(Self {
 694            limit,
 695            remaining,
 696            reset,
 697        })
 698    }
 699}
 700
 701/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
 702#[derive(Debug)]
 703pub struct RateLimitInfo {
 704    pub retry_after: Option<Duration>,
 705    pub requests: Option<RateLimit>,
 706    pub tokens: Option<RateLimit>,
 707    pub input_tokens: Option<RateLimit>,
 708    pub output_tokens: Option<RateLimit>,
 709}
 710
 711impl RateLimitInfo {
 712    fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
 713        // Check if any rate limit headers exist
 714        let has_rate_limit_headers = headers
 715            .keys()
 716            .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
 717
 718        if !has_rate_limit_headers {
 719            return Self {
 720                retry_after: None,
 721                requests: None,
 722                tokens: None,
 723                input_tokens: None,
 724                output_tokens: None,
 725            };
 726        }
 727
 728        Self {
 729            retry_after: parse_retry_after(headers),
 730            requests: RateLimit::from_headers("requests", headers).ok(),
 731            tokens: RateLimit::from_headers("tokens", headers).ok(),
 732            input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
 733            output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
 734        }
 735    }
 736}
 737
 738/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
 739/// seconds). Note that other services might specify an HTTP date or some other format for this
 740/// header. Returns `None` if the header is not present or cannot be parsed.
 741pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
 742    headers
 743        .get("retry-after")
 744        .and_then(|v| v.to_str().ok())
 745        .and_then(|v| v.parse::<u64>().ok())
 746        .map(Duration::from_secs)
 747}
 748
 749fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
 750    Ok(headers
 751        .get(key)
 752        .with_context(|| format!("missing header `{key}`"))?
 753        .to_str()?)
 754}
 755
 756pub async fn stream_completion_with_rate_limit_info(
 757    client: &dyn HttpClient,
 758    api_url: &str,
 759    api_key: &str,
 760    request: Request,
 761    beta_headers: Option<String>,
 762) -> Result<
 763    (
 764        BoxStream<'static, Result<Event, AnthropicError>>,
 765        Option<RateLimitInfo>,
 766    ),
 767    AnthropicError,
 768> {
 769    let request = StreamingRequest {
 770        base: request,
 771        stream: true,
 772    };
 773
 774    let (response, rate_limits) =
 775        send_request(client, api_url, api_key, &request, beta_headers).await?;
 776
 777    if response.status().is_success() {
 778        let reader = BufReader::new(response.into_body());
 779        let stream = reader
 780            .lines()
 781            .filter_map(|line| async move {
 782                match line {
 783                    Ok(line) => {
 784                        let line = line
 785                            .strip_prefix("data: ")
 786                            .or_else(|| line.strip_prefix("data:"))?;
 787
 788                        match serde_json::from_str(line) {
 789                            Ok(response) => Some(Ok(response)),
 790                            Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
 791                        }
 792                    }
 793                    Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
 794                }
 795            })
 796            .boxed();
 797        Ok((stream, Some(rate_limits)))
 798    } else {
 799        Err(handle_error_response(response, rate_limits).await)
 800    }
 801}
 802
 803#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 804#[serde(rename_all = "lowercase")]
 805pub enum CacheControlType {
 806    Ephemeral,
 807}
 808
 809#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 810pub struct CacheControl {
 811    #[serde(rename = "type")]
 812    pub cache_type: CacheControlType,
 813}
 814
 815#[derive(Debug, Serialize, Deserialize)]
 816pub struct Message {
 817    pub role: Role,
 818    pub content: Vec<RequestContent>,
 819}
 820
 821#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
 822#[serde(rename_all = "lowercase")]
 823pub enum Role {
 824    User,
 825    Assistant,
 826}
 827
 828#[derive(Debug, Serialize, Deserialize)]
 829#[serde(tag = "type")]
 830pub enum RequestContent {
 831    #[serde(rename = "text")]
 832    Text {
 833        text: String,
 834        #[serde(skip_serializing_if = "Option::is_none")]
 835        cache_control: Option<CacheControl>,
 836    },
 837    #[serde(rename = "thinking")]
 838    Thinking {
 839        thinking: String,
 840        signature: String,
 841        #[serde(skip_serializing_if = "Option::is_none")]
 842        cache_control: Option<CacheControl>,
 843    },
 844    #[serde(rename = "redacted_thinking")]
 845    RedactedThinking { data: String },
 846    #[serde(rename = "image")]
 847    Image {
 848        source: ImageSource,
 849        #[serde(skip_serializing_if = "Option::is_none")]
 850        cache_control: Option<CacheControl>,
 851    },
 852    #[serde(rename = "tool_use")]
 853    ToolUse {
 854        id: String,
 855        name: String,
 856        input: serde_json::Value,
 857        #[serde(skip_serializing_if = "Option::is_none")]
 858        cache_control: Option<CacheControl>,
 859    },
 860    #[serde(rename = "tool_result")]
 861    ToolResult {
 862        tool_use_id: String,
 863        is_error: bool,
 864        content: ToolResultContent,
 865        #[serde(skip_serializing_if = "Option::is_none")]
 866        cache_control: Option<CacheControl>,
 867    },
 868}
 869
 870#[derive(Debug, Serialize, Deserialize)]
 871#[serde(untagged)]
 872pub enum ToolResultContent {
 873    Plain(String),
 874    Multipart(Vec<ToolResultPart>),
 875}
 876
 877#[derive(Debug, Serialize, Deserialize)]
 878#[serde(tag = "type", rename_all = "lowercase")]
 879pub enum ToolResultPart {
 880    Text { text: String },
 881    Image { source: ImageSource },
 882}
 883
 884#[derive(Debug, Serialize, Deserialize)]
 885#[serde(tag = "type")]
 886pub enum ResponseContent {
 887    #[serde(rename = "text")]
 888    Text { text: String },
 889    #[serde(rename = "thinking")]
 890    Thinking { thinking: String },
 891    #[serde(rename = "redacted_thinking")]
 892    RedactedThinking { data: String },
 893    #[serde(rename = "tool_use")]
 894    ToolUse {
 895        id: String,
 896        name: String,
 897        input: serde_json::Value,
 898    },
 899}
 900
 901#[derive(Debug, Serialize, Deserialize)]
 902pub struct ImageSource {
 903    #[serde(rename = "type")]
 904    pub source_type: String,
 905    pub media_type: String,
 906    pub data: String,
 907}
 908
 909fn is_false(value: &bool) -> bool {
 910    !value
 911}
 912
 913#[derive(Debug, Serialize, Deserialize)]
 914pub struct Tool {
 915    pub name: String,
 916    pub description: String,
 917    pub input_schema: serde_json::Value,
 918    #[serde(default, skip_serializing_if = "is_false")]
 919    pub eager_input_streaming: bool,
 920}
 921
 922#[derive(Debug, Serialize, Deserialize)]
 923#[serde(tag = "type", rename_all = "lowercase")]
 924pub enum ToolChoice {
 925    Auto,
 926    Any,
 927    Tool { name: String },
 928    None,
 929}
 930
 931#[derive(Debug, Serialize, Deserialize)]
 932#[serde(tag = "type", rename_all = "lowercase")]
 933pub enum Thinking {
 934    Enabled { budget_tokens: Option<u32> },
 935    Adaptive,
 936}
 937
 938#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
 939#[serde(rename_all = "snake_case")]
 940#[strum(serialize_all = "snake_case")]
 941pub enum Effort {
 942    Low,
 943    Medium,
 944    High,
 945    Max,
 946}
 947
 948#[derive(Debug, Clone, Serialize, Deserialize)]
 949pub struct OutputConfig {
 950    pub effort: Option<Effort>,
 951}
 952
 953#[derive(Debug, Serialize, Deserialize)]
 954#[serde(untagged)]
 955pub enum StringOrContents {
 956    String(String),
 957    Content(Vec<RequestContent>),
 958}
 959
 960#[derive(Debug, Serialize, Deserialize)]
 961pub struct Request {
 962    pub model: String,
 963    pub max_tokens: u64,
 964    pub messages: Vec<Message>,
 965    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 966    pub tools: Vec<Tool>,
 967    #[serde(default, skip_serializing_if = "Option::is_none")]
 968    pub thinking: Option<Thinking>,
 969    #[serde(default, skip_serializing_if = "Option::is_none")]
 970    pub tool_choice: Option<ToolChoice>,
 971    #[serde(default, skip_serializing_if = "Option::is_none")]
 972    pub system: Option<StringOrContents>,
 973    #[serde(default, skip_serializing_if = "Option::is_none")]
 974    pub metadata: Option<Metadata>,
 975    #[serde(default, skip_serializing_if = "Option::is_none")]
 976    pub output_config: Option<OutputConfig>,
 977    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 978    pub stop_sequences: Vec<String>,
 979    #[serde(default, skip_serializing_if = "Option::is_none")]
 980    pub speed: Option<Speed>,
 981    #[serde(default, skip_serializing_if = "Option::is_none")]
 982    pub temperature: Option<f32>,
 983    #[serde(default, skip_serializing_if = "Option::is_none")]
 984    pub top_k: Option<u32>,
 985    #[serde(default, skip_serializing_if = "Option::is_none")]
 986    pub top_p: Option<f32>,
 987}
 988
 989#[derive(Debug, Default, Serialize, Deserialize)]
 990#[serde(rename_all = "snake_case")]
 991pub enum Speed {
 992    #[default]
 993    Standard,
 994    Fast,
 995}
 996
 997#[derive(Debug, Serialize, Deserialize)]
 998pub struct StreamingRequest {
 999    #[serde(flatten)]
1000    pub base: Request,
1001    pub stream: bool,
1002}
1003
1004#[derive(Debug, Serialize, Deserialize)]
1005pub struct Metadata {
1006    pub user_id: Option<String>,
1007}
1008
1009#[derive(Debug, Serialize, Deserialize, Default)]
1010pub struct Usage {
1011    #[serde(default, skip_serializing_if = "Option::is_none")]
1012    pub input_tokens: Option<u64>,
1013    #[serde(default, skip_serializing_if = "Option::is_none")]
1014    pub output_tokens: Option<u64>,
1015    #[serde(default, skip_serializing_if = "Option::is_none")]
1016    pub cache_creation_input_tokens: Option<u64>,
1017    #[serde(default, skip_serializing_if = "Option::is_none")]
1018    pub cache_read_input_tokens: Option<u64>,
1019}
1020
1021#[derive(Debug, Serialize, Deserialize)]
1022pub struct Response {
1023    pub id: String,
1024    #[serde(rename = "type")]
1025    pub response_type: String,
1026    pub role: Role,
1027    pub content: Vec<ResponseContent>,
1028    pub model: String,
1029    #[serde(default, skip_serializing_if = "Option::is_none")]
1030    pub stop_reason: Option<String>,
1031    #[serde(default, skip_serializing_if = "Option::is_none")]
1032    pub stop_sequence: Option<String>,
1033    pub usage: Usage,
1034}
1035
1036#[derive(Debug, Serialize, Deserialize)]
1037#[serde(tag = "type")]
1038pub enum Event {
1039    #[serde(rename = "message_start")]
1040    MessageStart { message: Response },
1041    #[serde(rename = "content_block_start")]
1042    ContentBlockStart {
1043        index: usize,
1044        content_block: ResponseContent,
1045    },
1046    #[serde(rename = "content_block_delta")]
1047    ContentBlockDelta { index: usize, delta: ContentDelta },
1048    #[serde(rename = "content_block_stop")]
1049    ContentBlockStop { index: usize },
1050    #[serde(rename = "message_delta")]
1051    MessageDelta { delta: MessageDelta, usage: Usage },
1052    #[serde(rename = "message_stop")]
1053    MessageStop,
1054    #[serde(rename = "ping")]
1055    Ping,
1056    #[serde(rename = "error")]
1057    Error { error: ApiError },
1058}
1059
1060#[derive(Debug, Serialize, Deserialize)]
1061#[serde(tag = "type")]
1062pub enum ContentDelta {
1063    #[serde(rename = "text_delta")]
1064    TextDelta { text: String },
1065    #[serde(rename = "thinking_delta")]
1066    ThinkingDelta { thinking: String },
1067    #[serde(rename = "signature_delta")]
1068    SignatureDelta { signature: String },
1069    #[serde(rename = "input_json_delta")]
1070    InputJsonDelta { partial_json: String },
1071}
1072
1073#[derive(Debug, Serialize, Deserialize)]
1074pub struct MessageDelta {
1075    pub stop_reason: Option<String>,
1076    pub stop_sequence: Option<String>,
1077}
1078
1079#[derive(Debug)]
1080pub enum AnthropicError {
1081    /// Failed to serialize the HTTP request body to JSON
1082    SerializeRequest(serde_json::Error),
1083
1084    /// Failed to construct the HTTP request body
1085    BuildRequestBody(http::Error),
1086
1087    /// Failed to send the HTTP request
1088    HttpSend(anyhow::Error),
1089
1090    /// Failed to deserialize the response from JSON
1091    DeserializeResponse(serde_json::Error),
1092
1093    /// Failed to read from response stream
1094    ReadResponse(io::Error),
1095
1096    /// HTTP error response from the API
1097    HttpResponseError {
1098        status_code: StatusCode,
1099        message: String,
1100    },
1101
1102    /// Rate limit exceeded
1103    RateLimit { retry_after: Duration },
1104
1105    /// Server overloaded
1106    ServerOverloaded { retry_after: Option<Duration> },
1107
1108    /// API returned an error response
1109    ApiError(ApiError),
1110}
1111
1112#[derive(Debug, Serialize, Deserialize, Error)]
1113#[error("Anthropic API Error: {error_type}: {message}")]
1114pub struct ApiError {
1115    #[serde(rename = "type")]
1116    pub error_type: String,
1117    pub message: String,
1118}
1119
1120/// An Anthropic API error code.
1121/// <https://docs.anthropic.com/en/api/errors#http-errors>
1122#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
1123#[strum(serialize_all = "snake_case")]
1124pub enum ApiErrorCode {
1125    /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
1126    InvalidRequestError,
1127    /// 401 - `authentication_error`: There's an issue with your API key.
1128    AuthenticationError,
1129    /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
1130    PermissionError,
1131    /// 404 - `not_found_error`: The requested resource was not found.
1132    NotFoundError,
1133    /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
1134    RequestTooLarge,
1135    /// 429 - `rate_limit_error`: Your account has hit a rate limit.
1136    RateLimitError,
1137    /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
1138    ApiError,
1139    /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
1140    OverloadedError,
1141}
1142
1143impl ApiError {
1144    pub fn code(&self) -> Option<ApiErrorCode> {
1145        ApiErrorCode::from_str(&self.error_type).ok()
1146    }
1147
1148    pub fn is_rate_limit_error(&self) -> bool {
1149        matches!(self.error_type.as_str(), "rate_limit_error")
1150    }
1151
1152    pub fn match_window_exceeded(&self) -> Option<u64> {
1153        let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
1154            return None;
1155        };
1156
1157        parse_prompt_too_long(&self.message)
1158    }
1159}
1160
1161pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1162    message
1163        .strip_prefix("prompt is too long: ")?
1164        .split_once(" tokens")?
1165        .0
1166        .parse()
1167        .ok()
1168}
1169
1170/// Request body for the token counting API.
1171/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
1172#[derive(Debug, Serialize)]
1173pub struct CountTokensRequest {
1174    pub model: String,
1175    pub messages: Vec<Message>,
1176    #[serde(default, skip_serializing_if = "Option::is_none")]
1177    pub system: Option<StringOrContents>,
1178    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1179    pub tools: Vec<Tool>,
1180    #[serde(default, skip_serializing_if = "Option::is_none")]
1181    pub thinking: Option<Thinking>,
1182    #[serde(default, skip_serializing_if = "Option::is_none")]
1183    pub tool_choice: Option<ToolChoice>,
1184}
1185
1186/// Response from the token counting API.
1187#[derive(Debug, Deserialize)]
1188pub struct CountTokensResponse {
1189    pub input_tokens: u64,
1190}
1191
1192/// Count the number of tokens in a message without creating it.
1193pub async fn count_tokens(
1194    client: &dyn HttpClient,
1195    api_url: &str,
1196    api_key: &str,
1197    request: CountTokensRequest,
1198) -> Result<CountTokensResponse, AnthropicError> {
1199    let uri = format!("{api_url}/v1/messages/count_tokens");
1200
1201    let request_builder = HttpRequest::builder()
1202        .method(Method::POST)
1203        .uri(uri)
1204        .header("Anthropic-Version", "2023-06-01")
1205        .header("X-Api-Key", api_key.trim())
1206        .header("Content-Type", "application/json");
1207
1208    let serialized_request =
1209        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1210    let http_request = request_builder
1211        .body(AsyncBody::from(serialized_request))
1212        .map_err(AnthropicError::BuildRequestBody)?;
1213
1214    let mut response = client
1215        .send(http_request)
1216        .await
1217        .map_err(AnthropicError::HttpSend)?;
1218
1219    let rate_limits = RateLimitInfo::from_headers(response.headers());
1220
1221    if response.status().is_success() {
1222        let mut body = String::new();
1223        response
1224            .body_mut()
1225            .read_to_string(&mut body)
1226            .await
1227            .map_err(AnthropicError::ReadResponse)?;
1228
1229        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1230    } else {
1231        Err(handle_error_response(response, rate_limits).await)
1232    }
1233}
1234
1235#[test]
1236fn test_match_window_exceeded() {
1237    let error = ApiError {
1238        error_type: "invalid_request_error".to_string(),
1239        message: "prompt is too long: 220000 tokens > 200000".to_string(),
1240    };
1241    assert_eq!(error.match_window_exceeded(), Some(220_000));
1242
1243    let error = ApiError {
1244        error_type: "invalid_request_error".to_string(),
1245        message: "prompt is too long: 1234953 tokens".to_string(),
1246    };
1247    assert_eq!(error.match_window_exceeded(), Some(1234953));
1248
1249    let error = ApiError {
1250        error_type: "invalid_request_error".to_string(),
1251        message: "not a prompt length error".to_string(),
1252    };
1253    assert_eq!(error.match_window_exceeded(), None);
1254
1255    let error = ApiError {
1256        error_type: "rate_limit_error".to_string(),
1257        message: "prompt is too long: 12345 tokens".to_string(),
1258    };
1259    assert_eq!(error.match_window_exceeded(), None);
1260
1261    let error = ApiError {
1262        error_type: "invalid_request_error".to_string(),
1263        message: "prompt is too long: invalid tokens".to_string(),
1264    };
1265    assert_eq!(error.match_window_exceeded(), None);
1266}