anthropic.rs

   1use std::io;
   2use std::str::FromStr;
   3use std::time::Duration;
   4
   5use anyhow::{Context as _, Result, anyhow};
   6use chrono::{DateTime, Utc};
   7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
   8use http_client::http::{self, HeaderMap, HeaderValue};
   9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
  10use serde::{Deserialize, Serialize};
  11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
  12use strum::{EnumIter, EnumString};
  13use thiserror::Error;
  14
  15pub mod batches;
  16
  17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
  18
  19pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07";
  20
  21#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  22#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  23pub struct AnthropicModelCacheConfiguration {
  24    pub min_total_token: u64,
  25    pub should_speculate: bool,
  26    pub max_cache_anchors: usize,
  27}
  28
  29#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  30#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  31pub enum AnthropicModelMode {
  32    #[default]
  33    Default,
  34    Thinking {
  35        budget_tokens: Option<u32>,
  36    },
  37}
  38
  39impl From<ModelMode> for AnthropicModelMode {
  40    fn from(value: ModelMode) -> Self {
  41        match value {
  42            ModelMode::Default => AnthropicModelMode::Default,
  43            ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
  44        }
  45    }
  46}
  47
  48impl From<AnthropicModelMode> for ModelMode {
  49    fn from(value: AnthropicModelMode) -> Self {
  50        match value {
  51            AnthropicModelMode::Default => ModelMode::Default,
  52            AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
  53        }
  54    }
  55}
  56
  57#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  58#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
  59pub enum Model {
  60    #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
  61    ClaudeOpus4,
  62    #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
  63    ClaudeOpus4_1,
  64    #[serde(
  65        rename = "claude-opus-4-thinking",
  66        alias = "claude-opus-4-thinking-latest"
  67    )]
  68    ClaudeOpus4Thinking,
  69    #[serde(
  70        rename = "claude-opus-4-1-thinking",
  71        alias = "claude-opus-4-1-thinking-latest"
  72    )]
  73    ClaudeOpus4_1Thinking,
  74    #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
  75    ClaudeOpus4_5,
  76    #[serde(
  77        rename = "claude-opus-4-5-thinking",
  78        alias = "claude-opus-4-5-thinking-latest"
  79    )]
  80    ClaudeOpus4_5Thinking,
  81    #[serde(
  82        rename = "claude-opus-4-6",
  83        alias = "claude-opus-4-6-latest",
  84        alias = "claude-opus-4-6-1m-context",
  85        alias = "claude-opus-4-6-1m-context-latest"
  86    )]
  87    ClaudeOpus4_6,
  88    #[serde(
  89        rename = "claude-opus-4-6-thinking",
  90        alias = "claude-opus-4-6-thinking-latest",
  91        alias = "claude-opus-4-6-1m-context-thinking",
  92        alias = "claude-opus-4-6-1m-context-thinking-latest"
  93    )]
  94    ClaudeOpus4_6Thinking,
  95    #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
  96    ClaudeSonnet4,
  97    #[serde(
  98        rename = "claude-sonnet-4-thinking",
  99        alias = "claude-sonnet-4-thinking-latest"
 100    )]
 101    ClaudeSonnet4Thinking,
 102    #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
 103    ClaudeSonnet4_5,
 104    #[serde(
 105        rename = "claude-sonnet-4-5-thinking",
 106        alias = "claude-sonnet-4-5-thinking-latest"
 107    )]
 108    ClaudeSonnet4_5Thinking,
 109    #[serde(
 110        rename = "claude-sonnet-4-5-1m-context",
 111        alias = "claude-sonnet-4-5-1m-context-latest"
 112    )]
 113    ClaudeSonnet4_5_1mContext,
 114    #[serde(
 115        rename = "claude-sonnet-4-5-1m-context-thinking",
 116        alias = "claude-sonnet-4-5-1m-context-thinking-latest"
 117    )]
 118    ClaudeSonnet4_5_1mContextThinking,
 119    #[default]
 120    #[serde(
 121        rename = "claude-sonnet-4-6",
 122        alias = "claude-sonnet-4-6-latest",
 123        alias = "claude-sonnet-4-6-1m-context",
 124        alias = "claude-sonnet-4-6-1m-context-latest"
 125    )]
 126    ClaudeSonnet4_6,
 127    #[serde(
 128        rename = "claude-sonnet-4-6-thinking",
 129        alias = "claude-sonnet-4-6-thinking-latest",
 130        alias = "claude-sonnet-4-6-1m-context-thinking",
 131        alias = "claude-sonnet-4-6-1m-context-thinking-latest"
 132    )]
 133    ClaudeSonnet4_6Thinking,
 134    #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
 135    ClaudeHaiku4_5,
 136    #[serde(
 137        rename = "claude-haiku-4-5-thinking",
 138        alias = "claude-haiku-4-5-thinking-latest"
 139    )]
 140    ClaudeHaiku4_5Thinking,
 141    #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
 142    Claude3Haiku,
 143    #[serde(rename = "custom")]
 144    Custom {
 145        name: String,
 146        max_tokens: u64,
 147        /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
 148        display_name: Option<String>,
 149        /// Override this model with a different Anthropic model for tool calls.
 150        tool_override: Option<String>,
 151        /// Indicates whether this custom model supports caching.
 152        cache_configuration: Option<AnthropicModelCacheConfiguration>,
 153        max_output_tokens: Option<u64>,
 154        default_temperature: Option<f32>,
 155        #[serde(default)]
 156        extra_beta_headers: Vec<String>,
 157        #[serde(default)]
 158        mode: AnthropicModelMode,
 159    },
 160}
 161
 162impl Model {
 163    pub fn default_fast() -> Self {
 164        Self::ClaudeHaiku4_5
 165    }
 166
 167    pub fn from_id(id: &str) -> Result<Self> {
 168        if id.starts_with("claude-opus-4-6-1m-context-thinking") {
 169            return Ok(Self::ClaudeOpus4_6Thinking);
 170        }
 171
 172        if id.starts_with("claude-opus-4-6-1m-context") {
 173            return Ok(Self::ClaudeOpus4_6);
 174        }
 175
 176        if id.starts_with("claude-opus-4-6-thinking") {
 177            return Ok(Self::ClaudeOpus4_6Thinking);
 178        }
 179
 180        if id.starts_with("claude-opus-4-6") {
 181            return Ok(Self::ClaudeOpus4_6);
 182        }
 183
 184        if id.starts_with("claude-opus-4-5-thinking") {
 185            return Ok(Self::ClaudeOpus4_5Thinking);
 186        }
 187
 188        if id.starts_with("claude-opus-4-5") {
 189            return Ok(Self::ClaudeOpus4_5);
 190        }
 191
 192        if id.starts_with("claude-opus-4-1-thinking") {
 193            return Ok(Self::ClaudeOpus4_1Thinking);
 194        }
 195
 196        if id.starts_with("claude-opus-4-thinking") {
 197            return Ok(Self::ClaudeOpus4Thinking);
 198        }
 199
 200        if id.starts_with("claude-opus-4-1") {
 201            return Ok(Self::ClaudeOpus4_1);
 202        }
 203
 204        if id.starts_with("claude-opus-4") {
 205            return Ok(Self::ClaudeOpus4);
 206        }
 207
 208        if id.starts_with("claude-sonnet-4-6-1m-context-thinking") {
 209            return Ok(Self::ClaudeSonnet4_6Thinking);
 210        }
 211
 212        if id.starts_with("claude-sonnet-4-6-1m-context") {
 213            return Ok(Self::ClaudeSonnet4_6);
 214        }
 215
 216        if id.starts_with("claude-sonnet-4-6-thinking") {
 217            return Ok(Self::ClaudeSonnet4_6Thinking);
 218        }
 219
 220        if id.starts_with("claude-sonnet-4-6") {
 221            return Ok(Self::ClaudeSonnet4_6);
 222        }
 223
 224        if id.starts_with("claude-sonnet-4-5-1m-context-thinking") {
 225            return Ok(Self::ClaudeSonnet4_5_1mContextThinking);
 226        }
 227
 228        if id.starts_with("claude-sonnet-4-5-1m-context") {
 229            return Ok(Self::ClaudeSonnet4_5_1mContext);
 230        }
 231
 232        if id.starts_with("claude-sonnet-4-5-thinking") {
 233            return Ok(Self::ClaudeSonnet4_5Thinking);
 234        }
 235
 236        if id.starts_with("claude-sonnet-4-5") {
 237            return Ok(Self::ClaudeSonnet4_5);
 238        }
 239
 240        if id.starts_with("claude-sonnet-4-thinking") {
 241            return Ok(Self::ClaudeSonnet4Thinking);
 242        }
 243
 244        if id.starts_with("claude-sonnet-4") {
 245            return Ok(Self::ClaudeSonnet4);
 246        }
 247
 248        if id.starts_with("claude-haiku-4-5-thinking") {
 249            return Ok(Self::ClaudeHaiku4_5Thinking);
 250        }
 251
 252        if id.starts_with("claude-haiku-4-5") {
 253            return Ok(Self::ClaudeHaiku4_5);
 254        }
 255
 256        if id.starts_with("claude-3-haiku") {
 257            return Ok(Self::Claude3Haiku);
 258        }
 259
 260        Err(anyhow!("invalid model ID: {id}"))
 261    }
 262
 263    pub fn id(&self) -> &str {
 264        match self {
 265            Self::ClaudeOpus4 => "claude-opus-4-latest",
 266            Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
 267            Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
 268            Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
 269            Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
 270            Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking-latest",
 271            Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
 272            Self::ClaudeOpus4_6Thinking => "claude-opus-4-6-thinking-latest",
 273            Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
 274            Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
 275            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
 276            Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
 277            Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-1m-context-latest",
 278            Self::ClaudeSonnet4_5_1mContextThinking => {
 279                "claude-sonnet-4-5-1m-context-thinking-latest"
 280            }
 281            Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
 282            Self::ClaudeSonnet4_6Thinking => "claude-sonnet-4-6-thinking-latest",
 283            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
 284            Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
 285            Self::Claude3Haiku => "claude-3-haiku-20240307",
 286            Self::Custom { name, .. } => name,
 287        }
 288    }
 289
 290    /// The id of the model that should be used for making API requests
 291    pub fn request_id(&self) -> &str {
 292        match self {
 293            Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
 294            Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
 295            Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-20251101",
 296            Self::ClaudeOpus4_6 | Self::ClaudeOpus4_6Thinking => "claude-opus-4-6",
 297            Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
 298            Self::ClaudeSonnet4_5
 299            | Self::ClaudeSonnet4_5Thinking
 300            | Self::ClaudeSonnet4_5_1mContext
 301            | Self::ClaudeSonnet4_5_1mContextThinking => "claude-sonnet-4-5-20250929",
 302            Self::ClaudeSonnet4_6 | Self::ClaudeSonnet4_6Thinking => "claude-sonnet-4-6",
 303            Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
 304            Self::Claude3Haiku => "claude-3-haiku-20240307",
 305            Self::Custom { name, .. } => name,
 306        }
 307    }
 308
 309    pub fn display_name(&self) -> &str {
 310        match self {
 311            Self::ClaudeOpus4 => "Claude Opus 4",
 312            Self::ClaudeOpus4_1 => "Claude Opus 4.1",
 313            Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
 314            Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
 315            Self::ClaudeOpus4_5 => "Claude Opus 4.5",
 316            Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
 317            Self::ClaudeOpus4_6 => "Claude Opus 4.6",
 318            Self::ClaudeOpus4_6Thinking => "Claude Opus 4.6 Thinking",
 319            Self::ClaudeSonnet4 => "Claude Sonnet 4",
 320            Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
 321            Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
 322            Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
 323            Self::ClaudeSonnet4_5_1mContext => "Claude Sonnet 4.5 (1M context)",
 324            Self::ClaudeSonnet4_5_1mContextThinking => "Claude Sonnet 4.5 Thinking (1M context)",
 325            Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
 326            Self::ClaudeSonnet4_6Thinking => "Claude Sonnet 4.6 Thinking",
 327            Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
 328            Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
 329            Self::Claude3Haiku => "Claude 3 Haiku",
 330            Self::Custom {
 331                name, display_name, ..
 332            } => display_name.as_ref().unwrap_or(name),
 333        }
 334    }
 335
 336    pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
 337        match self {
 338            Self::ClaudeOpus4
 339            | Self::ClaudeOpus4_1
 340            | Self::ClaudeOpus4Thinking
 341            | Self::ClaudeOpus4_1Thinking
 342            | Self::ClaudeOpus4_5
 343            | Self::ClaudeOpus4_5Thinking
 344            | Self::ClaudeOpus4_6
 345            | Self::ClaudeOpus4_6Thinking
 346            | Self::ClaudeSonnet4
 347            | Self::ClaudeSonnet4Thinking
 348            | Self::ClaudeSonnet4_5
 349            | Self::ClaudeSonnet4_5Thinking
 350            | Self::ClaudeSonnet4_5_1mContext
 351            | Self::ClaudeSonnet4_5_1mContextThinking
 352            | Self::ClaudeSonnet4_6
 353            | Self::ClaudeSonnet4_6Thinking
 354            | Self::ClaudeHaiku4_5
 355            | Self::ClaudeHaiku4_5Thinking
 356            | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
 357                min_total_token: 2_048,
 358                should_speculate: true,
 359                max_cache_anchors: 4,
 360            }),
 361            Self::Custom {
 362                cache_configuration,
 363                ..
 364            } => cache_configuration.clone(),
 365        }
 366    }
 367
 368    pub fn max_token_count(&self) -> u64 {
 369        match self {
 370            Self::ClaudeOpus4
 371            | Self::ClaudeOpus4_1
 372            | Self::ClaudeOpus4Thinking
 373            | Self::ClaudeOpus4_1Thinking
 374            | Self::ClaudeOpus4_5
 375            | Self::ClaudeOpus4_5Thinking
 376            | Self::ClaudeSonnet4
 377            | Self::ClaudeSonnet4Thinking
 378            | Self::ClaudeSonnet4_5
 379            | Self::ClaudeSonnet4_5Thinking
 380            | Self::ClaudeHaiku4_5
 381            | Self::ClaudeHaiku4_5Thinking
 382            | Self::Claude3Haiku => 200_000,
 383            Self::ClaudeOpus4_6
 384            | Self::ClaudeOpus4_6Thinking
 385            | Self::ClaudeSonnet4_5_1mContext
 386            | Self::ClaudeSonnet4_5_1mContextThinking
 387            | Self::ClaudeSonnet4_6
 388            | Self::ClaudeSonnet4_6Thinking => 1_000_000,
 389            Self::Custom { max_tokens, .. } => *max_tokens,
 390        }
 391    }
 392
 393    pub fn max_output_tokens(&self) -> u64 {
 394        match self {
 395            Self::ClaudeOpus4
 396            | Self::ClaudeOpus4Thinking
 397            | Self::ClaudeOpus4_1
 398            | Self::ClaudeOpus4_1Thinking => 32_000,
 399            Self::ClaudeOpus4_5
 400            | Self::ClaudeOpus4_5Thinking
 401            | Self::ClaudeSonnet4
 402            | Self::ClaudeSonnet4Thinking
 403            | Self::ClaudeSonnet4_5
 404            | Self::ClaudeSonnet4_5Thinking
 405            | Self::ClaudeSonnet4_5_1mContext
 406            | Self::ClaudeSonnet4_5_1mContextThinking
 407            | Self::ClaudeSonnet4_6
 408            | Self::ClaudeSonnet4_6Thinking
 409            | Self::ClaudeHaiku4_5
 410            | Self::ClaudeHaiku4_5Thinking => 64_000,
 411            Self::ClaudeOpus4_6 | Self::ClaudeOpus4_6Thinking => 128_000,
 412            Self::Claude3Haiku => 4_096,
 413            Self::Custom {
 414                max_output_tokens, ..
 415            } => max_output_tokens.unwrap_or(4_096),
 416        }
 417    }
 418
 419    pub fn default_temperature(&self) -> f32 {
 420        match self {
 421            Self::ClaudeOpus4
 422            | Self::ClaudeOpus4_1
 423            | Self::ClaudeOpus4Thinking
 424            | Self::ClaudeOpus4_1Thinking
 425            | Self::ClaudeOpus4_5
 426            | Self::ClaudeOpus4_5Thinking
 427            | Self::ClaudeOpus4_6
 428            | Self::ClaudeOpus4_6Thinking
 429            | Self::ClaudeSonnet4
 430            | Self::ClaudeSonnet4Thinking
 431            | Self::ClaudeSonnet4_5
 432            | Self::ClaudeSonnet4_5Thinking
 433            | Self::ClaudeSonnet4_5_1mContext
 434            | Self::ClaudeSonnet4_5_1mContextThinking
 435            | Self::ClaudeSonnet4_6
 436            | Self::ClaudeSonnet4_6Thinking
 437            | Self::ClaudeHaiku4_5
 438            | Self::ClaudeHaiku4_5Thinking
 439            | Self::Claude3Haiku => 1.0,
 440            Self::Custom {
 441                default_temperature,
 442                ..
 443            } => default_temperature.unwrap_or(1.0),
 444        }
 445    }
 446
 447    pub fn mode(&self) -> AnthropicModelMode {
 448        match self {
 449            Self::ClaudeOpus4
 450            | Self::ClaudeOpus4_1
 451            | Self::ClaudeOpus4_5
 452            | Self::ClaudeOpus4_6
 453            | Self::ClaudeSonnet4
 454            | Self::ClaudeSonnet4_5
 455            | Self::ClaudeSonnet4_5_1mContext
 456            | Self::ClaudeSonnet4_6
 457            | Self::ClaudeHaiku4_5
 458            | Self::Claude3Haiku => AnthropicModelMode::Default,
 459            Self::ClaudeOpus4Thinking
 460            | Self::ClaudeOpus4_1Thinking
 461            | Self::ClaudeOpus4_5Thinking
 462            | Self::ClaudeOpus4_6Thinking
 463            | Self::ClaudeSonnet4Thinking
 464            | Self::ClaudeSonnet4_5Thinking
 465            | Self::ClaudeSonnet4_5_1mContextThinking
 466            | Self::ClaudeSonnet4_6Thinking
 467            | Self::ClaudeHaiku4_5Thinking => AnthropicModelMode::Thinking {
 468                budget_tokens: Some(4_096),
 469            },
 470            Self::Custom { mode, .. } => mode.clone(),
 471        }
 472    }
 473
 474    pub fn beta_headers(&self) -> Option<String> {
 475        let mut headers = vec![];
 476
 477        match self {
 478            Self::ClaudeSonnet4_5_1mContext | Self::ClaudeSonnet4_5_1mContextThinking => {
 479                headers.push(CONTEXT_1M_BETA_HEADER.to_string());
 480            }
 481            Self::Custom {
 482                extra_beta_headers, ..
 483            } => {
 484                headers.extend(
 485                    extra_beta_headers
 486                        .iter()
 487                        .filter(|header| !header.trim().is_empty())
 488                        .cloned(),
 489                );
 490            }
 491            _ => {}
 492        }
 493
 494        if headers.is_empty() {
 495            None
 496        } else {
 497            Some(headers.join(","))
 498        }
 499    }
 500
 501    pub fn tool_model_id(&self) -> &str {
 502        if let Self::Custom {
 503            tool_override: Some(tool_override),
 504            ..
 505        } = self
 506        {
 507            tool_override
 508        } else {
 509            self.request_id()
 510        }
 511    }
 512}
 513
 514/// Generate completion with streaming.
 515pub async fn stream_completion(
 516    client: &dyn HttpClient,
 517    api_url: &str,
 518    api_key: &str,
 519    request: Request,
 520    beta_headers: Option<String>,
 521) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
 522    stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
 523        .await
 524        .map(|output| output.0)
 525}
 526
 527/// Generate completion without streaming.
 528pub async fn non_streaming_completion(
 529    client: &dyn HttpClient,
 530    api_url: &str,
 531    api_key: &str,
 532    request: Request,
 533    beta_headers: Option<String>,
 534) -> Result<Response, AnthropicError> {
 535    let (mut response, rate_limits) =
 536        send_request(client, api_url, api_key, &request, beta_headers).await?;
 537
 538    if response.status().is_success() {
 539        let mut body = String::new();
 540        response
 541            .body_mut()
 542            .read_to_string(&mut body)
 543            .await
 544            .map_err(AnthropicError::ReadResponse)?;
 545
 546        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
 547    } else {
 548        Err(handle_error_response(response, rate_limits).await)
 549    }
 550}
 551
 552async fn send_request(
 553    client: &dyn HttpClient,
 554    api_url: &str,
 555    api_key: &str,
 556    request: impl Serialize,
 557    beta_headers: Option<String>,
 558) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
 559    let uri = format!("{api_url}/v1/messages");
 560
 561    let mut request_builder = HttpRequest::builder()
 562        .method(Method::POST)
 563        .uri(uri)
 564        .header("Anthropic-Version", "2023-06-01")
 565        .header("X-Api-Key", api_key.trim())
 566        .header("Content-Type", "application/json");
 567
 568    if let Some(beta_headers) = beta_headers {
 569        request_builder = request_builder.header("Anthropic-Beta", beta_headers);
 570    }
 571
 572    let serialized_request =
 573        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
 574    let request = request_builder
 575        .body(AsyncBody::from(serialized_request))
 576        .map_err(AnthropicError::BuildRequestBody)?;
 577
 578    let response = client
 579        .send(request)
 580        .await
 581        .map_err(AnthropicError::HttpSend)?;
 582
 583    let rate_limits = RateLimitInfo::from_headers(response.headers());
 584
 585    Ok((response, rate_limits))
 586}
 587
 588async fn handle_error_response(
 589    mut response: http::Response<AsyncBody>,
 590    rate_limits: RateLimitInfo,
 591) -> AnthropicError {
 592    if response.status().as_u16() == 529 {
 593        return AnthropicError::ServerOverloaded {
 594            retry_after: rate_limits.retry_after,
 595        };
 596    }
 597
 598    if let Some(retry_after) = rate_limits.retry_after {
 599        return AnthropicError::RateLimit { retry_after };
 600    }
 601
 602    let mut body = String::new();
 603    let read_result = response
 604        .body_mut()
 605        .read_to_string(&mut body)
 606        .await
 607        .map_err(AnthropicError::ReadResponse);
 608
 609    if let Err(err) = read_result {
 610        return err;
 611    }
 612
 613    match serde_json::from_str::<Event>(&body) {
 614        Ok(Event::Error { error }) => AnthropicError::ApiError(error),
 615        Ok(_) | Err(_) => AnthropicError::HttpResponseError {
 616            status_code: response.status(),
 617            message: body,
 618        },
 619    }
 620}
 621
 622/// An individual rate limit.
 623#[derive(Debug)]
 624pub struct RateLimit {
 625    pub limit: usize,
 626    pub remaining: usize,
 627    pub reset: DateTime<Utc>,
 628}
 629
 630impl RateLimit {
 631    fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
 632        let limit =
 633            get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
 634        let remaining = get_header(
 635            &format!("anthropic-ratelimit-{resource}-remaining"),
 636            headers,
 637        )?
 638        .parse()?;
 639        let reset = DateTime::parse_from_rfc3339(get_header(
 640            &format!("anthropic-ratelimit-{resource}-reset"),
 641            headers,
 642        )?)?
 643        .to_utc();
 644
 645        Ok(Self {
 646            limit,
 647            remaining,
 648            reset,
 649        })
 650    }
 651}
 652
 653/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
 654#[derive(Debug)]
 655pub struct RateLimitInfo {
 656    pub retry_after: Option<Duration>,
 657    pub requests: Option<RateLimit>,
 658    pub tokens: Option<RateLimit>,
 659    pub input_tokens: Option<RateLimit>,
 660    pub output_tokens: Option<RateLimit>,
 661}
 662
 663impl RateLimitInfo {
 664    fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
 665        // Check if any rate limit headers exist
 666        let has_rate_limit_headers = headers
 667            .keys()
 668            .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
 669
 670        if !has_rate_limit_headers {
 671            return Self {
 672                retry_after: None,
 673                requests: None,
 674                tokens: None,
 675                input_tokens: None,
 676                output_tokens: None,
 677            };
 678        }
 679
 680        Self {
 681            retry_after: parse_retry_after(headers),
 682            requests: RateLimit::from_headers("requests", headers).ok(),
 683            tokens: RateLimit::from_headers("tokens", headers).ok(),
 684            input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
 685            output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
 686        }
 687    }
 688}
 689
 690/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
 691/// seconds). Note that other services might specify an HTTP date or some other format for this
 692/// header. Returns `None` if the header is not present or cannot be parsed.
 693pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
 694    headers
 695        .get("retry-after")
 696        .and_then(|v| v.to_str().ok())
 697        .and_then(|v| v.parse::<u64>().ok())
 698        .map(Duration::from_secs)
 699}
 700
 701fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
 702    Ok(headers
 703        .get(key)
 704        .with_context(|| format!("missing header `{key}`"))?
 705        .to_str()?)
 706}
 707
 708pub async fn stream_completion_with_rate_limit_info(
 709    client: &dyn HttpClient,
 710    api_url: &str,
 711    api_key: &str,
 712    request: Request,
 713    beta_headers: Option<String>,
 714) -> Result<
 715    (
 716        BoxStream<'static, Result<Event, AnthropicError>>,
 717        Option<RateLimitInfo>,
 718    ),
 719    AnthropicError,
 720> {
 721    let request = StreamingRequest {
 722        base: request,
 723        stream: true,
 724    };
 725
 726    let (response, rate_limits) =
 727        send_request(client, api_url, api_key, &request, beta_headers).await?;
 728
 729    if response.status().is_success() {
 730        let reader = BufReader::new(response.into_body());
 731        let stream = reader
 732            .lines()
 733            .filter_map(|line| async move {
 734                match line {
 735                    Ok(line) => {
 736                        let line = line
 737                            .strip_prefix("data: ")
 738                            .or_else(|| line.strip_prefix("data:"))?;
 739
 740                        match serde_json::from_str(line) {
 741                            Ok(response) => Some(Ok(response)),
 742                            Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
 743                        }
 744                    }
 745                    Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
 746                }
 747            })
 748            .boxed();
 749        Ok((stream, Some(rate_limits)))
 750    } else {
 751        Err(handle_error_response(response, rate_limits).await)
 752    }
 753}
 754
 755#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 756#[serde(rename_all = "lowercase")]
 757pub enum CacheControlType {
 758    Ephemeral,
 759}
 760
 761#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 762pub struct CacheControl {
 763    #[serde(rename = "type")]
 764    pub cache_type: CacheControlType,
 765}
 766
 767#[derive(Debug, Serialize, Deserialize)]
 768pub struct Message {
 769    pub role: Role,
 770    pub content: Vec<RequestContent>,
 771}
 772
 773#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
 774#[serde(rename_all = "lowercase")]
 775pub enum Role {
 776    User,
 777    Assistant,
 778}
 779
 780#[derive(Debug, Serialize, Deserialize)]
 781#[serde(tag = "type")]
 782pub enum RequestContent {
 783    #[serde(rename = "text")]
 784    Text {
 785        text: String,
 786        #[serde(skip_serializing_if = "Option::is_none")]
 787        cache_control: Option<CacheControl>,
 788    },
 789    #[serde(rename = "thinking")]
 790    Thinking {
 791        thinking: String,
 792        signature: String,
 793        #[serde(skip_serializing_if = "Option::is_none")]
 794        cache_control: Option<CacheControl>,
 795    },
 796    #[serde(rename = "redacted_thinking")]
 797    RedactedThinking { data: String },
 798    #[serde(rename = "image")]
 799    Image {
 800        source: ImageSource,
 801        #[serde(skip_serializing_if = "Option::is_none")]
 802        cache_control: Option<CacheControl>,
 803    },
 804    #[serde(rename = "tool_use")]
 805    ToolUse {
 806        id: String,
 807        name: String,
 808        input: serde_json::Value,
 809        #[serde(skip_serializing_if = "Option::is_none")]
 810        cache_control: Option<CacheControl>,
 811    },
 812    #[serde(rename = "tool_result")]
 813    ToolResult {
 814        tool_use_id: String,
 815        is_error: bool,
 816        content: ToolResultContent,
 817        #[serde(skip_serializing_if = "Option::is_none")]
 818        cache_control: Option<CacheControl>,
 819    },
 820}
 821
 822#[derive(Debug, Serialize, Deserialize)]
 823#[serde(untagged)]
 824pub enum ToolResultContent {
 825    Plain(String),
 826    Multipart(Vec<ToolResultPart>),
 827}
 828
 829#[derive(Debug, Serialize, Deserialize)]
 830#[serde(tag = "type", rename_all = "lowercase")]
 831pub enum ToolResultPart {
 832    Text { text: String },
 833    Image { source: ImageSource },
 834}
 835
 836#[derive(Debug, Serialize, Deserialize)]
 837#[serde(tag = "type")]
 838pub enum ResponseContent {
 839    #[serde(rename = "text")]
 840    Text { text: String },
 841    #[serde(rename = "thinking")]
 842    Thinking { thinking: String },
 843    #[serde(rename = "redacted_thinking")]
 844    RedactedThinking { data: String },
 845    #[serde(rename = "tool_use")]
 846    ToolUse {
 847        id: String,
 848        name: String,
 849        input: serde_json::Value,
 850    },
 851}
 852
 853#[derive(Debug, Serialize, Deserialize)]
 854pub struct ImageSource {
 855    #[serde(rename = "type")]
 856    pub source_type: String,
 857    pub media_type: String,
 858    pub data: String,
 859}
 860
 861fn is_false(value: &bool) -> bool {
 862    !value
 863}
 864
 865#[derive(Debug, Serialize, Deserialize)]
 866pub struct Tool {
 867    pub name: String,
 868    pub description: String,
 869    pub input_schema: serde_json::Value,
 870    #[serde(default, skip_serializing_if = "is_false")]
 871    pub eager_input_streaming: bool,
 872}
 873
 874#[derive(Debug, Serialize, Deserialize)]
 875#[serde(tag = "type", rename_all = "lowercase")]
 876pub enum ToolChoice {
 877    Auto,
 878    Any,
 879    Tool { name: String },
 880    None,
 881}
 882
 883#[derive(Debug, Serialize, Deserialize)]
 884#[serde(tag = "type", rename_all = "lowercase")]
 885pub enum Thinking {
 886    Enabled { budget_tokens: Option<u32> },
 887    Adaptive,
 888}
 889
 890#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
 891#[serde(rename_all = "snake_case")]
 892#[strum(serialize_all = "snake_case")]
 893pub enum Effort {
 894    Low,
 895    Medium,
 896    High,
 897    Max,
 898}
 899
 900#[derive(Debug, Clone, Serialize, Deserialize)]
 901pub struct OutputConfig {
 902    pub effort: Option<Effort>,
 903}
 904
 905#[derive(Debug, Serialize, Deserialize)]
 906#[serde(untagged)]
 907pub enum StringOrContents {
 908    String(String),
 909    Content(Vec<RequestContent>),
 910}
 911
 912#[derive(Debug, Serialize, Deserialize)]
 913pub struct Request {
 914    pub model: String,
 915    pub max_tokens: u64,
 916    pub messages: Vec<Message>,
 917    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 918    pub tools: Vec<Tool>,
 919    #[serde(default, skip_serializing_if = "Option::is_none")]
 920    pub thinking: Option<Thinking>,
 921    #[serde(default, skip_serializing_if = "Option::is_none")]
 922    pub tool_choice: Option<ToolChoice>,
 923    #[serde(default, skip_serializing_if = "Option::is_none")]
 924    pub system: Option<StringOrContents>,
 925    #[serde(default, skip_serializing_if = "Option::is_none")]
 926    pub metadata: Option<Metadata>,
 927    #[serde(default, skip_serializing_if = "Option::is_none")]
 928    pub output_config: Option<OutputConfig>,
 929    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 930    pub stop_sequences: Vec<String>,
 931    #[serde(default, skip_serializing_if = "Option::is_none")]
 932    pub speed: Option<Speed>,
 933    #[serde(default, skip_serializing_if = "Option::is_none")]
 934    pub temperature: Option<f32>,
 935    #[serde(default, skip_serializing_if = "Option::is_none")]
 936    pub top_k: Option<u32>,
 937    #[serde(default, skip_serializing_if = "Option::is_none")]
 938    pub top_p: Option<f32>,
 939}
 940
 941#[derive(Debug, Default, Serialize, Deserialize)]
 942#[serde(rename_all = "snake_case")]
 943pub enum Speed {
 944    #[default]
 945    Standard,
 946    Fast,
 947}
 948
 949#[derive(Debug, Serialize, Deserialize)]
 950pub struct StreamingRequest {
 951    #[serde(flatten)]
 952    pub base: Request,
 953    pub stream: bool,
 954}
 955
 956#[derive(Debug, Serialize, Deserialize)]
 957pub struct Metadata {
 958    pub user_id: Option<String>,
 959}
 960
 961#[derive(Debug, Serialize, Deserialize, Default)]
 962pub struct Usage {
 963    #[serde(default, skip_serializing_if = "Option::is_none")]
 964    pub input_tokens: Option<u64>,
 965    #[serde(default, skip_serializing_if = "Option::is_none")]
 966    pub output_tokens: Option<u64>,
 967    #[serde(default, skip_serializing_if = "Option::is_none")]
 968    pub cache_creation_input_tokens: Option<u64>,
 969    #[serde(default, skip_serializing_if = "Option::is_none")]
 970    pub cache_read_input_tokens: Option<u64>,
 971}
 972
 973#[derive(Debug, Serialize, Deserialize)]
 974pub struct Response {
 975    pub id: String,
 976    #[serde(rename = "type")]
 977    pub response_type: String,
 978    pub role: Role,
 979    pub content: Vec<ResponseContent>,
 980    pub model: String,
 981    #[serde(default, skip_serializing_if = "Option::is_none")]
 982    pub stop_reason: Option<String>,
 983    #[serde(default, skip_serializing_if = "Option::is_none")]
 984    pub stop_sequence: Option<String>,
 985    pub usage: Usage,
 986}
 987
 988#[derive(Debug, Serialize, Deserialize)]
 989#[serde(tag = "type")]
 990pub enum Event {
 991    #[serde(rename = "message_start")]
 992    MessageStart { message: Response },
 993    #[serde(rename = "content_block_start")]
 994    ContentBlockStart {
 995        index: usize,
 996        content_block: ResponseContent,
 997    },
 998    #[serde(rename = "content_block_delta")]
 999    ContentBlockDelta { index: usize, delta: ContentDelta },
1000    #[serde(rename = "content_block_stop")]
1001    ContentBlockStop { index: usize },
1002    #[serde(rename = "message_delta")]
1003    MessageDelta { delta: MessageDelta, usage: Usage },
1004    #[serde(rename = "message_stop")]
1005    MessageStop,
1006    #[serde(rename = "ping")]
1007    Ping,
1008    #[serde(rename = "error")]
1009    Error { error: ApiError },
1010}
1011
1012#[derive(Debug, Serialize, Deserialize)]
1013#[serde(tag = "type")]
1014pub enum ContentDelta {
1015    #[serde(rename = "text_delta")]
1016    TextDelta { text: String },
1017    #[serde(rename = "thinking_delta")]
1018    ThinkingDelta { thinking: String },
1019    #[serde(rename = "signature_delta")]
1020    SignatureDelta { signature: String },
1021    #[serde(rename = "input_json_delta")]
1022    InputJsonDelta { partial_json: String },
1023}
1024
1025#[derive(Debug, Serialize, Deserialize)]
1026pub struct MessageDelta {
1027    pub stop_reason: Option<String>,
1028    pub stop_sequence: Option<String>,
1029}
1030
1031#[derive(Debug)]
1032pub enum AnthropicError {
1033    /// Failed to serialize the HTTP request body to JSON
1034    SerializeRequest(serde_json::Error),
1035
1036    /// Failed to construct the HTTP request body
1037    BuildRequestBody(http::Error),
1038
1039    /// Failed to send the HTTP request
1040    HttpSend(anyhow::Error),
1041
1042    /// Failed to deserialize the response from JSON
1043    DeserializeResponse(serde_json::Error),
1044
1045    /// Failed to read from response stream
1046    ReadResponse(io::Error),
1047
1048    /// HTTP error response from the API
1049    HttpResponseError {
1050        status_code: StatusCode,
1051        message: String,
1052    },
1053
1054    /// Rate limit exceeded
1055    RateLimit { retry_after: Duration },
1056
1057    /// Server overloaded
1058    ServerOverloaded { retry_after: Option<Duration> },
1059
1060    /// API returned an error response
1061    ApiError(ApiError),
1062}
1063
1064#[derive(Debug, Serialize, Deserialize, Error)]
1065#[error("Anthropic API Error: {error_type}: {message}")]
1066pub struct ApiError {
1067    #[serde(rename = "type")]
1068    pub error_type: String,
1069    pub message: String,
1070}
1071
1072/// An Anthropic API error code.
1073/// <https://docs.anthropic.com/en/api/errors#http-errors>
1074#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
1075#[strum(serialize_all = "snake_case")]
1076pub enum ApiErrorCode {
1077    /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
1078    InvalidRequestError,
1079    /// 401 - `authentication_error`: There's an issue with your API key.
1080    AuthenticationError,
1081    /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
1082    PermissionError,
1083    /// 404 - `not_found_error`: The requested resource was not found.
1084    NotFoundError,
1085    /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
1086    RequestTooLarge,
1087    /// 429 - `rate_limit_error`: Your account has hit a rate limit.
1088    RateLimitError,
1089    /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
1090    ApiError,
1091    /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
1092    OverloadedError,
1093}
1094
1095impl ApiError {
1096    pub fn code(&self) -> Option<ApiErrorCode> {
1097        ApiErrorCode::from_str(&self.error_type).ok()
1098    }
1099
1100    pub fn is_rate_limit_error(&self) -> bool {
1101        matches!(self.error_type.as_str(), "rate_limit_error")
1102    }
1103
1104    pub fn match_window_exceeded(&self) -> Option<u64> {
1105        let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
1106            return None;
1107        };
1108
1109        parse_prompt_too_long(&self.message)
1110    }
1111}
1112
1113pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1114    message
1115        .strip_prefix("prompt is too long: ")?
1116        .split_once(" tokens")?
1117        .0
1118        .parse()
1119        .ok()
1120}
1121
1122/// Request body for the token counting API.
1123/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
1124#[derive(Debug, Serialize)]
1125pub struct CountTokensRequest {
1126    pub model: String,
1127    pub messages: Vec<Message>,
1128    #[serde(default, skip_serializing_if = "Option::is_none")]
1129    pub system: Option<StringOrContents>,
1130    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1131    pub tools: Vec<Tool>,
1132    #[serde(default, skip_serializing_if = "Option::is_none")]
1133    pub thinking: Option<Thinking>,
1134    #[serde(default, skip_serializing_if = "Option::is_none")]
1135    pub tool_choice: Option<ToolChoice>,
1136}
1137
1138/// Response from the token counting API.
1139#[derive(Debug, Deserialize)]
1140pub struct CountTokensResponse {
1141    pub input_tokens: u64,
1142}
1143
1144/// Count the number of tokens in a message without creating it.
1145pub async fn count_tokens(
1146    client: &dyn HttpClient,
1147    api_url: &str,
1148    api_key: &str,
1149    request: CountTokensRequest,
1150) -> Result<CountTokensResponse, AnthropicError> {
1151    let uri = format!("{api_url}/v1/messages/count_tokens");
1152
1153    let request_builder = HttpRequest::builder()
1154        .method(Method::POST)
1155        .uri(uri)
1156        .header("Anthropic-Version", "2023-06-01")
1157        .header("X-Api-Key", api_key.trim())
1158        .header("Content-Type", "application/json");
1159
1160    let serialized_request =
1161        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1162    let http_request = request_builder
1163        .body(AsyncBody::from(serialized_request))
1164        .map_err(AnthropicError::BuildRequestBody)?;
1165
1166    let mut response = client
1167        .send(http_request)
1168        .await
1169        .map_err(AnthropicError::HttpSend)?;
1170
1171    let rate_limits = RateLimitInfo::from_headers(response.headers());
1172
1173    if response.status().is_success() {
1174        let mut body = String::new();
1175        response
1176            .body_mut()
1177            .read_to_string(&mut body)
1178            .await
1179            .map_err(AnthropicError::ReadResponse)?;
1180
1181        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1182    } else {
1183        Err(handle_error_response(response, rate_limits).await)
1184    }
1185}
1186
1187#[test]
1188fn test_match_window_exceeded() {
1189    let error = ApiError {
1190        error_type: "invalid_request_error".to_string(),
1191        message: "prompt is too long: 220000 tokens > 200000".to_string(),
1192    };
1193    assert_eq!(error.match_window_exceeded(), Some(220_000));
1194
1195    let error = ApiError {
1196        error_type: "invalid_request_error".to_string(),
1197        message: "prompt is too long: 1234953 tokens".to_string(),
1198    };
1199    assert_eq!(error.match_window_exceeded(), Some(1234953));
1200
1201    let error = ApiError {
1202        error_type: "invalid_request_error".to_string(),
1203        message: "not a prompt length error".to_string(),
1204    };
1205    assert_eq!(error.match_window_exceeded(), None);
1206
1207    let error = ApiError {
1208        error_type: "rate_limit_error".to_string(),
1209        message: "prompt is too long: 12345 tokens".to_string(),
1210    };
1211    assert_eq!(error.match_window_exceeded(), None);
1212
1213    let error = ApiError {
1214        error_type: "invalid_request_error".to_string(),
1215        message: "prompt is too long: invalid tokens".to_string(),
1216    };
1217    assert_eq!(error.match_window_exceeded(), None);
1218}