anthropic.rs

   1use std::io;
   2use std::str::FromStr;
   3use std::time::Duration;
   4
   5use anyhow::{Context as _, Result, anyhow};
   6use chrono::{DateTime, Utc};
   7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
   8use http_client::http::{self, HeaderMap, HeaderValue};
   9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
  10use serde::{Deserialize, Serialize};
  11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
  12use strum::{EnumIter, EnumString};
  13use thiserror::Error;
  14
  15pub mod batches;
  16
  17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
  18
  19#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  20#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  21pub struct AnthropicModelCacheConfiguration {
  22    pub min_total_token: u64,
  23    pub should_speculate: bool,
  24    pub max_cache_anchors: usize,
  25}
  26
  27#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  28#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  29pub enum AnthropicModelMode {
  30    #[default]
  31    Default,
  32    Thinking {
  33        budget_tokens: Option<u32>,
  34    },
  35}
  36
  37impl From<ModelMode> for AnthropicModelMode {
  38    fn from(value: ModelMode) -> Self {
  39        match value {
  40            ModelMode::Default => AnthropicModelMode::Default,
  41            ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
  42        }
  43    }
  44}
  45
  46impl From<AnthropicModelMode> for ModelMode {
  47    fn from(value: AnthropicModelMode) -> Self {
  48        match value {
  49            AnthropicModelMode::Default => ModelMode::Default,
  50            AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
  51        }
  52    }
  53}
  54
  55#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  56#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
  57pub enum Model {
  58    #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
  59    ClaudeOpus4,
  60    #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
  61    ClaudeOpus4_1,
  62    #[serde(
  63        rename = "claude-opus-4-thinking",
  64        alias = "claude-opus-4-thinking-latest"
  65    )]
  66    ClaudeOpus4Thinking,
  67    #[serde(
  68        rename = "claude-opus-4-1-thinking",
  69        alias = "claude-opus-4-1-thinking-latest"
  70    )]
  71    ClaudeOpus4_1Thinking,
  72    #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
  73    ClaudeOpus4_5,
  74    #[serde(
  75        rename = "claude-opus-4-5-thinking",
  76        alias = "claude-opus-4-5-thinking-latest"
  77    )]
  78    ClaudeOpus4_5Thinking,
  79    #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
  80    ClaudeSonnet4,
  81    #[serde(
  82        rename = "claude-sonnet-4-thinking",
  83        alias = "claude-sonnet-4-thinking-latest"
  84    )]
  85    ClaudeSonnet4Thinking,
  86    #[default]
  87    #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
  88    ClaudeSonnet4_5,
  89    #[serde(
  90        rename = "claude-sonnet-4-5-thinking",
  91        alias = "claude-sonnet-4-5-thinking-latest"
  92    )]
  93    ClaudeSonnet4_5Thinking,
  94    #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
  95    Claude3_7Sonnet,
  96    #[serde(
  97        rename = "claude-3-7-sonnet-thinking",
  98        alias = "claude-3-7-sonnet-thinking-latest"
  99    )]
 100    Claude3_7SonnetThinking,
 101    #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
 102    Claude3_5Sonnet,
 103    #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
 104    ClaudeHaiku4_5,
 105    #[serde(
 106        rename = "claude-haiku-4-5-thinking",
 107        alias = "claude-haiku-4-5-thinking-latest"
 108    )]
 109    ClaudeHaiku4_5Thinking,
 110    #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
 111    Claude3_5Haiku,
 112    #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
 113    Claude3Opus,
 114    #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
 115    Claude3Sonnet,
 116    #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
 117    Claude3Haiku,
 118    #[serde(rename = "custom")]
 119    Custom {
 120        name: String,
 121        max_tokens: u64,
 122        /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
 123        display_name: Option<String>,
 124        /// Override this model with a different Anthropic model for tool calls.
 125        tool_override: Option<String>,
 126        /// Indicates whether this custom model supports caching.
 127        cache_configuration: Option<AnthropicModelCacheConfiguration>,
 128        max_output_tokens: Option<u64>,
 129        default_temperature: Option<f32>,
 130        #[serde(default)]
 131        extra_beta_headers: Vec<String>,
 132        #[serde(default)]
 133        mode: AnthropicModelMode,
 134    },
 135}
 136
 137impl Model {
 138    pub fn default_fast() -> Self {
 139        Self::Claude3_5Haiku
 140    }
 141
 142    pub fn from_id(id: &str) -> Result<Self> {
 143        if id.starts_with("claude-opus-4-5-thinking") {
 144            return Ok(Self::ClaudeOpus4_5Thinking);
 145        }
 146
 147        if id.starts_with("claude-opus-4-5") {
 148            return Ok(Self::ClaudeOpus4_5);
 149        }
 150
 151        if id.starts_with("claude-opus-4-1-thinking") {
 152            return Ok(Self::ClaudeOpus4_1Thinking);
 153        }
 154
 155        if id.starts_with("claude-opus-4-thinking") {
 156            return Ok(Self::ClaudeOpus4Thinking);
 157        }
 158
 159        if id.starts_with("claude-opus-4-1") {
 160            return Ok(Self::ClaudeOpus4_1);
 161        }
 162
 163        if id.starts_with("claude-opus-4") {
 164            return Ok(Self::ClaudeOpus4);
 165        }
 166
 167        if id.starts_with("claude-sonnet-4-5-thinking") {
 168            return Ok(Self::ClaudeSonnet4_5Thinking);
 169        }
 170
 171        if id.starts_with("claude-sonnet-4-5") {
 172            return Ok(Self::ClaudeSonnet4_5);
 173        }
 174
 175        if id.starts_with("claude-sonnet-4-thinking") {
 176            return Ok(Self::ClaudeSonnet4Thinking);
 177        }
 178
 179        if id.starts_with("claude-sonnet-4") {
 180            return Ok(Self::ClaudeSonnet4);
 181        }
 182
 183        if id.starts_with("claude-3-7-sonnet-thinking") {
 184            return Ok(Self::Claude3_7SonnetThinking);
 185        }
 186
 187        if id.starts_with("claude-3-7-sonnet") {
 188            return Ok(Self::Claude3_7Sonnet);
 189        }
 190
 191        if id.starts_with("claude-3-5-sonnet") {
 192            return Ok(Self::Claude3_5Sonnet);
 193        }
 194
 195        if id.starts_with("claude-haiku-4-5-thinking") {
 196            return Ok(Self::ClaudeHaiku4_5Thinking);
 197        }
 198
 199        if id.starts_with("claude-haiku-4-5") {
 200            return Ok(Self::ClaudeHaiku4_5);
 201        }
 202
 203        if id.starts_with("claude-3-5-haiku") {
 204            return Ok(Self::Claude3_5Haiku);
 205        }
 206
 207        if id.starts_with("claude-3-opus") {
 208            return Ok(Self::Claude3Opus);
 209        }
 210
 211        if id.starts_with("claude-3-sonnet") {
 212            return Ok(Self::Claude3Sonnet);
 213        }
 214
 215        if id.starts_with("claude-3-haiku") {
 216            return Ok(Self::Claude3Haiku);
 217        }
 218
 219        Err(anyhow!("invalid model ID: {id}"))
 220    }
 221
 222    pub fn id(&self) -> &str {
 223        match self {
 224            Self::ClaudeOpus4 => "claude-opus-4-latest",
 225            Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
 226            Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
 227            Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
 228            Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
 229            Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking-latest",
 230            Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
 231            Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
 232            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
 233            Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
 234            Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
 235            Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
 236            Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
 237            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
 238            Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
 239            Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
 240            Self::Claude3Opus => "claude-3-opus-latest",
 241            Self::Claude3Sonnet => "claude-3-sonnet-20240229",
 242            Self::Claude3Haiku => "claude-3-haiku-20240307",
 243            Self::Custom { name, .. } => name,
 244        }
 245    }
 246
 247    /// The id of the model that should be used for making API requests
 248    pub fn request_id(&self) -> &str {
 249        match self {
 250            Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
 251            Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
 252            Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-20251101",
 253            Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
 254            Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-20250929",
 255            Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
 256            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
 257            Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
 258            Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
 259            Self::Claude3Opus => "claude-3-opus-latest",
 260            Self::Claude3Sonnet => "claude-3-sonnet-20240229",
 261            Self::Claude3Haiku => "claude-3-haiku-20240307",
 262            Self::Custom { name, .. } => name,
 263        }
 264    }
 265
 266    pub fn display_name(&self) -> &str {
 267        match self {
 268            Self::ClaudeOpus4 => "Claude Opus 4",
 269            Self::ClaudeOpus4_1 => "Claude Opus 4.1",
 270            Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
 271            Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
 272            Self::ClaudeOpus4_5 => "Claude Opus 4.5",
 273            Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
 274            Self::ClaudeSonnet4 => "Claude Sonnet 4",
 275            Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
 276            Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
 277            Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
 278            Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
 279            Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
 280            Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
 281            Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
 282            Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
 283            Self::Claude3_5Haiku => "Claude 3.5 Haiku",
 284            Self::Claude3Opus => "Claude 3 Opus",
 285            Self::Claude3Sonnet => "Claude 3 Sonnet",
 286            Self::Claude3Haiku => "Claude 3 Haiku",
 287            Self::Custom {
 288                name, display_name, ..
 289            } => display_name.as_ref().unwrap_or(name),
 290        }
 291    }
 292
 293    pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
 294        match self {
 295            Self::ClaudeOpus4
 296            | Self::ClaudeOpus4_1
 297            | Self::ClaudeOpus4Thinking
 298            | Self::ClaudeOpus4_1Thinking
 299            | Self::ClaudeOpus4_5
 300            | Self::ClaudeOpus4_5Thinking
 301            | Self::ClaudeSonnet4
 302            | Self::ClaudeSonnet4Thinking
 303            | Self::ClaudeSonnet4_5
 304            | Self::ClaudeSonnet4_5Thinking
 305            | Self::Claude3_5Sonnet
 306            | Self::ClaudeHaiku4_5
 307            | Self::ClaudeHaiku4_5Thinking
 308            | Self::Claude3_5Haiku
 309            | Self::Claude3_7Sonnet
 310            | Self::Claude3_7SonnetThinking
 311            | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
 312                min_total_token: 2_048,
 313                should_speculate: true,
 314                max_cache_anchors: 4,
 315            }),
 316            Self::Custom {
 317                cache_configuration,
 318                ..
 319            } => cache_configuration.clone(),
 320            _ => None,
 321        }
 322    }
 323
 324    pub fn max_token_count(&self) -> u64 {
 325        match self {
 326            Self::ClaudeOpus4
 327            | Self::ClaudeOpus4_1
 328            | Self::ClaudeOpus4Thinking
 329            | Self::ClaudeOpus4_1Thinking
 330            | Self::ClaudeOpus4_5
 331            | Self::ClaudeOpus4_5Thinking
 332            | Self::ClaudeSonnet4
 333            | Self::ClaudeSonnet4Thinking
 334            | Self::ClaudeSonnet4_5
 335            | Self::ClaudeSonnet4_5Thinking
 336            | Self::Claude3_5Sonnet
 337            | Self::ClaudeHaiku4_5
 338            | Self::ClaudeHaiku4_5Thinking
 339            | Self::Claude3_5Haiku
 340            | Self::Claude3_7Sonnet
 341            | Self::Claude3_7SonnetThinking
 342            | Self::Claude3Opus
 343            | Self::Claude3Sonnet
 344            | Self::Claude3Haiku => 200_000,
 345            Self::Custom { max_tokens, .. } => *max_tokens,
 346        }
 347    }
 348
 349    pub fn max_output_tokens(&self) -> u64 {
 350        match self {
 351            Self::ClaudeOpus4
 352            | Self::ClaudeOpus4_1
 353            | Self::ClaudeOpus4Thinking
 354            | Self::ClaudeOpus4_1Thinking
 355            | Self::ClaudeOpus4_5
 356            | Self::ClaudeOpus4_5Thinking
 357            | Self::ClaudeSonnet4
 358            | Self::ClaudeSonnet4Thinking
 359            | Self::ClaudeSonnet4_5
 360            | Self::ClaudeSonnet4_5Thinking
 361            | Self::Claude3_5Sonnet
 362            | Self::Claude3_7Sonnet
 363            | Self::Claude3_7SonnetThinking
 364            | Self::Claude3_5Haiku => 8_192,
 365            Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => 64_000,
 366            Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
 367            Self::Custom {
 368                max_output_tokens, ..
 369            } => max_output_tokens.unwrap_or(4_096),
 370        }
 371    }
 372
 373    pub fn default_temperature(&self) -> f32 {
 374        match self {
 375            Self::ClaudeOpus4
 376            | Self::ClaudeOpus4_1
 377            | Self::ClaudeOpus4Thinking
 378            | Self::ClaudeOpus4_1Thinking
 379            | Self::ClaudeOpus4_5
 380            | Self::ClaudeOpus4_5Thinking
 381            | Self::ClaudeSonnet4
 382            | Self::ClaudeSonnet4Thinking
 383            | Self::ClaudeSonnet4_5
 384            | Self::ClaudeSonnet4_5Thinking
 385            | Self::Claude3_5Sonnet
 386            | Self::Claude3_7Sonnet
 387            | Self::Claude3_7SonnetThinking
 388            | Self::ClaudeHaiku4_5
 389            | Self::ClaudeHaiku4_5Thinking
 390            | Self::Claude3_5Haiku
 391            | Self::Claude3Opus
 392            | Self::Claude3Sonnet
 393            | Self::Claude3Haiku => 1.0,
 394            Self::Custom {
 395                default_temperature,
 396                ..
 397            } => default_temperature.unwrap_or(1.0),
 398        }
 399    }
 400
 401    pub fn mode(&self) -> AnthropicModelMode {
 402        match self {
 403            Self::ClaudeOpus4
 404            | Self::ClaudeOpus4_1
 405            | Self::ClaudeOpus4_5
 406            | Self::ClaudeSonnet4
 407            | Self::ClaudeSonnet4_5
 408            | Self::Claude3_5Sonnet
 409            | Self::Claude3_7Sonnet
 410            | Self::ClaudeHaiku4_5
 411            | Self::Claude3_5Haiku
 412            | Self::Claude3Opus
 413            | Self::Claude3Sonnet
 414            | Self::Claude3Haiku => AnthropicModelMode::Default,
 415            Self::ClaudeOpus4Thinking
 416            | Self::ClaudeOpus4_1Thinking
 417            | Self::ClaudeOpus4_5Thinking
 418            | Self::ClaudeSonnet4Thinking
 419            | Self::ClaudeSonnet4_5Thinking
 420            | Self::ClaudeHaiku4_5Thinking
 421            | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
 422                budget_tokens: Some(4_096),
 423            },
 424            Self::Custom { mode, .. } => mode.clone(),
 425        }
 426    }
 427
 428    pub fn beta_headers(&self) -> Option<String> {
 429        let mut headers = vec![];
 430
 431        match self {
 432            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
 433                // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
 434                // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
 435                headers.push("token-efficient-tools-2025-02-19".to_string());
 436            }
 437            Self::Custom {
 438                extra_beta_headers, ..
 439            } => {
 440                headers.extend(
 441                    extra_beta_headers
 442                        .iter()
 443                        .filter(|header| !header.trim().is_empty())
 444                        .cloned(),
 445                );
 446            }
 447            _ => {}
 448        }
 449
 450        if headers.is_empty() {
 451            None
 452        } else {
 453            Some(headers.join(","))
 454        }
 455    }
 456
 457    pub fn tool_model_id(&self) -> &str {
 458        if let Self::Custom {
 459            tool_override: Some(tool_override),
 460            ..
 461        } = self
 462        {
 463            tool_override
 464        } else {
 465            self.request_id()
 466        }
 467    }
 468}
 469
 470/// Generate completion with streaming.
 471pub async fn stream_completion(
 472    client: &dyn HttpClient,
 473    api_url: &str,
 474    api_key: &str,
 475    request: Request,
 476    beta_headers: Option<String>,
 477) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
 478    stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
 479        .await
 480        .map(|output| output.0)
 481}
 482
 483/// Generate completion without streaming.
 484pub async fn non_streaming_completion(
 485    client: &dyn HttpClient,
 486    api_url: &str,
 487    api_key: &str,
 488    request: Request,
 489    beta_headers: Option<String>,
 490) -> Result<Response, AnthropicError> {
 491    let (mut response, rate_limits) =
 492        send_request(client, api_url, api_key, &request, beta_headers).await?;
 493
 494    if response.status().is_success() {
 495        let mut body = String::new();
 496        response
 497            .body_mut()
 498            .read_to_string(&mut body)
 499            .await
 500            .map_err(AnthropicError::ReadResponse)?;
 501
 502        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
 503    } else {
 504        Err(handle_error_response(response, rate_limits).await)
 505    }
 506}
 507
 508async fn send_request(
 509    client: &dyn HttpClient,
 510    api_url: &str,
 511    api_key: &str,
 512    request: impl Serialize,
 513    beta_headers: Option<String>,
 514) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
 515    let uri = format!("{api_url}/v1/messages");
 516
 517    let mut request_builder = HttpRequest::builder()
 518        .method(Method::POST)
 519        .uri(uri)
 520        .header("Anthropic-Version", "2023-06-01")
 521        .header("X-Api-Key", api_key.trim())
 522        .header("Content-Type", "application/json");
 523
 524    if let Some(beta_headers) = beta_headers {
 525        request_builder = request_builder.header("Anthropic-Beta", beta_headers);
 526    }
 527
 528    let serialized_request =
 529        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
 530    let request = request_builder
 531        .body(AsyncBody::from(serialized_request))
 532        .map_err(AnthropicError::BuildRequestBody)?;
 533
 534    let response = client
 535        .send(request)
 536        .await
 537        .map_err(AnthropicError::HttpSend)?;
 538
 539    let rate_limits = RateLimitInfo::from_headers(response.headers());
 540
 541    Ok((response, rate_limits))
 542}
 543
 544async fn handle_error_response(
 545    mut response: http::Response<AsyncBody>,
 546    rate_limits: RateLimitInfo,
 547) -> AnthropicError {
 548    if response.status().as_u16() == 529 {
 549        return AnthropicError::ServerOverloaded {
 550            retry_after: rate_limits.retry_after,
 551        };
 552    }
 553
 554    if let Some(retry_after) = rate_limits.retry_after {
 555        return AnthropicError::RateLimit { retry_after };
 556    }
 557
 558    let mut body = String::new();
 559    let read_result = response
 560        .body_mut()
 561        .read_to_string(&mut body)
 562        .await
 563        .map_err(AnthropicError::ReadResponse);
 564
 565    if let Err(err) = read_result {
 566        return err;
 567    }
 568
 569    match serde_json::from_str::<Event>(&body) {
 570        Ok(Event::Error { error }) => AnthropicError::ApiError(error),
 571        Ok(_) | Err(_) => AnthropicError::HttpResponseError {
 572            status_code: response.status(),
 573            message: body,
 574        },
 575    }
 576}
 577
 578/// An individual rate limit.
 579#[derive(Debug)]
 580pub struct RateLimit {
 581    pub limit: usize,
 582    pub remaining: usize,
 583    pub reset: DateTime<Utc>,
 584}
 585
 586impl RateLimit {
 587    fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
 588        let limit =
 589            get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
 590        let remaining = get_header(
 591            &format!("anthropic-ratelimit-{resource}-remaining"),
 592            headers,
 593        )?
 594        .parse()?;
 595        let reset = DateTime::parse_from_rfc3339(get_header(
 596            &format!("anthropic-ratelimit-{resource}-reset"),
 597            headers,
 598        )?)?
 599        .to_utc();
 600
 601        Ok(Self {
 602            limit,
 603            remaining,
 604            reset,
 605        })
 606    }
 607}
 608
 609/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
 610#[derive(Debug)]
 611pub struct RateLimitInfo {
 612    pub retry_after: Option<Duration>,
 613    pub requests: Option<RateLimit>,
 614    pub tokens: Option<RateLimit>,
 615    pub input_tokens: Option<RateLimit>,
 616    pub output_tokens: Option<RateLimit>,
 617}
 618
 619impl RateLimitInfo {
 620    fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
 621        // Check if any rate limit headers exist
 622        let has_rate_limit_headers = headers
 623            .keys()
 624            .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
 625
 626        if !has_rate_limit_headers {
 627            return Self {
 628                retry_after: None,
 629                requests: None,
 630                tokens: None,
 631                input_tokens: None,
 632                output_tokens: None,
 633            };
 634        }
 635
 636        Self {
 637            retry_after: parse_retry_after(headers),
 638            requests: RateLimit::from_headers("requests", headers).ok(),
 639            tokens: RateLimit::from_headers("tokens", headers).ok(),
 640            input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
 641            output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
 642        }
 643    }
 644}
 645
 646/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
 647/// seconds). Note that other services might specify an HTTP date or some other format for this
 648/// header. Returns `None` if the header is not present or cannot be parsed.
 649pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
 650    headers
 651        .get("retry-after")
 652        .and_then(|v| v.to_str().ok())
 653        .and_then(|v| v.parse::<u64>().ok())
 654        .map(Duration::from_secs)
 655}
 656
 657fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
 658    Ok(headers
 659        .get(key)
 660        .with_context(|| format!("missing header `{key}`"))?
 661        .to_str()?)
 662}
 663
 664pub async fn stream_completion_with_rate_limit_info(
 665    client: &dyn HttpClient,
 666    api_url: &str,
 667    api_key: &str,
 668    request: Request,
 669    beta_headers: Option<String>,
 670) -> Result<
 671    (
 672        BoxStream<'static, Result<Event, AnthropicError>>,
 673        Option<RateLimitInfo>,
 674    ),
 675    AnthropicError,
 676> {
 677    let request = StreamingRequest {
 678        base: request,
 679        stream: true,
 680    };
 681
 682    let (response, rate_limits) =
 683        send_request(client, api_url, api_key, &request, beta_headers).await?;
 684
 685    if response.status().is_success() {
 686        let reader = BufReader::new(response.into_body());
 687        let stream = reader
 688            .lines()
 689            .filter_map(|line| async move {
 690                match line {
 691                    Ok(line) => {
 692                        let line = line.strip_prefix("data: ")?;
 693                        match serde_json::from_str(line) {
 694                            Ok(response) => Some(Ok(response)),
 695                            Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
 696                        }
 697                    }
 698                    Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
 699                }
 700            })
 701            .boxed();
 702        Ok((stream, Some(rate_limits)))
 703    } else {
 704        Err(handle_error_response(response, rate_limits).await)
 705    }
 706}
 707
 708#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 709#[serde(rename_all = "lowercase")]
 710pub enum CacheControlType {
 711    Ephemeral,
 712}
 713
 714#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 715pub struct CacheControl {
 716    #[serde(rename = "type")]
 717    pub cache_type: CacheControlType,
 718}
 719
 720#[derive(Debug, Serialize, Deserialize)]
 721pub struct Message {
 722    pub role: Role,
 723    pub content: Vec<RequestContent>,
 724}
 725
 726#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
 727#[serde(rename_all = "lowercase")]
 728pub enum Role {
 729    User,
 730    Assistant,
 731}
 732
 733#[derive(Debug, Serialize, Deserialize)]
 734#[serde(tag = "type")]
 735pub enum RequestContent {
 736    #[serde(rename = "text")]
 737    Text {
 738        text: String,
 739        #[serde(skip_serializing_if = "Option::is_none")]
 740        cache_control: Option<CacheControl>,
 741    },
 742    #[serde(rename = "thinking")]
 743    Thinking {
 744        thinking: String,
 745        signature: String,
 746        #[serde(skip_serializing_if = "Option::is_none")]
 747        cache_control: Option<CacheControl>,
 748    },
 749    #[serde(rename = "redacted_thinking")]
 750    RedactedThinking { data: String },
 751    #[serde(rename = "image")]
 752    Image {
 753        source: ImageSource,
 754        #[serde(skip_serializing_if = "Option::is_none")]
 755        cache_control: Option<CacheControl>,
 756    },
 757    #[serde(rename = "tool_use")]
 758    ToolUse {
 759        id: String,
 760        name: String,
 761        input: serde_json::Value,
 762        #[serde(skip_serializing_if = "Option::is_none")]
 763        cache_control: Option<CacheControl>,
 764    },
 765    #[serde(rename = "tool_result")]
 766    ToolResult {
 767        tool_use_id: String,
 768        is_error: bool,
 769        content: ToolResultContent,
 770        #[serde(skip_serializing_if = "Option::is_none")]
 771        cache_control: Option<CacheControl>,
 772    },
 773}
 774
 775#[derive(Debug, Serialize, Deserialize)]
 776#[serde(untagged)]
 777pub enum ToolResultContent {
 778    Plain(String),
 779    Multipart(Vec<ToolResultPart>),
 780}
 781
 782#[derive(Debug, Serialize, Deserialize)]
 783#[serde(tag = "type", rename_all = "lowercase")]
 784pub enum ToolResultPart {
 785    Text { text: String },
 786    Image { source: ImageSource },
 787}
 788
 789#[derive(Debug, Serialize, Deserialize)]
 790#[serde(tag = "type")]
 791pub enum ResponseContent {
 792    #[serde(rename = "text")]
 793    Text { text: String },
 794    #[serde(rename = "thinking")]
 795    Thinking { thinking: String },
 796    #[serde(rename = "redacted_thinking")]
 797    RedactedThinking { data: String },
 798    #[serde(rename = "tool_use")]
 799    ToolUse {
 800        id: String,
 801        name: String,
 802        input: serde_json::Value,
 803    },
 804}
 805
 806#[derive(Debug, Serialize, Deserialize)]
 807pub struct ImageSource {
 808    #[serde(rename = "type")]
 809    pub source_type: String,
 810    pub media_type: String,
 811    pub data: String,
 812}
 813
 814#[derive(Debug, Serialize, Deserialize)]
 815pub struct Tool {
 816    pub name: String,
 817    pub description: String,
 818    pub input_schema: serde_json::Value,
 819}
 820
 821#[derive(Debug, Serialize, Deserialize)]
 822#[serde(tag = "type", rename_all = "lowercase")]
 823pub enum ToolChoice {
 824    Auto,
 825    Any,
 826    Tool { name: String },
 827    None,
 828}
 829
 830#[derive(Debug, Serialize, Deserialize)]
 831#[serde(tag = "type", rename_all = "lowercase")]
 832pub enum Thinking {
 833    Enabled { budget_tokens: Option<u32> },
 834}
 835
 836#[derive(Debug, Serialize, Deserialize)]
 837#[serde(untagged)]
 838pub enum StringOrContents {
 839    String(String),
 840    Content(Vec<RequestContent>),
 841}
 842
 843#[derive(Debug, Serialize, Deserialize)]
 844pub struct Request {
 845    pub model: String,
 846    pub max_tokens: u64,
 847    pub messages: Vec<Message>,
 848    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 849    pub tools: Vec<Tool>,
 850    #[serde(default, skip_serializing_if = "Option::is_none")]
 851    pub thinking: Option<Thinking>,
 852    #[serde(default, skip_serializing_if = "Option::is_none")]
 853    pub tool_choice: Option<ToolChoice>,
 854    #[serde(default, skip_serializing_if = "Option::is_none")]
 855    pub system: Option<StringOrContents>,
 856    #[serde(default, skip_serializing_if = "Option::is_none")]
 857    pub metadata: Option<Metadata>,
 858    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 859    pub stop_sequences: Vec<String>,
 860    #[serde(default, skip_serializing_if = "Option::is_none")]
 861    pub temperature: Option<f32>,
 862    #[serde(default, skip_serializing_if = "Option::is_none")]
 863    pub top_k: Option<u32>,
 864    #[serde(default, skip_serializing_if = "Option::is_none")]
 865    pub top_p: Option<f32>,
 866}
 867
 868#[derive(Debug, Serialize, Deserialize)]
 869struct StreamingRequest {
 870    #[serde(flatten)]
 871    pub base: Request,
 872    pub stream: bool,
 873}
 874
 875#[derive(Debug, Serialize, Deserialize)]
 876pub struct Metadata {
 877    pub user_id: Option<String>,
 878}
 879
 880#[derive(Debug, Serialize, Deserialize, Default)]
 881pub struct Usage {
 882    #[serde(default, skip_serializing_if = "Option::is_none")]
 883    pub input_tokens: Option<u64>,
 884    #[serde(default, skip_serializing_if = "Option::is_none")]
 885    pub output_tokens: Option<u64>,
 886    #[serde(default, skip_serializing_if = "Option::is_none")]
 887    pub cache_creation_input_tokens: Option<u64>,
 888    #[serde(default, skip_serializing_if = "Option::is_none")]
 889    pub cache_read_input_tokens: Option<u64>,
 890}
 891
 892#[derive(Debug, Serialize, Deserialize)]
 893pub struct Response {
 894    pub id: String,
 895    #[serde(rename = "type")]
 896    pub response_type: String,
 897    pub role: Role,
 898    pub content: Vec<ResponseContent>,
 899    pub model: String,
 900    #[serde(default, skip_serializing_if = "Option::is_none")]
 901    pub stop_reason: Option<String>,
 902    #[serde(default, skip_serializing_if = "Option::is_none")]
 903    pub stop_sequence: Option<String>,
 904    pub usage: Usage,
 905}
 906
 907#[derive(Debug, Serialize, Deserialize)]
 908#[serde(tag = "type")]
 909pub enum Event {
 910    #[serde(rename = "message_start")]
 911    MessageStart { message: Response },
 912    #[serde(rename = "content_block_start")]
 913    ContentBlockStart {
 914        index: usize,
 915        content_block: ResponseContent,
 916    },
 917    #[serde(rename = "content_block_delta")]
 918    ContentBlockDelta { index: usize, delta: ContentDelta },
 919    #[serde(rename = "content_block_stop")]
 920    ContentBlockStop { index: usize },
 921    #[serde(rename = "message_delta")]
 922    MessageDelta { delta: MessageDelta, usage: Usage },
 923    #[serde(rename = "message_stop")]
 924    MessageStop,
 925    #[serde(rename = "ping")]
 926    Ping,
 927    #[serde(rename = "error")]
 928    Error { error: ApiError },
 929}
 930
 931#[derive(Debug, Serialize, Deserialize)]
 932#[serde(tag = "type")]
 933pub enum ContentDelta {
 934    #[serde(rename = "text_delta")]
 935    TextDelta { text: String },
 936    #[serde(rename = "thinking_delta")]
 937    ThinkingDelta { thinking: String },
 938    #[serde(rename = "signature_delta")]
 939    SignatureDelta { signature: String },
 940    #[serde(rename = "input_json_delta")]
 941    InputJsonDelta { partial_json: String },
 942}
 943
 944#[derive(Debug, Serialize, Deserialize)]
 945pub struct MessageDelta {
 946    pub stop_reason: Option<String>,
 947    pub stop_sequence: Option<String>,
 948}
 949
 950#[derive(Debug)]
 951pub enum AnthropicError {
 952    /// Failed to serialize the HTTP request body to JSON
 953    SerializeRequest(serde_json::Error),
 954
 955    /// Failed to construct the HTTP request body
 956    BuildRequestBody(http::Error),
 957
 958    /// Failed to send the HTTP request
 959    HttpSend(anyhow::Error),
 960
 961    /// Failed to deserialize the response from JSON
 962    DeserializeResponse(serde_json::Error),
 963
 964    /// Failed to read from response stream
 965    ReadResponse(io::Error),
 966
 967    /// HTTP error response from the API
 968    HttpResponseError {
 969        status_code: StatusCode,
 970        message: String,
 971    },
 972
 973    /// Rate limit exceeded
 974    RateLimit { retry_after: Duration },
 975
 976    /// Server overloaded
 977    ServerOverloaded { retry_after: Option<Duration> },
 978
 979    /// API returned an error response
 980    ApiError(ApiError),
 981}
 982
 983#[derive(Debug, Serialize, Deserialize, Error)]
 984#[error("Anthropic API Error: {error_type}: {message}")]
 985pub struct ApiError {
 986    #[serde(rename = "type")]
 987    pub error_type: String,
 988    pub message: String,
 989}
 990
 991/// An Anthropic API error code.
 992/// <https://docs.anthropic.com/en/api/errors#http-errors>
 993#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
 994#[strum(serialize_all = "snake_case")]
 995pub enum ApiErrorCode {
 996    /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
 997    InvalidRequestError,
 998    /// 401 - `authentication_error`: There's an issue with your API key.
 999    AuthenticationError,
1000    /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
1001    PermissionError,
1002    /// 404 - `not_found_error`: The requested resource was not found.
1003    NotFoundError,
1004    /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
1005    RequestTooLarge,
1006    /// 429 - `rate_limit_error`: Your account has hit a rate limit.
1007    RateLimitError,
1008    /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
1009    ApiError,
1010    /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
1011    OverloadedError,
1012}
1013
1014impl ApiError {
1015    pub fn code(&self) -> Option<ApiErrorCode> {
1016        ApiErrorCode::from_str(&self.error_type).ok()
1017    }
1018
1019    pub fn is_rate_limit_error(&self) -> bool {
1020        matches!(self.error_type.as_str(), "rate_limit_error")
1021    }
1022
1023    pub fn match_window_exceeded(&self) -> Option<u64> {
1024        let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
1025            return None;
1026        };
1027
1028        parse_prompt_too_long(&self.message)
1029    }
1030}
1031
1032pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1033    message
1034        .strip_prefix("prompt is too long: ")?
1035        .split_once(" tokens")?
1036        .0
1037        .parse()
1038        .ok()
1039}
1040
1041#[test]
1042fn test_match_window_exceeded() {
1043    let error = ApiError {
1044        error_type: "invalid_request_error".to_string(),
1045        message: "prompt is too long: 220000 tokens > 200000".to_string(),
1046    };
1047    assert_eq!(error.match_window_exceeded(), Some(220_000));
1048
1049    let error = ApiError {
1050        error_type: "invalid_request_error".to_string(),
1051        message: "prompt is too long: 1234953 tokens".to_string(),
1052    };
1053    assert_eq!(error.match_window_exceeded(), Some(1234953));
1054
1055    let error = ApiError {
1056        error_type: "invalid_request_error".to_string(),
1057        message: "not a prompt length error".to_string(),
1058    };
1059    assert_eq!(error.match_window_exceeded(), None);
1060
1061    let error = ApiError {
1062        error_type: "rate_limit_error".to_string(),
1063        message: "prompt is too long: 12345 tokens".to_string(),
1064    };
1065    assert_eq!(error.match_window_exceeded(), None);
1066
1067    let error = ApiError {
1068        error_type: "invalid_request_error".to_string(),
1069        message: "prompt is too long: invalid tokens".to_string(),
1070    };
1071    assert_eq!(error.match_window_exceeded(), None);
1072}