anthropic.rs

   1use std::io;
   2use std::str::FromStr;
   3use std::time::Duration;
   4
   5use anyhow::{Context as _, Result, anyhow};
   6use chrono::{DateTime, Utc};
   7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
   8use http_client::http::{self, HeaderMap, HeaderValue};
   9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
  10use serde::{Deserialize, Serialize};
  11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
  12use strum::{EnumIter, EnumString};
  13use thiserror::Error;
  14
  15pub mod batches;
  16
  17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
  18
  19#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  20#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  21pub struct AnthropicModelCacheConfiguration {
  22    pub min_total_token: u64,
  23    pub should_speculate: bool,
  24    pub max_cache_anchors: usize,
  25}
  26
  27#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  28#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  29pub enum AnthropicModelMode {
  30    #[default]
  31    Default,
  32    Thinking {
  33        budget_tokens: Option<u32>,
  34    },
  35}
  36
  37impl From<ModelMode> for AnthropicModelMode {
  38    fn from(value: ModelMode) -> Self {
  39        match value {
  40            ModelMode::Default => AnthropicModelMode::Default,
  41            ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
  42        }
  43    }
  44}
  45
  46impl From<AnthropicModelMode> for ModelMode {
  47    fn from(value: AnthropicModelMode) -> Self {
  48        match value {
  49            AnthropicModelMode::Default => ModelMode::Default,
  50            AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
  51        }
  52    }
  53}
  54
  55#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  56#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
  57pub enum Model {
  58    #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
  59    ClaudeOpus4,
  60    #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
  61    ClaudeOpus4_1,
  62    #[serde(
  63        rename = "claude-opus-4-thinking",
  64        alias = "claude-opus-4-thinking-latest"
  65    )]
  66    ClaudeOpus4Thinking,
  67    #[serde(
  68        rename = "claude-opus-4-1-thinking",
  69        alias = "claude-opus-4-1-thinking-latest"
  70    )]
  71    ClaudeOpus4_1Thinking,
  72    #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
  73    ClaudeOpus4_5,
  74    #[serde(
  75        rename = "claude-opus-4-5-thinking",
  76        alias = "claude-opus-4-5-thinking-latest"
  77    )]
  78    ClaudeOpus4_5Thinking,
  79    #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
  80    ClaudeSonnet4,
  81    #[serde(
  82        rename = "claude-sonnet-4-thinking",
  83        alias = "claude-sonnet-4-thinking-latest"
  84    )]
  85    ClaudeSonnet4Thinking,
  86    #[default]
  87    #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
  88    ClaudeSonnet4_5,
  89    #[serde(
  90        rename = "claude-sonnet-4-5-thinking",
  91        alias = "claude-sonnet-4-5-thinking-latest"
  92    )]
  93    ClaudeSonnet4_5Thinking,
  94    #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
  95    Claude3_7Sonnet,
  96    #[serde(
  97        rename = "claude-3-7-sonnet-thinking",
  98        alias = "claude-3-7-sonnet-thinking-latest"
  99    )]
 100    Claude3_7SonnetThinking,
 101    #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
 102    Claude3_5Sonnet,
 103    #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
 104    ClaudeHaiku4_5,
 105    #[serde(
 106        rename = "claude-haiku-4-5-thinking",
 107        alias = "claude-haiku-4-5-thinking-latest"
 108    )]
 109    ClaudeHaiku4_5Thinking,
 110    #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
 111    Claude3_5Haiku,
 112    #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
 113    Claude3Opus,
 114    #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
 115    Claude3Sonnet,
 116    #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
 117    Claude3Haiku,
 118    #[serde(rename = "custom")]
 119    Custom {
 120        name: String,
 121        max_tokens: u64,
 122        /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
 123        display_name: Option<String>,
 124        /// Override this model with a different Anthropic model for tool calls.
 125        tool_override: Option<String>,
 126        /// Indicates whether this custom model supports caching.
 127        cache_configuration: Option<AnthropicModelCacheConfiguration>,
 128        max_output_tokens: Option<u64>,
 129        default_temperature: Option<f32>,
 130        #[serde(default)]
 131        extra_beta_headers: Vec<String>,
 132        #[serde(default)]
 133        mode: AnthropicModelMode,
 134    },
 135}
 136
 137impl Model {
 138    pub fn default_fast() -> Self {
 139        Self::Claude3_5Haiku
 140    }
 141
 142    pub fn from_id(id: &str) -> Result<Self> {
 143        if id.starts_with("claude-opus-4-5-thinking") {
 144            return Ok(Self::ClaudeOpus4_5Thinking);
 145        }
 146
 147        if id.starts_with("claude-opus-4-5") {
 148            return Ok(Self::ClaudeOpus4_5);
 149        }
 150
 151        if id.starts_with("claude-opus-4-1-thinking") {
 152            return Ok(Self::ClaudeOpus4_1Thinking);
 153        }
 154
 155        if id.starts_with("claude-opus-4-thinking") {
 156            return Ok(Self::ClaudeOpus4Thinking);
 157        }
 158
 159        if id.starts_with("claude-opus-4-1") {
 160            return Ok(Self::ClaudeOpus4_1);
 161        }
 162
 163        if id.starts_with("claude-opus-4") {
 164            return Ok(Self::ClaudeOpus4);
 165        }
 166
 167        if id.starts_with("claude-sonnet-4-5-thinking") {
 168            return Ok(Self::ClaudeSonnet4_5Thinking);
 169        }
 170
 171        if id.starts_with("claude-sonnet-4-5") {
 172            return Ok(Self::ClaudeSonnet4_5);
 173        }
 174
 175        if id.starts_with("claude-sonnet-4-thinking") {
 176            return Ok(Self::ClaudeSonnet4Thinking);
 177        }
 178
 179        if id.starts_with("claude-sonnet-4") {
 180            return Ok(Self::ClaudeSonnet4);
 181        }
 182
 183        if id.starts_with("claude-3-7-sonnet-thinking") {
 184            return Ok(Self::Claude3_7SonnetThinking);
 185        }
 186
 187        if id.starts_with("claude-3-7-sonnet") {
 188            return Ok(Self::Claude3_7Sonnet);
 189        }
 190
 191        if id.starts_with("claude-3-5-sonnet") {
 192            return Ok(Self::Claude3_5Sonnet);
 193        }
 194
 195        if id.starts_with("claude-haiku-4-5-thinking") {
 196            return Ok(Self::ClaudeHaiku4_5Thinking);
 197        }
 198
 199        if id.starts_with("claude-haiku-4-5") {
 200            return Ok(Self::ClaudeHaiku4_5);
 201        }
 202
 203        if id.starts_with("claude-3-5-haiku") {
 204            return Ok(Self::Claude3_5Haiku);
 205        }
 206
 207        if id.starts_with("claude-3-opus") {
 208            return Ok(Self::Claude3Opus);
 209        }
 210
 211        if id.starts_with("claude-3-sonnet") {
 212            return Ok(Self::Claude3Sonnet);
 213        }
 214
 215        if id.starts_with("claude-3-haiku") {
 216            return Ok(Self::Claude3Haiku);
 217        }
 218
 219        Err(anyhow!("invalid model ID: {id}"))
 220    }
 221
 222    pub fn id(&self) -> &str {
 223        match self {
 224            Self::ClaudeOpus4 => "claude-opus-4-latest",
 225            Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
 226            Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
 227            Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
 228            Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
 229            Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking-latest",
 230            Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
 231            Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
 232            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
 233            Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
 234            Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
 235            Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
 236            Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
 237            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
 238            Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
 239            Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
 240            Self::Claude3Opus => "claude-3-opus-latest",
 241            Self::Claude3Sonnet => "claude-3-sonnet-20240229",
 242            Self::Claude3Haiku => "claude-3-haiku-20240307",
 243            Self::Custom { name, .. } => name,
 244        }
 245    }
 246
 247    /// The id of the model that should be used for making API requests
 248    pub fn request_id(&self) -> &str {
 249        match self {
 250            Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
 251            Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
 252            Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-20251101",
 253            Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
 254            Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-20250929",
 255            Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
 256            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
 257            Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
 258            Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
 259            Self::Claude3Opus => "claude-3-opus-latest",
 260            Self::Claude3Sonnet => "claude-3-sonnet-20240229",
 261            Self::Claude3Haiku => "claude-3-haiku-20240307",
 262            Self::Custom { name, .. } => name,
 263        }
 264    }
 265
 266    pub fn display_name(&self) -> &str {
 267        match self {
 268            Self::ClaudeOpus4 => "Claude Opus 4",
 269            Self::ClaudeOpus4_1 => "Claude Opus 4.1",
 270            Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
 271            Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
 272            Self::ClaudeOpus4_5 => "Claude Opus 4.5",
 273            Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
 274            Self::ClaudeSonnet4 => "Claude Sonnet 4",
 275            Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
 276            Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
 277            Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
 278            Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
 279            Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
 280            Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
 281            Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
 282            Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
 283            Self::Claude3_5Haiku => "Claude 3.5 Haiku",
 284            Self::Claude3Opus => "Claude 3 Opus",
 285            Self::Claude3Sonnet => "Claude 3 Sonnet",
 286            Self::Claude3Haiku => "Claude 3 Haiku",
 287            Self::Custom {
 288                name, display_name, ..
 289            } => display_name.as_ref().unwrap_or(name),
 290        }
 291    }
 292
 293    pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
 294        match self {
 295            Self::ClaudeOpus4
 296            | Self::ClaudeOpus4_1
 297            | Self::ClaudeOpus4Thinking
 298            | Self::ClaudeOpus4_1Thinking
 299            | Self::ClaudeOpus4_5
 300            | Self::ClaudeOpus4_5Thinking
 301            | Self::ClaudeSonnet4
 302            | Self::ClaudeSonnet4Thinking
 303            | Self::ClaudeSonnet4_5
 304            | Self::ClaudeSonnet4_5Thinking
 305            | Self::Claude3_5Sonnet
 306            | Self::ClaudeHaiku4_5
 307            | Self::ClaudeHaiku4_5Thinking
 308            | Self::Claude3_5Haiku
 309            | Self::Claude3_7Sonnet
 310            | Self::Claude3_7SonnetThinking
 311            | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
 312                min_total_token: 2_048,
 313                should_speculate: true,
 314                max_cache_anchors: 4,
 315            }),
 316            Self::Custom {
 317                cache_configuration,
 318                ..
 319            } => cache_configuration.clone(),
 320            _ => None,
 321        }
 322    }
 323
 324    pub fn max_token_count(&self) -> u64 {
 325        match self {
 326            Self::ClaudeOpus4
 327            | Self::ClaudeOpus4_1
 328            | Self::ClaudeOpus4Thinking
 329            | Self::ClaudeOpus4_1Thinking
 330            | Self::ClaudeOpus4_5
 331            | Self::ClaudeOpus4_5Thinking
 332            | Self::ClaudeSonnet4
 333            | Self::ClaudeSonnet4Thinking
 334            | Self::ClaudeSonnet4_5
 335            | Self::ClaudeSonnet4_5Thinking
 336            | Self::Claude3_5Sonnet
 337            | Self::ClaudeHaiku4_5
 338            | Self::ClaudeHaiku4_5Thinking
 339            | Self::Claude3_5Haiku
 340            | Self::Claude3_7Sonnet
 341            | Self::Claude3_7SonnetThinking
 342            | Self::Claude3Opus
 343            | Self::Claude3Sonnet
 344            | Self::Claude3Haiku => 200_000,
 345            Self::Custom { max_tokens, .. } => *max_tokens,
 346        }
 347    }
 348
 349    pub fn max_output_tokens(&self) -> u64 {
 350        match self {
 351            Self::ClaudeOpus4
 352            | Self::ClaudeOpus4_1
 353            | Self::ClaudeOpus4Thinking
 354            | Self::ClaudeOpus4_1Thinking
 355            | Self::ClaudeOpus4_5
 356            | Self::ClaudeOpus4_5Thinking
 357            | Self::ClaudeSonnet4
 358            | Self::ClaudeSonnet4Thinking
 359            | Self::ClaudeSonnet4_5
 360            | Self::ClaudeSonnet4_5Thinking
 361            | Self::Claude3_5Sonnet
 362            | Self::Claude3_7Sonnet
 363            | Self::Claude3_7SonnetThinking
 364            | Self::Claude3_5Haiku => 8_192,
 365            Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => 64_000,
 366            Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
 367            Self::Custom {
 368                max_output_tokens, ..
 369            } => max_output_tokens.unwrap_or(4_096),
 370        }
 371    }
 372
 373    pub fn default_temperature(&self) -> f32 {
 374        match self {
 375            Self::ClaudeOpus4
 376            | Self::ClaudeOpus4_1
 377            | Self::ClaudeOpus4Thinking
 378            | Self::ClaudeOpus4_1Thinking
 379            | Self::ClaudeOpus4_5
 380            | Self::ClaudeOpus4_5Thinking
 381            | Self::ClaudeSonnet4
 382            | Self::ClaudeSonnet4Thinking
 383            | Self::ClaudeSonnet4_5
 384            | Self::ClaudeSonnet4_5Thinking
 385            | Self::Claude3_5Sonnet
 386            | Self::Claude3_7Sonnet
 387            | Self::Claude3_7SonnetThinking
 388            | Self::ClaudeHaiku4_5
 389            | Self::ClaudeHaiku4_5Thinking
 390            | Self::Claude3_5Haiku
 391            | Self::Claude3Opus
 392            | Self::Claude3Sonnet
 393            | Self::Claude3Haiku => 1.0,
 394            Self::Custom {
 395                default_temperature,
 396                ..
 397            } => default_temperature.unwrap_or(1.0),
 398        }
 399    }
 400
 401    pub fn mode(&self) -> AnthropicModelMode {
 402        match self {
 403            Self::ClaudeOpus4
 404            | Self::ClaudeOpus4_1
 405            | Self::ClaudeOpus4_5
 406            | Self::ClaudeSonnet4
 407            | Self::ClaudeSonnet4_5
 408            | Self::Claude3_5Sonnet
 409            | Self::Claude3_7Sonnet
 410            | Self::ClaudeHaiku4_5
 411            | Self::Claude3_5Haiku
 412            | Self::Claude3Opus
 413            | Self::Claude3Sonnet
 414            | Self::Claude3Haiku => AnthropicModelMode::Default,
 415            Self::ClaudeOpus4Thinking
 416            | Self::ClaudeOpus4_1Thinking
 417            | Self::ClaudeOpus4_5Thinking
 418            | Self::ClaudeSonnet4Thinking
 419            | Self::ClaudeSonnet4_5Thinking
 420            | Self::ClaudeHaiku4_5Thinking
 421            | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
 422                budget_tokens: Some(4_096),
 423            },
 424            Self::Custom { mode, .. } => mode.clone(),
 425        }
 426    }
 427
 428    pub fn beta_headers(&self) -> Option<String> {
 429        let mut headers = vec![];
 430
 431        match self {
 432            Self::ClaudeOpus4
 433            | Self::ClaudeOpus4_1
 434            | Self::ClaudeOpus4_5
 435            | Self::ClaudeSonnet4
 436            | Self::ClaudeSonnet4_5
 437            | Self::ClaudeOpus4Thinking
 438            | Self::ClaudeOpus4_1Thinking
 439            | Self::ClaudeOpus4_5Thinking
 440            | Self::ClaudeSonnet4Thinking
 441            | Self::ClaudeSonnet4_5Thinking => {
 442                // Fine-grained tool streaming for newer models
 443                headers.push("fine-grained-tool-streaming-2025-05-14".to_string());
 444            }
 445            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
 446                // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
 447                // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
 448                headers.push("token-efficient-tools-2025-02-19".to_string());
 449                headers.push("fine-grained-tool-streaming-2025-05-14".to_string());
 450            }
 451            Self::Custom {
 452                extra_beta_headers, ..
 453            } => {
 454                headers.extend(
 455                    extra_beta_headers
 456                        .iter()
 457                        .filter(|header| !header.trim().is_empty())
 458                        .cloned(),
 459                );
 460            }
 461            _ => {}
 462        }
 463
 464        if headers.is_empty() {
 465            None
 466        } else {
 467            Some(headers.join(","))
 468        }
 469    }
 470
 471    pub fn tool_model_id(&self) -> &str {
 472        if let Self::Custom {
 473            tool_override: Some(tool_override),
 474            ..
 475        } = self
 476        {
 477            tool_override
 478        } else {
 479            self.request_id()
 480        }
 481    }
 482}
 483
 484/// Generate completion with streaming.
 485pub async fn stream_completion(
 486    client: &dyn HttpClient,
 487    api_url: &str,
 488    api_key: &str,
 489    request: Request,
 490    beta_headers: Option<String>,
 491) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
 492    stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
 493        .await
 494        .map(|output| output.0)
 495}
 496
 497/// Generate completion without streaming.
 498pub async fn non_streaming_completion(
 499    client: &dyn HttpClient,
 500    api_url: &str,
 501    api_key: &str,
 502    request: Request,
 503    beta_headers: Option<String>,
 504) -> Result<Response, AnthropicError> {
 505    let (mut response, rate_limits) =
 506        send_request(client, api_url, api_key, &request, beta_headers).await?;
 507
 508    if response.status().is_success() {
 509        let mut body = String::new();
 510        response
 511            .body_mut()
 512            .read_to_string(&mut body)
 513            .await
 514            .map_err(AnthropicError::ReadResponse)?;
 515
 516        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
 517    } else {
 518        Err(handle_error_response(response, rate_limits).await)
 519    }
 520}
 521
 522async fn send_request(
 523    client: &dyn HttpClient,
 524    api_url: &str,
 525    api_key: &str,
 526    request: impl Serialize,
 527    beta_headers: Option<String>,
 528) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
 529    let uri = format!("{api_url}/v1/messages");
 530
 531    let mut request_builder = HttpRequest::builder()
 532        .method(Method::POST)
 533        .uri(uri)
 534        .header("Anthropic-Version", "2023-06-01")
 535        .header("X-Api-Key", api_key.trim())
 536        .header("Content-Type", "application/json");
 537
 538    if let Some(beta_headers) = beta_headers {
 539        request_builder = request_builder.header("Anthropic-Beta", beta_headers);
 540    }
 541
 542    let serialized_request =
 543        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
 544    let request = request_builder
 545        .body(AsyncBody::from(serialized_request))
 546        .map_err(AnthropicError::BuildRequestBody)?;
 547
 548    let response = client
 549        .send(request)
 550        .await
 551        .map_err(AnthropicError::HttpSend)?;
 552
 553    let rate_limits = RateLimitInfo::from_headers(response.headers());
 554
 555    Ok((response, rate_limits))
 556}
 557
 558async fn handle_error_response(
 559    mut response: http::Response<AsyncBody>,
 560    rate_limits: RateLimitInfo,
 561) -> AnthropicError {
 562    if response.status().as_u16() == 529 {
 563        return AnthropicError::ServerOverloaded {
 564            retry_after: rate_limits.retry_after,
 565        };
 566    }
 567
 568    if let Some(retry_after) = rate_limits.retry_after {
 569        return AnthropicError::RateLimit { retry_after };
 570    }
 571
 572    let mut body = String::new();
 573    let read_result = response
 574        .body_mut()
 575        .read_to_string(&mut body)
 576        .await
 577        .map_err(AnthropicError::ReadResponse);
 578
 579    if let Err(err) = read_result {
 580        return err;
 581    }
 582
 583    match serde_json::from_str::<Event>(&body) {
 584        Ok(Event::Error { error }) => AnthropicError::ApiError(error),
 585        Ok(_) | Err(_) => AnthropicError::HttpResponseError {
 586            status_code: response.status(),
 587            message: body,
 588        },
 589    }
 590}
 591
 592/// An individual rate limit.
 593#[derive(Debug)]
 594pub struct RateLimit {
 595    pub limit: usize,
 596    pub remaining: usize,
 597    pub reset: DateTime<Utc>,
 598}
 599
 600impl RateLimit {
 601    fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
 602        let limit =
 603            get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
 604        let remaining = get_header(
 605            &format!("anthropic-ratelimit-{resource}-remaining"),
 606            headers,
 607        )?
 608        .parse()?;
 609        let reset = DateTime::parse_from_rfc3339(get_header(
 610            &format!("anthropic-ratelimit-{resource}-reset"),
 611            headers,
 612        )?)?
 613        .to_utc();
 614
 615        Ok(Self {
 616            limit,
 617            remaining,
 618            reset,
 619        })
 620    }
 621}
 622
 623/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
 624#[derive(Debug)]
 625pub struct RateLimitInfo {
 626    pub retry_after: Option<Duration>,
 627    pub requests: Option<RateLimit>,
 628    pub tokens: Option<RateLimit>,
 629    pub input_tokens: Option<RateLimit>,
 630    pub output_tokens: Option<RateLimit>,
 631}
 632
 633impl RateLimitInfo {
 634    fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
 635        // Check if any rate limit headers exist
 636        let has_rate_limit_headers = headers
 637            .keys()
 638            .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
 639
 640        if !has_rate_limit_headers {
 641            return Self {
 642                retry_after: None,
 643                requests: None,
 644                tokens: None,
 645                input_tokens: None,
 646                output_tokens: None,
 647            };
 648        }
 649
 650        Self {
 651            retry_after: parse_retry_after(headers),
 652            requests: RateLimit::from_headers("requests", headers).ok(),
 653            tokens: RateLimit::from_headers("tokens", headers).ok(),
 654            input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
 655            output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
 656        }
 657    }
 658}
 659
 660/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
 661/// seconds). Note that other services might specify an HTTP date or some other format for this
 662/// header. Returns `None` if the header is not present or cannot be parsed.
 663pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
 664    headers
 665        .get("retry-after")
 666        .and_then(|v| v.to_str().ok())
 667        .and_then(|v| v.parse::<u64>().ok())
 668        .map(Duration::from_secs)
 669}
 670
 671fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
 672    Ok(headers
 673        .get(key)
 674        .with_context(|| format!("missing header `{key}`"))?
 675        .to_str()?)
 676}
 677
 678pub async fn stream_completion_with_rate_limit_info(
 679    client: &dyn HttpClient,
 680    api_url: &str,
 681    api_key: &str,
 682    request: Request,
 683    beta_headers: Option<String>,
 684) -> Result<
 685    (
 686        BoxStream<'static, Result<Event, AnthropicError>>,
 687        Option<RateLimitInfo>,
 688    ),
 689    AnthropicError,
 690> {
 691    let request = StreamingRequest {
 692        base: request,
 693        stream: true,
 694    };
 695
 696    let (response, rate_limits) =
 697        send_request(client, api_url, api_key, &request, beta_headers).await?;
 698
 699    if response.status().is_success() {
 700        let reader = BufReader::new(response.into_body());
 701        let stream = reader
 702            .lines()
 703            .filter_map(|line| async move {
 704                match line {
 705                    Ok(line) => {
 706                        let line = line.strip_prefix("data: ")?;
 707                        match serde_json::from_str(line) {
 708                            Ok(response) => Some(Ok(response)),
 709                            Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
 710                        }
 711                    }
 712                    Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
 713                }
 714            })
 715            .boxed();
 716        Ok((stream, Some(rate_limits)))
 717    } else {
 718        Err(handle_error_response(response, rate_limits).await)
 719    }
 720}
 721
 722#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 723#[serde(rename_all = "lowercase")]
 724pub enum CacheControlType {
 725    Ephemeral,
 726}
 727
 728#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 729pub struct CacheControl {
 730    #[serde(rename = "type")]
 731    pub cache_type: CacheControlType,
 732}
 733
 734#[derive(Debug, Serialize, Deserialize)]
 735pub struct Message {
 736    pub role: Role,
 737    pub content: Vec<RequestContent>,
 738}
 739
 740#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
 741#[serde(rename_all = "lowercase")]
 742pub enum Role {
 743    User,
 744    Assistant,
 745}
 746
 747#[derive(Debug, Serialize, Deserialize)]
 748#[serde(tag = "type")]
 749pub enum RequestContent {
 750    #[serde(rename = "text")]
 751    Text {
 752        text: String,
 753        #[serde(skip_serializing_if = "Option::is_none")]
 754        cache_control: Option<CacheControl>,
 755    },
 756    #[serde(rename = "thinking")]
 757    Thinking {
 758        thinking: String,
 759        signature: String,
 760        #[serde(skip_serializing_if = "Option::is_none")]
 761        cache_control: Option<CacheControl>,
 762    },
 763    #[serde(rename = "redacted_thinking")]
 764    RedactedThinking { data: String },
 765    #[serde(rename = "image")]
 766    Image {
 767        source: ImageSource,
 768        #[serde(skip_serializing_if = "Option::is_none")]
 769        cache_control: Option<CacheControl>,
 770    },
 771    #[serde(rename = "tool_use")]
 772    ToolUse {
 773        id: String,
 774        name: String,
 775        input: serde_json::Value,
 776        #[serde(skip_serializing_if = "Option::is_none")]
 777        cache_control: Option<CacheControl>,
 778    },
 779    #[serde(rename = "tool_result")]
 780    ToolResult {
 781        tool_use_id: String,
 782        is_error: bool,
 783        content: ToolResultContent,
 784        #[serde(skip_serializing_if = "Option::is_none")]
 785        cache_control: Option<CacheControl>,
 786    },
 787}
 788
 789#[derive(Debug, Serialize, Deserialize)]
 790#[serde(untagged)]
 791pub enum ToolResultContent {
 792    Plain(String),
 793    Multipart(Vec<ToolResultPart>),
 794}
 795
 796#[derive(Debug, Serialize, Deserialize)]
 797#[serde(tag = "type", rename_all = "lowercase")]
 798pub enum ToolResultPart {
 799    Text { text: String },
 800    Image { source: ImageSource },
 801}
 802
 803#[derive(Debug, Serialize, Deserialize)]
 804#[serde(tag = "type")]
 805pub enum ResponseContent {
 806    #[serde(rename = "text")]
 807    Text { text: String },
 808    #[serde(rename = "thinking")]
 809    Thinking { thinking: String },
 810    #[serde(rename = "redacted_thinking")]
 811    RedactedThinking { data: String },
 812    #[serde(rename = "tool_use")]
 813    ToolUse {
 814        id: String,
 815        name: String,
 816        input: serde_json::Value,
 817    },
 818}
 819
 820#[derive(Debug, Serialize, Deserialize)]
 821pub struct ImageSource {
 822    #[serde(rename = "type")]
 823    pub source_type: String,
 824    pub media_type: String,
 825    pub data: String,
 826}
 827
 828#[derive(Debug, Serialize, Deserialize)]
 829pub struct Tool {
 830    pub name: String,
 831    pub description: String,
 832    pub input_schema: serde_json::Value,
 833}
 834
 835#[derive(Debug, Serialize, Deserialize)]
 836#[serde(tag = "type", rename_all = "lowercase")]
 837pub enum ToolChoice {
 838    Auto,
 839    Any,
 840    Tool { name: String },
 841    None,
 842}
 843
 844#[derive(Debug, Serialize, Deserialize)]
 845#[serde(tag = "type", rename_all = "lowercase")]
 846pub enum Thinking {
 847    Enabled { budget_tokens: Option<u32> },
 848}
 849
 850#[derive(Debug, Serialize, Deserialize)]
 851#[serde(untagged)]
 852pub enum StringOrContents {
 853    String(String),
 854    Content(Vec<RequestContent>),
 855}
 856
 857#[derive(Debug, Serialize, Deserialize)]
 858pub struct Request {
 859    pub model: String,
 860    pub max_tokens: u64,
 861    pub messages: Vec<Message>,
 862    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 863    pub tools: Vec<Tool>,
 864    #[serde(default, skip_serializing_if = "Option::is_none")]
 865    pub thinking: Option<Thinking>,
 866    #[serde(default, skip_serializing_if = "Option::is_none")]
 867    pub tool_choice: Option<ToolChoice>,
 868    #[serde(default, skip_serializing_if = "Option::is_none")]
 869    pub system: Option<StringOrContents>,
 870    #[serde(default, skip_serializing_if = "Option::is_none")]
 871    pub metadata: Option<Metadata>,
 872    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 873    pub stop_sequences: Vec<String>,
 874    #[serde(default, skip_serializing_if = "Option::is_none")]
 875    pub temperature: Option<f32>,
 876    #[serde(default, skip_serializing_if = "Option::is_none")]
 877    pub top_k: Option<u32>,
 878    #[serde(default, skip_serializing_if = "Option::is_none")]
 879    pub top_p: Option<f32>,
 880}
 881
 882#[derive(Debug, Serialize, Deserialize)]
 883struct StreamingRequest {
 884    #[serde(flatten)]
 885    pub base: Request,
 886    pub stream: bool,
 887}
 888
 889#[derive(Debug, Serialize, Deserialize)]
 890pub struct Metadata {
 891    pub user_id: Option<String>,
 892}
 893
 894#[derive(Debug, Serialize, Deserialize, Default)]
 895pub struct Usage {
 896    #[serde(default, skip_serializing_if = "Option::is_none")]
 897    pub input_tokens: Option<u64>,
 898    #[serde(default, skip_serializing_if = "Option::is_none")]
 899    pub output_tokens: Option<u64>,
 900    #[serde(default, skip_serializing_if = "Option::is_none")]
 901    pub cache_creation_input_tokens: Option<u64>,
 902    #[serde(default, skip_serializing_if = "Option::is_none")]
 903    pub cache_read_input_tokens: Option<u64>,
 904}
 905
 906#[derive(Debug, Serialize, Deserialize)]
 907pub struct Response {
 908    pub id: String,
 909    #[serde(rename = "type")]
 910    pub response_type: String,
 911    pub role: Role,
 912    pub content: Vec<ResponseContent>,
 913    pub model: String,
 914    #[serde(default, skip_serializing_if = "Option::is_none")]
 915    pub stop_reason: Option<String>,
 916    #[serde(default, skip_serializing_if = "Option::is_none")]
 917    pub stop_sequence: Option<String>,
 918    pub usage: Usage,
 919}
 920
 921#[derive(Debug, Serialize, Deserialize)]
 922#[serde(tag = "type")]
 923pub enum Event {
 924    #[serde(rename = "message_start")]
 925    MessageStart { message: Response },
 926    #[serde(rename = "content_block_start")]
 927    ContentBlockStart {
 928        index: usize,
 929        content_block: ResponseContent,
 930    },
 931    #[serde(rename = "content_block_delta")]
 932    ContentBlockDelta { index: usize, delta: ContentDelta },
 933    #[serde(rename = "content_block_stop")]
 934    ContentBlockStop { index: usize },
 935    #[serde(rename = "message_delta")]
 936    MessageDelta { delta: MessageDelta, usage: Usage },
 937    #[serde(rename = "message_stop")]
 938    MessageStop,
 939    #[serde(rename = "ping")]
 940    Ping,
 941    #[serde(rename = "error")]
 942    Error { error: ApiError },
 943}
 944
 945#[derive(Debug, Serialize, Deserialize)]
 946#[serde(tag = "type")]
 947pub enum ContentDelta {
 948    #[serde(rename = "text_delta")]
 949    TextDelta { text: String },
 950    #[serde(rename = "thinking_delta")]
 951    ThinkingDelta { thinking: String },
 952    #[serde(rename = "signature_delta")]
 953    SignatureDelta { signature: String },
 954    #[serde(rename = "input_json_delta")]
 955    InputJsonDelta { partial_json: String },
 956}
 957
 958#[derive(Debug, Serialize, Deserialize)]
 959pub struct MessageDelta {
 960    pub stop_reason: Option<String>,
 961    pub stop_sequence: Option<String>,
 962}
 963
 964#[derive(Debug)]
 965pub enum AnthropicError {
 966    /// Failed to serialize the HTTP request body to JSON
 967    SerializeRequest(serde_json::Error),
 968
 969    /// Failed to construct the HTTP request body
 970    BuildRequestBody(http::Error),
 971
 972    /// Failed to send the HTTP request
 973    HttpSend(anyhow::Error),
 974
 975    /// Failed to deserialize the response from JSON
 976    DeserializeResponse(serde_json::Error),
 977
 978    /// Failed to read from response stream
 979    ReadResponse(io::Error),
 980
 981    /// HTTP error response from the API
 982    HttpResponseError {
 983        status_code: StatusCode,
 984        message: String,
 985    },
 986
 987    /// Rate limit exceeded
 988    RateLimit { retry_after: Duration },
 989
 990    /// Server overloaded
 991    ServerOverloaded { retry_after: Option<Duration> },
 992
 993    /// API returned an error response
 994    ApiError(ApiError),
 995}
 996
 997#[derive(Debug, Serialize, Deserialize, Error)]
 998#[error("Anthropic API Error: {error_type}: {message}")]
 999pub struct ApiError {
1000    #[serde(rename = "type")]
1001    pub error_type: String,
1002    pub message: String,
1003}
1004
1005/// An Anthropic API error code.
1006/// <https://docs.anthropic.com/en/api/errors#http-errors>
1007#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
1008#[strum(serialize_all = "snake_case")]
1009pub enum ApiErrorCode {
1010    /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
1011    InvalidRequestError,
1012    /// 401 - `authentication_error`: There's an issue with your API key.
1013    AuthenticationError,
1014    /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
1015    PermissionError,
1016    /// 404 - `not_found_error`: The requested resource was not found.
1017    NotFoundError,
1018    /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
1019    RequestTooLarge,
1020    /// 429 - `rate_limit_error`: Your account has hit a rate limit.
1021    RateLimitError,
1022    /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
1023    ApiError,
1024    /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
1025    OverloadedError,
1026}
1027
1028impl ApiError {
1029    pub fn code(&self) -> Option<ApiErrorCode> {
1030        ApiErrorCode::from_str(&self.error_type).ok()
1031    }
1032
1033    pub fn is_rate_limit_error(&self) -> bool {
1034        matches!(self.error_type.as_str(), "rate_limit_error")
1035    }
1036
1037    pub fn match_window_exceeded(&self) -> Option<u64> {
1038        let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
1039            return None;
1040        };
1041
1042        parse_prompt_too_long(&self.message)
1043    }
1044}
1045
1046pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1047    message
1048        .strip_prefix("prompt is too long: ")?
1049        .split_once(" tokens")?
1050        .0
1051        .parse()
1052        .ok()
1053}
1054
1055#[test]
1056fn test_match_window_exceeded() {
1057    let error = ApiError {
1058        error_type: "invalid_request_error".to_string(),
1059        message: "prompt is too long: 220000 tokens > 200000".to_string(),
1060    };
1061    assert_eq!(error.match_window_exceeded(), Some(220_000));
1062
1063    let error = ApiError {
1064        error_type: "invalid_request_error".to_string(),
1065        message: "prompt is too long: 1234953 tokens".to_string(),
1066    };
1067    assert_eq!(error.match_window_exceeded(), Some(1234953));
1068
1069    let error = ApiError {
1070        error_type: "invalid_request_error".to_string(),
1071        message: "not a prompt length error".to_string(),
1072    };
1073    assert_eq!(error.match_window_exceeded(), None);
1074
1075    let error = ApiError {
1076        error_type: "rate_limit_error".to_string(),
1077        message: "prompt is too long: 12345 tokens".to_string(),
1078    };
1079    assert_eq!(error.match_window_exceeded(), None);
1080
1081    let error = ApiError {
1082        error_type: "invalid_request_error".to_string(),
1083        message: "prompt is too long: invalid tokens".to_string(),
1084    };
1085    assert_eq!(error.match_window_exceeded(), None);
1086}