anthropic.rs

   1use std::io;
   2use std::str::FromStr;
   3use std::time::Duration;
   4
   5use anyhow::{Context as _, Result, anyhow};
   6use chrono::{DateTime, Utc};
   7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
   8use http_client::http::{self, HeaderMap, HeaderValue};
   9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
  10use serde::{Deserialize, Serialize};
  11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
  12use strum::{EnumIter, EnumString};
  13use thiserror::Error;
  14
  15pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
  16
  17#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  18#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  19pub struct AnthropicModelCacheConfiguration {
  20    pub min_total_token: u64,
  21    pub should_speculate: bool,
  22    pub max_cache_anchors: usize,
  23}
  24
  25#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  26#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  27pub enum AnthropicModelMode {
  28    #[default]
  29    Default,
  30    Thinking {
  31        budget_tokens: Option<u32>,
  32    },
  33}
  34
  35impl From<ModelMode> for AnthropicModelMode {
  36    fn from(value: ModelMode) -> Self {
  37        match value {
  38            ModelMode::Default => AnthropicModelMode::Default,
  39            ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
  40        }
  41    }
  42}
  43
  44impl From<AnthropicModelMode> for ModelMode {
  45    fn from(value: AnthropicModelMode) -> Self {
  46        match value {
  47            AnthropicModelMode::Default => ModelMode::Default,
  48            AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
  49        }
  50    }
  51}
  52
  53#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  54#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
  55pub enum Model {
  56    #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
  57    ClaudeOpus4,
  58    #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
  59    ClaudeOpus4_1,
  60    #[serde(
  61        rename = "claude-opus-4-thinking",
  62        alias = "claude-opus-4-thinking-latest"
  63    )]
  64    ClaudeOpus4Thinking,
  65    #[serde(
  66        rename = "claude-opus-4-1-thinking",
  67        alias = "claude-opus-4-1-thinking-latest"
  68    )]
  69    ClaudeOpus4_1Thinking,
  70    #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
  71    ClaudeOpus4_5,
  72    #[serde(
  73        rename = "claude-opus-4-5-thinking",
  74        alias = "claude-opus-4-5-thinking-latest"
  75    )]
  76    ClaudeOpus4_5Thinking,
  77    #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
  78    ClaudeSonnet4,
  79    #[serde(
  80        rename = "claude-sonnet-4-thinking",
  81        alias = "claude-sonnet-4-thinking-latest"
  82    )]
  83    ClaudeSonnet4Thinking,
  84    #[default]
  85    #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
  86    ClaudeSonnet4_5,
  87    #[serde(
  88        rename = "claude-sonnet-4-5-thinking",
  89        alias = "claude-sonnet-4-5-thinking-latest"
  90    )]
  91    ClaudeSonnet4_5Thinking,
  92    #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
  93    Claude3_7Sonnet,
  94    #[serde(
  95        rename = "claude-3-7-sonnet-thinking",
  96        alias = "claude-3-7-sonnet-thinking-latest"
  97    )]
  98    Claude3_7SonnetThinking,
  99    #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
 100    Claude3_5Sonnet,
 101    #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
 102    ClaudeHaiku4_5,
 103    #[serde(
 104        rename = "claude-haiku-4-5-thinking",
 105        alias = "claude-haiku-4-5-thinking-latest"
 106    )]
 107    ClaudeHaiku4_5Thinking,
 108    #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
 109    Claude3_5Haiku,
 110    #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
 111    Claude3Opus,
 112    #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
 113    Claude3Sonnet,
 114    #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
 115    Claude3Haiku,
 116    #[serde(rename = "custom")]
 117    Custom {
 118        name: String,
 119        max_tokens: u64,
 120        /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
 121        display_name: Option<String>,
 122        /// Override this model with a different Anthropic model for tool calls.
 123        tool_override: Option<String>,
 124        /// Indicates whether this custom model supports caching.
 125        cache_configuration: Option<AnthropicModelCacheConfiguration>,
 126        max_output_tokens: Option<u64>,
 127        default_temperature: Option<f32>,
 128        #[serde(default)]
 129        extra_beta_headers: Vec<String>,
 130        #[serde(default)]
 131        mode: AnthropicModelMode,
 132    },
 133}
 134
 135impl Model {
 136    pub fn default_fast() -> Self {
 137        Self::Claude3_5Haiku
 138    }
 139
 140    pub fn from_id(id: &str) -> Result<Self> {
 141        if id.starts_with("claude-opus-4-5-thinking") {
 142            return Ok(Self::ClaudeOpus4_5Thinking);
 143        }
 144
 145        if id.starts_with("claude-opus-4-5") {
 146            return Ok(Self::ClaudeOpus4_5);
 147        }
 148
 149        if id.starts_with("claude-opus-4-1-thinking") {
 150            return Ok(Self::ClaudeOpus4_1Thinking);
 151        }
 152
 153        if id.starts_with("claude-opus-4-thinking") {
 154            return Ok(Self::ClaudeOpus4Thinking);
 155        }
 156
 157        if id.starts_with("claude-opus-4-1") {
 158            return Ok(Self::ClaudeOpus4_1);
 159        }
 160
 161        if id.starts_with("claude-opus-4") {
 162            return Ok(Self::ClaudeOpus4);
 163        }
 164
 165        if id.starts_with("claude-sonnet-4-5-thinking") {
 166            return Ok(Self::ClaudeSonnet4_5Thinking);
 167        }
 168
 169        if id.starts_with("claude-sonnet-4-5") {
 170            return Ok(Self::ClaudeSonnet4_5);
 171        }
 172
 173        if id.starts_with("claude-sonnet-4-thinking") {
 174            return Ok(Self::ClaudeSonnet4Thinking);
 175        }
 176
 177        if id.starts_with("claude-sonnet-4") {
 178            return Ok(Self::ClaudeSonnet4);
 179        }
 180
 181        if id.starts_with("claude-3-7-sonnet-thinking") {
 182            return Ok(Self::Claude3_7SonnetThinking);
 183        }
 184
 185        if id.starts_with("claude-3-7-sonnet") {
 186            return Ok(Self::Claude3_7Sonnet);
 187        }
 188
 189        if id.starts_with("claude-3-5-sonnet") {
 190            return Ok(Self::Claude3_5Sonnet);
 191        }
 192
 193        if id.starts_with("claude-haiku-4-5-thinking") {
 194            return Ok(Self::ClaudeHaiku4_5Thinking);
 195        }
 196
 197        if id.starts_with("claude-haiku-4-5") {
 198            return Ok(Self::ClaudeHaiku4_5);
 199        }
 200
 201        if id.starts_with("claude-3-5-haiku") {
 202            return Ok(Self::Claude3_5Haiku);
 203        }
 204
 205        if id.starts_with("claude-3-opus") {
 206            return Ok(Self::Claude3Opus);
 207        }
 208
 209        if id.starts_with("claude-3-sonnet") {
 210            return Ok(Self::Claude3Sonnet);
 211        }
 212
 213        if id.starts_with("claude-3-haiku") {
 214            return Ok(Self::Claude3Haiku);
 215        }
 216
 217        Err(anyhow!("invalid model ID: {id}"))
 218    }
 219
 220    pub fn id(&self) -> &str {
 221        match self {
 222            Self::ClaudeOpus4 => "claude-opus-4-latest",
 223            Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
 224            Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
 225            Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
 226            Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
 227            Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking-latest",
 228            Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
 229            Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
 230            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
 231            Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
 232            Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
 233            Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
 234            Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
 235            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
 236            Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
 237            Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
 238            Self::Claude3Opus => "claude-3-opus-latest",
 239            Self::Claude3Sonnet => "claude-3-sonnet-20240229",
 240            Self::Claude3Haiku => "claude-3-haiku-20240307",
 241            Self::Custom { name, .. } => name,
 242        }
 243    }
 244
 245    /// The id of the model that should be used for making API requests
 246    pub fn request_id(&self) -> &str {
 247        match self {
 248            Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
 249            Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
 250            Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-20251101",
 251            Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
 252            Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-20250929",
 253            Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
 254            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
 255            Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
 256            Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
 257            Self::Claude3Opus => "claude-3-opus-latest",
 258            Self::Claude3Sonnet => "claude-3-sonnet-20240229",
 259            Self::Claude3Haiku => "claude-3-haiku-20240307",
 260            Self::Custom { name, .. } => name,
 261        }
 262    }
 263
 264    pub fn display_name(&self) -> &str {
 265        match self {
 266            Self::ClaudeOpus4 => "Claude Opus 4",
 267            Self::ClaudeOpus4_1 => "Claude Opus 4.1",
 268            Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
 269            Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
 270            Self::ClaudeOpus4_5 => "Claude Opus 4.5",
 271            Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
 272            Self::ClaudeSonnet4 => "Claude Sonnet 4",
 273            Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
 274            Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
 275            Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
 276            Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
 277            Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
 278            Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
 279            Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
 280            Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
 281            Self::Claude3_5Haiku => "Claude 3.5 Haiku",
 282            Self::Claude3Opus => "Claude 3 Opus",
 283            Self::Claude3Sonnet => "Claude 3 Sonnet",
 284            Self::Claude3Haiku => "Claude 3 Haiku",
 285            Self::Custom {
 286                name, display_name, ..
 287            } => display_name.as_ref().unwrap_or(name),
 288        }
 289    }
 290
 291    pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
 292        match self {
 293            Self::ClaudeOpus4
 294            | Self::ClaudeOpus4_1
 295            | Self::ClaudeOpus4Thinking
 296            | Self::ClaudeOpus4_1Thinking
 297            | Self::ClaudeOpus4_5
 298            | Self::ClaudeOpus4_5Thinking
 299            | Self::ClaudeSonnet4
 300            | Self::ClaudeSonnet4Thinking
 301            | Self::ClaudeSonnet4_5
 302            | Self::ClaudeSonnet4_5Thinking
 303            | Self::Claude3_5Sonnet
 304            | Self::ClaudeHaiku4_5
 305            | Self::ClaudeHaiku4_5Thinking
 306            | Self::Claude3_5Haiku
 307            | Self::Claude3_7Sonnet
 308            | Self::Claude3_7SonnetThinking
 309            | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
 310                min_total_token: 2_048,
 311                should_speculate: true,
 312                max_cache_anchors: 4,
 313            }),
 314            Self::Custom {
 315                cache_configuration,
 316                ..
 317            } => cache_configuration.clone(),
 318            _ => None,
 319        }
 320    }
 321
 322    pub fn max_token_count(&self) -> u64 {
 323        match self {
 324            Self::ClaudeOpus4
 325            | Self::ClaudeOpus4_1
 326            | Self::ClaudeOpus4Thinking
 327            | Self::ClaudeOpus4_1Thinking
 328            | Self::ClaudeOpus4_5
 329            | Self::ClaudeOpus4_5Thinking
 330            | Self::ClaudeSonnet4
 331            | Self::ClaudeSonnet4Thinking
 332            | Self::ClaudeSonnet4_5
 333            | Self::ClaudeSonnet4_5Thinking
 334            | Self::Claude3_5Sonnet
 335            | Self::ClaudeHaiku4_5
 336            | Self::ClaudeHaiku4_5Thinking
 337            | Self::Claude3_5Haiku
 338            | Self::Claude3_7Sonnet
 339            | Self::Claude3_7SonnetThinking
 340            | Self::Claude3Opus
 341            | Self::Claude3Sonnet
 342            | Self::Claude3Haiku => 200_000,
 343            Self::Custom { max_tokens, .. } => *max_tokens,
 344        }
 345    }
 346
 347    pub fn max_output_tokens(&self) -> u64 {
 348        match self {
 349            Self::ClaudeOpus4
 350            | Self::ClaudeOpus4_1
 351            | Self::ClaudeOpus4Thinking
 352            | Self::ClaudeOpus4_1Thinking
 353            | Self::ClaudeOpus4_5
 354            | Self::ClaudeOpus4_5Thinking
 355            | Self::ClaudeSonnet4
 356            | Self::ClaudeSonnet4Thinking
 357            | Self::ClaudeSonnet4_5
 358            | Self::ClaudeSonnet4_5Thinking
 359            | Self::Claude3_5Sonnet
 360            | Self::Claude3_7Sonnet
 361            | Self::Claude3_7SonnetThinking
 362            | Self::Claude3_5Haiku => 8_192,
 363            Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => 64_000,
 364            Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
 365            Self::Custom {
 366                max_output_tokens, ..
 367            } => max_output_tokens.unwrap_or(4_096),
 368        }
 369    }
 370
 371    pub fn default_temperature(&self) -> f32 {
 372        match self {
 373            Self::ClaudeOpus4
 374            | Self::ClaudeOpus4_1
 375            | Self::ClaudeOpus4Thinking
 376            | Self::ClaudeOpus4_1Thinking
 377            | Self::ClaudeOpus4_5
 378            | Self::ClaudeOpus4_5Thinking
 379            | Self::ClaudeSonnet4
 380            | Self::ClaudeSonnet4Thinking
 381            | Self::ClaudeSonnet4_5
 382            | Self::ClaudeSonnet4_5Thinking
 383            | Self::Claude3_5Sonnet
 384            | Self::Claude3_7Sonnet
 385            | Self::Claude3_7SonnetThinking
 386            | Self::ClaudeHaiku4_5
 387            | Self::ClaudeHaiku4_5Thinking
 388            | Self::Claude3_5Haiku
 389            | Self::Claude3Opus
 390            | Self::Claude3Sonnet
 391            | Self::Claude3Haiku => 1.0,
 392            Self::Custom {
 393                default_temperature,
 394                ..
 395            } => default_temperature.unwrap_or(1.0),
 396        }
 397    }
 398
 399    pub fn mode(&self) -> AnthropicModelMode {
 400        match self {
 401            Self::ClaudeOpus4
 402            | Self::ClaudeOpus4_1
 403            | Self::ClaudeOpus4_5
 404            | Self::ClaudeSonnet4
 405            | Self::ClaudeSonnet4_5
 406            | Self::Claude3_5Sonnet
 407            | Self::Claude3_7Sonnet
 408            | Self::ClaudeHaiku4_5
 409            | Self::Claude3_5Haiku
 410            | Self::Claude3Opus
 411            | Self::Claude3Sonnet
 412            | Self::Claude3Haiku => AnthropicModelMode::Default,
 413            Self::ClaudeOpus4Thinking
 414            | Self::ClaudeOpus4_1Thinking
 415            | Self::ClaudeOpus4_5Thinking
 416            | Self::ClaudeSonnet4Thinking
 417            | Self::ClaudeSonnet4_5Thinking
 418            | Self::ClaudeHaiku4_5Thinking
 419            | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
 420                budget_tokens: Some(4_096),
 421            },
 422            Self::Custom { mode, .. } => mode.clone(),
 423        }
 424    }
 425
 426    pub const DEFAULT_BETA_HEADERS: &[&str] = &["prompt-caching-2024-07-31"];
 427
 428    pub fn beta_headers(&self) -> String {
 429        let mut headers = Self::DEFAULT_BETA_HEADERS
 430            .iter()
 431            .map(|header| header.to_string())
 432            .collect::<Vec<_>>();
 433
 434        match self {
 435            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
 436                // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
 437                // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
 438                headers.push("token-efficient-tools-2025-02-19".to_string());
 439            }
 440            Self::Custom {
 441                extra_beta_headers, ..
 442            } => {
 443                headers.extend(
 444                    extra_beta_headers
 445                        .iter()
 446                        .filter(|header| !header.trim().is_empty())
 447                        .cloned(),
 448                );
 449            }
 450            _ => {}
 451        }
 452
 453        headers.join(",")
 454    }
 455
 456    pub fn tool_model_id(&self) -> &str {
 457        if let Self::Custom {
 458            tool_override: Some(tool_override),
 459            ..
 460        } = self
 461        {
 462            tool_override
 463        } else {
 464            self.request_id()
 465        }
 466    }
 467}
 468
 469pub async fn complete(
 470    client: &dyn HttpClient,
 471    api_url: &str,
 472    api_key: &str,
 473    request: Request,
 474    beta_headers: String,
 475) -> Result<Response, AnthropicError> {
 476    let uri = format!("{api_url}/v1/messages");
 477    let request_builder = HttpRequest::builder()
 478        .method(Method::POST)
 479        .uri(uri)
 480        .header("Anthropic-Version", "2023-06-01")
 481        .header("Anthropic-Beta", beta_headers)
 482        .header("X-Api-Key", api_key.trim())
 483        .header("Content-Type", "application/json");
 484
 485    let serialized_request =
 486        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
 487    let request = request_builder
 488        .body(AsyncBody::from(serialized_request))
 489        .map_err(AnthropicError::BuildRequestBody)?;
 490
 491    let mut response = client
 492        .send(request)
 493        .await
 494        .map_err(AnthropicError::HttpSend)?;
 495    let status_code = response.status();
 496    let mut body = String::new();
 497    response
 498        .body_mut()
 499        .read_to_string(&mut body)
 500        .await
 501        .map_err(AnthropicError::ReadResponse)?;
 502
 503    if status_code.is_success() {
 504        Ok(serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)?)
 505    } else {
 506        Err(AnthropicError::HttpResponseError {
 507            status_code,
 508            message: body,
 509        })
 510    }
 511}
 512
 513pub async fn stream_completion(
 514    client: &dyn HttpClient,
 515    api_url: &str,
 516    api_key: &str,
 517    request: Request,
 518    beta_headers: String,
 519) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
 520    stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
 521        .await
 522        .map(|output| output.0)
 523}
 524
 525/// An individual rate limit.
 526#[derive(Debug)]
 527pub struct RateLimit {
 528    pub limit: usize,
 529    pub remaining: usize,
 530    pub reset: DateTime<Utc>,
 531}
 532
 533impl RateLimit {
 534    fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
 535        let limit =
 536            get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
 537        let remaining = get_header(
 538            &format!("anthropic-ratelimit-{resource}-remaining"),
 539            headers,
 540        )?
 541        .parse()?;
 542        let reset = DateTime::parse_from_rfc3339(get_header(
 543            &format!("anthropic-ratelimit-{resource}-reset"),
 544            headers,
 545        )?)?
 546        .to_utc();
 547
 548        Ok(Self {
 549            limit,
 550            remaining,
 551            reset,
 552        })
 553    }
 554}
 555
 556/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
 557#[derive(Debug)]
 558pub struct RateLimitInfo {
 559    pub retry_after: Option<Duration>,
 560    pub requests: Option<RateLimit>,
 561    pub tokens: Option<RateLimit>,
 562    pub input_tokens: Option<RateLimit>,
 563    pub output_tokens: Option<RateLimit>,
 564}
 565
 566impl RateLimitInfo {
 567    fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
 568        // Check if any rate limit headers exist
 569        let has_rate_limit_headers = headers
 570            .keys()
 571            .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
 572
 573        if !has_rate_limit_headers {
 574            return Self {
 575                retry_after: None,
 576                requests: None,
 577                tokens: None,
 578                input_tokens: None,
 579                output_tokens: None,
 580            };
 581        }
 582
 583        Self {
 584            retry_after: parse_retry_after(headers),
 585            requests: RateLimit::from_headers("requests", headers).ok(),
 586            tokens: RateLimit::from_headers("tokens", headers).ok(),
 587            input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
 588            output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
 589        }
 590    }
 591}
 592
 593/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
 594/// seconds). Note that other services might specify an HTTP date or some other format for this
 595/// header. Returns `None` if the header is not present or cannot be parsed.
 596pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
 597    headers
 598        .get("retry-after")
 599        .and_then(|v| v.to_str().ok())
 600        .and_then(|v| v.parse::<u64>().ok())
 601        .map(Duration::from_secs)
 602}
 603
 604fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
 605    Ok(headers
 606        .get(key)
 607        .with_context(|| format!("missing header `{key}`"))?
 608        .to_str()?)
 609}
 610
 611pub async fn stream_completion_with_rate_limit_info(
 612    client: &dyn HttpClient,
 613    api_url: &str,
 614    api_key: &str,
 615    request: Request,
 616    beta_headers: String,
 617) -> Result<
 618    (
 619        BoxStream<'static, Result<Event, AnthropicError>>,
 620        Option<RateLimitInfo>,
 621    ),
 622    AnthropicError,
 623> {
 624    let request = StreamingRequest {
 625        base: request,
 626        stream: true,
 627    };
 628    let uri = format!("{api_url}/v1/messages");
 629
 630    let request_builder = HttpRequest::builder()
 631        .method(Method::POST)
 632        .uri(uri)
 633        .header("Anthropic-Version", "2023-06-01")
 634        .header("Anthropic-Beta", beta_headers)
 635        .header("X-Api-Key", api_key.trim())
 636        .header("Content-Type", "application/json");
 637    let serialized_request =
 638        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
 639    let request = request_builder
 640        .body(AsyncBody::from(serialized_request))
 641        .map_err(AnthropicError::BuildRequestBody)?;
 642
 643    let mut response = client
 644        .send(request)
 645        .await
 646        .map_err(AnthropicError::HttpSend)?;
 647    let rate_limits = RateLimitInfo::from_headers(response.headers());
 648    if response.status().is_success() {
 649        let reader = BufReader::new(response.into_body());
 650        let stream = reader
 651            .lines()
 652            .filter_map(|line| async move {
 653                match line {
 654                    Ok(line) => {
 655                        let line = line.strip_prefix("data: ")?;
 656                        match serde_json::from_str(line) {
 657                            Ok(response) => Some(Ok(response)),
 658                            Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
 659                        }
 660                    }
 661                    Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
 662                }
 663            })
 664            .boxed();
 665        Ok((stream, Some(rate_limits)))
 666    } else if response.status().as_u16() == 529 {
 667        Err(AnthropicError::ServerOverloaded {
 668            retry_after: rate_limits.retry_after,
 669        })
 670    } else if let Some(retry_after) = rate_limits.retry_after {
 671        Err(AnthropicError::RateLimit { retry_after })
 672    } else {
 673        let mut body = String::new();
 674        response
 675            .body_mut()
 676            .read_to_string(&mut body)
 677            .await
 678            .map_err(AnthropicError::ReadResponse)?;
 679
 680        match serde_json::from_str::<Event>(&body) {
 681            Ok(Event::Error { error }) => Err(AnthropicError::ApiError(error)),
 682            Ok(_) | Err(_) => Err(AnthropicError::HttpResponseError {
 683                status_code: response.status(),
 684                message: body,
 685            }),
 686        }
 687    }
 688}
 689
 690#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 691#[serde(rename_all = "lowercase")]
 692pub enum CacheControlType {
 693    Ephemeral,
 694}
 695
 696#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 697pub struct CacheControl {
 698    #[serde(rename = "type")]
 699    pub cache_type: CacheControlType,
 700}
 701
 702#[derive(Debug, Serialize, Deserialize)]
 703pub struct Message {
 704    pub role: Role,
 705    pub content: Vec<RequestContent>,
 706}
 707
 708#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
 709#[serde(rename_all = "lowercase")]
 710pub enum Role {
 711    User,
 712    Assistant,
 713}
 714
 715#[derive(Debug, Serialize, Deserialize)]
 716#[serde(tag = "type")]
 717pub enum RequestContent {
 718    #[serde(rename = "text")]
 719    Text {
 720        text: String,
 721        #[serde(skip_serializing_if = "Option::is_none")]
 722        cache_control: Option<CacheControl>,
 723    },
 724    #[serde(rename = "thinking")]
 725    Thinking {
 726        thinking: String,
 727        signature: String,
 728        #[serde(skip_serializing_if = "Option::is_none")]
 729        cache_control: Option<CacheControl>,
 730    },
 731    #[serde(rename = "redacted_thinking")]
 732    RedactedThinking { data: String },
 733    #[serde(rename = "image")]
 734    Image {
 735        source: ImageSource,
 736        #[serde(skip_serializing_if = "Option::is_none")]
 737        cache_control: Option<CacheControl>,
 738    },
 739    #[serde(rename = "tool_use")]
 740    ToolUse {
 741        id: String,
 742        name: String,
 743        input: serde_json::Value,
 744        #[serde(skip_serializing_if = "Option::is_none")]
 745        cache_control: Option<CacheControl>,
 746    },
 747    #[serde(rename = "tool_result")]
 748    ToolResult {
 749        tool_use_id: String,
 750        is_error: bool,
 751        content: ToolResultContent,
 752        #[serde(skip_serializing_if = "Option::is_none")]
 753        cache_control: Option<CacheControl>,
 754    },
 755}
 756
 757#[derive(Debug, Serialize, Deserialize)]
 758#[serde(untagged)]
 759pub enum ToolResultContent {
 760    Plain(String),
 761    Multipart(Vec<ToolResultPart>),
 762}
 763
 764#[derive(Debug, Serialize, Deserialize)]
 765#[serde(tag = "type", rename_all = "lowercase")]
 766pub enum ToolResultPart {
 767    Text { text: String },
 768    Image { source: ImageSource },
 769}
 770
 771#[derive(Debug, Serialize, Deserialize)]
 772#[serde(tag = "type")]
 773pub enum ResponseContent {
 774    #[serde(rename = "text")]
 775    Text { text: String },
 776    #[serde(rename = "thinking")]
 777    Thinking { thinking: String },
 778    #[serde(rename = "redacted_thinking")]
 779    RedactedThinking { data: String },
 780    #[serde(rename = "tool_use")]
 781    ToolUse {
 782        id: String,
 783        name: String,
 784        input: serde_json::Value,
 785    },
 786}
 787
 788#[derive(Debug, Serialize, Deserialize)]
 789pub struct ImageSource {
 790    #[serde(rename = "type")]
 791    pub source_type: String,
 792    pub media_type: String,
 793    pub data: String,
 794}
 795
 796#[derive(Debug, Serialize, Deserialize)]
 797pub struct Tool {
 798    pub name: String,
 799    pub description: String,
 800    pub input_schema: serde_json::Value,
 801}
 802
 803#[derive(Debug, Serialize, Deserialize)]
 804#[serde(tag = "type", rename_all = "lowercase")]
 805pub enum ToolChoice {
 806    Auto,
 807    Any,
 808    Tool { name: String },
 809    None,
 810}
 811
 812#[derive(Debug, Serialize, Deserialize)]
 813#[serde(tag = "type", rename_all = "lowercase")]
 814pub enum Thinking {
 815    Enabled { budget_tokens: Option<u32> },
 816}
 817
 818#[derive(Debug, Serialize, Deserialize)]
 819#[serde(untagged)]
 820pub enum StringOrContents {
 821    String(String),
 822    Content(Vec<RequestContent>),
 823}
 824
 825#[derive(Debug, Serialize, Deserialize)]
 826pub struct Request {
 827    pub model: String,
 828    pub max_tokens: u64,
 829    pub messages: Vec<Message>,
 830    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 831    pub tools: Vec<Tool>,
 832    #[serde(default, skip_serializing_if = "Option::is_none")]
 833    pub thinking: Option<Thinking>,
 834    #[serde(default, skip_serializing_if = "Option::is_none")]
 835    pub tool_choice: Option<ToolChoice>,
 836    #[serde(default, skip_serializing_if = "Option::is_none")]
 837    pub system: Option<StringOrContents>,
 838    #[serde(default, skip_serializing_if = "Option::is_none")]
 839    pub metadata: Option<Metadata>,
 840    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 841    pub stop_sequences: Vec<String>,
 842    #[serde(default, skip_serializing_if = "Option::is_none")]
 843    pub temperature: Option<f32>,
 844    #[serde(default, skip_serializing_if = "Option::is_none")]
 845    pub top_k: Option<u32>,
 846    #[serde(default, skip_serializing_if = "Option::is_none")]
 847    pub top_p: Option<f32>,
 848}
 849
 850#[derive(Debug, Serialize, Deserialize)]
 851struct StreamingRequest {
 852    #[serde(flatten)]
 853    pub base: Request,
 854    pub stream: bool,
 855}
 856
 857#[derive(Debug, Serialize, Deserialize)]
 858pub struct Metadata {
 859    pub user_id: Option<String>,
 860}
 861
 862#[derive(Debug, Serialize, Deserialize, Default)]
 863pub struct Usage {
 864    #[serde(default, skip_serializing_if = "Option::is_none")]
 865    pub input_tokens: Option<u64>,
 866    #[serde(default, skip_serializing_if = "Option::is_none")]
 867    pub output_tokens: Option<u64>,
 868    #[serde(default, skip_serializing_if = "Option::is_none")]
 869    pub cache_creation_input_tokens: Option<u64>,
 870    #[serde(default, skip_serializing_if = "Option::is_none")]
 871    pub cache_read_input_tokens: Option<u64>,
 872}
 873
 874#[derive(Debug, Serialize, Deserialize)]
 875pub struct Response {
 876    pub id: String,
 877    #[serde(rename = "type")]
 878    pub response_type: String,
 879    pub role: Role,
 880    pub content: Vec<ResponseContent>,
 881    pub model: String,
 882    #[serde(default, skip_serializing_if = "Option::is_none")]
 883    pub stop_reason: Option<String>,
 884    #[serde(default, skip_serializing_if = "Option::is_none")]
 885    pub stop_sequence: Option<String>,
 886    pub usage: Usage,
 887}
 888
 889#[derive(Debug, Serialize, Deserialize)]
 890#[serde(tag = "type")]
 891pub enum Event {
 892    #[serde(rename = "message_start")]
 893    MessageStart { message: Response },
 894    #[serde(rename = "content_block_start")]
 895    ContentBlockStart {
 896        index: usize,
 897        content_block: ResponseContent,
 898    },
 899    #[serde(rename = "content_block_delta")]
 900    ContentBlockDelta { index: usize, delta: ContentDelta },
 901    #[serde(rename = "content_block_stop")]
 902    ContentBlockStop { index: usize },
 903    #[serde(rename = "message_delta")]
 904    MessageDelta { delta: MessageDelta, usage: Usage },
 905    #[serde(rename = "message_stop")]
 906    MessageStop,
 907    #[serde(rename = "ping")]
 908    Ping,
 909    #[serde(rename = "error")]
 910    Error { error: ApiError },
 911}
 912
 913#[derive(Debug, Serialize, Deserialize)]
 914#[serde(tag = "type")]
 915pub enum ContentDelta {
 916    #[serde(rename = "text_delta")]
 917    TextDelta { text: String },
 918    #[serde(rename = "thinking_delta")]
 919    ThinkingDelta { thinking: String },
 920    #[serde(rename = "signature_delta")]
 921    SignatureDelta { signature: String },
 922    #[serde(rename = "input_json_delta")]
 923    InputJsonDelta { partial_json: String },
 924}
 925
 926#[derive(Debug, Serialize, Deserialize)]
 927pub struct MessageDelta {
 928    pub stop_reason: Option<String>,
 929    pub stop_sequence: Option<String>,
 930}
 931
 932#[derive(Debug)]
 933pub enum AnthropicError {
 934    /// Failed to serialize the HTTP request body to JSON
 935    SerializeRequest(serde_json::Error),
 936
 937    /// Failed to construct the HTTP request body
 938    BuildRequestBody(http::Error),
 939
 940    /// Failed to send the HTTP request
 941    HttpSend(anyhow::Error),
 942
 943    /// Failed to deserialize the response from JSON
 944    DeserializeResponse(serde_json::Error),
 945
 946    /// Failed to read from response stream
 947    ReadResponse(io::Error),
 948
 949    /// HTTP error response from the API
 950    HttpResponseError {
 951        status_code: StatusCode,
 952        message: String,
 953    },
 954
 955    /// Rate limit exceeded
 956    RateLimit { retry_after: Duration },
 957
 958    /// Server overloaded
 959    ServerOverloaded { retry_after: Option<Duration> },
 960
 961    /// API returned an error response
 962    ApiError(ApiError),
 963}
 964
 965#[derive(Debug, Serialize, Deserialize, Error)]
 966#[error("Anthropic API Error: {error_type}: {message}")]
 967pub struct ApiError {
 968    #[serde(rename = "type")]
 969    pub error_type: String,
 970    pub message: String,
 971}
 972
 973/// An Anthropic API error code.
 974/// <https://docs.anthropic.com/en/api/errors#http-errors>
 975#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
 976#[strum(serialize_all = "snake_case")]
 977pub enum ApiErrorCode {
 978    /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
 979    InvalidRequestError,
 980    /// 401 - `authentication_error`: There's an issue with your API key.
 981    AuthenticationError,
 982    /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
 983    PermissionError,
 984    /// 404 - `not_found_error`: The requested resource was not found.
 985    NotFoundError,
 986    /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
 987    RequestTooLarge,
 988    /// 429 - `rate_limit_error`: Your account has hit a rate limit.
 989    RateLimitError,
 990    /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
 991    ApiError,
 992    /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
 993    OverloadedError,
 994}
 995
 996impl ApiError {
 997    pub fn code(&self) -> Option<ApiErrorCode> {
 998        ApiErrorCode::from_str(&self.error_type).ok()
 999    }
1000
1001    pub fn is_rate_limit_error(&self) -> bool {
1002        matches!(self.error_type.as_str(), "rate_limit_error")
1003    }
1004
1005    pub fn match_window_exceeded(&self) -> Option<u64> {
1006        let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
1007            return None;
1008        };
1009
1010        parse_prompt_too_long(&self.message)
1011    }
1012}
1013
1014pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1015    message
1016        .strip_prefix("prompt is too long: ")?
1017        .split_once(" tokens")?
1018        .0
1019        .parse()
1020        .ok()
1021}
1022
1023#[test]
1024fn test_match_window_exceeded() {
1025    let error = ApiError {
1026        error_type: "invalid_request_error".to_string(),
1027        message: "prompt is too long: 220000 tokens > 200000".to_string(),
1028    };
1029    assert_eq!(error.match_window_exceeded(), Some(220_000));
1030
1031    let error = ApiError {
1032        error_type: "invalid_request_error".to_string(),
1033        message: "prompt is too long: 1234953 tokens".to_string(),
1034    };
1035    assert_eq!(error.match_window_exceeded(), Some(1234953));
1036
1037    let error = ApiError {
1038        error_type: "invalid_request_error".to_string(),
1039        message: "not a prompt length error".to_string(),
1040    };
1041    assert_eq!(error.match_window_exceeded(), None);
1042
1043    let error = ApiError {
1044        error_type: "rate_limit_error".to_string(),
1045        message: "prompt is too long: 12345 tokens".to_string(),
1046    };
1047    assert_eq!(error.match_window_exceeded(), None);
1048
1049    let error = ApiError {
1050        error_type: "invalid_request_error".to_string(),
1051        message: "prompt is too long: invalid tokens".to_string(),
1052    };
1053    assert_eq!(error.match_window_exceeded(), None);
1054}