anthropic.rs

   1use std::io;
   2use std::str::FromStr;
   3use std::time::Duration;
   4
   5use anyhow::{Context as _, Result, anyhow};
   6use chrono::{DateTime, Utc};
   7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
   8use http_client::http::{self, HeaderMap, HeaderValue};
   9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
  10use serde::{Deserialize, Serialize};
  11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
  12use strum::{EnumIter, EnumString};
  13use thiserror::Error;
  14
  15pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
  16
  17#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  18#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  19pub struct AnthropicModelCacheConfiguration {
  20    pub min_total_token: u64,
  21    pub should_speculate: bool,
  22    pub max_cache_anchors: usize,
  23}
  24
  25#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  26#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  27pub enum AnthropicModelMode {
  28    #[default]
  29    Default,
  30    Thinking {
  31        budget_tokens: Option<u32>,
  32    },
  33}
  34
  35impl From<ModelMode> for AnthropicModelMode {
  36    fn from(value: ModelMode) -> Self {
  37        match value {
  38            ModelMode::Default => AnthropicModelMode::Default,
  39            ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
  40        }
  41    }
  42}
  43
  44impl From<AnthropicModelMode> for ModelMode {
  45    fn from(value: AnthropicModelMode) -> Self {
  46        match value {
  47            AnthropicModelMode::Default => ModelMode::Default,
  48            AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
  49        }
  50    }
  51}
  52
  53#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  54#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
  55pub enum Model {
  56    #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
  57    ClaudeOpus4,
  58    #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
  59    ClaudeOpus4_1,
  60    #[serde(
  61        rename = "claude-opus-4-thinking",
  62        alias = "claude-opus-4-thinking-latest"
  63    )]
  64    ClaudeOpus4Thinking,
  65    #[serde(
  66        rename = "claude-opus-4-1-thinking",
  67        alias = "claude-opus-4-1-thinking-latest"
  68    )]
  69    ClaudeOpus4_1Thinking,
  70    #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
  71    ClaudeSonnet4,
  72    #[serde(
  73        rename = "claude-sonnet-4-thinking",
  74        alias = "claude-sonnet-4-thinking-latest"
  75    )]
  76    ClaudeSonnet4Thinking,
  77    #[default]
  78    #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
  79    ClaudeSonnet4_5,
  80    #[serde(
  81        rename = "claude-sonnet-4-5-thinking",
  82        alias = "claude-sonnet-4-5-thinking-latest"
  83    )]
  84    ClaudeSonnet4_5Thinking,
  85    #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
  86    Claude3_7Sonnet,
  87    #[serde(
  88        rename = "claude-3-7-sonnet-thinking",
  89        alias = "claude-3-7-sonnet-thinking-latest"
  90    )]
  91    Claude3_7SonnetThinking,
  92    #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
  93    Claude3_5Sonnet,
  94    #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
  95    ClaudeHaiku4_5,
  96    #[serde(
  97        rename = "claude-haiku-4-5-thinking",
  98        alias = "claude-haiku-4-5-thinking-latest"
  99    )]
 100    ClaudeHaiku4_5Thinking,
 101    #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
 102    Claude3_5Haiku,
 103    #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
 104    Claude3Opus,
 105    #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
 106    Claude3Sonnet,
 107    #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
 108    Claude3Haiku,
 109    #[serde(rename = "custom")]
 110    Custom {
 111        name: String,
 112        max_tokens: u64,
 113        /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
 114        display_name: Option<String>,
 115        /// Override this model with a different Anthropic model for tool calls.
 116        tool_override: Option<String>,
 117        /// Indicates whether this custom model supports caching.
 118        cache_configuration: Option<AnthropicModelCacheConfiguration>,
 119        max_output_tokens: Option<u64>,
 120        default_temperature: Option<f32>,
 121        #[serde(default)]
 122        extra_beta_headers: Vec<String>,
 123        #[serde(default)]
 124        mode: AnthropicModelMode,
 125    },
 126}
 127
 128impl Model {
 129    pub fn default_fast() -> Self {
 130        Self::Claude3_5Haiku
 131    }
 132
 133    pub fn from_id(id: &str) -> Result<Self> {
 134        if id.starts_with("claude-opus-4-1-thinking") {
 135            return Ok(Self::ClaudeOpus4_1Thinking);
 136        }
 137
 138        if id.starts_with("claude-opus-4-thinking") {
 139            return Ok(Self::ClaudeOpus4Thinking);
 140        }
 141
 142        if id.starts_with("claude-opus-4-1") {
 143            return Ok(Self::ClaudeOpus4_1);
 144        }
 145
 146        if id.starts_with("claude-opus-4") {
 147            return Ok(Self::ClaudeOpus4);
 148        }
 149
 150        if id.starts_with("claude-sonnet-4-5-thinking") {
 151            return Ok(Self::ClaudeSonnet4_5Thinking);
 152        }
 153
 154        if id.starts_with("claude-sonnet-4-5") {
 155            return Ok(Self::ClaudeSonnet4_5);
 156        }
 157
 158        if id.starts_with("claude-sonnet-4-thinking") {
 159            return Ok(Self::ClaudeSonnet4Thinking);
 160        }
 161
 162        if id.starts_with("claude-sonnet-4") {
 163            return Ok(Self::ClaudeSonnet4);
 164        }
 165
 166        if id.starts_with("claude-3-7-sonnet-thinking") {
 167            return Ok(Self::Claude3_7SonnetThinking);
 168        }
 169
 170        if id.starts_with("claude-3-7-sonnet") {
 171            return Ok(Self::Claude3_7Sonnet);
 172        }
 173
 174        if id.starts_with("claude-3-5-sonnet") {
 175            return Ok(Self::Claude3_5Sonnet);
 176        }
 177
 178        if id.starts_with("claude-haiku-4-5-thinking") {
 179            return Ok(Self::ClaudeHaiku4_5Thinking);
 180        }
 181
 182        if id.starts_with("claude-haiku-4-5") {
 183            return Ok(Self::ClaudeHaiku4_5);
 184        }
 185
 186        if id.starts_with("claude-3-5-haiku") {
 187            return Ok(Self::Claude3_5Haiku);
 188        }
 189
 190        if id.starts_with("claude-3-opus") {
 191            return Ok(Self::Claude3Opus);
 192        }
 193
 194        if id.starts_with("claude-3-sonnet") {
 195            return Ok(Self::Claude3Sonnet);
 196        }
 197
 198        if id.starts_with("claude-3-haiku") {
 199            return Ok(Self::Claude3Haiku);
 200        }
 201
 202        Err(anyhow!("invalid model ID: {id}"))
 203    }
 204
 205    pub fn id(&self) -> &str {
 206        match self {
 207            Self::ClaudeOpus4 => "claude-opus-4-latest",
 208            Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
 209            Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
 210            Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
 211            Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
 212            Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
 213            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
 214            Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
 215            Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
 216            Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
 217            Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
 218            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
 219            Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
 220            Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
 221            Self::Claude3Opus => "claude-3-opus-latest",
 222            Self::Claude3Sonnet => "claude-3-sonnet-20240229",
 223            Self::Claude3Haiku => "claude-3-haiku-20240307",
 224            Self::Custom { name, .. } => name,
 225        }
 226    }
 227
 228    /// The id of the model that should be used for making API requests
 229    pub fn request_id(&self) -> &str {
 230        match self {
 231            Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
 232            Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
 233            Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
 234            Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-20250929",
 235            Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
 236            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
 237            Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
 238            Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
 239            Self::Claude3Opus => "claude-3-opus-latest",
 240            Self::Claude3Sonnet => "claude-3-sonnet-20240229",
 241            Self::Claude3Haiku => "claude-3-haiku-20240307",
 242            Self::Custom { name, .. } => name,
 243        }
 244    }
 245
 246    pub fn display_name(&self) -> &str {
 247        match self {
 248            Self::ClaudeOpus4 => "Claude Opus 4",
 249            Self::ClaudeOpus4_1 => "Claude Opus 4.1",
 250            Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
 251            Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
 252            Self::ClaudeSonnet4 => "Claude Sonnet 4",
 253            Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
 254            Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
 255            Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
 256            Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
 257            Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
 258            Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
 259            Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
 260            Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
 261            Self::Claude3_5Haiku => "Claude 3.5 Haiku",
 262            Self::Claude3Opus => "Claude 3 Opus",
 263            Self::Claude3Sonnet => "Claude 3 Sonnet",
 264            Self::Claude3Haiku => "Claude 3 Haiku",
 265            Self::Custom {
 266                name, display_name, ..
 267            } => display_name.as_ref().unwrap_or(name),
 268        }
 269    }
 270
 271    pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
 272        match self {
 273            Self::ClaudeOpus4
 274            | Self::ClaudeOpus4_1
 275            | Self::ClaudeOpus4Thinking
 276            | Self::ClaudeOpus4_1Thinking
 277            | Self::ClaudeSonnet4
 278            | Self::ClaudeSonnet4Thinking
 279            | Self::ClaudeSonnet4_5
 280            | Self::ClaudeSonnet4_5Thinking
 281            | Self::Claude3_5Sonnet
 282            | Self::ClaudeHaiku4_5
 283            | Self::ClaudeHaiku4_5Thinking
 284            | Self::Claude3_5Haiku
 285            | Self::Claude3_7Sonnet
 286            | Self::Claude3_7SonnetThinking
 287            | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
 288                min_total_token: 2_048,
 289                should_speculate: true,
 290                max_cache_anchors: 4,
 291            }),
 292            Self::Custom {
 293                cache_configuration,
 294                ..
 295            } => cache_configuration.clone(),
 296            _ => None,
 297        }
 298    }
 299
 300    pub fn max_token_count(&self) -> u64 {
 301        match self {
 302            Self::ClaudeOpus4
 303            | Self::ClaudeOpus4_1
 304            | Self::ClaudeOpus4Thinking
 305            | Self::ClaudeOpus4_1Thinking
 306            | Self::ClaudeSonnet4
 307            | Self::ClaudeSonnet4Thinking
 308            | Self::ClaudeSonnet4_5
 309            | Self::ClaudeSonnet4_5Thinking
 310            | Self::Claude3_5Sonnet
 311            | Self::ClaudeHaiku4_5
 312            | Self::ClaudeHaiku4_5Thinking
 313            | Self::Claude3_5Haiku
 314            | Self::Claude3_7Sonnet
 315            | Self::Claude3_7SonnetThinking
 316            | Self::Claude3Opus
 317            | Self::Claude3Sonnet
 318            | Self::Claude3Haiku => 200_000,
 319            Self::Custom { max_tokens, .. } => *max_tokens,
 320        }
 321    }
 322
 323    pub fn max_output_tokens(&self) -> u64 {
 324        match self {
 325            Self::ClaudeOpus4
 326            | Self::ClaudeOpus4_1
 327            | Self::ClaudeOpus4Thinking
 328            | Self::ClaudeOpus4_1Thinking
 329            | Self::ClaudeSonnet4
 330            | Self::ClaudeSonnet4Thinking
 331            | Self::ClaudeSonnet4_5
 332            | Self::ClaudeSonnet4_5Thinking
 333            | Self::Claude3_5Sonnet
 334            | Self::Claude3_7Sonnet
 335            | Self::Claude3_7SonnetThinking
 336            | Self::Claude3_5Haiku => 8_192,
 337            Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => 64_000,
 338            Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
 339            Self::Custom {
 340                max_output_tokens, ..
 341            } => max_output_tokens.unwrap_or(4_096),
 342        }
 343    }
 344
 345    pub fn default_temperature(&self) -> f32 {
 346        match self {
 347            Self::ClaudeOpus4
 348            | Self::ClaudeOpus4_1
 349            | Self::ClaudeOpus4Thinking
 350            | Self::ClaudeOpus4_1Thinking
 351            | Self::ClaudeSonnet4
 352            | Self::ClaudeSonnet4Thinking
 353            | Self::ClaudeSonnet4_5
 354            | Self::ClaudeSonnet4_5Thinking
 355            | Self::Claude3_5Sonnet
 356            | Self::Claude3_7Sonnet
 357            | Self::Claude3_7SonnetThinking
 358            | Self::ClaudeHaiku4_5
 359            | Self::ClaudeHaiku4_5Thinking
 360            | Self::Claude3_5Haiku
 361            | Self::Claude3Opus
 362            | Self::Claude3Sonnet
 363            | Self::Claude3Haiku => 1.0,
 364            Self::Custom {
 365                default_temperature,
 366                ..
 367            } => default_temperature.unwrap_or(1.0),
 368        }
 369    }
 370
 371    pub fn mode(&self) -> AnthropicModelMode {
 372        match self {
 373            Self::ClaudeOpus4
 374            | Self::ClaudeOpus4_1
 375            | Self::ClaudeSonnet4
 376            | Self::ClaudeSonnet4_5
 377            | Self::Claude3_5Sonnet
 378            | Self::Claude3_7Sonnet
 379            | Self::ClaudeHaiku4_5
 380            | Self::Claude3_5Haiku
 381            | Self::Claude3Opus
 382            | Self::Claude3Sonnet
 383            | Self::Claude3Haiku => AnthropicModelMode::Default,
 384            Self::ClaudeOpus4Thinking
 385            | Self::ClaudeOpus4_1Thinking
 386            | Self::ClaudeSonnet4Thinking
 387            | Self::ClaudeSonnet4_5Thinking
 388            | Self::ClaudeHaiku4_5Thinking
 389            | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
 390                budget_tokens: Some(4_096),
 391            },
 392            Self::Custom { mode, .. } => mode.clone(),
 393        }
 394    }
 395
 396    pub fn beta_headers(&self) -> String {
 397        let mut headers = vec![];
 398
 399        match self {
 400            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
 401                // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
 402                // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
 403                headers.push("token-efficient-tools-2025-02-19".to_string());
 404            }
 405            Self::Custom {
 406                extra_beta_headers, ..
 407            } => {
 408                headers.extend(
 409                    extra_beta_headers
 410                        .iter()
 411                        .filter(|header| !header.trim().is_empty())
 412                        .cloned(),
 413                );
 414            }
 415            _ => {}
 416        }
 417
 418        headers.join(",")
 419    }
 420
 421    pub fn tool_model_id(&self) -> &str {
 422        if let Self::Custom {
 423            tool_override: Some(tool_override),
 424            ..
 425        } = self
 426        {
 427            tool_override
 428        } else {
 429            self.request_id()
 430        }
 431    }
 432}
 433
 434pub async fn complete(
 435    client: &dyn HttpClient,
 436    api_url: &str,
 437    api_key: &str,
 438    request: Request,
 439    beta_headers: String,
 440) -> Result<Response, AnthropicError> {
 441    let uri = format!("{api_url}/v1/messages");
 442    let request_builder = HttpRequest::builder()
 443        .method(Method::POST)
 444        .uri(uri)
 445        .header("Anthropic-Version", "2023-06-01")
 446        .header("Anthropic-Beta", beta_headers)
 447        .header("X-Api-Key", api_key.trim())
 448        .header("Content-Type", "application/json");
 449
 450    let serialized_request =
 451        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
 452    let request = request_builder
 453        .body(AsyncBody::from(serialized_request))
 454        .map_err(AnthropicError::BuildRequestBody)?;
 455
 456    let mut response = client
 457        .send(request)
 458        .await
 459        .map_err(AnthropicError::HttpSend)?;
 460    let status_code = response.status();
 461    let mut body = String::new();
 462    response
 463        .body_mut()
 464        .read_to_string(&mut body)
 465        .await
 466        .map_err(AnthropicError::ReadResponse)?;
 467
 468    if status_code.is_success() {
 469        Ok(serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)?)
 470    } else {
 471        Err(AnthropicError::HttpResponseError {
 472            status_code,
 473            message: body,
 474        })
 475    }
 476}
 477
 478pub async fn stream_completion(
 479    client: &dyn HttpClient,
 480    api_url: &str,
 481    api_key: &str,
 482    request: Request,
 483    beta_headers: String,
 484) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
 485    stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
 486        .await
 487        .map(|output| output.0)
 488}
 489
 490/// An individual rate limit.
 491#[derive(Debug)]
 492pub struct RateLimit {
 493    pub limit: usize,
 494    pub remaining: usize,
 495    pub reset: DateTime<Utc>,
 496}
 497
 498impl RateLimit {
 499    fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
 500        let limit =
 501            get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
 502        let remaining = get_header(
 503            &format!("anthropic-ratelimit-{resource}-remaining"),
 504            headers,
 505        )?
 506        .parse()?;
 507        let reset = DateTime::parse_from_rfc3339(get_header(
 508            &format!("anthropic-ratelimit-{resource}-reset"),
 509            headers,
 510        )?)?
 511        .to_utc();
 512
 513        Ok(Self {
 514            limit,
 515            remaining,
 516            reset,
 517        })
 518    }
 519}
 520
 521/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
 522#[derive(Debug)]
 523pub struct RateLimitInfo {
 524    pub retry_after: Option<Duration>,
 525    pub requests: Option<RateLimit>,
 526    pub tokens: Option<RateLimit>,
 527    pub input_tokens: Option<RateLimit>,
 528    pub output_tokens: Option<RateLimit>,
 529}
 530
 531impl RateLimitInfo {
 532    fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
 533        // Check if any rate limit headers exist
 534        let has_rate_limit_headers = headers
 535            .keys()
 536            .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
 537
 538        if !has_rate_limit_headers {
 539            return Self {
 540                retry_after: None,
 541                requests: None,
 542                tokens: None,
 543                input_tokens: None,
 544                output_tokens: None,
 545            };
 546        }
 547
 548        Self {
 549            retry_after: parse_retry_after(headers),
 550            requests: RateLimit::from_headers("requests", headers).ok(),
 551            tokens: RateLimit::from_headers("tokens", headers).ok(),
 552            input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
 553            output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
 554        }
 555    }
 556}
 557
 558/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
 559/// seconds). Note that other services might specify an HTTP date or some other format for this
 560/// header. Returns `None` if the header is not present or cannot be parsed.
 561pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
 562    headers
 563        .get("retry-after")
 564        .and_then(|v| v.to_str().ok())
 565        .and_then(|v| v.parse::<u64>().ok())
 566        .map(Duration::from_secs)
 567}
 568
 569fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
 570    Ok(headers
 571        .get(key)
 572        .with_context(|| format!("missing header `{key}`"))?
 573        .to_str()?)
 574}
 575
 576pub async fn stream_completion_with_rate_limit_info(
 577    client: &dyn HttpClient,
 578    api_url: &str,
 579    api_key: &str,
 580    request: Request,
 581    beta_headers: String,
 582) -> Result<
 583    (
 584        BoxStream<'static, Result<Event, AnthropicError>>,
 585        Option<RateLimitInfo>,
 586    ),
 587    AnthropicError,
 588> {
 589    let request = StreamingRequest {
 590        base: request,
 591        stream: true,
 592    };
 593    let uri = format!("{api_url}/v1/messages");
 594
 595    let request_builder = HttpRequest::builder()
 596        .method(Method::POST)
 597        .uri(uri)
 598        .header("Anthropic-Version", "2023-06-01")
 599        .header("Anthropic-Beta", beta_headers)
 600        .header("X-Api-Key", api_key.trim())
 601        .header("Content-Type", "application/json");
 602    let serialized_request =
 603        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
 604    let request = request_builder
 605        .body(AsyncBody::from(serialized_request))
 606        .map_err(AnthropicError::BuildRequestBody)?;
 607
 608    let mut response = client
 609        .send(request)
 610        .await
 611        .map_err(AnthropicError::HttpSend)?;
 612    let rate_limits = RateLimitInfo::from_headers(response.headers());
 613    if response.status().is_success() {
 614        let reader = BufReader::new(response.into_body());
 615        let stream = reader
 616            .lines()
 617            .filter_map(|line| async move {
 618                match line {
 619                    Ok(line) => {
 620                        let line = line.strip_prefix("data: ")?;
 621                        match serde_json::from_str(line) {
 622                            Ok(response) => Some(Ok(response)),
 623                            Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
 624                        }
 625                    }
 626                    Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
 627                }
 628            })
 629            .boxed();
 630        Ok((stream, Some(rate_limits)))
 631    } else if response.status().as_u16() == 529 {
 632        Err(AnthropicError::ServerOverloaded {
 633            retry_after: rate_limits.retry_after,
 634        })
 635    } else if let Some(retry_after) = rate_limits.retry_after {
 636        Err(AnthropicError::RateLimit { retry_after })
 637    } else {
 638        let mut body = String::new();
 639        response
 640            .body_mut()
 641            .read_to_string(&mut body)
 642            .await
 643            .map_err(AnthropicError::ReadResponse)?;
 644
 645        match serde_json::from_str::<Event>(&body) {
 646            Ok(Event::Error { error }) => Err(AnthropicError::ApiError(error)),
 647            Ok(_) | Err(_) => Err(AnthropicError::HttpResponseError {
 648                status_code: response.status(),
 649                message: body,
 650            }),
 651        }
 652    }
 653}
 654
 655#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 656#[serde(rename_all = "lowercase")]
 657pub enum CacheControlType {
 658    Ephemeral,
 659}
 660
 661#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 662pub struct CacheControl {
 663    #[serde(rename = "type")]
 664    pub cache_type: CacheControlType,
 665}
 666
 667#[derive(Debug, Serialize, Deserialize)]
 668pub struct Message {
 669    pub role: Role,
 670    pub content: Vec<RequestContent>,
 671}
 672
 673#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
 674#[serde(rename_all = "lowercase")]
 675pub enum Role {
 676    User,
 677    Assistant,
 678}
 679
 680#[derive(Debug, Serialize, Deserialize)]
 681#[serde(tag = "type")]
 682pub enum RequestContent {
 683    #[serde(rename = "text")]
 684    Text {
 685        text: String,
 686        #[serde(skip_serializing_if = "Option::is_none")]
 687        cache_control: Option<CacheControl>,
 688    },
 689    #[serde(rename = "thinking")]
 690    Thinking {
 691        thinking: String,
 692        signature: String,
 693        #[serde(skip_serializing_if = "Option::is_none")]
 694        cache_control: Option<CacheControl>,
 695    },
 696    #[serde(rename = "redacted_thinking")]
 697    RedactedThinking { data: String },
 698    #[serde(rename = "image")]
 699    Image {
 700        source: ImageSource,
 701        #[serde(skip_serializing_if = "Option::is_none")]
 702        cache_control: Option<CacheControl>,
 703    },
 704    #[serde(rename = "tool_use")]
 705    ToolUse {
 706        id: String,
 707        name: String,
 708        input: serde_json::Value,
 709        #[serde(skip_serializing_if = "Option::is_none")]
 710        cache_control: Option<CacheControl>,
 711    },
 712    #[serde(rename = "tool_result")]
 713    ToolResult {
 714        tool_use_id: String,
 715        is_error: bool,
 716        content: ToolResultContent,
 717        #[serde(skip_serializing_if = "Option::is_none")]
 718        cache_control: Option<CacheControl>,
 719    },
 720}
 721
 722#[derive(Debug, Serialize, Deserialize)]
 723#[serde(untagged)]
 724pub enum ToolResultContent {
 725    Plain(String),
 726    Multipart(Vec<ToolResultPart>),
 727}
 728
 729#[derive(Debug, Serialize, Deserialize)]
 730#[serde(tag = "type", rename_all = "lowercase")]
 731pub enum ToolResultPart {
 732    Text { text: String },
 733    Image { source: ImageSource },
 734}
 735
 736#[derive(Debug, Serialize, Deserialize)]
 737#[serde(tag = "type")]
 738pub enum ResponseContent {
 739    #[serde(rename = "text")]
 740    Text { text: String },
 741    #[serde(rename = "thinking")]
 742    Thinking { thinking: String },
 743    #[serde(rename = "redacted_thinking")]
 744    RedactedThinking { data: String },
 745    #[serde(rename = "tool_use")]
 746    ToolUse {
 747        id: String,
 748        name: String,
 749        input: serde_json::Value,
 750    },
 751}
 752
 753#[derive(Debug, Serialize, Deserialize)]
 754pub struct ImageSource {
 755    #[serde(rename = "type")]
 756    pub source_type: String,
 757    pub media_type: String,
 758    pub data: String,
 759}
 760
 761#[derive(Debug, Serialize, Deserialize)]
 762pub struct Tool {
 763    pub name: String,
 764    pub description: String,
 765    pub input_schema: serde_json::Value,
 766}
 767
 768#[derive(Debug, Serialize, Deserialize)]
 769#[serde(tag = "type", rename_all = "lowercase")]
 770pub enum ToolChoice {
 771    Auto,
 772    Any,
 773    Tool { name: String },
 774    None,
 775}
 776
 777#[derive(Debug, Serialize, Deserialize)]
 778#[serde(tag = "type", rename_all = "lowercase")]
 779pub enum Thinking {
 780    Enabled { budget_tokens: Option<u32> },
 781}
 782
 783#[derive(Debug, Serialize, Deserialize)]
 784#[serde(untagged)]
 785pub enum StringOrContents {
 786    String(String),
 787    Content(Vec<RequestContent>),
 788}
 789
 790#[derive(Debug, Serialize, Deserialize)]
 791pub struct Request {
 792    pub model: String,
 793    pub max_tokens: u64,
 794    pub messages: Vec<Message>,
 795    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 796    pub tools: Vec<Tool>,
 797    #[serde(default, skip_serializing_if = "Option::is_none")]
 798    pub thinking: Option<Thinking>,
 799    #[serde(default, skip_serializing_if = "Option::is_none")]
 800    pub tool_choice: Option<ToolChoice>,
 801    #[serde(default, skip_serializing_if = "Option::is_none")]
 802    pub system: Option<StringOrContents>,
 803    #[serde(default, skip_serializing_if = "Option::is_none")]
 804    pub metadata: Option<Metadata>,
 805    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 806    pub stop_sequences: Vec<String>,
 807    #[serde(default, skip_serializing_if = "Option::is_none")]
 808    pub temperature: Option<f32>,
 809    #[serde(default, skip_serializing_if = "Option::is_none")]
 810    pub top_k: Option<u32>,
 811    #[serde(default, skip_serializing_if = "Option::is_none")]
 812    pub top_p: Option<f32>,
 813}
 814
 815#[derive(Debug, Serialize, Deserialize)]
 816struct StreamingRequest {
 817    #[serde(flatten)]
 818    pub base: Request,
 819    pub stream: bool,
 820}
 821
 822#[derive(Debug, Serialize, Deserialize)]
 823pub struct Metadata {
 824    pub user_id: Option<String>,
 825}
 826
 827#[derive(Debug, Serialize, Deserialize, Default)]
 828pub struct Usage {
 829    #[serde(default, skip_serializing_if = "Option::is_none")]
 830    pub input_tokens: Option<u64>,
 831    #[serde(default, skip_serializing_if = "Option::is_none")]
 832    pub output_tokens: Option<u64>,
 833    #[serde(default, skip_serializing_if = "Option::is_none")]
 834    pub cache_creation_input_tokens: Option<u64>,
 835    #[serde(default, skip_serializing_if = "Option::is_none")]
 836    pub cache_read_input_tokens: Option<u64>,
 837}
 838
 839#[derive(Debug, Serialize, Deserialize)]
 840pub struct Response {
 841    pub id: String,
 842    #[serde(rename = "type")]
 843    pub response_type: String,
 844    pub role: Role,
 845    pub content: Vec<ResponseContent>,
 846    pub model: String,
 847    #[serde(default, skip_serializing_if = "Option::is_none")]
 848    pub stop_reason: Option<String>,
 849    #[serde(default, skip_serializing_if = "Option::is_none")]
 850    pub stop_sequence: Option<String>,
 851    pub usage: Usage,
 852}
 853
 854#[derive(Debug, Serialize, Deserialize)]
 855#[serde(tag = "type")]
 856pub enum Event {
 857    #[serde(rename = "message_start")]
 858    MessageStart { message: Response },
 859    #[serde(rename = "content_block_start")]
 860    ContentBlockStart {
 861        index: usize,
 862        content_block: ResponseContent,
 863    },
 864    #[serde(rename = "content_block_delta")]
 865    ContentBlockDelta { index: usize, delta: ContentDelta },
 866    #[serde(rename = "content_block_stop")]
 867    ContentBlockStop { index: usize },
 868    #[serde(rename = "message_delta")]
 869    MessageDelta { delta: MessageDelta, usage: Usage },
 870    #[serde(rename = "message_stop")]
 871    MessageStop,
 872    #[serde(rename = "ping")]
 873    Ping,
 874    #[serde(rename = "error")]
 875    Error { error: ApiError },
 876}
 877
 878#[derive(Debug, Serialize, Deserialize)]
 879#[serde(tag = "type")]
 880pub enum ContentDelta {
 881    #[serde(rename = "text_delta")]
 882    TextDelta { text: String },
 883    #[serde(rename = "thinking_delta")]
 884    ThinkingDelta { thinking: String },
 885    #[serde(rename = "signature_delta")]
 886    SignatureDelta { signature: String },
 887    #[serde(rename = "input_json_delta")]
 888    InputJsonDelta { partial_json: String },
 889}
 890
 891#[derive(Debug, Serialize, Deserialize)]
 892pub struct MessageDelta {
 893    pub stop_reason: Option<String>,
 894    pub stop_sequence: Option<String>,
 895}
 896
 897#[derive(Debug)]
 898pub enum AnthropicError {
 899    /// Failed to serialize the HTTP request body to JSON
 900    SerializeRequest(serde_json::Error),
 901
 902    /// Failed to construct the HTTP request body
 903    BuildRequestBody(http::Error),
 904
 905    /// Failed to send the HTTP request
 906    HttpSend(anyhow::Error),
 907
 908    /// Failed to deserialize the response from JSON
 909    DeserializeResponse(serde_json::Error),
 910
 911    /// Failed to read from response stream
 912    ReadResponse(io::Error),
 913
 914    /// HTTP error response from the API
 915    HttpResponseError {
 916        status_code: StatusCode,
 917        message: String,
 918    },
 919
 920    /// Rate limit exceeded
 921    RateLimit { retry_after: Duration },
 922
 923    /// Server overloaded
 924    ServerOverloaded { retry_after: Option<Duration> },
 925
 926    /// API returned an error response
 927    ApiError(ApiError),
 928}
 929
 930#[derive(Debug, Serialize, Deserialize, Error)]
 931#[error("Anthropic API Error: {error_type}: {message}")]
 932pub struct ApiError {
 933    #[serde(rename = "type")]
 934    pub error_type: String,
 935    pub message: String,
 936}
 937
 938/// An Anthropic API error code.
 939/// <https://docs.anthropic.com/en/api/errors#http-errors>
 940#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
 941#[strum(serialize_all = "snake_case")]
 942pub enum ApiErrorCode {
 943    /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
 944    InvalidRequestError,
 945    /// 401 - `authentication_error`: There's an issue with your API key.
 946    AuthenticationError,
 947    /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
 948    PermissionError,
 949    /// 404 - `not_found_error`: The requested resource was not found.
 950    NotFoundError,
 951    /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
 952    RequestTooLarge,
 953    /// 429 - `rate_limit_error`: Your account has hit a rate limit.
 954    RateLimitError,
 955    /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
 956    ApiError,
 957    /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
 958    OverloadedError,
 959}
 960
 961impl ApiError {
 962    pub fn code(&self) -> Option<ApiErrorCode> {
 963        ApiErrorCode::from_str(&self.error_type).ok()
 964    }
 965
 966    pub fn is_rate_limit_error(&self) -> bool {
 967        matches!(self.error_type.as_str(), "rate_limit_error")
 968    }
 969
 970    pub fn match_window_exceeded(&self) -> Option<u64> {
 971        let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
 972            return None;
 973        };
 974
 975        parse_prompt_too_long(&self.message)
 976    }
 977}
 978
 979pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
 980    message
 981        .strip_prefix("prompt is too long: ")?
 982        .split_once(" tokens")?
 983        .0
 984        .parse()
 985        .ok()
 986}
 987
 988#[test]
 989fn test_match_window_exceeded() {
 990    let error = ApiError {
 991        error_type: "invalid_request_error".to_string(),
 992        message: "prompt is too long: 220000 tokens > 200000".to_string(),
 993    };
 994    assert_eq!(error.match_window_exceeded(), Some(220_000));
 995
 996    let error = ApiError {
 997        error_type: "invalid_request_error".to_string(),
 998        message: "prompt is too long: 1234953 tokens".to_string(),
 999    };
1000    assert_eq!(error.match_window_exceeded(), Some(1234953));
1001
1002    let error = ApiError {
1003        error_type: "invalid_request_error".to_string(),
1004        message: "not a prompt length error".to_string(),
1005    };
1006    assert_eq!(error.match_window_exceeded(), None);
1007
1008    let error = ApiError {
1009        error_type: "rate_limit_error".to_string(),
1010        message: "prompt is too long: 12345 tokens".to_string(),
1011    };
1012    assert_eq!(error.match_window_exceeded(), None);
1013
1014    let error = ApiError {
1015        error_type: "invalid_request_error".to_string(),
1016        message: "prompt is too long: invalid tokens".to_string(),
1017    };
1018    assert_eq!(error.match_window_exceeded(), None);
1019}