anthropic.rs

   1use std::io;
   2use std::str::FromStr;
   3use std::time::Duration;
   4
   5use anyhow::{Context as _, Result, anyhow};
   6use chrono::{DateTime, Utc};
   7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
   8use http_client::http::{self, HeaderMap, HeaderValue};
   9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
  10use serde::{Deserialize, Serialize};
  11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
  12use strum::{EnumIter, EnumString};
  13use thiserror::Error;
  14
  15pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
  16
  17#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  18#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  19pub struct AnthropicModelCacheConfiguration {
  20    pub min_total_token: u64,
  21    pub should_speculate: bool,
  22    pub max_cache_anchors: usize,
  23}
  24
  25#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  26#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  27pub enum AnthropicModelMode {
  28    #[default]
  29    Default,
  30    Thinking {
  31        budget_tokens: Option<u32>,
  32    },
  33}
  34
  35impl From<ModelMode> for AnthropicModelMode {
  36    fn from(value: ModelMode) -> Self {
  37        match value {
  38            ModelMode::Default => AnthropicModelMode::Default,
  39            ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
  40        }
  41    }
  42}
  43
  44impl From<AnthropicModelMode> for ModelMode {
  45    fn from(value: AnthropicModelMode) -> Self {
  46        match value {
  47            AnthropicModelMode::Default => ModelMode::Default,
  48            AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
  49        }
  50    }
  51}
  52
  53#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  54#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
  55pub enum Model {
  56    #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
  57    ClaudeOpus4,
  58    #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
  59    ClaudeOpus4_1,
  60    #[serde(
  61        rename = "claude-opus-4-thinking",
  62        alias = "claude-opus-4-thinking-latest"
  63    )]
  64    ClaudeOpus4Thinking,
  65    #[serde(
  66        rename = "claude-opus-4-1-thinking",
  67        alias = "claude-opus-4-1-thinking-latest"
  68    )]
  69    ClaudeOpus4_1Thinking,
  70    #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
  71    ClaudeSonnet4,
  72    #[serde(
  73        rename = "claude-sonnet-4-thinking",
  74        alias = "claude-sonnet-4-thinking-latest"
  75    )]
  76    ClaudeSonnet4Thinking,
  77    #[default]
  78    #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
  79    ClaudeSonnet4_5,
  80    #[serde(
  81        rename = "claude-sonnet-4-5-thinking",
  82        alias = "claude-sonnet-4-5-thinking-latest"
  83    )]
  84    ClaudeSonnet4_5Thinking,
  85    #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
  86    Claude3_7Sonnet,
  87    #[serde(
  88        rename = "claude-3-7-sonnet-thinking",
  89        alias = "claude-3-7-sonnet-thinking-latest"
  90    )]
  91    Claude3_7SonnetThinking,
  92    #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
  93    Claude3_5Sonnet,
  94    #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
  95    ClaudeHaiku4_5,
  96    #[serde(
  97        rename = "claude-haiku-4-5-thinking",
  98        alias = "claude-haiku-4-5-thinking-latest"
  99    )]
 100    ClaudeHaiku4_5Thinking,
 101    #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
 102    Claude3_5Haiku,
 103    #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
 104    Claude3Opus,
 105    #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
 106    Claude3Sonnet,
 107    #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
 108    Claude3Haiku,
 109    #[serde(rename = "custom")]
 110    Custom {
 111        name: String,
 112        max_tokens: u64,
 113        /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
 114        display_name: Option<String>,
 115        /// Override this model with a different Anthropic model for tool calls.
 116        tool_override: Option<String>,
 117        /// Indicates whether this custom model supports caching.
 118        cache_configuration: Option<AnthropicModelCacheConfiguration>,
 119        max_output_tokens: Option<u64>,
 120        default_temperature: Option<f32>,
 121        #[serde(default)]
 122        extra_beta_headers: Vec<String>,
 123        #[serde(default)]
 124        mode: AnthropicModelMode,
 125    },
 126}
 127
 128impl Model {
 129    pub const fn default_fast() -> Self {
 130        Self::Claude3_5Haiku
 131    }
 132
 133    pub fn from_id(id: &str) -> Result<Self> {
 134        if id.starts_with("claude-opus-4-1-thinking") {
 135            return Ok(Self::ClaudeOpus4_1Thinking);
 136        }
 137
 138        if id.starts_with("claude-opus-4-thinking") {
 139            return Ok(Self::ClaudeOpus4Thinking);
 140        }
 141
 142        if id.starts_with("claude-opus-4-1") {
 143            return Ok(Self::ClaudeOpus4_1);
 144        }
 145
 146        if id.starts_with("claude-opus-4") {
 147            return Ok(Self::ClaudeOpus4);
 148        }
 149
 150        if id.starts_with("claude-sonnet-4-5-thinking") {
 151            return Ok(Self::ClaudeSonnet4_5Thinking);
 152        }
 153
 154        if id.starts_with("claude-sonnet-4-5") {
 155            return Ok(Self::ClaudeSonnet4_5);
 156        }
 157
 158        if id.starts_with("claude-sonnet-4-thinking") {
 159            return Ok(Self::ClaudeSonnet4Thinking);
 160        }
 161
 162        if id.starts_with("claude-sonnet-4") {
 163            return Ok(Self::ClaudeSonnet4);
 164        }
 165
 166        if id.starts_with("claude-3-7-sonnet-thinking") {
 167            return Ok(Self::Claude3_7SonnetThinking);
 168        }
 169
 170        if id.starts_with("claude-3-7-sonnet") {
 171            return Ok(Self::Claude3_7Sonnet);
 172        }
 173
 174        if id.starts_with("claude-3-5-sonnet") {
 175            return Ok(Self::Claude3_5Sonnet);
 176        }
 177
 178        if id.starts_with("claude-haiku-4-5-thinking") {
 179            return Ok(Self::ClaudeHaiku4_5Thinking);
 180        }
 181
 182        if id.starts_with("claude-haiku-4-5") {
 183            return Ok(Self::ClaudeHaiku4_5);
 184        }
 185
 186        if id.starts_with("claude-3-5-haiku") {
 187            return Ok(Self::Claude3_5Haiku);
 188        }
 189
 190        if id.starts_with("claude-3-opus") {
 191            return Ok(Self::Claude3Opus);
 192        }
 193
 194        if id.starts_with("claude-3-sonnet") {
 195            return Ok(Self::Claude3Sonnet);
 196        }
 197
 198        if id.starts_with("claude-3-haiku") {
 199            return Ok(Self::Claude3Haiku);
 200        }
 201
 202        Err(anyhow!("invalid model ID: {id}"))
 203    }
 204
 205    pub fn id(&self) -> &str {
 206        match self {
 207            Self::ClaudeOpus4 => "claude-opus-4-latest",
 208            Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
 209            Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
 210            Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
 211            Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
 212            Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
 213            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
 214            Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
 215            Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
 216            Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
 217            Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
 218            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
 219            Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
 220            Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
 221            Self::Claude3Opus => "claude-3-opus-latest",
 222            Self::Claude3Sonnet => "claude-3-sonnet-20240229",
 223            Self::Claude3Haiku => "claude-3-haiku-20240307",
 224            Self::Custom { name, .. } => name,
 225        }
 226    }
 227
 228    /// The id of the model that should be used for making API requests
 229    pub fn request_id(&self) -> &str {
 230        match self {
 231            Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
 232            Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
 233            Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
 234            Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-20250929",
 235            Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
 236            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
 237            Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
 238            Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
 239            Self::Claude3Opus => "claude-3-opus-latest",
 240            Self::Claude3Sonnet => "claude-3-sonnet-20240229",
 241            Self::Claude3Haiku => "claude-3-haiku-20240307",
 242            Self::Custom { name, .. } => name,
 243        }
 244    }
 245
 246    pub fn display_name(&self) -> &str {
 247        match self {
 248            Self::ClaudeOpus4 => "Claude Opus 4",
 249            Self::ClaudeOpus4_1 => "Claude Opus 4.1",
 250            Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
 251            Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
 252            Self::ClaudeSonnet4 => "Claude Sonnet 4",
 253            Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
 254            Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
 255            Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
 256            Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
 257            Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
 258            Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
 259            Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
 260            Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
 261            Self::Claude3_5Haiku => "Claude 3.5 Haiku",
 262            Self::Claude3Opus => "Claude 3 Opus",
 263            Self::Claude3Sonnet => "Claude 3 Sonnet",
 264            Self::Claude3Haiku => "Claude 3 Haiku",
 265            Self::Custom {
 266                name, display_name, ..
 267            } => display_name.as_ref().unwrap_or(name),
 268        }
 269    }
 270
 271    pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
 272        match self {
 273            Self::ClaudeOpus4
 274            | Self::ClaudeOpus4_1
 275            | Self::ClaudeOpus4Thinking
 276            | Self::ClaudeOpus4_1Thinking
 277            | Self::ClaudeSonnet4
 278            | Self::ClaudeSonnet4Thinking
 279            | Self::ClaudeSonnet4_5
 280            | Self::ClaudeSonnet4_5Thinking
 281            | Self::Claude3_5Sonnet
 282            | Self::ClaudeHaiku4_5
 283            | Self::ClaudeHaiku4_5Thinking
 284            | Self::Claude3_5Haiku
 285            | Self::Claude3_7Sonnet
 286            | Self::Claude3_7SonnetThinking
 287            | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
 288                min_total_token: 2_048,
 289                should_speculate: true,
 290                max_cache_anchors: 4,
 291            }),
 292            Self::Custom {
 293                cache_configuration,
 294                ..
 295            } => cache_configuration.clone(),
 296            _ => None,
 297        }
 298    }
 299
 300    pub const fn max_token_count(&self) -> u64 {
 301        match self {
 302            Self::ClaudeOpus4
 303            | Self::ClaudeOpus4_1
 304            | Self::ClaudeOpus4Thinking
 305            | Self::ClaudeOpus4_1Thinking
 306            | Self::ClaudeSonnet4
 307            | Self::ClaudeSonnet4Thinking
 308            | Self::ClaudeSonnet4_5
 309            | Self::ClaudeSonnet4_5Thinking
 310            | Self::Claude3_5Sonnet
 311            | Self::ClaudeHaiku4_5
 312            | Self::ClaudeHaiku4_5Thinking
 313            | Self::Claude3_5Haiku
 314            | Self::Claude3_7Sonnet
 315            | Self::Claude3_7SonnetThinking
 316            | Self::Claude3Opus
 317            | Self::Claude3Sonnet
 318            | Self::Claude3Haiku => 200_000,
 319            Self::Custom { max_tokens, .. } => *max_tokens,
 320        }
 321    }
 322
 323    pub fn max_output_tokens(&self) -> u64 {
 324        match self {
 325            Self::ClaudeOpus4
 326            | Self::ClaudeOpus4_1
 327            | Self::ClaudeOpus4Thinking
 328            | Self::ClaudeOpus4_1Thinking
 329            | Self::ClaudeSonnet4
 330            | Self::ClaudeSonnet4Thinking
 331            | Self::ClaudeSonnet4_5
 332            | Self::ClaudeSonnet4_5Thinking
 333            | Self::Claude3_5Sonnet
 334            | Self::Claude3_7Sonnet
 335            | Self::Claude3_7SonnetThinking
 336            | Self::Claude3_5Haiku => 8_192,
 337            Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => 64_000,
 338            Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
 339            Self::Custom {
 340                max_output_tokens, ..
 341            } => max_output_tokens.unwrap_or(4_096),
 342        }
 343    }
 344
 345    pub fn default_temperature(&self) -> f32 {
 346        match self {
 347            Self::ClaudeOpus4
 348            | Self::ClaudeOpus4_1
 349            | Self::ClaudeOpus4Thinking
 350            | Self::ClaudeOpus4_1Thinking
 351            | Self::ClaudeSonnet4
 352            | Self::ClaudeSonnet4Thinking
 353            | Self::ClaudeSonnet4_5
 354            | Self::ClaudeSonnet4_5Thinking
 355            | Self::Claude3_5Sonnet
 356            | Self::Claude3_7Sonnet
 357            | Self::Claude3_7SonnetThinking
 358            | Self::ClaudeHaiku4_5
 359            | Self::ClaudeHaiku4_5Thinking
 360            | Self::Claude3_5Haiku
 361            | Self::Claude3Opus
 362            | Self::Claude3Sonnet
 363            | Self::Claude3Haiku => 1.0,
 364            Self::Custom {
 365                default_temperature,
 366                ..
 367            } => default_temperature.unwrap_or(1.0),
 368        }
 369    }
 370
 371    pub fn mode(&self) -> AnthropicModelMode {
 372        match self {
 373            Self::ClaudeOpus4
 374            | Self::ClaudeOpus4_1
 375            | Self::ClaudeSonnet4
 376            | Self::ClaudeSonnet4_5
 377            | Self::Claude3_5Sonnet
 378            | Self::Claude3_7Sonnet
 379            | Self::ClaudeHaiku4_5
 380            | Self::Claude3_5Haiku
 381            | Self::Claude3Opus
 382            | Self::Claude3Sonnet
 383            | Self::Claude3Haiku => AnthropicModelMode::Default,
 384            Self::ClaudeOpus4Thinking
 385            | Self::ClaudeOpus4_1Thinking
 386            | Self::ClaudeSonnet4Thinking
 387            | Self::ClaudeSonnet4_5Thinking
 388            | Self::ClaudeHaiku4_5Thinking
 389            | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
 390                budget_tokens: Some(4_096),
 391            },
 392            Self::Custom { mode, .. } => mode.clone(),
 393        }
 394    }
 395
 396    pub const DEFAULT_BETA_HEADERS: &[&str] = &["prompt-caching-2024-07-31"];
 397
 398    pub fn beta_headers(&self) -> String {
 399        let mut headers = Self::DEFAULT_BETA_HEADERS
 400            .iter()
 401            .map(|header| header.to_string())
 402            .collect::<Vec<_>>();
 403
 404        match self {
 405            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
 406                // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
 407                // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
 408                headers.push("token-efficient-tools-2025-02-19".to_string());
 409            }
 410            Self::Custom {
 411                extra_beta_headers, ..
 412            } => {
 413                headers.extend(
 414                    extra_beta_headers
 415                        .iter()
 416                        .filter(|header| !header.trim().is_empty())
 417                        .cloned(),
 418                );
 419            }
 420            _ => {}
 421        }
 422
 423        headers.join(",")
 424    }
 425
 426    pub fn tool_model_id(&self) -> &str {
 427        if let Self::Custom {
 428            tool_override: Some(tool_override),
 429            ..
 430        } = self
 431        {
 432            tool_override
 433        } else {
 434            self.request_id()
 435        }
 436    }
 437}
 438
 439pub async fn complete(
 440    client: &dyn HttpClient,
 441    api_url: &str,
 442    api_key: &str,
 443    request: Request,
 444    beta_headers: String,
 445) -> Result<Response, AnthropicError> {
 446    let uri = format!("{api_url}/v1/messages");
 447    let request_builder = HttpRequest::builder()
 448        .method(Method::POST)
 449        .uri(uri)
 450        .header("Anthropic-Version", "2023-06-01")
 451        .header("Anthropic-Beta", beta_headers)
 452        .header("X-Api-Key", api_key.trim())
 453        .header("Content-Type", "application/json");
 454
 455    let serialized_request =
 456        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
 457    let request = request_builder
 458        .body(AsyncBody::from(serialized_request))
 459        .map_err(AnthropicError::BuildRequestBody)?;
 460
 461    let mut response = client
 462        .send(request)
 463        .await
 464        .map_err(AnthropicError::HttpSend)?;
 465    let status_code = response.status();
 466    let mut body = String::new();
 467    response
 468        .body_mut()
 469        .read_to_string(&mut body)
 470        .await
 471        .map_err(AnthropicError::ReadResponse)?;
 472
 473    if status_code.is_success() {
 474        Ok(serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)?)
 475    } else {
 476        Err(AnthropicError::HttpResponseError {
 477            status_code,
 478            message: body,
 479        })
 480    }
 481}
 482
 483pub async fn stream_completion(
 484    client: &dyn HttpClient,
 485    api_url: &str,
 486    api_key: &str,
 487    request: Request,
 488    beta_headers: String,
 489) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
 490    stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
 491        .await
 492        .map(|output| output.0)
 493}
 494
 495/// An individual rate limit.
 496#[derive(Debug)]
 497pub struct RateLimit {
 498    pub limit: usize,
 499    pub remaining: usize,
 500    pub reset: DateTime<Utc>,
 501}
 502
 503impl RateLimit {
 504    fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
 505        let limit =
 506            get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
 507        let remaining = get_header(
 508            &format!("anthropic-ratelimit-{resource}-remaining"),
 509            headers,
 510        )?
 511        .parse()?;
 512        let reset = DateTime::parse_from_rfc3339(get_header(
 513            &format!("anthropic-ratelimit-{resource}-reset"),
 514            headers,
 515        )?)?
 516        .to_utc();
 517
 518        Ok(Self {
 519            limit,
 520            remaining,
 521            reset,
 522        })
 523    }
 524}
 525
 526/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
 527#[derive(Debug)]
 528pub struct RateLimitInfo {
 529    pub retry_after: Option<Duration>,
 530    pub requests: Option<RateLimit>,
 531    pub tokens: Option<RateLimit>,
 532    pub input_tokens: Option<RateLimit>,
 533    pub output_tokens: Option<RateLimit>,
 534}
 535
 536impl RateLimitInfo {
 537    fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
 538        // Check if any rate limit headers exist
 539        let has_rate_limit_headers = headers
 540            .keys()
 541            .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
 542
 543        if !has_rate_limit_headers {
 544            return Self {
 545                retry_after: None,
 546                requests: None,
 547                tokens: None,
 548                input_tokens: None,
 549                output_tokens: None,
 550            };
 551        }
 552
 553        Self {
 554            retry_after: parse_retry_after(headers),
 555            requests: RateLimit::from_headers("requests", headers).ok(),
 556            tokens: RateLimit::from_headers("tokens", headers).ok(),
 557            input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
 558            output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
 559        }
 560    }
 561}
 562
 563/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
 564/// seconds). Note that other services might specify an HTTP date or some other format for this
 565/// header. Returns `None` if the header is not present or cannot be parsed.
 566pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
 567    headers
 568        .get("retry-after")
 569        .and_then(|v| v.to_str().ok())
 570        .and_then(|v| v.parse::<u64>().ok())
 571        .map(Duration::from_secs)
 572}
 573
 574fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
 575    Ok(headers
 576        .get(key)
 577        .with_context(|| format!("missing header `{key}`"))?
 578        .to_str()?)
 579}
 580
 581pub async fn stream_completion_with_rate_limit_info(
 582    client: &dyn HttpClient,
 583    api_url: &str,
 584    api_key: &str,
 585    request: Request,
 586    beta_headers: String,
 587) -> Result<
 588    (
 589        BoxStream<'static, Result<Event, AnthropicError>>,
 590        Option<RateLimitInfo>,
 591    ),
 592    AnthropicError,
 593> {
 594    let request = StreamingRequest {
 595        base: request,
 596        stream: true,
 597    };
 598    let uri = format!("{api_url}/v1/messages");
 599
 600    let request_builder = HttpRequest::builder()
 601        .method(Method::POST)
 602        .uri(uri)
 603        .header("Anthropic-Version", "2023-06-01")
 604        .header("Anthropic-Beta", beta_headers)
 605        .header("X-Api-Key", api_key.trim())
 606        .header("Content-Type", "application/json");
 607    let serialized_request =
 608        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
 609    let request = request_builder
 610        .body(AsyncBody::from(serialized_request))
 611        .map_err(AnthropicError::BuildRequestBody)?;
 612
 613    let mut response = client
 614        .send(request)
 615        .await
 616        .map_err(AnthropicError::HttpSend)?;
 617    let rate_limits = RateLimitInfo::from_headers(response.headers());
 618    if response.status().is_success() {
 619        let reader = BufReader::new(response.into_body());
 620        let stream = reader
 621            .lines()
 622            .filter_map(|line| async move {
 623                match line {
 624                    Ok(line) => {
 625                        let line = line.strip_prefix("data: ")?;
 626                        match serde_json::from_str(line) {
 627                            Ok(response) => Some(Ok(response)),
 628                            Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
 629                        }
 630                    }
 631                    Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
 632                }
 633            })
 634            .boxed();
 635        Ok((stream, Some(rate_limits)))
 636    } else if response.status().as_u16() == 529 {
 637        Err(AnthropicError::ServerOverloaded {
 638            retry_after: rate_limits.retry_after,
 639        })
 640    } else if let Some(retry_after) = rate_limits.retry_after {
 641        Err(AnthropicError::RateLimit { retry_after })
 642    } else {
 643        let mut body = String::new();
 644        response
 645            .body_mut()
 646            .read_to_string(&mut body)
 647            .await
 648            .map_err(AnthropicError::ReadResponse)?;
 649
 650        match serde_json::from_str::<Event>(&body) {
 651            Ok(Event::Error { error }) => Err(AnthropicError::ApiError(error)),
 652            Ok(_) | Err(_) => Err(AnthropicError::HttpResponseError {
 653                status_code: response.status(),
 654                message: body,
 655            }),
 656        }
 657    }
 658}
 659
 660#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 661#[serde(rename_all = "lowercase")]
 662pub enum CacheControlType {
 663    Ephemeral,
 664}
 665
 666#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 667pub struct CacheControl {
 668    #[serde(rename = "type")]
 669    pub cache_type: CacheControlType,
 670}
 671
 672#[derive(Debug, Serialize, Deserialize)]
 673pub struct Message {
 674    pub role: Role,
 675    pub content: Vec<RequestContent>,
 676}
 677
 678#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
 679#[serde(rename_all = "lowercase")]
 680pub enum Role {
 681    User,
 682    Assistant,
 683}
 684
 685#[derive(Debug, Serialize, Deserialize)]
 686#[serde(tag = "type")]
 687pub enum RequestContent {
 688    #[serde(rename = "text")]
 689    Text {
 690        text: String,
 691        #[serde(skip_serializing_if = "Option::is_none")]
 692        cache_control: Option<CacheControl>,
 693    },
 694    #[serde(rename = "thinking")]
 695    Thinking {
 696        thinking: String,
 697        signature: String,
 698        #[serde(skip_serializing_if = "Option::is_none")]
 699        cache_control: Option<CacheControl>,
 700    },
 701    #[serde(rename = "redacted_thinking")]
 702    RedactedThinking { data: String },
 703    #[serde(rename = "image")]
 704    Image {
 705        source: ImageSource,
 706        #[serde(skip_serializing_if = "Option::is_none")]
 707        cache_control: Option<CacheControl>,
 708    },
 709    #[serde(rename = "tool_use")]
 710    ToolUse {
 711        id: String,
 712        name: String,
 713        input: serde_json::Value,
 714        #[serde(skip_serializing_if = "Option::is_none")]
 715        cache_control: Option<CacheControl>,
 716    },
 717    #[serde(rename = "tool_result")]
 718    ToolResult {
 719        tool_use_id: String,
 720        is_error: bool,
 721        content: ToolResultContent,
 722        #[serde(skip_serializing_if = "Option::is_none")]
 723        cache_control: Option<CacheControl>,
 724    },
 725}
 726
 727#[derive(Debug, Serialize, Deserialize)]
 728#[serde(untagged)]
 729pub enum ToolResultContent {
 730    Plain(String),
 731    Multipart(Vec<ToolResultPart>),
 732}
 733
 734#[derive(Debug, Serialize, Deserialize)]
 735#[serde(tag = "type", rename_all = "lowercase")]
 736pub enum ToolResultPart {
 737    Text { text: String },
 738    Image { source: ImageSource },
 739}
 740
 741#[derive(Debug, Serialize, Deserialize)]
 742#[serde(tag = "type")]
 743pub enum ResponseContent {
 744    #[serde(rename = "text")]
 745    Text { text: String },
 746    #[serde(rename = "thinking")]
 747    Thinking { thinking: String },
 748    #[serde(rename = "redacted_thinking")]
 749    RedactedThinking { data: String },
 750    #[serde(rename = "tool_use")]
 751    ToolUse {
 752        id: String,
 753        name: String,
 754        input: serde_json::Value,
 755    },
 756}
 757
 758#[derive(Debug, Serialize, Deserialize)]
 759pub struct ImageSource {
 760    #[serde(rename = "type")]
 761    pub source_type: String,
 762    pub media_type: String,
 763    pub data: String,
 764}
 765
 766#[derive(Debug, Serialize, Deserialize)]
 767pub struct Tool {
 768    pub name: String,
 769    pub description: String,
 770    pub input_schema: serde_json::Value,
 771}
 772
 773#[derive(Debug, Serialize, Deserialize)]
 774#[serde(tag = "type", rename_all = "lowercase")]
 775pub enum ToolChoice {
 776    Auto,
 777    Any,
 778    Tool { name: String },
 779    None,
 780}
 781
 782#[derive(Debug, Serialize, Deserialize)]
 783#[serde(tag = "type", rename_all = "lowercase")]
 784pub enum Thinking {
 785    Enabled { budget_tokens: Option<u32> },
 786}
 787
 788#[derive(Debug, Serialize, Deserialize)]
 789#[serde(untagged)]
 790pub enum StringOrContents {
 791    String(String),
 792    Content(Vec<RequestContent>),
 793}
 794
 795#[derive(Debug, Serialize, Deserialize)]
 796pub struct Request {
 797    pub model: String,
 798    pub max_tokens: u64,
 799    pub messages: Vec<Message>,
 800    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 801    pub tools: Vec<Tool>,
 802    #[serde(default, skip_serializing_if = "Option::is_none")]
 803    pub thinking: Option<Thinking>,
 804    #[serde(default, skip_serializing_if = "Option::is_none")]
 805    pub tool_choice: Option<ToolChoice>,
 806    #[serde(default, skip_serializing_if = "Option::is_none")]
 807    pub system: Option<StringOrContents>,
 808    #[serde(default, skip_serializing_if = "Option::is_none")]
 809    pub metadata: Option<Metadata>,
 810    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 811    pub stop_sequences: Vec<String>,
 812    #[serde(default, skip_serializing_if = "Option::is_none")]
 813    pub temperature: Option<f32>,
 814    #[serde(default, skip_serializing_if = "Option::is_none")]
 815    pub top_k: Option<u32>,
 816    #[serde(default, skip_serializing_if = "Option::is_none")]
 817    pub top_p: Option<f32>,
 818}
 819
 820#[derive(Debug, Serialize, Deserialize)]
 821struct StreamingRequest {
 822    #[serde(flatten)]
 823    pub base: Request,
 824    pub stream: bool,
 825}
 826
 827#[derive(Debug, Serialize, Deserialize)]
 828pub struct Metadata {
 829    pub user_id: Option<String>,
 830}
 831
 832#[derive(Debug, Serialize, Deserialize, Default)]
 833pub struct Usage {
 834    #[serde(default, skip_serializing_if = "Option::is_none")]
 835    pub input_tokens: Option<u64>,
 836    #[serde(default, skip_serializing_if = "Option::is_none")]
 837    pub output_tokens: Option<u64>,
 838    #[serde(default, skip_serializing_if = "Option::is_none")]
 839    pub cache_creation_input_tokens: Option<u64>,
 840    #[serde(default, skip_serializing_if = "Option::is_none")]
 841    pub cache_read_input_tokens: Option<u64>,
 842}
 843
 844#[derive(Debug, Serialize, Deserialize)]
 845pub struct Response {
 846    pub id: String,
 847    #[serde(rename = "type")]
 848    pub response_type: String,
 849    pub role: Role,
 850    pub content: Vec<ResponseContent>,
 851    pub model: String,
 852    #[serde(default, skip_serializing_if = "Option::is_none")]
 853    pub stop_reason: Option<String>,
 854    #[serde(default, skip_serializing_if = "Option::is_none")]
 855    pub stop_sequence: Option<String>,
 856    pub usage: Usage,
 857}
 858
 859#[derive(Debug, Serialize, Deserialize)]
 860#[serde(tag = "type")]
 861pub enum Event {
 862    #[serde(rename = "message_start")]
 863    MessageStart { message: Response },
 864    #[serde(rename = "content_block_start")]
 865    ContentBlockStart {
 866        index: usize,
 867        content_block: ResponseContent,
 868    },
 869    #[serde(rename = "content_block_delta")]
 870    ContentBlockDelta { index: usize, delta: ContentDelta },
 871    #[serde(rename = "content_block_stop")]
 872    ContentBlockStop { index: usize },
 873    #[serde(rename = "message_delta")]
 874    MessageDelta { delta: MessageDelta, usage: Usage },
 875    #[serde(rename = "message_stop")]
 876    MessageStop,
 877    #[serde(rename = "ping")]
 878    Ping,
 879    #[serde(rename = "error")]
 880    Error { error: ApiError },
 881}
 882
 883#[derive(Debug, Serialize, Deserialize)]
 884#[serde(tag = "type")]
 885pub enum ContentDelta {
 886    #[serde(rename = "text_delta")]
 887    TextDelta { text: String },
 888    #[serde(rename = "thinking_delta")]
 889    ThinkingDelta { thinking: String },
 890    #[serde(rename = "signature_delta")]
 891    SignatureDelta { signature: String },
 892    #[serde(rename = "input_json_delta")]
 893    InputJsonDelta { partial_json: String },
 894}
 895
 896#[derive(Debug, Serialize, Deserialize)]
 897pub struct MessageDelta {
 898    pub stop_reason: Option<String>,
 899    pub stop_sequence: Option<String>,
 900}
 901
 902#[derive(Debug)]
 903pub enum AnthropicError {
 904    /// Failed to serialize the HTTP request body to JSON
 905    SerializeRequest(serde_json::Error),
 906
 907    /// Failed to construct the HTTP request body
 908    BuildRequestBody(http::Error),
 909
 910    /// Failed to send the HTTP request
 911    HttpSend(anyhow::Error),
 912
 913    /// Failed to deserialize the response from JSON
 914    DeserializeResponse(serde_json::Error),
 915
 916    /// Failed to read from response stream
 917    ReadResponse(io::Error),
 918
 919    /// HTTP error response from the API
 920    HttpResponseError {
 921        status_code: StatusCode,
 922        message: String,
 923    },
 924
 925    /// Rate limit exceeded
 926    RateLimit { retry_after: Duration },
 927
 928    /// Server overloaded
 929    ServerOverloaded { retry_after: Option<Duration> },
 930
 931    /// API returned an error response
 932    ApiError(ApiError),
 933}
 934
 935#[derive(Debug, Serialize, Deserialize, Error)]
 936#[error("Anthropic API Error: {error_type}: {message}")]
 937pub struct ApiError {
 938    #[serde(rename = "type")]
 939    pub error_type: String,
 940    pub message: String,
 941}
 942
 943/// An Anthropic API error code.
 944/// <https://docs.anthropic.com/en/api/errors#http-errors>
 945#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
 946#[strum(serialize_all = "snake_case")]
 947pub enum ApiErrorCode {
 948    /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
 949    InvalidRequestError,
 950    /// 401 - `authentication_error`: There's an issue with your API key.
 951    AuthenticationError,
 952    /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
 953    PermissionError,
 954    /// 404 - `not_found_error`: The requested resource was not found.
 955    NotFoundError,
 956    /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
 957    RequestTooLarge,
 958    /// 429 - `rate_limit_error`: Your account has hit a rate limit.
 959    RateLimitError,
 960    /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
 961    ApiError,
 962    /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
 963    OverloadedError,
 964}
 965
 966impl ApiError {
 967    pub fn code(&self) -> Option<ApiErrorCode> {
 968        ApiErrorCode::from_str(&self.error_type).ok()
 969    }
 970
 971    pub fn is_rate_limit_error(&self) -> bool {
 972        matches!(self.error_type.as_str(), "rate_limit_error")
 973    }
 974
 975    pub fn match_window_exceeded(&self) -> Option<u64> {
 976        let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
 977            return None;
 978        };
 979
 980        parse_prompt_too_long(&self.message)
 981    }
 982}
 983
 984pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
 985    message
 986        .strip_prefix("prompt is too long: ")?
 987        .split_once(" tokens")?
 988        .0
 989        .parse()
 990        .ok()
 991}
 992
 993#[test]
 994fn test_match_window_exceeded() {
 995    let error = ApiError {
 996        error_type: "invalid_request_error".to_string(),
 997        message: "prompt is too long: 220000 tokens > 200000".to_string(),
 998    };
 999    assert_eq!(error.match_window_exceeded(), Some(220_000));
1000
1001    let error = ApiError {
1002        error_type: "invalid_request_error".to_string(),
1003        message: "prompt is too long: 1234953 tokens".to_string(),
1004    };
1005    assert_eq!(error.match_window_exceeded(), Some(1234953));
1006
1007    let error = ApiError {
1008        error_type: "invalid_request_error".to_string(),
1009        message: "not a prompt length error".to_string(),
1010    };
1011    assert_eq!(error.match_window_exceeded(), None);
1012
1013    let error = ApiError {
1014        error_type: "rate_limit_error".to_string(),
1015        message: "prompt is too long: 12345 tokens".to_string(),
1016    };
1017    assert_eq!(error.match_window_exceeded(), None);
1018
1019    let error = ApiError {
1020        error_type: "invalid_request_error".to_string(),
1021        message: "prompt is too long: invalid tokens".to_string(),
1022    };
1023    assert_eq!(error.match_window_exceeded(), None);
1024}