anthropic.rs

   1use std::io;
   2use std::str::FromStr;
   3use std::time::Duration;
   4
   5use anyhow::{Context as _, Result, anyhow};
   6use chrono::{DateTime, Utc};
   7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
   8use http_client::http::{self, HeaderMap, HeaderValue};
   9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
  10use serde::{Deserialize, Serialize};
  11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
  12use strum::{EnumIter, EnumString};
  13use thiserror::Error;
  14
  15pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
  16
  17#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  18#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  19pub struct AnthropicModelCacheConfiguration {
  20    pub min_total_token: u64,
  21    pub should_speculate: bool,
  22    pub max_cache_anchors: usize,
  23}
  24
  25#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  26#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  27pub enum AnthropicModelMode {
  28    #[default]
  29    Default,
  30    Thinking {
  31        budget_tokens: Option<u32>,
  32    },
  33}
  34
  35impl From<ModelMode> for AnthropicModelMode {
  36    fn from(value: ModelMode) -> Self {
  37        match value {
  38            ModelMode::Default => AnthropicModelMode::Default,
  39            ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
  40        }
  41    }
  42}
  43
  44impl From<AnthropicModelMode> for ModelMode {
  45    fn from(value: AnthropicModelMode) -> Self {
  46        match value {
  47            AnthropicModelMode::Default => ModelMode::Default,
  48            AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
  49        }
  50    }
  51}
  52
  53#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  54#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
  55pub enum Model {
  56    #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
  57    ClaudeOpus4,
  58    #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
  59    ClaudeOpus4_1,
  60    #[serde(
  61        rename = "claude-opus-4-thinking",
  62        alias = "claude-opus-4-thinking-latest"
  63    )]
  64    ClaudeOpus4Thinking,
  65    #[serde(
  66        rename = "claude-opus-4-1-thinking",
  67        alias = "claude-opus-4-1-thinking-latest"
  68    )]
  69    ClaudeOpus4_1Thinking,
  70    #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
  71    ClaudeOpus4_5,
  72    #[serde(
  73        rename = "claude-opus-4-5-thinking",
  74        alias = "claude-opus-4-5-thinking-latest"
  75    )]
  76    ClaudeOpus4_5Thinking,
  77    #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
  78    ClaudeSonnet4,
  79    #[serde(
  80        rename = "claude-sonnet-4-thinking",
  81        alias = "claude-sonnet-4-thinking-latest"
  82    )]
  83    ClaudeSonnet4Thinking,
  84    #[default]
  85    #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
  86    ClaudeSonnet4_5,
  87    #[serde(
  88        rename = "claude-sonnet-4-5-thinking",
  89        alias = "claude-sonnet-4-5-thinking-latest"
  90    )]
  91    ClaudeSonnet4_5Thinking,
  92    #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
  93    Claude3_7Sonnet,
  94    #[serde(
  95        rename = "claude-3-7-sonnet-thinking",
  96        alias = "claude-3-7-sonnet-thinking-latest"
  97    )]
  98    Claude3_7SonnetThinking,
  99    #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
 100    Claude3_5Sonnet,
 101    #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
 102    ClaudeHaiku4_5,
 103    #[serde(
 104        rename = "claude-haiku-4-5-thinking",
 105        alias = "claude-haiku-4-5-thinking-latest"
 106    )]
 107    ClaudeHaiku4_5Thinking,
 108    #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
 109    Claude3_5Haiku,
 110    #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
 111    Claude3Opus,
 112    #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
 113    Claude3Sonnet,
 114    #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
 115    Claude3Haiku,
 116    #[serde(rename = "custom")]
 117    Custom {
 118        name: String,
 119        max_tokens: u64,
 120        /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
 121        display_name: Option<String>,
 122        /// Override this model with a different Anthropic model for tool calls.
 123        tool_override: Option<String>,
 124        /// Indicates whether this custom model supports caching.
 125        cache_configuration: Option<AnthropicModelCacheConfiguration>,
 126        max_output_tokens: Option<u64>,
 127        default_temperature: Option<f32>,
 128        #[serde(default)]
 129        extra_beta_headers: Vec<String>,
 130        #[serde(default)]
 131        mode: AnthropicModelMode,
 132    },
 133}
 134
 135impl Model {
 136    pub fn default_fast() -> Self {
 137        Self::Claude3_5Haiku
 138    }
 139
 140    pub fn from_id(id: &str) -> Result<Self> {
 141        if id.starts_with("claude-opus-4-5-thinking") {
 142            return Ok(Self::ClaudeOpus4_5Thinking);
 143        }
 144
 145        if id.starts_with("claude-opus-4-5") {
 146            return Ok(Self::ClaudeOpus4_5);
 147        }
 148
 149        if id.starts_with("claude-opus-4-1-thinking") {
 150            return Ok(Self::ClaudeOpus4_1Thinking);
 151        }
 152
 153        if id.starts_with("claude-opus-4-thinking") {
 154            return Ok(Self::ClaudeOpus4Thinking);
 155        }
 156
 157        if id.starts_with("claude-opus-4-1") {
 158            return Ok(Self::ClaudeOpus4_1);
 159        }
 160
 161        if id.starts_with("claude-opus-4") {
 162            return Ok(Self::ClaudeOpus4);
 163        }
 164
 165        if id.starts_with("claude-sonnet-4-5-thinking") {
 166            return Ok(Self::ClaudeSonnet4_5Thinking);
 167        }
 168
 169        if id.starts_with("claude-sonnet-4-5") {
 170            return Ok(Self::ClaudeSonnet4_5);
 171        }
 172
 173        if id.starts_with("claude-sonnet-4-thinking") {
 174            return Ok(Self::ClaudeSonnet4Thinking);
 175        }
 176
 177        if id.starts_with("claude-sonnet-4") {
 178            return Ok(Self::ClaudeSonnet4);
 179        }
 180
 181        if id.starts_with("claude-3-7-sonnet-thinking") {
 182            return Ok(Self::Claude3_7SonnetThinking);
 183        }
 184
 185        if id.starts_with("claude-3-7-sonnet") {
 186            return Ok(Self::Claude3_7Sonnet);
 187        }
 188
 189        if id.starts_with("claude-3-5-sonnet") {
 190            return Ok(Self::Claude3_5Sonnet);
 191        }
 192
 193        if id.starts_with("claude-haiku-4-5-thinking") {
 194            return Ok(Self::ClaudeHaiku4_5Thinking);
 195        }
 196
 197        if id.starts_with("claude-haiku-4-5") {
 198            return Ok(Self::ClaudeHaiku4_5);
 199        }
 200
 201        if id.starts_with("claude-3-5-haiku") {
 202            return Ok(Self::Claude3_5Haiku);
 203        }
 204
 205        if id.starts_with("claude-3-opus") {
 206            return Ok(Self::Claude3Opus);
 207        }
 208
 209        if id.starts_with("claude-3-sonnet") {
 210            return Ok(Self::Claude3Sonnet);
 211        }
 212
 213        if id.starts_with("claude-3-haiku") {
 214            return Ok(Self::Claude3Haiku);
 215        }
 216
 217        Err(anyhow!("invalid model ID: {id}"))
 218    }
 219
 220    pub fn id(&self) -> &str {
 221        match self {
 222            Self::ClaudeOpus4 => "claude-opus-4-latest",
 223            Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
 224            Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
 225            Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
 226            Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
 227            Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking-latest",
 228            Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
 229            Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
 230            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
 231            Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
 232            Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
 233            Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
 234            Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
 235            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
 236            Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
 237            Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
 238            Self::Claude3Opus => "claude-3-opus-latest",
 239            Self::Claude3Sonnet => "claude-3-sonnet-20240229",
 240            Self::Claude3Haiku => "claude-3-haiku-20240307",
 241            Self::Custom { name, .. } => name,
 242        }
 243    }
 244
 245    /// The id of the model that should be used for making API requests
 246    pub fn request_id(&self) -> &str {
 247        match self {
 248            Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
 249            Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
 250            Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-20251101",
 251            Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
 252            Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-20250929",
 253            Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
 254            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
 255            Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
 256            Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
 257            Self::Claude3Opus => "claude-3-opus-latest",
 258            Self::Claude3Sonnet => "claude-3-sonnet-20240229",
 259            Self::Claude3Haiku => "claude-3-haiku-20240307",
 260            Self::Custom { name, .. } => name,
 261        }
 262    }
 263
 264    pub fn display_name(&self) -> &str {
 265        match self {
 266            Self::ClaudeOpus4 => "Claude Opus 4",
 267            Self::ClaudeOpus4_1 => "Claude Opus 4.1",
 268            Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
 269            Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
 270            Self::ClaudeOpus4_5 => "Claude Opus 4.5",
 271            Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
 272            Self::ClaudeSonnet4 => "Claude Sonnet 4",
 273            Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
 274            Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
 275            Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
 276            Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
 277            Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
 278            Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
 279            Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
 280            Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
 281            Self::Claude3_5Haiku => "Claude 3.5 Haiku",
 282            Self::Claude3Opus => "Claude 3 Opus",
 283            Self::Claude3Sonnet => "Claude 3 Sonnet",
 284            Self::Claude3Haiku => "Claude 3 Haiku",
 285            Self::Custom {
 286                name, display_name, ..
 287            } => display_name.as_ref().unwrap_or(name),
 288        }
 289    }
 290
 291    pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
 292        match self {
 293            Self::ClaudeOpus4
 294            | Self::ClaudeOpus4_1
 295            | Self::ClaudeOpus4Thinking
 296            | Self::ClaudeOpus4_1Thinking
 297            | Self::ClaudeOpus4_5
 298            | Self::ClaudeOpus4_5Thinking
 299            | Self::ClaudeSonnet4
 300            | Self::ClaudeSonnet4Thinking
 301            | Self::ClaudeSonnet4_5
 302            | Self::ClaudeSonnet4_5Thinking
 303            | Self::Claude3_5Sonnet
 304            | Self::ClaudeHaiku4_5
 305            | Self::ClaudeHaiku4_5Thinking
 306            | Self::Claude3_5Haiku
 307            | Self::Claude3_7Sonnet
 308            | Self::Claude3_7SonnetThinking
 309            | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
 310                min_total_token: 2_048,
 311                should_speculate: true,
 312                max_cache_anchors: 4,
 313            }),
 314            Self::Custom {
 315                cache_configuration,
 316                ..
 317            } => cache_configuration.clone(),
 318            _ => None,
 319        }
 320    }
 321
 322    pub fn max_token_count(&self) -> u64 {
 323        match self {
 324            Self::ClaudeOpus4
 325            | Self::ClaudeOpus4_1
 326            | Self::ClaudeOpus4Thinking
 327            | Self::ClaudeOpus4_1Thinking
 328            | Self::ClaudeOpus4_5
 329            | Self::ClaudeOpus4_5Thinking
 330            | Self::ClaudeSonnet4
 331            | Self::ClaudeSonnet4Thinking
 332            | Self::ClaudeSonnet4_5
 333            | Self::ClaudeSonnet4_5Thinking
 334            | Self::Claude3_5Sonnet
 335            | Self::ClaudeHaiku4_5
 336            | Self::ClaudeHaiku4_5Thinking
 337            | Self::Claude3_5Haiku
 338            | Self::Claude3_7Sonnet
 339            | Self::Claude3_7SonnetThinking
 340            | Self::Claude3Opus
 341            | Self::Claude3Sonnet
 342            | Self::Claude3Haiku => 200_000,
 343            Self::Custom { max_tokens, .. } => *max_tokens,
 344        }
 345    }
 346
 347    pub fn max_output_tokens(&self) -> u64 {
 348        match self {
 349            Self::ClaudeOpus4
 350            | Self::ClaudeOpus4_1
 351            | Self::ClaudeOpus4Thinking
 352            | Self::ClaudeOpus4_1Thinking
 353            | Self::ClaudeOpus4_5
 354            | Self::ClaudeOpus4_5Thinking
 355            | Self::ClaudeSonnet4
 356            | Self::ClaudeSonnet4Thinking
 357            | Self::ClaudeSonnet4_5
 358            | Self::ClaudeSonnet4_5Thinking
 359            | Self::Claude3_5Sonnet
 360            | Self::Claude3_7Sonnet
 361            | Self::Claude3_7SonnetThinking
 362            | Self::Claude3_5Haiku => 8_192,
 363            Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => 64_000,
 364            Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
 365            Self::Custom {
 366                max_output_tokens, ..
 367            } => max_output_tokens.unwrap_or(4_096),
 368        }
 369    }
 370
 371    pub fn default_temperature(&self) -> f32 {
 372        match self {
 373            Self::ClaudeOpus4
 374            | Self::ClaudeOpus4_1
 375            | Self::ClaudeOpus4Thinking
 376            | Self::ClaudeOpus4_1Thinking
 377            | Self::ClaudeOpus4_5
 378            | Self::ClaudeOpus4_5Thinking
 379            | Self::ClaudeSonnet4
 380            | Self::ClaudeSonnet4Thinking
 381            | Self::ClaudeSonnet4_5
 382            | Self::ClaudeSonnet4_5Thinking
 383            | Self::Claude3_5Sonnet
 384            | Self::Claude3_7Sonnet
 385            | Self::Claude3_7SonnetThinking
 386            | Self::ClaudeHaiku4_5
 387            | Self::ClaudeHaiku4_5Thinking
 388            | Self::Claude3_5Haiku
 389            | Self::Claude3Opus
 390            | Self::Claude3Sonnet
 391            | Self::Claude3Haiku => 1.0,
 392            Self::Custom {
 393                default_temperature,
 394                ..
 395            } => default_temperature.unwrap_or(1.0),
 396        }
 397    }
 398
 399    pub fn mode(&self) -> AnthropicModelMode {
 400        match self {
 401            Self::ClaudeOpus4
 402            | Self::ClaudeOpus4_1
 403            | Self::ClaudeOpus4_5
 404            | Self::ClaudeSonnet4
 405            | Self::ClaudeSonnet4_5
 406            | Self::Claude3_5Sonnet
 407            | Self::Claude3_7Sonnet
 408            | Self::ClaudeHaiku4_5
 409            | Self::Claude3_5Haiku
 410            | Self::Claude3Opus
 411            | Self::Claude3Sonnet
 412            | Self::Claude3Haiku => AnthropicModelMode::Default,
 413            Self::ClaudeOpus4Thinking
 414            | Self::ClaudeOpus4_1Thinking
 415            | Self::ClaudeOpus4_5Thinking
 416            | Self::ClaudeSonnet4Thinking
 417            | Self::ClaudeSonnet4_5Thinking
 418            | Self::ClaudeHaiku4_5Thinking
 419            | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
 420                budget_tokens: Some(4_096),
 421            },
 422            Self::Custom { mode, .. } => mode.clone(),
 423        }
 424    }
 425
 426    pub fn beta_headers(&self) -> Option<String> {
 427        let mut headers = vec![];
 428
 429        match self {
 430            Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
 431                // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
 432                // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
 433                headers.push("token-efficient-tools-2025-02-19".to_string());
 434            }
 435            Self::Custom {
 436                extra_beta_headers, ..
 437            } => {
 438                headers.extend(
 439                    extra_beta_headers
 440                        .iter()
 441                        .filter(|header| !header.trim().is_empty())
 442                        .cloned(),
 443                );
 444            }
 445            _ => {}
 446        }
 447
 448        if headers.is_empty() {
 449            None
 450        } else {
 451            Some(headers.join(","))
 452        }
 453    }
 454
 455    pub fn tool_model_id(&self) -> &str {
 456        if let Self::Custom {
 457            tool_override: Some(tool_override),
 458            ..
 459        } = self
 460        {
 461            tool_override
 462        } else {
 463            self.request_id()
 464        }
 465    }
 466}
 467
 468pub async fn stream_completion(
 469    client: &dyn HttpClient,
 470    api_url: &str,
 471    api_key: &str,
 472    request: Request,
 473    beta_headers: Option<String>,
 474) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
 475    stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
 476        .await
 477        .map(|output| output.0)
 478}
 479
 480/// An individual rate limit.
 481#[derive(Debug)]
 482pub struct RateLimit {
 483    pub limit: usize,
 484    pub remaining: usize,
 485    pub reset: DateTime<Utc>,
 486}
 487
 488impl RateLimit {
 489    fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
 490        let limit =
 491            get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
 492        let remaining = get_header(
 493            &format!("anthropic-ratelimit-{resource}-remaining"),
 494            headers,
 495        )?
 496        .parse()?;
 497        let reset = DateTime::parse_from_rfc3339(get_header(
 498            &format!("anthropic-ratelimit-{resource}-reset"),
 499            headers,
 500        )?)?
 501        .to_utc();
 502
 503        Ok(Self {
 504            limit,
 505            remaining,
 506            reset,
 507        })
 508    }
 509}
 510
 511/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
 512#[derive(Debug)]
 513pub struct RateLimitInfo {
 514    pub retry_after: Option<Duration>,
 515    pub requests: Option<RateLimit>,
 516    pub tokens: Option<RateLimit>,
 517    pub input_tokens: Option<RateLimit>,
 518    pub output_tokens: Option<RateLimit>,
 519}
 520
 521impl RateLimitInfo {
 522    fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
 523        // Check if any rate limit headers exist
 524        let has_rate_limit_headers = headers
 525            .keys()
 526            .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
 527
 528        if !has_rate_limit_headers {
 529            return Self {
 530                retry_after: None,
 531                requests: None,
 532                tokens: None,
 533                input_tokens: None,
 534                output_tokens: None,
 535            };
 536        }
 537
 538        Self {
 539            retry_after: parse_retry_after(headers),
 540            requests: RateLimit::from_headers("requests", headers).ok(),
 541            tokens: RateLimit::from_headers("tokens", headers).ok(),
 542            input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
 543            output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
 544        }
 545    }
 546}
 547
 548/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
 549/// seconds). Note that other services might specify an HTTP date or some other format for this
 550/// header. Returns `None` if the header is not present or cannot be parsed.
 551pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
 552    headers
 553        .get("retry-after")
 554        .and_then(|v| v.to_str().ok())
 555        .and_then(|v| v.parse::<u64>().ok())
 556        .map(Duration::from_secs)
 557}
 558
 559fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
 560    Ok(headers
 561        .get(key)
 562        .with_context(|| format!("missing header `{key}`"))?
 563        .to_str()?)
 564}
 565
 566pub async fn stream_completion_with_rate_limit_info(
 567    client: &dyn HttpClient,
 568    api_url: &str,
 569    api_key: &str,
 570    request: Request,
 571    beta_headers: Option<String>,
 572) -> Result<
 573    (
 574        BoxStream<'static, Result<Event, AnthropicError>>,
 575        Option<RateLimitInfo>,
 576    ),
 577    AnthropicError,
 578> {
 579    let request = StreamingRequest {
 580        base: request,
 581        stream: true,
 582    };
 583    let uri = format!("{api_url}/v1/messages");
 584
 585    let mut request_builder = HttpRequest::builder()
 586        .method(Method::POST)
 587        .uri(uri)
 588        .header("Anthropic-Version", "2023-06-01")
 589        .header("X-Api-Key", api_key.trim())
 590        .header("Content-Type", "application/json");
 591
 592    if let Some(beta_headers) = beta_headers {
 593        request_builder = request_builder.header("Anthropic-Beta", beta_headers);
 594    }
 595
 596    let serialized_request =
 597        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
 598    let request = request_builder
 599        .body(AsyncBody::from(serialized_request))
 600        .map_err(AnthropicError::BuildRequestBody)?;
 601
 602    let mut response = client
 603        .send(request)
 604        .await
 605        .map_err(AnthropicError::HttpSend)?;
 606    let rate_limits = RateLimitInfo::from_headers(response.headers());
 607    if response.status().is_success() {
 608        let reader = BufReader::new(response.into_body());
 609        let stream = reader
 610            .lines()
 611            .filter_map(|line| async move {
 612                match line {
 613                    Ok(line) => {
 614                        let line = line.strip_prefix("data: ")?;
 615                        match serde_json::from_str(line) {
 616                            Ok(response) => Some(Ok(response)),
 617                            Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
 618                        }
 619                    }
 620                    Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
 621                }
 622            })
 623            .boxed();
 624        Ok((stream, Some(rate_limits)))
 625    } else if response.status().as_u16() == 529 {
 626        Err(AnthropicError::ServerOverloaded {
 627            retry_after: rate_limits.retry_after,
 628        })
 629    } else if let Some(retry_after) = rate_limits.retry_after {
 630        Err(AnthropicError::RateLimit { retry_after })
 631    } else {
 632        let mut body = String::new();
 633        response
 634            .body_mut()
 635            .read_to_string(&mut body)
 636            .await
 637            .map_err(AnthropicError::ReadResponse)?;
 638
 639        match serde_json::from_str::<Event>(&body) {
 640            Ok(Event::Error { error }) => Err(AnthropicError::ApiError(error)),
 641            Ok(_) | Err(_) => Err(AnthropicError::HttpResponseError {
 642                status_code: response.status(),
 643                message: body,
 644            }),
 645        }
 646    }
 647}
 648
 649#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 650#[serde(rename_all = "lowercase")]
 651pub enum CacheControlType {
 652    Ephemeral,
 653}
 654
 655#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 656pub struct CacheControl {
 657    #[serde(rename = "type")]
 658    pub cache_type: CacheControlType,
 659}
 660
 661#[derive(Debug, Serialize, Deserialize)]
 662pub struct Message {
 663    pub role: Role,
 664    pub content: Vec<RequestContent>,
 665}
 666
 667#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
 668#[serde(rename_all = "lowercase")]
 669pub enum Role {
 670    User,
 671    Assistant,
 672}
 673
 674#[derive(Debug, Serialize, Deserialize)]
 675#[serde(tag = "type")]
 676pub enum RequestContent {
 677    #[serde(rename = "text")]
 678    Text {
 679        text: String,
 680        #[serde(skip_serializing_if = "Option::is_none")]
 681        cache_control: Option<CacheControl>,
 682    },
 683    #[serde(rename = "thinking")]
 684    Thinking {
 685        thinking: String,
 686        signature: String,
 687        #[serde(skip_serializing_if = "Option::is_none")]
 688        cache_control: Option<CacheControl>,
 689    },
 690    #[serde(rename = "redacted_thinking")]
 691    RedactedThinking { data: String },
 692    #[serde(rename = "image")]
 693    Image {
 694        source: ImageSource,
 695        #[serde(skip_serializing_if = "Option::is_none")]
 696        cache_control: Option<CacheControl>,
 697    },
 698    #[serde(rename = "tool_use")]
 699    ToolUse {
 700        id: String,
 701        name: String,
 702        input: serde_json::Value,
 703        #[serde(skip_serializing_if = "Option::is_none")]
 704        cache_control: Option<CacheControl>,
 705    },
 706    #[serde(rename = "tool_result")]
 707    ToolResult {
 708        tool_use_id: String,
 709        is_error: bool,
 710        content: ToolResultContent,
 711        #[serde(skip_serializing_if = "Option::is_none")]
 712        cache_control: Option<CacheControl>,
 713    },
 714}
 715
 716#[derive(Debug, Serialize, Deserialize)]
 717#[serde(untagged)]
 718pub enum ToolResultContent {
 719    Plain(String),
 720    Multipart(Vec<ToolResultPart>),
 721}
 722
 723#[derive(Debug, Serialize, Deserialize)]
 724#[serde(tag = "type", rename_all = "lowercase")]
 725pub enum ToolResultPart {
 726    Text { text: String },
 727    Image { source: ImageSource },
 728}
 729
 730#[derive(Debug, Serialize, Deserialize)]
 731#[serde(tag = "type")]
 732pub enum ResponseContent {
 733    #[serde(rename = "text")]
 734    Text { text: String },
 735    #[serde(rename = "thinking")]
 736    Thinking { thinking: String },
 737    #[serde(rename = "redacted_thinking")]
 738    RedactedThinking { data: String },
 739    #[serde(rename = "tool_use")]
 740    ToolUse {
 741        id: String,
 742        name: String,
 743        input: serde_json::Value,
 744    },
 745}
 746
 747#[derive(Debug, Serialize, Deserialize)]
 748pub struct ImageSource {
 749    #[serde(rename = "type")]
 750    pub source_type: String,
 751    pub media_type: String,
 752    pub data: String,
 753}
 754
 755#[derive(Debug, Serialize, Deserialize)]
 756pub struct Tool {
 757    pub name: String,
 758    pub description: String,
 759    pub input_schema: serde_json::Value,
 760}
 761
 762#[derive(Debug, Serialize, Deserialize)]
 763#[serde(tag = "type", rename_all = "lowercase")]
 764pub enum ToolChoice {
 765    Auto,
 766    Any,
 767    Tool { name: String },
 768    None,
 769}
 770
 771#[derive(Debug, Serialize, Deserialize)]
 772#[serde(tag = "type", rename_all = "lowercase")]
 773pub enum Thinking {
 774    Enabled { budget_tokens: Option<u32> },
 775}
 776
 777#[derive(Debug, Serialize, Deserialize)]
 778#[serde(untagged)]
 779pub enum StringOrContents {
 780    String(String),
 781    Content(Vec<RequestContent>),
 782}
 783
 784#[derive(Debug, Serialize, Deserialize)]
 785pub struct Request {
 786    pub model: String,
 787    pub max_tokens: u64,
 788    pub messages: Vec<Message>,
 789    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 790    pub tools: Vec<Tool>,
 791    #[serde(default, skip_serializing_if = "Option::is_none")]
 792    pub thinking: Option<Thinking>,
 793    #[serde(default, skip_serializing_if = "Option::is_none")]
 794    pub tool_choice: Option<ToolChoice>,
 795    #[serde(default, skip_serializing_if = "Option::is_none")]
 796    pub system: Option<StringOrContents>,
 797    #[serde(default, skip_serializing_if = "Option::is_none")]
 798    pub metadata: Option<Metadata>,
 799    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 800    pub stop_sequences: Vec<String>,
 801    #[serde(default, skip_serializing_if = "Option::is_none")]
 802    pub temperature: Option<f32>,
 803    #[serde(default, skip_serializing_if = "Option::is_none")]
 804    pub top_k: Option<u32>,
 805    #[serde(default, skip_serializing_if = "Option::is_none")]
 806    pub top_p: Option<f32>,
 807}
 808
 809#[derive(Debug, Serialize, Deserialize)]
 810struct StreamingRequest {
 811    #[serde(flatten)]
 812    pub base: Request,
 813    pub stream: bool,
 814}
 815
 816#[derive(Debug, Serialize, Deserialize)]
 817pub struct Metadata {
 818    pub user_id: Option<String>,
 819}
 820
 821#[derive(Debug, Serialize, Deserialize, Default)]
 822pub struct Usage {
 823    #[serde(default, skip_serializing_if = "Option::is_none")]
 824    pub input_tokens: Option<u64>,
 825    #[serde(default, skip_serializing_if = "Option::is_none")]
 826    pub output_tokens: Option<u64>,
 827    #[serde(default, skip_serializing_if = "Option::is_none")]
 828    pub cache_creation_input_tokens: Option<u64>,
 829    #[serde(default, skip_serializing_if = "Option::is_none")]
 830    pub cache_read_input_tokens: Option<u64>,
 831}
 832
 833#[derive(Debug, Serialize, Deserialize)]
 834pub struct Response {
 835    pub id: String,
 836    #[serde(rename = "type")]
 837    pub response_type: String,
 838    pub role: Role,
 839    pub content: Vec<ResponseContent>,
 840    pub model: String,
 841    #[serde(default, skip_serializing_if = "Option::is_none")]
 842    pub stop_reason: Option<String>,
 843    #[serde(default, skip_serializing_if = "Option::is_none")]
 844    pub stop_sequence: Option<String>,
 845    pub usage: Usage,
 846}
 847
 848#[derive(Debug, Serialize, Deserialize)]
 849#[serde(tag = "type")]
 850pub enum Event {
 851    #[serde(rename = "message_start")]
 852    MessageStart { message: Response },
 853    #[serde(rename = "content_block_start")]
 854    ContentBlockStart {
 855        index: usize,
 856        content_block: ResponseContent,
 857    },
 858    #[serde(rename = "content_block_delta")]
 859    ContentBlockDelta { index: usize, delta: ContentDelta },
 860    #[serde(rename = "content_block_stop")]
 861    ContentBlockStop { index: usize },
 862    #[serde(rename = "message_delta")]
 863    MessageDelta { delta: MessageDelta, usage: Usage },
 864    #[serde(rename = "message_stop")]
 865    MessageStop,
 866    #[serde(rename = "ping")]
 867    Ping,
 868    #[serde(rename = "error")]
 869    Error { error: ApiError },
 870}
 871
 872#[derive(Debug, Serialize, Deserialize)]
 873#[serde(tag = "type")]
 874pub enum ContentDelta {
 875    #[serde(rename = "text_delta")]
 876    TextDelta { text: String },
 877    #[serde(rename = "thinking_delta")]
 878    ThinkingDelta { thinking: String },
 879    #[serde(rename = "signature_delta")]
 880    SignatureDelta { signature: String },
 881    #[serde(rename = "input_json_delta")]
 882    InputJsonDelta { partial_json: String },
 883}
 884
 885#[derive(Debug, Serialize, Deserialize)]
 886pub struct MessageDelta {
 887    pub stop_reason: Option<String>,
 888    pub stop_sequence: Option<String>,
 889}
 890
 891#[derive(Debug)]
 892pub enum AnthropicError {
 893    /// Failed to serialize the HTTP request body to JSON
 894    SerializeRequest(serde_json::Error),
 895
 896    /// Failed to construct the HTTP request body
 897    BuildRequestBody(http::Error),
 898
 899    /// Failed to send the HTTP request
 900    HttpSend(anyhow::Error),
 901
 902    /// Failed to deserialize the response from JSON
 903    DeserializeResponse(serde_json::Error),
 904
 905    /// Failed to read from response stream
 906    ReadResponse(io::Error),
 907
 908    /// HTTP error response from the API
 909    HttpResponseError {
 910        status_code: StatusCode,
 911        message: String,
 912    },
 913
 914    /// Rate limit exceeded
 915    RateLimit { retry_after: Duration },
 916
 917    /// Server overloaded
 918    ServerOverloaded { retry_after: Option<Duration> },
 919
 920    /// API returned an error response
 921    ApiError(ApiError),
 922}
 923
 924#[derive(Debug, Serialize, Deserialize, Error)]
 925#[error("Anthropic API Error: {error_type}: {message}")]
 926pub struct ApiError {
 927    #[serde(rename = "type")]
 928    pub error_type: String,
 929    pub message: String,
 930}
 931
 932/// An Anthropic API error code.
 933/// <https://docs.anthropic.com/en/api/errors#http-errors>
 934#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
 935#[strum(serialize_all = "snake_case")]
 936pub enum ApiErrorCode {
 937    /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
 938    InvalidRequestError,
 939    /// 401 - `authentication_error`: There's an issue with your API key.
 940    AuthenticationError,
 941    /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
 942    PermissionError,
 943    /// 404 - `not_found_error`: The requested resource was not found.
 944    NotFoundError,
 945    /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
 946    RequestTooLarge,
 947    /// 429 - `rate_limit_error`: Your account has hit a rate limit.
 948    RateLimitError,
 949    /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
 950    ApiError,
 951    /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
 952    OverloadedError,
 953}
 954
 955impl ApiError {
 956    pub fn code(&self) -> Option<ApiErrorCode> {
 957        ApiErrorCode::from_str(&self.error_type).ok()
 958    }
 959
 960    pub fn is_rate_limit_error(&self) -> bool {
 961        matches!(self.error_type.as_str(), "rate_limit_error")
 962    }
 963
 964    pub fn match_window_exceeded(&self) -> Option<u64> {
 965        let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
 966            return None;
 967        };
 968
 969        parse_prompt_too_long(&self.message)
 970    }
 971}
 972
 973pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
 974    message
 975        .strip_prefix("prompt is too long: ")?
 976        .split_once(" tokens")?
 977        .0
 978        .parse()
 979        .ok()
 980}
 981
 982#[test]
 983fn test_match_window_exceeded() {
 984    let error = ApiError {
 985        error_type: "invalid_request_error".to_string(),
 986        message: "prompt is too long: 220000 tokens > 200000".to_string(),
 987    };
 988    assert_eq!(error.match_window_exceeded(), Some(220_000));
 989
 990    let error = ApiError {
 991        error_type: "invalid_request_error".to_string(),
 992        message: "prompt is too long: 1234953 tokens".to_string(),
 993    };
 994    assert_eq!(error.match_window_exceeded(), Some(1234953));
 995
 996    let error = ApiError {
 997        error_type: "invalid_request_error".to_string(),
 998        message: "not a prompt length error".to_string(),
 999    };
1000    assert_eq!(error.match_window_exceeded(), None);
1001
1002    let error = ApiError {
1003        error_type: "rate_limit_error".to_string(),
1004        message: "prompt is too long: 12345 tokens".to_string(),
1005    };
1006    assert_eq!(error.match_window_exceeded(), None);
1007
1008    let error = ApiError {
1009        error_type: "invalid_request_error".to_string(),
1010        message: "prompt is too long: invalid tokens".to_string(),
1011    };
1012    assert_eq!(error.match_window_exceeded(), None);
1013}