anthropic.rs

   1use std::io;
   2use std::str::FromStr;
   3use std::time::Duration;
   4
   5use anyhow::{Context as _, Result, anyhow};
   6use chrono::{DateTime, Utc};
   7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
   8use http_client::http::{self, HeaderMap, HeaderValue};
   9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
  10use serde::{Deserialize, Serialize};
  11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
  12use strum::{EnumIter, EnumString};
  13use thiserror::Error;
  14
  15pub mod batches;
  16
  17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
  18
  19pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07";
  20
  21#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  22#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  23pub struct AnthropicModelCacheConfiguration {
  24    pub min_total_token: u64,
  25    pub should_speculate: bool,
  26    pub max_cache_anchors: usize,
  27}
  28
  29#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  30#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  31pub enum AnthropicModelMode {
  32    #[default]
  33    Default,
  34    Thinking {
  35        budget_tokens: Option<u32>,
  36    },
  37    AdaptiveThinking,
  38}
  39
  40impl From<ModelMode> for AnthropicModelMode {
  41    fn from(value: ModelMode) -> Self {
  42        match value {
  43            ModelMode::Default => AnthropicModelMode::Default,
  44            ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
  45        }
  46    }
  47}
  48
  49impl From<AnthropicModelMode> for ModelMode {
  50    fn from(value: AnthropicModelMode) -> Self {
  51        match value {
  52            AnthropicModelMode::Default => ModelMode::Default,
  53            AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
  54            AnthropicModelMode::AdaptiveThinking => ModelMode::Default,
  55        }
  56    }
  57}
  58
  59#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  60#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
  61pub enum Model {
  62    #[serde(
  63        rename = "claude-opus-4",
  64        alias = "claude-opus-4-latest",
  65        alias = "claude-opus-4-thinking",
  66        alias = "claude-opus-4-thinking-latest"
  67    )]
  68    ClaudeOpus4,
  69    #[serde(
  70        rename = "claude-opus-4-1",
  71        alias = "claude-opus-4-1-latest",
  72        alias = "claude-opus-4-1-thinking",
  73        alias = "claude-opus-4-1-thinking-latest"
  74    )]
  75    ClaudeOpus4_1,
  76    #[serde(
  77        rename = "claude-opus-4-5",
  78        alias = "claude-opus-4-5-latest",
  79        alias = "claude-opus-4-5-thinking",
  80        alias = "claude-opus-4-5-thinking-latest"
  81    )]
  82    ClaudeOpus4_5,
  83    #[serde(
  84        rename = "claude-opus-4-6",
  85        alias = "claude-opus-4-6-latest",
  86        alias = "claude-opus-4-6-1m-context",
  87        alias = "claude-opus-4-6-1m-context-latest",
  88        alias = "claude-opus-4-6-thinking",
  89        alias = "claude-opus-4-6-thinking-latest",
  90        alias = "claude-opus-4-6-1m-context-thinking",
  91        alias = "claude-opus-4-6-1m-context-thinking-latest"
  92    )]
  93    ClaudeOpus4_6,
  94    #[serde(
  95        rename = "claude-sonnet-4",
  96        alias = "claude-sonnet-4-latest",
  97        alias = "claude-sonnet-4-thinking",
  98        alias = "claude-sonnet-4-thinking-latest"
  99    )]
 100    ClaudeSonnet4,
 101    #[serde(
 102        rename = "claude-sonnet-4-5",
 103        alias = "claude-sonnet-4-5-latest",
 104        alias = "claude-sonnet-4-5-thinking",
 105        alias = "claude-sonnet-4-5-thinking-latest"
 106    )]
 107    ClaudeSonnet4_5,
 108    #[serde(
 109        rename = "claude-sonnet-4-5-1m-context",
 110        alias = "claude-sonnet-4-5-1m-context-latest",
 111        alias = "claude-sonnet-4-5-1m-context-thinking",
 112        alias = "claude-sonnet-4-5-1m-context-thinking-latest"
 113    )]
 114    ClaudeSonnet4_5_1mContext,
 115    #[default]
 116    #[serde(
 117        rename = "claude-sonnet-4-6",
 118        alias = "claude-sonnet-4-6-latest",
 119        alias = "claude-sonnet-4-6-1m-context",
 120        alias = "claude-sonnet-4-6-1m-context-latest",
 121        alias = "claude-sonnet-4-6-thinking",
 122        alias = "claude-sonnet-4-6-thinking-latest",
 123        alias = "claude-sonnet-4-6-1m-context-thinking",
 124        alias = "claude-sonnet-4-6-1m-context-thinking-latest"
 125    )]
 126    ClaudeSonnet4_6,
 127    #[serde(
 128        rename = "claude-haiku-4-5",
 129        alias = "claude-haiku-4-5-latest",
 130        alias = "claude-haiku-4-5-thinking",
 131        alias = "claude-haiku-4-5-thinking-latest"
 132    )]
 133    ClaudeHaiku4_5,
 134    #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
 135    Claude3Haiku,
 136    #[serde(rename = "custom")]
 137    Custom {
 138        name: String,
 139        max_tokens: u64,
 140        /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
 141        display_name: Option<String>,
 142        /// Override this model with a different Anthropic model for tool calls.
 143        tool_override: Option<String>,
 144        /// Indicates whether this custom model supports caching.
 145        cache_configuration: Option<AnthropicModelCacheConfiguration>,
 146        max_output_tokens: Option<u64>,
 147        default_temperature: Option<f32>,
 148        #[serde(default)]
 149        extra_beta_headers: Vec<String>,
 150        #[serde(default)]
 151        mode: AnthropicModelMode,
 152    },
 153}
 154
 155impl Model {
 156    pub fn default_fast() -> Self {
 157        Self::ClaudeHaiku4_5
 158    }
 159
 160    pub fn from_id(id: &str) -> Result<Self> {
 161        if id.starts_with("claude-opus-4-6") {
 162            return Ok(Self::ClaudeOpus4_6);
 163        }
 164
 165        if id.starts_with("claude-opus-4-5") {
 166            return Ok(Self::ClaudeOpus4_5);
 167        }
 168
 169        if id.starts_with("claude-opus-4-1") {
 170            return Ok(Self::ClaudeOpus4_1);
 171        }
 172
 173        if id.starts_with("claude-opus-4") {
 174            return Ok(Self::ClaudeOpus4);
 175        }
 176
 177        if id.starts_with("claude-sonnet-4-6") {
 178            return Ok(Self::ClaudeSonnet4_6);
 179        }
 180
 181        if id.starts_with("claude-sonnet-4-5-1m-context") {
 182            return Ok(Self::ClaudeSonnet4_5_1mContext);
 183        }
 184
 185        if id.starts_with("claude-sonnet-4-5") {
 186            return Ok(Self::ClaudeSonnet4_5);
 187        }
 188
 189        if id.starts_with("claude-sonnet-4") {
 190            return Ok(Self::ClaudeSonnet4);
 191        }
 192
 193        if id.starts_with("claude-haiku-4-5") {
 194            return Ok(Self::ClaudeHaiku4_5);
 195        }
 196
 197        if id.starts_with("claude-3-haiku") {
 198            return Ok(Self::Claude3Haiku);
 199        }
 200
 201        Err(anyhow!("invalid model ID: {id}"))
 202    }
 203
 204    pub fn id(&self) -> &str {
 205        match self {
 206            Self::ClaudeOpus4 => "claude-opus-4-latest",
 207            Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
 208            Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
 209            Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
 210            Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
 211            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
 212            Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-1m-context-latest",
 213            Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
 214            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
 215            Self::Claude3Haiku => "claude-3-haiku-20240307",
 216            Self::Custom { name, .. } => name,
 217        }
 218    }
 219
 220    /// The id of the model that should be used for making API requests
 221    pub fn request_id(&self) -> &str {
 222        match self {
 223            Self::ClaudeOpus4 => "claude-opus-4-20250514",
 224            Self::ClaudeOpus4_1 => "claude-opus-4-1-20250805",
 225            Self::ClaudeOpus4_5 => "claude-opus-4-5-20251101",
 226            Self::ClaudeOpus4_6 => "claude-opus-4-6",
 227            Self::ClaudeSonnet4 => "claude-sonnet-4-20250514",
 228            Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-20250929",
 229            Self::ClaudeSonnet4_6 => "claude-sonnet-4-6",
 230            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-20251001",
 231            Self::Claude3Haiku => "claude-3-haiku-20240307",
 232            Self::Custom { name, .. } => name,
 233        }
 234    }
 235
 236    pub fn display_name(&self) -> &str {
 237        match self {
 238            Self::ClaudeOpus4 => "Claude Opus 4",
 239            Self::ClaudeOpus4_1 => "Claude Opus 4.1",
 240            Self::ClaudeOpus4_5 => "Claude Opus 4.5",
 241            Self::ClaudeOpus4_6 => "Claude Opus 4.6",
 242            Self::ClaudeSonnet4 => "Claude Sonnet 4",
 243            Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
 244            Self::ClaudeSonnet4_5_1mContext => "Claude Sonnet 4.5 (1M context)",
 245            Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
 246            Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
 247            Self::Claude3Haiku => "Claude 3 Haiku",
 248            Self::Custom {
 249                name, display_name, ..
 250            } => display_name.as_ref().unwrap_or(name),
 251        }
 252    }
 253
 254    pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
 255        match self {
 256            Self::ClaudeOpus4
 257            | Self::ClaudeOpus4_1
 258            | Self::ClaudeOpus4_5
 259            | Self::ClaudeOpus4_6
 260            | Self::ClaudeSonnet4
 261            | Self::ClaudeSonnet4_5
 262            | Self::ClaudeSonnet4_5_1mContext
 263            | Self::ClaudeSonnet4_6
 264            | Self::ClaudeHaiku4_5
 265            | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
 266                min_total_token: 2_048,
 267                should_speculate: true,
 268                max_cache_anchors: 4,
 269            }),
 270            Self::Custom {
 271                cache_configuration,
 272                ..
 273            } => cache_configuration.clone(),
 274        }
 275    }
 276
 277    pub fn max_token_count(&self) -> u64 {
 278        match self {
 279            Self::ClaudeOpus4
 280            | Self::ClaudeOpus4_1
 281            | Self::ClaudeOpus4_5
 282            | Self::ClaudeSonnet4
 283            | Self::ClaudeSonnet4_5
 284            | Self::ClaudeHaiku4_5
 285            | Self::Claude3Haiku => 200_000,
 286            Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_5_1mContext | Self::ClaudeSonnet4_6 => {
 287                1_000_000
 288            }
 289            Self::Custom { max_tokens, .. } => *max_tokens,
 290        }
 291    }
 292
 293    pub fn max_output_tokens(&self) -> u64 {
 294        match self {
 295            Self::ClaudeOpus4 | Self::ClaudeOpus4_1 => 32_000,
 296            Self::ClaudeOpus4_5
 297            | Self::ClaudeSonnet4
 298            | Self::ClaudeSonnet4_5
 299            | Self::ClaudeSonnet4_5_1mContext
 300            | Self::ClaudeSonnet4_6
 301            | Self::ClaudeHaiku4_5 => 64_000,
 302            Self::ClaudeOpus4_6 => 128_000,
 303            Self::Claude3Haiku => 4_096,
 304            Self::Custom {
 305                max_output_tokens, ..
 306            } => max_output_tokens.unwrap_or(4_096),
 307        }
 308    }
 309
 310    pub fn default_temperature(&self) -> f32 {
 311        match self {
 312            Self::ClaudeOpus4
 313            | Self::ClaudeOpus4_1
 314            | Self::ClaudeOpus4_5
 315            | Self::ClaudeOpus4_6
 316            | Self::ClaudeSonnet4
 317            | Self::ClaudeSonnet4_5
 318            | Self::ClaudeSonnet4_5_1mContext
 319            | Self::ClaudeSonnet4_6
 320            | Self::ClaudeHaiku4_5
 321            | Self::Claude3Haiku => 1.0,
 322            Self::Custom {
 323                default_temperature,
 324                ..
 325            } => default_temperature.unwrap_or(1.0),
 326        }
 327    }
 328
 329    pub fn mode(&self) -> AnthropicModelMode {
 330        if self.supports_adaptive_thinking() {
 331            AnthropicModelMode::AdaptiveThinking
 332        } else if self.supports_thinking() {
 333            AnthropicModelMode::Thinking {
 334                budget_tokens: Some(4_096),
 335            }
 336        } else {
 337            AnthropicModelMode::Default
 338        }
 339    }
 340
 341    pub fn supports_thinking(&self) -> bool {
 342        matches!(
 343            self,
 344            Self::ClaudeOpus4
 345                | Self::ClaudeOpus4_1
 346                | Self::ClaudeOpus4_5
 347                | Self::ClaudeOpus4_6
 348                | Self::ClaudeSonnet4
 349                | Self::ClaudeSonnet4_5
 350                | Self::ClaudeSonnet4_5_1mContext
 351                | Self::ClaudeSonnet4_6
 352                | Self::ClaudeHaiku4_5
 353        )
 354    }
 355
 356    pub fn supports_adaptive_thinking(&self) -> bool {
 357        matches!(self, Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6)
 358    }
 359
 360    pub fn beta_headers(&self) -> Option<String> {
 361        let mut headers = vec![];
 362
 363        match self {
 364            Self::ClaudeSonnet4_5_1mContext => {
 365                headers.push(CONTEXT_1M_BETA_HEADER.to_string());
 366            }
 367            Self::Custom {
 368                extra_beta_headers, ..
 369            } => {
 370                headers.extend(
 371                    extra_beta_headers
 372                        .iter()
 373                        .filter(|header| !header.trim().is_empty())
 374                        .cloned(),
 375                );
 376            }
 377            _ => {}
 378        }
 379
 380        if headers.is_empty() {
 381            None
 382        } else {
 383            Some(headers.join(","))
 384        }
 385    }
 386
 387    pub fn tool_model_id(&self) -> &str {
 388        if let Self::Custom {
 389            tool_override: Some(tool_override),
 390            ..
 391        } = self
 392        {
 393            tool_override
 394        } else {
 395            self.request_id()
 396        }
 397    }
 398}
 399
 400/// Generate completion with streaming.
 401pub async fn stream_completion(
 402    client: &dyn HttpClient,
 403    api_url: &str,
 404    api_key: &str,
 405    request: Request,
 406    beta_headers: Option<String>,
 407) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
 408    stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
 409        .await
 410        .map(|output| output.0)
 411}
 412
 413/// Generate completion without streaming.
 414pub async fn non_streaming_completion(
 415    client: &dyn HttpClient,
 416    api_url: &str,
 417    api_key: &str,
 418    request: Request,
 419    beta_headers: Option<String>,
 420) -> Result<Response, AnthropicError> {
 421    let (mut response, rate_limits) =
 422        send_request(client, api_url, api_key, &request, beta_headers).await?;
 423
 424    if response.status().is_success() {
 425        let mut body = String::new();
 426        response
 427            .body_mut()
 428            .read_to_string(&mut body)
 429            .await
 430            .map_err(AnthropicError::ReadResponse)?;
 431
 432        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
 433    } else {
 434        Err(handle_error_response(response, rate_limits).await)
 435    }
 436}
 437
 438async fn send_request(
 439    client: &dyn HttpClient,
 440    api_url: &str,
 441    api_key: &str,
 442    request: impl Serialize,
 443    beta_headers: Option<String>,
 444) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
 445    let uri = format!("{api_url}/v1/messages");
 446
 447    let mut request_builder = HttpRequest::builder()
 448        .method(Method::POST)
 449        .uri(uri)
 450        .header("Anthropic-Version", "2023-06-01")
 451        .header("X-Api-Key", api_key.trim())
 452        .header("Content-Type", "application/json");
 453
 454    if let Some(beta_headers) = beta_headers {
 455        request_builder = request_builder.header("Anthropic-Beta", beta_headers);
 456    }
 457
 458    let serialized_request =
 459        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
 460    let request = request_builder
 461        .body(AsyncBody::from(serialized_request))
 462        .map_err(AnthropicError::BuildRequestBody)?;
 463
 464    let response = client
 465        .send(request)
 466        .await
 467        .map_err(AnthropicError::HttpSend)?;
 468
 469    let rate_limits = RateLimitInfo::from_headers(response.headers());
 470
 471    Ok((response, rate_limits))
 472}
 473
 474async fn handle_error_response(
 475    mut response: http::Response<AsyncBody>,
 476    rate_limits: RateLimitInfo,
 477) -> AnthropicError {
 478    if response.status().as_u16() == 529 {
 479        return AnthropicError::ServerOverloaded {
 480            retry_after: rate_limits.retry_after,
 481        };
 482    }
 483
 484    if let Some(retry_after) = rate_limits.retry_after {
 485        return AnthropicError::RateLimit { retry_after };
 486    }
 487
 488    let mut body = String::new();
 489    let read_result = response
 490        .body_mut()
 491        .read_to_string(&mut body)
 492        .await
 493        .map_err(AnthropicError::ReadResponse);
 494
 495    if let Err(err) = read_result {
 496        return err;
 497    }
 498
 499    match serde_json::from_str::<Event>(&body) {
 500        Ok(Event::Error { error }) => AnthropicError::ApiError(error),
 501        Ok(_) | Err(_) => AnthropicError::HttpResponseError {
 502            status_code: response.status(),
 503            message: body,
 504        },
 505    }
 506}
 507
 508/// An individual rate limit.
 509#[derive(Debug)]
 510pub struct RateLimit {
 511    pub limit: usize,
 512    pub remaining: usize,
 513    pub reset: DateTime<Utc>,
 514}
 515
 516impl RateLimit {
 517    fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
 518        let limit =
 519            get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
 520        let remaining = get_header(
 521            &format!("anthropic-ratelimit-{resource}-remaining"),
 522            headers,
 523        )?
 524        .parse()?;
 525        let reset = DateTime::parse_from_rfc3339(get_header(
 526            &format!("anthropic-ratelimit-{resource}-reset"),
 527            headers,
 528        )?)?
 529        .to_utc();
 530
 531        Ok(Self {
 532            limit,
 533            remaining,
 534            reset,
 535        })
 536    }
 537}
 538
 539/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
 540#[derive(Debug)]
 541pub struct RateLimitInfo {
 542    pub retry_after: Option<Duration>,
 543    pub requests: Option<RateLimit>,
 544    pub tokens: Option<RateLimit>,
 545    pub input_tokens: Option<RateLimit>,
 546    pub output_tokens: Option<RateLimit>,
 547}
 548
 549impl RateLimitInfo {
 550    fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
 551        // Check if any rate limit headers exist
 552        let has_rate_limit_headers = headers
 553            .keys()
 554            .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
 555
 556        if !has_rate_limit_headers {
 557            return Self {
 558                retry_after: None,
 559                requests: None,
 560                tokens: None,
 561                input_tokens: None,
 562                output_tokens: None,
 563            };
 564        }
 565
 566        Self {
 567            retry_after: parse_retry_after(headers),
 568            requests: RateLimit::from_headers("requests", headers).ok(),
 569            tokens: RateLimit::from_headers("tokens", headers).ok(),
 570            input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
 571            output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
 572        }
 573    }
 574}
 575
 576/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
 577/// seconds). Note that other services might specify an HTTP date or some other format for this
 578/// header. Returns `None` if the header is not present or cannot be parsed.
 579pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
 580    headers
 581        .get("retry-after")
 582        .and_then(|v| v.to_str().ok())
 583        .and_then(|v| v.parse::<u64>().ok())
 584        .map(Duration::from_secs)
 585}
 586
 587fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
 588    Ok(headers
 589        .get(key)
 590        .with_context(|| format!("missing header `{key}`"))?
 591        .to_str()?)
 592}
 593
 594pub async fn stream_completion_with_rate_limit_info(
 595    client: &dyn HttpClient,
 596    api_url: &str,
 597    api_key: &str,
 598    request: Request,
 599    beta_headers: Option<String>,
 600) -> Result<
 601    (
 602        BoxStream<'static, Result<Event, AnthropicError>>,
 603        Option<RateLimitInfo>,
 604    ),
 605    AnthropicError,
 606> {
 607    let request = StreamingRequest {
 608        base: request,
 609        stream: true,
 610    };
 611
 612    let (response, rate_limits) =
 613        send_request(client, api_url, api_key, &request, beta_headers).await?;
 614
 615    if response.status().is_success() {
 616        let reader = BufReader::new(response.into_body());
 617        let stream = reader
 618            .lines()
 619            .filter_map(|line| async move {
 620                match line {
 621                    Ok(line) => {
 622                        let line = line
 623                            .strip_prefix("data: ")
 624                            .or_else(|| line.strip_prefix("data:"))?;
 625
 626                        match serde_json::from_str(line) {
 627                            Ok(response) => Some(Ok(response)),
 628                            Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
 629                        }
 630                    }
 631                    Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
 632                }
 633            })
 634            .boxed();
 635        Ok((stream, Some(rate_limits)))
 636    } else {
 637        Err(handle_error_response(response, rate_limits).await)
 638    }
 639}
 640
 641#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 642#[serde(rename_all = "lowercase")]
 643pub enum CacheControlType {
 644    Ephemeral,
 645}
 646
 647#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 648pub struct CacheControl {
 649    #[serde(rename = "type")]
 650    pub cache_type: CacheControlType,
 651}
 652
 653#[derive(Debug, Serialize, Deserialize)]
 654pub struct Message {
 655    pub role: Role,
 656    pub content: Vec<RequestContent>,
 657}
 658
 659#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
 660#[serde(rename_all = "lowercase")]
 661pub enum Role {
 662    User,
 663    Assistant,
 664}
 665
 666#[derive(Debug, Serialize, Deserialize)]
 667#[serde(tag = "type")]
 668pub enum RequestContent {
 669    #[serde(rename = "text")]
 670    Text {
 671        text: String,
 672        #[serde(skip_serializing_if = "Option::is_none")]
 673        cache_control: Option<CacheControl>,
 674    },
 675    #[serde(rename = "thinking")]
 676    Thinking {
 677        thinking: String,
 678        signature: String,
 679        #[serde(skip_serializing_if = "Option::is_none")]
 680        cache_control: Option<CacheControl>,
 681    },
 682    #[serde(rename = "redacted_thinking")]
 683    RedactedThinking { data: String },
 684    #[serde(rename = "image")]
 685    Image {
 686        source: ImageSource,
 687        #[serde(skip_serializing_if = "Option::is_none")]
 688        cache_control: Option<CacheControl>,
 689    },
 690    #[serde(rename = "tool_use")]
 691    ToolUse {
 692        id: String,
 693        name: String,
 694        input: serde_json::Value,
 695        #[serde(skip_serializing_if = "Option::is_none")]
 696        cache_control: Option<CacheControl>,
 697    },
 698    #[serde(rename = "tool_result")]
 699    ToolResult {
 700        tool_use_id: String,
 701        is_error: bool,
 702        content: ToolResultContent,
 703        #[serde(skip_serializing_if = "Option::is_none")]
 704        cache_control: Option<CacheControl>,
 705    },
 706}
 707
 708#[derive(Debug, Serialize, Deserialize)]
 709#[serde(untagged)]
 710pub enum ToolResultContent {
 711    Plain(String),
 712    Multipart(Vec<ToolResultPart>),
 713}
 714
 715#[derive(Debug, Serialize, Deserialize)]
 716#[serde(tag = "type", rename_all = "lowercase")]
 717pub enum ToolResultPart {
 718    Text { text: String },
 719    Image { source: ImageSource },
 720}
 721
 722#[derive(Debug, Serialize, Deserialize)]
 723#[serde(tag = "type")]
 724pub enum ResponseContent {
 725    #[serde(rename = "text")]
 726    Text { text: String },
 727    #[serde(rename = "thinking")]
 728    Thinking { thinking: String },
 729    #[serde(rename = "redacted_thinking")]
 730    RedactedThinking { data: String },
 731    #[serde(rename = "tool_use")]
 732    ToolUse {
 733        id: String,
 734        name: String,
 735        input: serde_json::Value,
 736    },
 737}
 738
 739#[derive(Debug, Serialize, Deserialize)]
 740pub struct ImageSource {
 741    #[serde(rename = "type")]
 742    pub source_type: String,
 743    pub media_type: String,
 744    pub data: String,
 745}
 746
 747fn is_false(value: &bool) -> bool {
 748    !value
 749}
 750
 751#[derive(Debug, Serialize, Deserialize)]
 752pub struct Tool {
 753    pub name: String,
 754    pub description: String,
 755    pub input_schema: serde_json::Value,
 756    #[serde(default, skip_serializing_if = "is_false")]
 757    pub eager_input_streaming: bool,
 758}
 759
 760#[derive(Debug, Serialize, Deserialize)]
 761#[serde(tag = "type", rename_all = "lowercase")]
 762pub enum ToolChoice {
 763    Auto,
 764    Any,
 765    Tool { name: String },
 766    None,
 767}
 768
 769#[derive(Debug, Serialize, Deserialize)]
 770#[serde(tag = "type", rename_all = "lowercase")]
 771pub enum Thinking {
 772    Enabled { budget_tokens: Option<u32> },
 773    Adaptive,
 774}
 775
 776#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
 777#[serde(rename_all = "snake_case")]
 778#[strum(serialize_all = "snake_case")]
 779pub enum Effort {
 780    Low,
 781    Medium,
 782    High,
 783    Max,
 784}
 785
 786#[derive(Debug, Clone, Serialize, Deserialize)]
 787pub struct OutputConfig {
 788    pub effort: Option<Effort>,
 789}
 790
 791#[derive(Debug, Serialize, Deserialize)]
 792#[serde(untagged)]
 793pub enum StringOrContents {
 794    String(String),
 795    Content(Vec<RequestContent>),
 796}
 797
 798#[derive(Debug, Serialize, Deserialize)]
 799pub struct Request {
 800    pub model: String,
 801    pub max_tokens: u64,
 802    pub messages: Vec<Message>,
 803    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 804    pub tools: Vec<Tool>,
 805    #[serde(default, skip_serializing_if = "Option::is_none")]
 806    pub thinking: Option<Thinking>,
 807    #[serde(default, skip_serializing_if = "Option::is_none")]
 808    pub tool_choice: Option<ToolChoice>,
 809    #[serde(default, skip_serializing_if = "Option::is_none")]
 810    pub system: Option<StringOrContents>,
 811    #[serde(default, skip_serializing_if = "Option::is_none")]
 812    pub metadata: Option<Metadata>,
 813    #[serde(default, skip_serializing_if = "Option::is_none")]
 814    pub output_config: Option<OutputConfig>,
 815    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 816    pub stop_sequences: Vec<String>,
 817    #[serde(default, skip_serializing_if = "Option::is_none")]
 818    pub speed: Option<Speed>,
 819    #[serde(default, skip_serializing_if = "Option::is_none")]
 820    pub temperature: Option<f32>,
 821    #[serde(default, skip_serializing_if = "Option::is_none")]
 822    pub top_k: Option<u32>,
 823    #[serde(default, skip_serializing_if = "Option::is_none")]
 824    pub top_p: Option<f32>,
 825}
 826
 827#[derive(Debug, Default, Serialize, Deserialize)]
 828#[serde(rename_all = "snake_case")]
 829pub enum Speed {
 830    #[default]
 831    Standard,
 832    Fast,
 833}
 834
 835#[derive(Debug, Serialize, Deserialize)]
 836pub struct StreamingRequest {
 837    #[serde(flatten)]
 838    pub base: Request,
 839    pub stream: bool,
 840}
 841
 842#[derive(Debug, Serialize, Deserialize)]
 843pub struct Metadata {
 844    pub user_id: Option<String>,
 845}
 846
 847#[derive(Debug, Serialize, Deserialize, Default)]
 848pub struct Usage {
 849    #[serde(default, skip_serializing_if = "Option::is_none")]
 850    pub input_tokens: Option<u64>,
 851    #[serde(default, skip_serializing_if = "Option::is_none")]
 852    pub output_tokens: Option<u64>,
 853    #[serde(default, skip_serializing_if = "Option::is_none")]
 854    pub cache_creation_input_tokens: Option<u64>,
 855    #[serde(default, skip_serializing_if = "Option::is_none")]
 856    pub cache_read_input_tokens: Option<u64>,
 857}
 858
 859#[derive(Debug, Serialize, Deserialize)]
 860pub struct Response {
 861    pub id: String,
 862    #[serde(rename = "type")]
 863    pub response_type: String,
 864    pub role: Role,
 865    pub content: Vec<ResponseContent>,
 866    pub model: String,
 867    #[serde(default, skip_serializing_if = "Option::is_none")]
 868    pub stop_reason: Option<String>,
 869    #[serde(default, skip_serializing_if = "Option::is_none")]
 870    pub stop_sequence: Option<String>,
 871    pub usage: Usage,
 872}
 873
 874#[derive(Debug, Serialize, Deserialize)]
 875#[serde(tag = "type")]
 876pub enum Event {
 877    #[serde(rename = "message_start")]
 878    MessageStart { message: Response },
 879    #[serde(rename = "content_block_start")]
 880    ContentBlockStart {
 881        index: usize,
 882        content_block: ResponseContent,
 883    },
 884    #[serde(rename = "content_block_delta")]
 885    ContentBlockDelta { index: usize, delta: ContentDelta },
 886    #[serde(rename = "content_block_stop")]
 887    ContentBlockStop { index: usize },
 888    #[serde(rename = "message_delta")]
 889    MessageDelta { delta: MessageDelta, usage: Usage },
 890    #[serde(rename = "message_stop")]
 891    MessageStop,
 892    #[serde(rename = "ping")]
 893    Ping,
 894    #[serde(rename = "error")]
 895    Error { error: ApiError },
 896}
 897
 898#[derive(Debug, Serialize, Deserialize)]
 899#[serde(tag = "type")]
 900pub enum ContentDelta {
 901    #[serde(rename = "text_delta")]
 902    TextDelta { text: String },
 903    #[serde(rename = "thinking_delta")]
 904    ThinkingDelta { thinking: String },
 905    #[serde(rename = "signature_delta")]
 906    SignatureDelta { signature: String },
 907    #[serde(rename = "input_json_delta")]
 908    InputJsonDelta { partial_json: String },
 909}
 910
 911#[derive(Debug, Serialize, Deserialize)]
 912pub struct MessageDelta {
 913    pub stop_reason: Option<String>,
 914    pub stop_sequence: Option<String>,
 915}
 916
 917#[derive(Debug)]
 918pub enum AnthropicError {
 919    /// Failed to serialize the HTTP request body to JSON
 920    SerializeRequest(serde_json::Error),
 921
 922    /// Failed to construct the HTTP request body
 923    BuildRequestBody(http::Error),
 924
 925    /// Failed to send the HTTP request
 926    HttpSend(anyhow::Error),
 927
 928    /// Failed to deserialize the response from JSON
 929    DeserializeResponse(serde_json::Error),
 930
 931    /// Failed to read from response stream
 932    ReadResponse(io::Error),
 933
 934    /// HTTP error response from the API
 935    HttpResponseError {
 936        status_code: StatusCode,
 937        message: String,
 938    },
 939
 940    /// Rate limit exceeded
 941    RateLimit { retry_after: Duration },
 942
 943    /// Server overloaded
 944    ServerOverloaded { retry_after: Option<Duration> },
 945
 946    /// API returned an error response
 947    ApiError(ApiError),
 948}
 949
 950#[derive(Debug, Serialize, Deserialize, Error)]
 951#[error("Anthropic API Error: {error_type}: {message}")]
 952pub struct ApiError {
 953    #[serde(rename = "type")]
 954    pub error_type: String,
 955    pub message: String,
 956}
 957
 958/// An Anthropic API error code.
 959/// <https://docs.anthropic.com/en/api/errors#http-errors>
 960#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
 961#[strum(serialize_all = "snake_case")]
 962pub enum ApiErrorCode {
 963    /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
 964    InvalidRequestError,
 965    /// 401 - `authentication_error`: There's an issue with your API key.
 966    AuthenticationError,
 967    /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
 968    PermissionError,
 969    /// 404 - `not_found_error`: The requested resource was not found.
 970    NotFoundError,
 971    /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
 972    RequestTooLarge,
 973    /// 429 - `rate_limit_error`: Your account has hit a rate limit.
 974    RateLimitError,
 975    /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
 976    ApiError,
 977    /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
 978    OverloadedError,
 979}
 980
 981impl ApiError {
 982    pub fn code(&self) -> Option<ApiErrorCode> {
 983        ApiErrorCode::from_str(&self.error_type).ok()
 984    }
 985
 986    pub fn is_rate_limit_error(&self) -> bool {
 987        matches!(self.error_type.as_str(), "rate_limit_error")
 988    }
 989
 990    pub fn match_window_exceeded(&self) -> Option<u64> {
 991        let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
 992            return None;
 993        };
 994
 995        parse_prompt_too_long(&self.message)
 996    }
 997}
 998
 999pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1000    message
1001        .strip_prefix("prompt is too long: ")?
1002        .split_once(" tokens")?
1003        .0
1004        .parse()
1005        .ok()
1006}
1007
1008/// Request body for the token counting API.
1009/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
1010#[derive(Debug, Serialize)]
1011pub struct CountTokensRequest {
1012    pub model: String,
1013    pub messages: Vec<Message>,
1014    #[serde(default, skip_serializing_if = "Option::is_none")]
1015    pub system: Option<StringOrContents>,
1016    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1017    pub tools: Vec<Tool>,
1018    #[serde(default, skip_serializing_if = "Option::is_none")]
1019    pub thinking: Option<Thinking>,
1020    #[serde(default, skip_serializing_if = "Option::is_none")]
1021    pub tool_choice: Option<ToolChoice>,
1022}
1023
1024/// Response from the token counting API.
1025#[derive(Debug, Deserialize)]
1026pub struct CountTokensResponse {
1027    pub input_tokens: u64,
1028}
1029
1030/// Count the number of tokens in a message without creating it.
1031pub async fn count_tokens(
1032    client: &dyn HttpClient,
1033    api_url: &str,
1034    api_key: &str,
1035    request: CountTokensRequest,
1036) -> Result<CountTokensResponse, AnthropicError> {
1037    let uri = format!("{api_url}/v1/messages/count_tokens");
1038
1039    let request_builder = HttpRequest::builder()
1040        .method(Method::POST)
1041        .uri(uri)
1042        .header("Anthropic-Version", "2023-06-01")
1043        .header("X-Api-Key", api_key.trim())
1044        .header("Content-Type", "application/json");
1045
1046    let serialized_request =
1047        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1048    let http_request = request_builder
1049        .body(AsyncBody::from(serialized_request))
1050        .map_err(AnthropicError::BuildRequestBody)?;
1051
1052    let mut response = client
1053        .send(http_request)
1054        .await
1055        .map_err(AnthropicError::HttpSend)?;
1056
1057    let rate_limits = RateLimitInfo::from_headers(response.headers());
1058
1059    if response.status().is_success() {
1060        let mut body = String::new();
1061        response
1062            .body_mut()
1063            .read_to_string(&mut body)
1064            .await
1065            .map_err(AnthropicError::ReadResponse)?;
1066
1067        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1068    } else {
1069        Err(handle_error_response(response, rate_limits).await)
1070    }
1071}
1072
1073#[test]
1074fn test_match_window_exceeded() {
1075    let error = ApiError {
1076        error_type: "invalid_request_error".to_string(),
1077        message: "prompt is too long: 220000 tokens > 200000".to_string(),
1078    };
1079    assert_eq!(error.match_window_exceeded(), Some(220_000));
1080
1081    let error = ApiError {
1082        error_type: "invalid_request_error".to_string(),
1083        message: "prompt is too long: 1234953 tokens".to_string(),
1084    };
1085    assert_eq!(error.match_window_exceeded(), Some(1234953));
1086
1087    let error = ApiError {
1088        error_type: "invalid_request_error".to_string(),
1089        message: "not a prompt length error".to_string(),
1090    };
1091    assert_eq!(error.match_window_exceeded(), None);
1092
1093    let error = ApiError {
1094        error_type: "rate_limit_error".to_string(),
1095        message: "prompt is too long: 12345 tokens".to_string(),
1096    };
1097    assert_eq!(error.match_window_exceeded(), None);
1098
1099    let error = ApiError {
1100        error_type: "invalid_request_error".to_string(),
1101        message: "prompt is too long: invalid tokens".to_string(),
1102    };
1103    assert_eq!(error.match_window_exceeded(), None);
1104}