anthropic.rs

   1use std::io;
   2use std::str::FromStr;
   3use std::time::Duration;
   4
   5use anyhow::{Context as _, Result, anyhow};
   6use chrono::{DateTime, Utc};
   7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
   8use http_client::http::{self, HeaderMap, HeaderValue};
   9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
  10use serde::{Deserialize, Serialize};
  11use strum::{EnumIter, EnumString};
  12use thiserror::Error;
  13
  14pub mod batches;
  15
  16pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
  17
  18pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07";
  19
  20#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  21#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  22pub struct AnthropicModelCacheConfiguration {
  23    pub min_total_token: u64,
  24    pub should_speculate: bool,
  25    pub max_cache_anchors: usize,
  26}
  27
  28#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  29#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  30pub enum AnthropicModelMode {
  31    #[default]
  32    Default,
  33    Thinking {
  34        budget_tokens: Option<u32>,
  35    },
  36    AdaptiveThinking,
  37}
  38
  39#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  40#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
  41pub enum Model {
  42    #[serde(
  43        rename = "claude-opus-4",
  44        alias = "claude-opus-4-latest",
  45        alias = "claude-opus-4-thinking",
  46        alias = "claude-opus-4-thinking-latest"
  47    )]
  48    ClaudeOpus4,
  49    #[serde(
  50        rename = "claude-opus-4-1",
  51        alias = "claude-opus-4-1-latest",
  52        alias = "claude-opus-4-1-thinking",
  53        alias = "claude-opus-4-1-thinking-latest"
  54    )]
  55    ClaudeOpus4_1,
  56    #[serde(
  57        rename = "claude-opus-4-5",
  58        alias = "claude-opus-4-5-latest",
  59        alias = "claude-opus-4-5-thinking",
  60        alias = "claude-opus-4-5-thinking-latest"
  61    )]
  62    ClaudeOpus4_5,
  63    #[serde(
  64        rename = "claude-opus-4-6",
  65        alias = "claude-opus-4-6-latest",
  66        alias = "claude-opus-4-6-1m-context",
  67        alias = "claude-opus-4-6-1m-context-latest",
  68        alias = "claude-opus-4-6-thinking",
  69        alias = "claude-opus-4-6-thinking-latest",
  70        alias = "claude-opus-4-6-1m-context-thinking",
  71        alias = "claude-opus-4-6-1m-context-thinking-latest"
  72    )]
  73    ClaudeOpus4_6,
  74    #[serde(
  75        rename = "claude-sonnet-4",
  76        alias = "claude-sonnet-4-latest",
  77        alias = "claude-sonnet-4-thinking",
  78        alias = "claude-sonnet-4-thinking-latest"
  79    )]
  80    ClaudeSonnet4,
  81    #[serde(
  82        rename = "claude-sonnet-4-5",
  83        alias = "claude-sonnet-4-5-latest",
  84        alias = "claude-sonnet-4-5-thinking",
  85        alias = "claude-sonnet-4-5-thinking-latest"
  86    )]
  87    ClaudeSonnet4_5,
  88    #[serde(
  89        rename = "claude-sonnet-4-5-1m-context",
  90        alias = "claude-sonnet-4-5-1m-context-latest",
  91        alias = "claude-sonnet-4-5-1m-context-thinking",
  92        alias = "claude-sonnet-4-5-1m-context-thinking-latest"
  93    )]
  94    ClaudeSonnet4_5_1mContext,
  95    #[default]
  96    #[serde(
  97        rename = "claude-sonnet-4-6",
  98        alias = "claude-sonnet-4-6-latest",
  99        alias = "claude-sonnet-4-6-1m-context",
 100        alias = "claude-sonnet-4-6-1m-context-latest",
 101        alias = "claude-sonnet-4-6-thinking",
 102        alias = "claude-sonnet-4-6-thinking-latest",
 103        alias = "claude-sonnet-4-6-1m-context-thinking",
 104        alias = "claude-sonnet-4-6-1m-context-thinking-latest"
 105    )]
 106    ClaudeSonnet4_6,
 107    #[serde(
 108        rename = "claude-haiku-4-5",
 109        alias = "claude-haiku-4-5-latest",
 110        alias = "claude-haiku-4-5-thinking",
 111        alias = "claude-haiku-4-5-thinking-latest"
 112    )]
 113    ClaudeHaiku4_5,
 114    #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
 115    Claude3Haiku,
 116    #[serde(rename = "custom")]
 117    Custom {
 118        name: String,
 119        max_tokens: u64,
 120        /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
 121        display_name: Option<String>,
 122        /// Override this model with a different Anthropic model for tool calls.
 123        tool_override: Option<String>,
 124        /// Indicates whether this custom model supports caching.
 125        cache_configuration: Option<AnthropicModelCacheConfiguration>,
 126        max_output_tokens: Option<u64>,
 127        default_temperature: Option<f32>,
 128        #[serde(default)]
 129        extra_beta_headers: Vec<String>,
 130        #[serde(default)]
 131        mode: AnthropicModelMode,
 132    },
 133}
 134
 135impl Model {
 136    pub fn default_fast() -> Self {
 137        Self::ClaudeHaiku4_5
 138    }
 139
 140    pub fn from_id(id: &str) -> Result<Self> {
 141        if id.starts_with("claude-opus-4-6") {
 142            return Ok(Self::ClaudeOpus4_6);
 143        }
 144
 145        if id.starts_with("claude-opus-4-5") {
 146            return Ok(Self::ClaudeOpus4_5);
 147        }
 148
 149        if id.starts_with("claude-opus-4-1") {
 150            return Ok(Self::ClaudeOpus4_1);
 151        }
 152
 153        if id.starts_with("claude-opus-4") {
 154            return Ok(Self::ClaudeOpus4);
 155        }
 156
 157        if id.starts_with("claude-sonnet-4-6") {
 158            return Ok(Self::ClaudeSonnet4_6);
 159        }
 160
 161        if id.starts_with("claude-sonnet-4-5-1m-context") {
 162            return Ok(Self::ClaudeSonnet4_5_1mContext);
 163        }
 164
 165        if id.starts_with("claude-sonnet-4-5") {
 166            return Ok(Self::ClaudeSonnet4_5);
 167        }
 168
 169        if id.starts_with("claude-sonnet-4") {
 170            return Ok(Self::ClaudeSonnet4);
 171        }
 172
 173        if id.starts_with("claude-haiku-4-5") {
 174            return Ok(Self::ClaudeHaiku4_5);
 175        }
 176
 177        if id.starts_with("claude-3-haiku") {
 178            return Ok(Self::Claude3Haiku);
 179        }
 180
 181        Err(anyhow!("invalid model ID: {id}"))
 182    }
 183
 184    pub fn id(&self) -> &str {
 185        match self {
 186            Self::ClaudeOpus4 => "claude-opus-4-latest",
 187            Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
 188            Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
 189            Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
 190            Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
 191            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
 192            Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-1m-context-latest",
 193            Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
 194            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
 195            Self::Claude3Haiku => "claude-3-haiku-20240307",
 196            Self::Custom { name, .. } => name,
 197        }
 198    }
 199
 200    /// The id of the model that should be used for making API requests
 201    pub fn request_id(&self) -> &str {
 202        match self {
 203            Self::ClaudeOpus4 => "claude-opus-4-20250514",
 204            Self::ClaudeOpus4_1 => "claude-opus-4-1-20250805",
 205            Self::ClaudeOpus4_5 => "claude-opus-4-5-20251101",
 206            Self::ClaudeOpus4_6 => "claude-opus-4-6",
 207            Self::ClaudeSonnet4 => "claude-sonnet-4-20250514",
 208            Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-20250929",
 209            Self::ClaudeSonnet4_6 => "claude-sonnet-4-6",
 210            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-20251001",
 211            Self::Claude3Haiku => "claude-3-haiku-20240307",
 212            Self::Custom { name, .. } => name,
 213        }
 214    }
 215
 216    pub fn display_name(&self) -> &str {
 217        match self {
 218            Self::ClaudeOpus4 => "Claude Opus 4",
 219            Self::ClaudeOpus4_1 => "Claude Opus 4.1",
 220            Self::ClaudeOpus4_5 => "Claude Opus 4.5",
 221            Self::ClaudeOpus4_6 => "Claude Opus 4.6",
 222            Self::ClaudeSonnet4 => "Claude Sonnet 4",
 223            Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
 224            Self::ClaudeSonnet4_5_1mContext => "Claude Sonnet 4.5 (1M context)",
 225            Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
 226            Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
 227            Self::Claude3Haiku => "Claude 3 Haiku",
 228            Self::Custom {
 229                name, display_name, ..
 230            } => display_name.as_ref().unwrap_or(name),
 231        }
 232    }
 233
 234    pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
 235        match self {
 236            Self::ClaudeOpus4
 237            | Self::ClaudeOpus4_1
 238            | Self::ClaudeOpus4_5
 239            | Self::ClaudeOpus4_6
 240            | Self::ClaudeSonnet4
 241            | Self::ClaudeSonnet4_5
 242            | Self::ClaudeSonnet4_5_1mContext
 243            | Self::ClaudeSonnet4_6
 244            | Self::ClaudeHaiku4_5
 245            | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
 246                min_total_token: 2_048,
 247                should_speculate: true,
 248                max_cache_anchors: 4,
 249            }),
 250            Self::Custom {
 251                cache_configuration,
 252                ..
 253            } => cache_configuration.clone(),
 254        }
 255    }
 256
 257    pub fn max_token_count(&self) -> u64 {
 258        match self {
 259            Self::ClaudeOpus4
 260            | Self::ClaudeOpus4_1
 261            | Self::ClaudeOpus4_5
 262            | Self::ClaudeSonnet4
 263            | Self::ClaudeSonnet4_5
 264            | Self::ClaudeHaiku4_5
 265            | Self::Claude3Haiku => 200_000,
 266            Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_5_1mContext | Self::ClaudeSonnet4_6 => {
 267                1_000_000
 268            }
 269            Self::Custom { max_tokens, .. } => *max_tokens,
 270        }
 271    }
 272
 273    pub fn max_output_tokens(&self) -> u64 {
 274        match self {
 275            Self::ClaudeOpus4 | Self::ClaudeOpus4_1 => 32_000,
 276            Self::ClaudeOpus4_5
 277            | Self::ClaudeSonnet4
 278            | Self::ClaudeSonnet4_5
 279            | Self::ClaudeSonnet4_5_1mContext
 280            | Self::ClaudeSonnet4_6
 281            | Self::ClaudeHaiku4_5 => 64_000,
 282            Self::ClaudeOpus4_6 => 128_000,
 283            Self::Claude3Haiku => 4_096,
 284            Self::Custom {
 285                max_output_tokens, ..
 286            } => max_output_tokens.unwrap_or(4_096),
 287        }
 288    }
 289
 290    pub fn default_temperature(&self) -> f32 {
 291        match self {
 292            Self::ClaudeOpus4
 293            | Self::ClaudeOpus4_1
 294            | Self::ClaudeOpus4_5
 295            | Self::ClaudeOpus4_6
 296            | Self::ClaudeSonnet4
 297            | Self::ClaudeSonnet4_5
 298            | Self::ClaudeSonnet4_5_1mContext
 299            | Self::ClaudeSonnet4_6
 300            | Self::ClaudeHaiku4_5
 301            | Self::Claude3Haiku => 1.0,
 302            Self::Custom {
 303                default_temperature,
 304                ..
 305            } => default_temperature.unwrap_or(1.0),
 306        }
 307    }
 308
 309    pub fn mode(&self) -> AnthropicModelMode {
 310        if self.supports_adaptive_thinking() {
 311            AnthropicModelMode::AdaptiveThinking
 312        } else if self.supports_thinking() {
 313            AnthropicModelMode::Thinking {
 314                budget_tokens: Some(4_096),
 315            }
 316        } else {
 317            AnthropicModelMode::Default
 318        }
 319    }
 320
 321    pub fn supports_thinking(&self) -> bool {
 322        matches!(
 323            self,
 324            Self::ClaudeOpus4
 325                | Self::ClaudeOpus4_1
 326                | Self::ClaudeOpus4_5
 327                | Self::ClaudeOpus4_6
 328                | Self::ClaudeSonnet4
 329                | Self::ClaudeSonnet4_5
 330                | Self::ClaudeSonnet4_5_1mContext
 331                | Self::ClaudeSonnet4_6
 332                | Self::ClaudeHaiku4_5
 333        )
 334    }
 335
 336    pub fn supports_adaptive_thinking(&self) -> bool {
 337        matches!(self, Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6)
 338    }
 339
 340    pub fn beta_headers(&self) -> Option<String> {
 341        let mut headers = vec![];
 342
 343        match self {
 344            Self::ClaudeSonnet4_5_1mContext => {
 345                headers.push(CONTEXT_1M_BETA_HEADER.to_string());
 346            }
 347            Self::Custom {
 348                extra_beta_headers, ..
 349            } => {
 350                headers.extend(
 351                    extra_beta_headers
 352                        .iter()
 353                        .filter(|header| !header.trim().is_empty())
 354                        .cloned(),
 355                );
 356            }
 357            _ => {}
 358        }
 359
 360        if headers.is_empty() {
 361            None
 362        } else {
 363            Some(headers.join(","))
 364        }
 365    }
 366
 367    pub fn tool_model_id(&self) -> &str {
 368        if let Self::Custom {
 369            tool_override: Some(tool_override),
 370            ..
 371        } = self
 372        {
 373            tool_override
 374        } else {
 375            self.request_id()
 376        }
 377    }
 378}
 379
 380/// Generate completion with streaming.
 381pub async fn stream_completion(
 382    client: &dyn HttpClient,
 383    api_url: &str,
 384    api_key: &str,
 385    request: Request,
 386    beta_headers: Option<String>,
 387) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
 388    stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
 389        .await
 390        .map(|output| output.0)
 391}
 392
 393/// Generate completion without streaming.
 394pub async fn non_streaming_completion(
 395    client: &dyn HttpClient,
 396    api_url: &str,
 397    api_key: &str,
 398    request: Request,
 399    beta_headers: Option<String>,
 400) -> Result<Response, AnthropicError> {
 401    let (mut response, rate_limits) =
 402        send_request(client, api_url, api_key, &request, beta_headers).await?;
 403
 404    if response.status().is_success() {
 405        let mut body = String::new();
 406        response
 407            .body_mut()
 408            .read_to_string(&mut body)
 409            .await
 410            .map_err(AnthropicError::ReadResponse)?;
 411
 412        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
 413    } else {
 414        Err(handle_error_response(response, rate_limits).await)
 415    }
 416}
 417
 418async fn send_request(
 419    client: &dyn HttpClient,
 420    api_url: &str,
 421    api_key: &str,
 422    request: impl Serialize,
 423    beta_headers: Option<String>,
 424) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
 425    let uri = format!("{api_url}/v1/messages");
 426
 427    let mut request_builder = HttpRequest::builder()
 428        .method(Method::POST)
 429        .uri(uri)
 430        .header("Anthropic-Version", "2023-06-01")
 431        .header("X-Api-Key", api_key.trim())
 432        .header("Content-Type", "application/json");
 433
 434    if let Some(beta_headers) = beta_headers {
 435        request_builder = request_builder.header("Anthropic-Beta", beta_headers);
 436    }
 437
 438    let serialized_request =
 439        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
 440    let request = request_builder
 441        .body(AsyncBody::from(serialized_request))
 442        .map_err(AnthropicError::BuildRequestBody)?;
 443
 444    let response = client
 445        .send(request)
 446        .await
 447        .map_err(AnthropicError::HttpSend)?;
 448
 449    let rate_limits = RateLimitInfo::from_headers(response.headers());
 450
 451    Ok((response, rate_limits))
 452}
 453
 454async fn handle_error_response(
 455    mut response: http::Response<AsyncBody>,
 456    rate_limits: RateLimitInfo,
 457) -> AnthropicError {
 458    if response.status().as_u16() == 529 {
 459        return AnthropicError::ServerOverloaded {
 460            retry_after: rate_limits.retry_after,
 461        };
 462    }
 463
 464    if let Some(retry_after) = rate_limits.retry_after {
 465        return AnthropicError::RateLimit { retry_after };
 466    }
 467
 468    let mut body = String::new();
 469    let read_result = response
 470        .body_mut()
 471        .read_to_string(&mut body)
 472        .await
 473        .map_err(AnthropicError::ReadResponse);
 474
 475    if let Err(err) = read_result {
 476        return err;
 477    }
 478
 479    match serde_json::from_str::<Event>(&body) {
 480        Ok(Event::Error { error }) => AnthropicError::ApiError(error),
 481        Ok(_) | Err(_) => AnthropicError::HttpResponseError {
 482            status_code: response.status(),
 483            message: body,
 484        },
 485    }
 486}
 487
 488/// An individual rate limit.
 489#[derive(Debug)]
 490pub struct RateLimit {
 491    pub limit: usize,
 492    pub remaining: usize,
 493    pub reset: DateTime<Utc>,
 494}
 495
 496impl RateLimit {
 497    fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
 498        let limit =
 499            get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
 500        let remaining = get_header(
 501            &format!("anthropic-ratelimit-{resource}-remaining"),
 502            headers,
 503        )?
 504        .parse()?;
 505        let reset = DateTime::parse_from_rfc3339(get_header(
 506            &format!("anthropic-ratelimit-{resource}-reset"),
 507            headers,
 508        )?)?
 509        .to_utc();
 510
 511        Ok(Self {
 512            limit,
 513            remaining,
 514            reset,
 515        })
 516    }
 517}
 518
 519/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
 520#[derive(Debug)]
 521pub struct RateLimitInfo {
 522    pub retry_after: Option<Duration>,
 523    pub requests: Option<RateLimit>,
 524    pub tokens: Option<RateLimit>,
 525    pub input_tokens: Option<RateLimit>,
 526    pub output_tokens: Option<RateLimit>,
 527}
 528
 529impl RateLimitInfo {
 530    fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
 531        // Check if any rate limit headers exist
 532        let has_rate_limit_headers = headers
 533            .keys()
 534            .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
 535
 536        if !has_rate_limit_headers {
 537            return Self {
 538                retry_after: None,
 539                requests: None,
 540                tokens: None,
 541                input_tokens: None,
 542                output_tokens: None,
 543            };
 544        }
 545
 546        Self {
 547            retry_after: parse_retry_after(headers),
 548            requests: RateLimit::from_headers("requests", headers).ok(),
 549            tokens: RateLimit::from_headers("tokens", headers).ok(),
 550            input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
 551            output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
 552        }
 553    }
 554}
 555
 556/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
 557/// seconds). Note that other services might specify an HTTP date or some other format for this
 558/// header. Returns `None` if the header is not present or cannot be parsed.
 559pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
 560    headers
 561        .get("retry-after")
 562        .and_then(|v| v.to_str().ok())
 563        .and_then(|v| v.parse::<u64>().ok())
 564        .map(Duration::from_secs)
 565}
 566
 567fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
 568    Ok(headers
 569        .get(key)
 570        .with_context(|| format!("missing header `{key}`"))?
 571        .to_str()?)
 572}
 573
 574pub async fn stream_completion_with_rate_limit_info(
 575    client: &dyn HttpClient,
 576    api_url: &str,
 577    api_key: &str,
 578    request: Request,
 579    beta_headers: Option<String>,
 580) -> Result<
 581    (
 582        BoxStream<'static, Result<Event, AnthropicError>>,
 583        Option<RateLimitInfo>,
 584    ),
 585    AnthropicError,
 586> {
 587    let request = StreamingRequest {
 588        base: request,
 589        stream: true,
 590    };
 591
 592    let (response, rate_limits) =
 593        send_request(client, api_url, api_key, &request, beta_headers).await?;
 594
 595    if response.status().is_success() {
 596        let reader = BufReader::new(response.into_body());
 597        let stream = reader
 598            .lines()
 599            .filter_map(|line| async move {
 600                match line {
 601                    Ok(line) => {
 602                        let line = line
 603                            .strip_prefix("data: ")
 604                            .or_else(|| line.strip_prefix("data:"))?;
 605
 606                        match serde_json::from_str(line) {
 607                            Ok(response) => Some(Ok(response)),
 608                            Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
 609                        }
 610                    }
 611                    Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
 612                }
 613            })
 614            .boxed();
 615        Ok((stream, Some(rate_limits)))
 616    } else {
 617        Err(handle_error_response(response, rate_limits).await)
 618    }
 619}
 620
 621#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 622#[serde(rename_all = "lowercase")]
 623pub enum CacheControlType {
 624    Ephemeral,
 625}
 626
 627#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 628pub struct CacheControl {
 629    #[serde(rename = "type")]
 630    pub cache_type: CacheControlType,
 631}
 632
 633#[derive(Debug, Serialize, Deserialize)]
 634pub struct Message {
 635    pub role: Role,
 636    pub content: Vec<RequestContent>,
 637}
 638
 639#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
 640#[serde(rename_all = "lowercase")]
 641pub enum Role {
 642    User,
 643    Assistant,
 644}
 645
 646#[derive(Debug, Serialize, Deserialize)]
 647#[serde(tag = "type")]
 648pub enum RequestContent {
 649    #[serde(rename = "text")]
 650    Text {
 651        text: String,
 652        #[serde(skip_serializing_if = "Option::is_none")]
 653        cache_control: Option<CacheControl>,
 654    },
 655    #[serde(rename = "thinking")]
 656    Thinking {
 657        thinking: String,
 658        signature: String,
 659        #[serde(skip_serializing_if = "Option::is_none")]
 660        cache_control: Option<CacheControl>,
 661    },
 662    #[serde(rename = "redacted_thinking")]
 663    RedactedThinking { data: String },
 664    #[serde(rename = "image")]
 665    Image {
 666        source: ImageSource,
 667        #[serde(skip_serializing_if = "Option::is_none")]
 668        cache_control: Option<CacheControl>,
 669    },
 670    #[serde(rename = "tool_use")]
 671    ToolUse {
 672        id: String,
 673        name: String,
 674        input: serde_json::Value,
 675        #[serde(skip_serializing_if = "Option::is_none")]
 676        cache_control: Option<CacheControl>,
 677    },
 678    #[serde(rename = "tool_result")]
 679    ToolResult {
 680        tool_use_id: String,
 681        is_error: bool,
 682        content: ToolResultContent,
 683        #[serde(skip_serializing_if = "Option::is_none")]
 684        cache_control: Option<CacheControl>,
 685    },
 686}
 687
 688#[derive(Debug, Serialize, Deserialize)]
 689#[serde(untagged)]
 690pub enum ToolResultContent {
 691    Plain(String),
 692    Multipart(Vec<ToolResultPart>),
 693}
 694
 695#[derive(Debug, Serialize, Deserialize)]
 696#[serde(tag = "type", rename_all = "lowercase")]
 697pub enum ToolResultPart {
 698    Text { text: String },
 699    Image { source: ImageSource },
 700}
 701
 702#[derive(Debug, Serialize, Deserialize)]
 703#[serde(tag = "type")]
 704pub enum ResponseContent {
 705    #[serde(rename = "text")]
 706    Text { text: String },
 707    #[serde(rename = "thinking")]
 708    Thinking { thinking: String },
 709    #[serde(rename = "redacted_thinking")]
 710    RedactedThinking { data: String },
 711    #[serde(rename = "tool_use")]
 712    ToolUse {
 713        id: String,
 714        name: String,
 715        input: serde_json::Value,
 716    },
 717}
 718
 719#[derive(Debug, Serialize, Deserialize)]
 720pub struct ImageSource {
 721    #[serde(rename = "type")]
 722    pub source_type: String,
 723    pub media_type: String,
 724    pub data: String,
 725}
 726
 727fn is_false(value: &bool) -> bool {
 728    !value
 729}
 730
 731#[derive(Debug, Serialize, Deserialize)]
 732pub struct Tool {
 733    pub name: String,
 734    pub description: String,
 735    pub input_schema: serde_json::Value,
 736    #[serde(default, skip_serializing_if = "is_false")]
 737    pub eager_input_streaming: bool,
 738}
 739
 740#[derive(Debug, Serialize, Deserialize)]
 741#[serde(tag = "type", rename_all = "lowercase")]
 742pub enum ToolChoice {
 743    Auto,
 744    Any,
 745    Tool { name: String },
 746    None,
 747}
 748
 749#[derive(Debug, Serialize, Deserialize)]
 750#[serde(tag = "type", rename_all = "lowercase")]
 751pub enum Thinking {
 752    Enabled { budget_tokens: Option<u32> },
 753    Adaptive,
 754}
 755
 756#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
 757#[serde(rename_all = "snake_case")]
 758#[strum(serialize_all = "snake_case")]
 759pub enum Effort {
 760    Low,
 761    Medium,
 762    High,
 763    Max,
 764}
 765
 766#[derive(Debug, Clone, Serialize, Deserialize)]
 767pub struct OutputConfig {
 768    pub effort: Option<Effort>,
 769}
 770
 771#[derive(Debug, Serialize, Deserialize)]
 772#[serde(untagged)]
 773pub enum StringOrContents {
 774    String(String),
 775    Content(Vec<RequestContent>),
 776}
 777
 778#[derive(Debug, Serialize, Deserialize)]
 779pub struct Request {
 780    pub model: String,
 781    pub max_tokens: u64,
 782    pub messages: Vec<Message>,
 783    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 784    pub tools: Vec<Tool>,
 785    #[serde(default, skip_serializing_if = "Option::is_none")]
 786    pub thinking: Option<Thinking>,
 787    #[serde(default, skip_serializing_if = "Option::is_none")]
 788    pub tool_choice: Option<ToolChoice>,
 789    #[serde(default, skip_serializing_if = "Option::is_none")]
 790    pub system: Option<StringOrContents>,
 791    #[serde(default, skip_serializing_if = "Option::is_none")]
 792    pub metadata: Option<Metadata>,
 793    #[serde(default, skip_serializing_if = "Option::is_none")]
 794    pub output_config: Option<OutputConfig>,
 795    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 796    pub stop_sequences: Vec<String>,
 797    #[serde(default, skip_serializing_if = "Option::is_none")]
 798    pub speed: Option<Speed>,
 799    #[serde(default, skip_serializing_if = "Option::is_none")]
 800    pub temperature: Option<f32>,
 801    #[serde(default, skip_serializing_if = "Option::is_none")]
 802    pub top_k: Option<u32>,
 803    #[serde(default, skip_serializing_if = "Option::is_none")]
 804    pub top_p: Option<f32>,
 805}
 806
 807#[derive(Debug, Default, Serialize, Deserialize)]
 808#[serde(rename_all = "snake_case")]
 809pub enum Speed {
 810    #[default]
 811    Standard,
 812    Fast,
 813}
 814
 815#[derive(Debug, Serialize, Deserialize)]
 816pub struct StreamingRequest {
 817    #[serde(flatten)]
 818    pub base: Request,
 819    pub stream: bool,
 820}
 821
 822#[derive(Debug, Serialize, Deserialize)]
 823pub struct Metadata {
 824    pub user_id: Option<String>,
 825}
 826
 827#[derive(Debug, Serialize, Deserialize, Default)]
 828pub struct Usage {
 829    #[serde(default, skip_serializing_if = "Option::is_none")]
 830    pub input_tokens: Option<u64>,
 831    #[serde(default, skip_serializing_if = "Option::is_none")]
 832    pub output_tokens: Option<u64>,
 833    #[serde(default, skip_serializing_if = "Option::is_none")]
 834    pub cache_creation_input_tokens: Option<u64>,
 835    #[serde(default, skip_serializing_if = "Option::is_none")]
 836    pub cache_read_input_tokens: Option<u64>,
 837}
 838
 839#[derive(Debug, Serialize, Deserialize)]
 840pub struct Response {
 841    pub id: String,
 842    #[serde(rename = "type")]
 843    pub response_type: String,
 844    pub role: Role,
 845    pub content: Vec<ResponseContent>,
 846    pub model: String,
 847    #[serde(default, skip_serializing_if = "Option::is_none")]
 848    pub stop_reason: Option<String>,
 849    #[serde(default, skip_serializing_if = "Option::is_none")]
 850    pub stop_sequence: Option<String>,
 851    pub usage: Usage,
 852}
 853
 854#[derive(Debug, Serialize, Deserialize)]
 855#[serde(tag = "type")]
 856pub enum Event {
 857    #[serde(rename = "message_start")]
 858    MessageStart { message: Response },
 859    #[serde(rename = "content_block_start")]
 860    ContentBlockStart {
 861        index: usize,
 862        content_block: ResponseContent,
 863    },
 864    #[serde(rename = "content_block_delta")]
 865    ContentBlockDelta { index: usize, delta: ContentDelta },
 866    #[serde(rename = "content_block_stop")]
 867    ContentBlockStop { index: usize },
 868    #[serde(rename = "message_delta")]
 869    MessageDelta { delta: MessageDelta, usage: Usage },
 870    #[serde(rename = "message_stop")]
 871    MessageStop,
 872    #[serde(rename = "ping")]
 873    Ping,
 874    #[serde(rename = "error")]
 875    Error { error: ApiError },
 876}
 877
 878#[derive(Debug, Serialize, Deserialize)]
 879#[serde(tag = "type")]
 880pub enum ContentDelta {
 881    #[serde(rename = "text_delta")]
 882    TextDelta { text: String },
 883    #[serde(rename = "thinking_delta")]
 884    ThinkingDelta { thinking: String },
 885    #[serde(rename = "signature_delta")]
 886    SignatureDelta { signature: String },
 887    #[serde(rename = "input_json_delta")]
 888    InputJsonDelta { partial_json: String },
 889}
 890
 891#[derive(Debug, Serialize, Deserialize)]
 892pub struct MessageDelta {
 893    pub stop_reason: Option<String>,
 894    pub stop_sequence: Option<String>,
 895}
 896
 897#[derive(Debug)]
 898pub enum AnthropicError {
 899    /// Failed to serialize the HTTP request body to JSON
 900    SerializeRequest(serde_json::Error),
 901
 902    /// Failed to construct the HTTP request body
 903    BuildRequestBody(http::Error),
 904
 905    /// Failed to send the HTTP request
 906    HttpSend(anyhow::Error),
 907
 908    /// Failed to deserialize the response from JSON
 909    DeserializeResponse(serde_json::Error),
 910
 911    /// Failed to read from response stream
 912    ReadResponse(io::Error),
 913
 914    /// HTTP error response from the API
 915    HttpResponseError {
 916        status_code: StatusCode,
 917        message: String,
 918    },
 919
 920    /// Rate limit exceeded
 921    RateLimit { retry_after: Duration },
 922
 923    /// Server overloaded
 924    ServerOverloaded { retry_after: Option<Duration> },
 925
 926    /// API returned an error response
 927    ApiError(ApiError),
 928}
 929
 930#[derive(Debug, Serialize, Deserialize, Error)]
 931#[error("Anthropic API Error: {error_type}: {message}")]
 932pub struct ApiError {
 933    #[serde(rename = "type")]
 934    pub error_type: String,
 935    pub message: String,
 936}
 937
 938/// An Anthropic API error code.
 939/// <https://docs.anthropic.com/en/api/errors#http-errors>
 940#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
 941#[strum(serialize_all = "snake_case")]
 942pub enum ApiErrorCode {
 943    /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
 944    InvalidRequestError,
 945    /// 401 - `authentication_error`: There's an issue with your API key.
 946    AuthenticationError,
 947    /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
 948    PermissionError,
 949    /// 404 - `not_found_error`: The requested resource was not found.
 950    NotFoundError,
 951    /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
 952    RequestTooLarge,
 953    /// 429 - `rate_limit_error`: Your account has hit a rate limit.
 954    RateLimitError,
 955    /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
 956    ApiError,
 957    /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
 958    OverloadedError,
 959}
 960
 961impl ApiError {
 962    pub fn code(&self) -> Option<ApiErrorCode> {
 963        ApiErrorCode::from_str(&self.error_type).ok()
 964    }
 965
 966    pub fn is_rate_limit_error(&self) -> bool {
 967        matches!(self.error_type.as_str(), "rate_limit_error")
 968    }
 969
 970    pub fn match_window_exceeded(&self) -> Option<u64> {
 971        let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
 972            return None;
 973        };
 974
 975        parse_prompt_too_long(&self.message)
 976    }
 977}
 978
 979pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
 980    message
 981        .strip_prefix("prompt is too long: ")?
 982        .split_once(" tokens")?
 983        .0
 984        .parse()
 985        .ok()
 986}
 987
 988/// Request body for the token counting API.
 989/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
 990#[derive(Debug, Serialize)]
 991pub struct CountTokensRequest {
 992    pub model: String,
 993    pub messages: Vec<Message>,
 994    #[serde(default, skip_serializing_if = "Option::is_none")]
 995    pub system: Option<StringOrContents>,
 996    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 997    pub tools: Vec<Tool>,
 998    #[serde(default, skip_serializing_if = "Option::is_none")]
 999    pub thinking: Option<Thinking>,
1000    #[serde(default, skip_serializing_if = "Option::is_none")]
1001    pub tool_choice: Option<ToolChoice>,
1002}
1003
1004/// Response from the token counting API.
1005#[derive(Debug, Deserialize)]
1006pub struct CountTokensResponse {
1007    pub input_tokens: u64,
1008}
1009
1010/// Count the number of tokens in a message without creating it.
1011pub async fn count_tokens(
1012    client: &dyn HttpClient,
1013    api_url: &str,
1014    api_key: &str,
1015    request: CountTokensRequest,
1016) -> Result<CountTokensResponse, AnthropicError> {
1017    let uri = format!("{api_url}/v1/messages/count_tokens");
1018
1019    let request_builder = HttpRequest::builder()
1020        .method(Method::POST)
1021        .uri(uri)
1022        .header("Anthropic-Version", "2023-06-01")
1023        .header("X-Api-Key", api_key.trim())
1024        .header("Content-Type", "application/json");
1025
1026    let serialized_request =
1027        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1028    let http_request = request_builder
1029        .body(AsyncBody::from(serialized_request))
1030        .map_err(AnthropicError::BuildRequestBody)?;
1031
1032    let mut response = client
1033        .send(http_request)
1034        .await
1035        .map_err(AnthropicError::HttpSend)?;
1036
1037    let rate_limits = RateLimitInfo::from_headers(response.headers());
1038
1039    if response.status().is_success() {
1040        let mut body = String::new();
1041        response
1042            .body_mut()
1043            .read_to_string(&mut body)
1044            .await
1045            .map_err(AnthropicError::ReadResponse)?;
1046
1047        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1048    } else {
1049        Err(handle_error_response(response, rate_limits).await)
1050    }
1051}
1052
1053#[test]
1054fn test_match_window_exceeded() {
1055    let error = ApiError {
1056        error_type: "invalid_request_error".to_string(),
1057        message: "prompt is too long: 220000 tokens > 200000".to_string(),
1058    };
1059    assert_eq!(error.match_window_exceeded(), Some(220_000));
1060
1061    let error = ApiError {
1062        error_type: "invalid_request_error".to_string(),
1063        message: "prompt is too long: 1234953 tokens".to_string(),
1064    };
1065    assert_eq!(error.match_window_exceeded(), Some(1234953));
1066
1067    let error = ApiError {
1068        error_type: "invalid_request_error".to_string(),
1069        message: "not a prompt length error".to_string(),
1070    };
1071    assert_eq!(error.match_window_exceeded(), None);
1072
1073    let error = ApiError {
1074        error_type: "rate_limit_error".to_string(),
1075        message: "prompt is too long: 12345 tokens".to_string(),
1076    };
1077    assert_eq!(error.match_window_exceeded(), None);
1078
1079    let error = ApiError {
1080        error_type: "invalid_request_error".to_string(),
1081        message: "prompt is too long: invalid tokens".to_string(),
1082    };
1083    assert_eq!(error.match_window_exceeded(), None);
1084}