anthropic.rs

   1use std::io;
   2use std::str::FromStr;
   3use std::time::Duration;
   4
   5use anyhow::{Context as _, Result, anyhow};
   6use chrono::{DateTime, Utc};
   7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
   8use http_client::http::{self, HeaderMap, HeaderValue};
   9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
  10use serde::{Deserialize, Serialize};
  11use strum::{EnumIter, EnumString};
  12use thiserror::Error;
  13
  14pub mod batches;
  15pub mod completion;
  16
  17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
  18
  19#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  20#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  21pub struct AnthropicModelCacheConfiguration {
  22    pub min_total_token: u64,
  23    pub should_speculate: bool,
  24    pub max_cache_anchors: usize,
  25}
  26
  27#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  28#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  29pub enum AnthropicModelMode {
  30    #[default]
  31    Default,
  32    Thinking {
  33        budget_tokens: Option<u32>,
  34    },
  35    AdaptiveThinking,
  36}
  37
  38#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  39#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
  40pub enum Model {
  41    #[serde(
  42        rename = "claude-opus-4",
  43        alias = "claude-opus-4-latest",
  44        alias = "claude-opus-4-thinking",
  45        alias = "claude-opus-4-thinking-latest"
  46    )]
  47    ClaudeOpus4,
  48    #[serde(
  49        rename = "claude-opus-4-1",
  50        alias = "claude-opus-4-1-latest",
  51        alias = "claude-opus-4-1-thinking",
  52        alias = "claude-opus-4-1-thinking-latest"
  53    )]
  54    ClaudeOpus4_1,
  55    #[serde(
  56        rename = "claude-opus-4-5",
  57        alias = "claude-opus-4-5-latest",
  58        alias = "claude-opus-4-5-thinking",
  59        alias = "claude-opus-4-5-thinking-latest"
  60    )]
  61    ClaudeOpus4_5,
  62    #[serde(
  63        rename = "claude-opus-4-6",
  64        alias = "claude-opus-4-6-latest",
  65        alias = "claude-opus-4-6-1m-context",
  66        alias = "claude-opus-4-6-1m-context-latest",
  67        alias = "claude-opus-4-6-thinking",
  68        alias = "claude-opus-4-6-thinking-latest",
  69        alias = "claude-opus-4-6-1m-context-thinking",
  70        alias = "claude-opus-4-6-1m-context-thinking-latest"
  71    )]
  72    ClaudeOpus4_6,
  73    #[serde(
  74        rename = "claude-sonnet-4",
  75        alias = "claude-sonnet-4-latest",
  76        alias = "claude-sonnet-4-thinking",
  77        alias = "claude-sonnet-4-thinking-latest"
  78    )]
  79    ClaudeSonnet4,
  80    #[serde(
  81        rename = "claude-sonnet-4-5",
  82        alias = "claude-sonnet-4-5-latest",
  83        alias = "claude-sonnet-4-5-thinking",
  84        alias = "claude-sonnet-4-5-thinking-latest"
  85    )]
  86    ClaudeSonnet4_5,
  87    #[default]
  88    #[serde(
  89        rename = "claude-sonnet-4-6",
  90        alias = "claude-sonnet-4-6-latest",
  91        alias = "claude-sonnet-4-6-1m-context",
  92        alias = "claude-sonnet-4-6-1m-context-latest",
  93        alias = "claude-sonnet-4-6-thinking",
  94        alias = "claude-sonnet-4-6-thinking-latest",
  95        alias = "claude-sonnet-4-6-1m-context-thinking",
  96        alias = "claude-sonnet-4-6-1m-context-thinking-latest"
  97    )]
  98    ClaudeSonnet4_6,
  99    #[serde(
 100        rename = "claude-haiku-4-5",
 101        alias = "claude-haiku-4-5-latest",
 102        alias = "claude-haiku-4-5-thinking",
 103        alias = "claude-haiku-4-5-thinking-latest"
 104    )]
 105    ClaudeHaiku4_5,
 106    #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
 107    Claude3Haiku,
 108    #[serde(rename = "custom")]
 109    Custom {
 110        name: String,
 111        max_tokens: u64,
 112        /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
 113        display_name: Option<String>,
 114        /// Override this model with a different Anthropic model for tool calls.
 115        tool_override: Option<String>,
 116        /// Indicates whether this custom model supports caching.
 117        cache_configuration: Option<AnthropicModelCacheConfiguration>,
 118        max_output_tokens: Option<u64>,
 119        default_temperature: Option<f32>,
 120        #[serde(default)]
 121        extra_beta_headers: Vec<String>,
 122        #[serde(default)]
 123        mode: AnthropicModelMode,
 124    },
 125}
 126
 127impl Model {
 128    pub fn default_fast() -> Self {
 129        Self::ClaudeHaiku4_5
 130    }
 131
 132    pub fn from_id(id: &str) -> Result<Self> {
 133        if id.starts_with("claude-opus-4-6") {
 134            return Ok(Self::ClaudeOpus4_6);
 135        }
 136
 137        if id.starts_with("claude-opus-4-5") {
 138            return Ok(Self::ClaudeOpus4_5);
 139        }
 140
 141        if id.starts_with("claude-opus-4-1") {
 142            return Ok(Self::ClaudeOpus4_1);
 143        }
 144
 145        if id.starts_with("claude-opus-4") {
 146            return Ok(Self::ClaudeOpus4);
 147        }
 148
 149        if id.starts_with("claude-sonnet-4-6") {
 150            return Ok(Self::ClaudeSonnet4_6);
 151        }
 152
 153        if id.starts_with("claude-sonnet-4-5") {
 154            return Ok(Self::ClaudeSonnet4_5);
 155        }
 156
 157        if id.starts_with("claude-sonnet-4") {
 158            return Ok(Self::ClaudeSonnet4);
 159        }
 160
 161        if id.starts_with("claude-haiku-4-5") {
 162            return Ok(Self::ClaudeHaiku4_5);
 163        }
 164
 165        if id.starts_with("claude-3-haiku") {
 166            return Ok(Self::Claude3Haiku);
 167        }
 168
 169        Err(anyhow!("invalid model ID: {id}"))
 170    }
 171
 172    pub fn id(&self) -> &str {
 173        match self {
 174            Self::ClaudeOpus4 => "claude-opus-4-latest",
 175            Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
 176            Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
 177            Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
 178            Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
 179            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
 180            Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
 181            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
 182            Self::Claude3Haiku => "claude-3-haiku-20240307",
 183            Self::Custom { name, .. } => name,
 184        }
 185    }
 186
 187    /// The id of the model that should be used for making API requests
 188    pub fn request_id(&self) -> &str {
 189        match self {
 190            Self::ClaudeOpus4 => "claude-opus-4-20250514",
 191            Self::ClaudeOpus4_1 => "claude-opus-4-1-20250805",
 192            Self::ClaudeOpus4_5 => "claude-opus-4-5-20251101",
 193            Self::ClaudeOpus4_6 => "claude-opus-4-6",
 194            Self::ClaudeSonnet4 => "claude-sonnet-4-20250514",
 195            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-20250929",
 196            Self::ClaudeSonnet4_6 => "claude-sonnet-4-6",
 197            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-20251001",
 198            Self::Claude3Haiku => "claude-3-haiku-20240307",
 199            Self::Custom { name, .. } => name,
 200        }
 201    }
 202
 203    pub fn display_name(&self) -> &str {
 204        match self {
 205            Self::ClaudeOpus4 => "Claude Opus 4",
 206            Self::ClaudeOpus4_1 => "Claude Opus 4.1",
 207            Self::ClaudeOpus4_5 => "Claude Opus 4.5",
 208            Self::ClaudeOpus4_6 => "Claude Opus 4.6",
 209            Self::ClaudeSonnet4 => "Claude Sonnet 4",
 210            Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
 211            Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
 212            Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
 213            Self::Claude3Haiku => "Claude 3 Haiku",
 214            Self::Custom {
 215                name, display_name, ..
 216            } => display_name.as_ref().unwrap_or(name),
 217        }
 218    }
 219
 220    pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
 221        match self {
 222            Self::ClaudeOpus4
 223            | Self::ClaudeOpus4_1
 224            | Self::ClaudeOpus4_5
 225            | Self::ClaudeOpus4_6
 226            | Self::ClaudeSonnet4
 227            | Self::ClaudeSonnet4_5
 228            | Self::ClaudeSonnet4_6
 229            | Self::ClaudeHaiku4_5
 230            | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
 231                min_total_token: 2_048,
 232                should_speculate: true,
 233                max_cache_anchors: 4,
 234            }),
 235            Self::Custom {
 236                cache_configuration,
 237                ..
 238            } => cache_configuration.clone(),
 239        }
 240    }
 241
 242    pub fn max_token_count(&self) -> u64 {
 243        match self {
 244            Self::ClaudeOpus4
 245            | Self::ClaudeOpus4_1
 246            | Self::ClaudeOpus4_5
 247            | Self::ClaudeSonnet4
 248            | Self::ClaudeSonnet4_5
 249            | Self::ClaudeHaiku4_5
 250            | Self::Claude3Haiku => 200_000,
 251            Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6 => 1_000_000,
 252            Self::Custom { max_tokens, .. } => *max_tokens,
 253        }
 254    }
 255
 256    pub fn max_output_tokens(&self) -> u64 {
 257        match self {
 258            Self::ClaudeOpus4 | Self::ClaudeOpus4_1 => 32_000,
 259            Self::ClaudeOpus4_5
 260            | Self::ClaudeSonnet4
 261            | Self::ClaudeSonnet4_5
 262            | Self::ClaudeSonnet4_6
 263            | Self::ClaudeHaiku4_5 => 64_000,
 264            Self::ClaudeOpus4_6 => 128_000,
 265            Self::Claude3Haiku => 4_096,
 266            Self::Custom {
 267                max_output_tokens, ..
 268            } => max_output_tokens.unwrap_or(4_096),
 269        }
 270    }
 271
 272    pub fn default_temperature(&self) -> f32 {
 273        match self {
 274            Self::ClaudeOpus4
 275            | Self::ClaudeOpus4_1
 276            | Self::ClaudeOpus4_5
 277            | Self::ClaudeOpus4_6
 278            | Self::ClaudeSonnet4
 279            | Self::ClaudeSonnet4_5
 280            | Self::ClaudeSonnet4_6
 281            | Self::ClaudeHaiku4_5
 282            | Self::Claude3Haiku => 1.0,
 283            Self::Custom {
 284                default_temperature,
 285                ..
 286            } => default_temperature.unwrap_or(1.0),
 287        }
 288    }
 289
 290    pub fn mode(&self) -> AnthropicModelMode {
 291        if self.supports_adaptive_thinking() {
 292            AnthropicModelMode::AdaptiveThinking
 293        } else if self.supports_thinking() {
 294            AnthropicModelMode::Thinking {
 295                budget_tokens: Some(4_096),
 296            }
 297        } else {
 298            AnthropicModelMode::Default
 299        }
 300    }
 301
 302    pub fn supports_thinking(&self) -> bool {
 303        matches!(
 304            self,
 305            Self::ClaudeOpus4
 306                | Self::ClaudeOpus4_1
 307                | Self::ClaudeOpus4_5
 308                | Self::ClaudeOpus4_6
 309                | Self::ClaudeSonnet4
 310                | Self::ClaudeSonnet4_5
 311                | Self::ClaudeSonnet4_6
 312                | Self::ClaudeHaiku4_5
 313        )
 314    }
 315
 316    pub fn supports_adaptive_thinking(&self) -> bool {
 317        matches!(self, Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6)
 318    }
 319
 320    pub fn beta_headers(&self) -> Option<String> {
 321        let mut headers = vec![];
 322
 323        match self {
 324            Self::Custom {
 325                extra_beta_headers, ..
 326            } => {
 327                headers.extend(
 328                    extra_beta_headers
 329                        .iter()
 330                        .filter(|header| !header.trim().is_empty())
 331                        .cloned(),
 332                );
 333            }
 334            _ => {}
 335        }
 336
 337        if headers.is_empty() {
 338            None
 339        } else {
 340            Some(headers.join(","))
 341        }
 342    }
 343
 344    pub fn tool_model_id(&self) -> &str {
 345        if let Self::Custom {
 346            tool_override: Some(tool_override),
 347            ..
 348        } = self
 349        {
 350            tool_override
 351        } else {
 352            self.request_id()
 353        }
 354    }
 355}
 356
 357/// Generate completion with streaming.
 358pub async fn stream_completion(
 359    client: &dyn HttpClient,
 360    api_url: &str,
 361    api_key: &str,
 362    request: Request,
 363    beta_headers: Option<String>,
 364) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
 365    stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
 366        .await
 367        .map(|output| output.0)
 368}
 369
 370/// Generate completion without streaming.
 371pub async fn non_streaming_completion(
 372    client: &dyn HttpClient,
 373    api_url: &str,
 374    api_key: &str,
 375    request: Request,
 376    beta_headers: Option<String>,
 377) -> Result<Response, AnthropicError> {
 378    let (mut response, rate_limits) =
 379        send_request(client, api_url, api_key, &request, beta_headers).await?;
 380
 381    if response.status().is_success() {
 382        let mut body = String::new();
 383        response
 384            .body_mut()
 385            .read_to_string(&mut body)
 386            .await
 387            .map_err(AnthropicError::ReadResponse)?;
 388
 389        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
 390    } else {
 391        Err(handle_error_response(response, rate_limits).await)
 392    }
 393}
 394
 395async fn send_request(
 396    client: &dyn HttpClient,
 397    api_url: &str,
 398    api_key: &str,
 399    request: impl Serialize,
 400    beta_headers: Option<String>,
 401) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
 402    let uri = format!("{api_url}/v1/messages");
 403
 404    let mut request_builder = HttpRequest::builder()
 405        .method(Method::POST)
 406        .uri(uri)
 407        .header("Anthropic-Version", "2023-06-01")
 408        .header("X-Api-Key", api_key.trim())
 409        .header("Content-Type", "application/json");
 410
 411    if let Some(beta_headers) = beta_headers {
 412        request_builder = request_builder.header("Anthropic-Beta", beta_headers);
 413    }
 414
 415    let serialized_request =
 416        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
 417    let request = request_builder
 418        .body(AsyncBody::from(serialized_request))
 419        .map_err(AnthropicError::BuildRequestBody)?;
 420
 421    let response = client
 422        .send(request)
 423        .await
 424        .map_err(AnthropicError::HttpSend)?;
 425
 426    let rate_limits = RateLimitInfo::from_headers(response.headers());
 427
 428    Ok((response, rate_limits))
 429}
 430
 431async fn handle_error_response(
 432    mut response: http::Response<AsyncBody>,
 433    rate_limits: RateLimitInfo,
 434) -> AnthropicError {
 435    if response.status().as_u16() == 529 {
 436        return AnthropicError::ServerOverloaded {
 437            retry_after: rate_limits.retry_after,
 438        };
 439    }
 440
 441    if let Some(retry_after) = rate_limits.retry_after {
 442        return AnthropicError::RateLimit { retry_after };
 443    }
 444
 445    let mut body = String::new();
 446    let read_result = response
 447        .body_mut()
 448        .read_to_string(&mut body)
 449        .await
 450        .map_err(AnthropicError::ReadResponse);
 451
 452    if let Err(err) = read_result {
 453        return err;
 454    }
 455
 456    match serde_json::from_str::<Event>(&body) {
 457        Ok(Event::Error { error }) => AnthropicError::ApiError(error),
 458        Ok(_) | Err(_) => AnthropicError::HttpResponseError {
 459            status_code: response.status(),
 460            message: body,
 461        },
 462    }
 463}
 464
 465/// An individual rate limit.
 466#[derive(Debug)]
 467pub struct RateLimit {
 468    pub limit: usize,
 469    pub remaining: usize,
 470    pub reset: DateTime<Utc>,
 471}
 472
 473impl RateLimit {
 474    fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
 475        let limit =
 476            get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
 477        let remaining = get_header(
 478            &format!("anthropic-ratelimit-{resource}-remaining"),
 479            headers,
 480        )?
 481        .parse()?;
 482        let reset = DateTime::parse_from_rfc3339(get_header(
 483            &format!("anthropic-ratelimit-{resource}-reset"),
 484            headers,
 485        )?)?
 486        .to_utc();
 487
 488        Ok(Self {
 489            limit,
 490            remaining,
 491            reset,
 492        })
 493    }
 494}
 495
 496/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
 497#[derive(Debug)]
 498pub struct RateLimitInfo {
 499    pub retry_after: Option<Duration>,
 500    pub requests: Option<RateLimit>,
 501    pub tokens: Option<RateLimit>,
 502    pub input_tokens: Option<RateLimit>,
 503    pub output_tokens: Option<RateLimit>,
 504}
 505
 506impl RateLimitInfo {
 507    fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
 508        // Check if any rate limit headers exist
 509        let has_rate_limit_headers = headers
 510            .keys()
 511            .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
 512
 513        if !has_rate_limit_headers {
 514            return Self {
 515                retry_after: None,
 516                requests: None,
 517                tokens: None,
 518                input_tokens: None,
 519                output_tokens: None,
 520            };
 521        }
 522
 523        Self {
 524            retry_after: parse_retry_after(headers),
 525            requests: RateLimit::from_headers("requests", headers).ok(),
 526            tokens: RateLimit::from_headers("tokens", headers).ok(),
 527            input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
 528            output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
 529        }
 530    }
 531}
 532
 533/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
 534/// seconds). Note that other services might specify an HTTP date or some other format for this
 535/// header. Returns `None` if the header is not present or cannot be parsed.
 536pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
 537    headers
 538        .get("retry-after")
 539        .and_then(|v| v.to_str().ok())
 540        .and_then(|v| v.parse::<u64>().ok())
 541        .map(Duration::from_secs)
 542}
 543
 544fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
 545    Ok(headers
 546        .get(key)
 547        .with_context(|| format!("missing header `{key}`"))?
 548        .to_str()?)
 549}
 550
 551pub async fn stream_completion_with_rate_limit_info(
 552    client: &dyn HttpClient,
 553    api_url: &str,
 554    api_key: &str,
 555    request: Request,
 556    beta_headers: Option<String>,
 557) -> Result<
 558    (
 559        BoxStream<'static, Result<Event, AnthropicError>>,
 560        Option<RateLimitInfo>,
 561    ),
 562    AnthropicError,
 563> {
 564    let request = StreamingRequest {
 565        base: request,
 566        stream: true,
 567    };
 568
 569    let (response, rate_limits) =
 570        send_request(client, api_url, api_key, &request, beta_headers).await?;
 571
 572    if response.status().is_success() {
 573        let reader = BufReader::new(response.into_body());
 574        let stream = reader
 575            .lines()
 576            .filter_map(|line| async move {
 577                match line {
 578                    Ok(line) => {
 579                        let line = line
 580                            .strip_prefix("data: ")
 581                            .or_else(|| line.strip_prefix("data:"))?;
 582
 583                        match serde_json::from_str(line) {
 584                            Ok(response) => Some(Ok(response)),
 585                            Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
 586                        }
 587                    }
 588                    Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
 589                }
 590            })
 591            .boxed();
 592        Ok((stream, Some(rate_limits)))
 593    } else {
 594        Err(handle_error_response(response, rate_limits).await)
 595    }
 596}
 597
 598#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 599#[serde(rename_all = "lowercase")]
 600pub enum CacheControlType {
 601    Ephemeral,
 602}
 603
 604#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 605pub struct CacheControl {
 606    #[serde(rename = "type")]
 607    pub cache_type: CacheControlType,
 608}
 609
 610#[derive(Debug, Serialize, Deserialize)]
 611pub struct Message {
 612    pub role: Role,
 613    pub content: Vec<RequestContent>,
 614}
 615
 616#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
 617#[serde(rename_all = "lowercase")]
 618pub enum Role {
 619    User,
 620    Assistant,
 621}
 622
 623#[derive(Debug, Serialize, Deserialize)]
 624#[serde(tag = "type")]
 625pub enum RequestContent {
 626    #[serde(rename = "text")]
 627    Text {
 628        text: String,
 629        #[serde(skip_serializing_if = "Option::is_none")]
 630        cache_control: Option<CacheControl>,
 631    },
 632    #[serde(rename = "thinking")]
 633    Thinking {
 634        thinking: String,
 635        signature: String,
 636        #[serde(skip_serializing_if = "Option::is_none")]
 637        cache_control: Option<CacheControl>,
 638    },
 639    #[serde(rename = "redacted_thinking")]
 640    RedactedThinking { data: String },
 641    #[serde(rename = "image")]
 642    Image {
 643        source: ImageSource,
 644        #[serde(skip_serializing_if = "Option::is_none")]
 645        cache_control: Option<CacheControl>,
 646    },
 647    #[serde(rename = "tool_use")]
 648    ToolUse {
 649        id: String,
 650        name: String,
 651        input: serde_json::Value,
 652        #[serde(skip_serializing_if = "Option::is_none")]
 653        cache_control: Option<CacheControl>,
 654    },
 655    #[serde(rename = "tool_result")]
 656    ToolResult {
 657        tool_use_id: String,
 658        is_error: bool,
 659        content: ToolResultContent,
 660        #[serde(skip_serializing_if = "Option::is_none")]
 661        cache_control: Option<CacheControl>,
 662    },
 663}
 664
 665#[derive(Debug, Serialize, Deserialize)]
 666#[serde(untagged)]
 667pub enum ToolResultContent {
 668    Plain(String),
 669    Multipart(Vec<ToolResultPart>),
 670}
 671
 672#[derive(Debug, Serialize, Deserialize)]
 673#[serde(tag = "type", rename_all = "lowercase")]
 674pub enum ToolResultPart {
 675    Text { text: String },
 676    Image { source: ImageSource },
 677}
 678
 679#[derive(Debug, Serialize, Deserialize)]
 680#[serde(tag = "type")]
 681pub enum ResponseContent {
 682    #[serde(rename = "text")]
 683    Text { text: String },
 684    #[serde(rename = "thinking")]
 685    Thinking { thinking: String },
 686    #[serde(rename = "redacted_thinking")]
 687    RedactedThinking { data: String },
 688    #[serde(rename = "tool_use")]
 689    ToolUse {
 690        id: String,
 691        name: String,
 692        input: serde_json::Value,
 693    },
 694}
 695
 696#[derive(Debug, Serialize, Deserialize)]
 697pub struct ImageSource {
 698    #[serde(rename = "type")]
 699    pub source_type: String,
 700    pub media_type: String,
 701    pub data: String,
 702}
 703
 704fn is_false(value: &bool) -> bool {
 705    !value
 706}
 707
 708#[derive(Debug, Serialize, Deserialize)]
 709pub struct Tool {
 710    pub name: String,
 711    pub description: String,
 712    pub input_schema: serde_json::Value,
 713    #[serde(default, skip_serializing_if = "is_false")]
 714    pub eager_input_streaming: bool,
 715}
 716
 717#[derive(Debug, Serialize, Deserialize)]
 718#[serde(tag = "type", rename_all = "lowercase")]
 719pub enum ToolChoice {
 720    Auto,
 721    Any,
 722    Tool { name: String },
 723    None,
 724}
 725
 726#[derive(Debug, Serialize, Deserialize)]
 727#[serde(tag = "type", rename_all = "lowercase")]
 728pub enum Thinking {
 729    Enabled { budget_tokens: Option<u32> },
 730    Adaptive,
 731}
 732
 733#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
 734#[serde(rename_all = "snake_case")]
 735#[strum(serialize_all = "snake_case")]
 736pub enum Effort {
 737    Low,
 738    Medium,
 739    High,
 740    Max,
 741}
 742
 743#[derive(Debug, Clone, Serialize, Deserialize)]
 744pub struct OutputConfig {
 745    pub effort: Option<Effort>,
 746}
 747
 748#[derive(Debug, Serialize, Deserialize)]
 749#[serde(untagged)]
 750pub enum StringOrContents {
 751    String(String),
 752    Content(Vec<RequestContent>),
 753}
 754
 755#[derive(Debug, Serialize, Deserialize)]
 756pub struct Request {
 757    pub model: String,
 758    pub max_tokens: u64,
 759    pub messages: Vec<Message>,
 760    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 761    pub tools: Vec<Tool>,
 762    #[serde(default, skip_serializing_if = "Option::is_none")]
 763    pub thinking: Option<Thinking>,
 764    #[serde(default, skip_serializing_if = "Option::is_none")]
 765    pub tool_choice: Option<ToolChoice>,
 766    #[serde(default, skip_serializing_if = "Option::is_none")]
 767    pub system: Option<StringOrContents>,
 768    #[serde(default, skip_serializing_if = "Option::is_none")]
 769    pub metadata: Option<Metadata>,
 770    #[serde(default, skip_serializing_if = "Option::is_none")]
 771    pub output_config: Option<OutputConfig>,
 772    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 773    pub stop_sequences: Vec<String>,
 774    #[serde(default, skip_serializing_if = "Option::is_none")]
 775    pub speed: Option<Speed>,
 776    #[serde(default, skip_serializing_if = "Option::is_none")]
 777    pub temperature: Option<f32>,
 778    #[serde(default, skip_serializing_if = "Option::is_none")]
 779    pub top_k: Option<u32>,
 780    #[serde(default, skip_serializing_if = "Option::is_none")]
 781    pub top_p: Option<f32>,
 782}
 783
 784#[derive(Debug, Default, Serialize, Deserialize)]
 785#[serde(rename_all = "snake_case")]
 786pub enum Speed {
 787    #[default]
 788    Standard,
 789    Fast,
 790}
 791
 792#[derive(Debug, Serialize, Deserialize)]
 793pub struct StreamingRequest {
 794    #[serde(flatten)]
 795    pub base: Request,
 796    pub stream: bool,
 797}
 798
 799#[derive(Debug, Serialize, Deserialize)]
 800pub struct Metadata {
 801    pub user_id: Option<String>,
 802}
 803
 804#[derive(Debug, Serialize, Deserialize, Default)]
 805pub struct Usage {
 806    #[serde(default, skip_serializing_if = "Option::is_none")]
 807    pub input_tokens: Option<u64>,
 808    #[serde(default, skip_serializing_if = "Option::is_none")]
 809    pub output_tokens: Option<u64>,
 810    #[serde(default, skip_serializing_if = "Option::is_none")]
 811    pub cache_creation_input_tokens: Option<u64>,
 812    #[serde(default, skip_serializing_if = "Option::is_none")]
 813    pub cache_read_input_tokens: Option<u64>,
 814}
 815
 816#[derive(Debug, Serialize, Deserialize)]
 817pub struct Response {
 818    pub id: String,
 819    #[serde(rename = "type")]
 820    pub response_type: String,
 821    pub role: Role,
 822    pub content: Vec<ResponseContent>,
 823    pub model: String,
 824    #[serde(default, skip_serializing_if = "Option::is_none")]
 825    pub stop_reason: Option<String>,
 826    #[serde(default, skip_serializing_if = "Option::is_none")]
 827    pub stop_sequence: Option<String>,
 828    pub usage: Usage,
 829}
 830
 831#[derive(Debug, Serialize, Deserialize)]
 832#[serde(tag = "type")]
 833pub enum Event {
 834    #[serde(rename = "message_start")]
 835    MessageStart { message: Response },
 836    #[serde(rename = "content_block_start")]
 837    ContentBlockStart {
 838        index: usize,
 839        content_block: ResponseContent,
 840    },
 841    #[serde(rename = "content_block_delta")]
 842    ContentBlockDelta { index: usize, delta: ContentDelta },
 843    #[serde(rename = "content_block_stop")]
 844    ContentBlockStop { index: usize },
 845    #[serde(rename = "message_delta")]
 846    MessageDelta { delta: MessageDelta, usage: Usage },
 847    #[serde(rename = "message_stop")]
 848    MessageStop,
 849    #[serde(rename = "ping")]
 850    Ping,
 851    #[serde(rename = "error")]
 852    Error { error: ApiError },
 853}
 854
 855#[derive(Debug, Serialize, Deserialize)]
 856#[serde(tag = "type")]
 857pub enum ContentDelta {
 858    #[serde(rename = "text_delta")]
 859    TextDelta { text: String },
 860    #[serde(rename = "thinking_delta")]
 861    ThinkingDelta { thinking: String },
 862    #[serde(rename = "signature_delta")]
 863    SignatureDelta { signature: String },
 864    #[serde(rename = "input_json_delta")]
 865    InputJsonDelta { partial_json: String },
 866}
 867
 868#[derive(Debug, Serialize, Deserialize)]
 869pub struct MessageDelta {
 870    pub stop_reason: Option<String>,
 871    pub stop_sequence: Option<String>,
 872}
 873
 874#[derive(Debug)]
 875pub enum AnthropicError {
 876    /// Failed to serialize the HTTP request body to JSON
 877    SerializeRequest(serde_json::Error),
 878
 879    /// Failed to construct the HTTP request body
 880    BuildRequestBody(http::Error),
 881
 882    /// Failed to send the HTTP request
 883    HttpSend(anyhow::Error),
 884
 885    /// Failed to deserialize the response from JSON
 886    DeserializeResponse(serde_json::Error),
 887
 888    /// Failed to read from response stream
 889    ReadResponse(io::Error),
 890
 891    /// HTTP error response from the API
 892    HttpResponseError {
 893        status_code: StatusCode,
 894        message: String,
 895    },
 896
 897    /// Rate limit exceeded
 898    RateLimit { retry_after: Duration },
 899
 900    /// Server overloaded
 901    ServerOverloaded { retry_after: Option<Duration> },
 902
 903    /// API returned an error response
 904    ApiError(ApiError),
 905}
 906
 907#[derive(Debug, Serialize, Deserialize, Error)]
 908#[error("Anthropic API Error: {error_type}: {message}")]
 909pub struct ApiError {
 910    #[serde(rename = "type")]
 911    pub error_type: String,
 912    pub message: String,
 913}
 914
 915/// An Anthropic API error code.
 916/// <https://docs.anthropic.com/en/api/errors#http-errors>
 917#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
 918#[strum(serialize_all = "snake_case")]
 919pub enum ApiErrorCode {
 920    /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
 921    InvalidRequestError,
 922    /// 401 - `authentication_error`: There's an issue with your API key.
 923    AuthenticationError,
 924    /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
 925    PermissionError,
 926    /// 404 - `not_found_error`: The requested resource was not found.
 927    NotFoundError,
 928    /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
 929    RequestTooLarge,
 930    /// 429 - `rate_limit_error`: Your account has hit a rate limit.
 931    RateLimitError,
 932    /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
 933    ApiError,
 934    /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
 935    OverloadedError,
 936}
 937
 938impl ApiError {
 939    pub fn code(&self) -> Option<ApiErrorCode> {
 940        ApiErrorCode::from_str(&self.error_type).ok()
 941    }
 942
 943    pub fn is_rate_limit_error(&self) -> bool {
 944        matches!(self.error_type.as_str(), "rate_limit_error")
 945    }
 946
 947    pub fn match_window_exceeded(&self) -> Option<u64> {
 948        let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
 949            return None;
 950        };
 951
 952        parse_prompt_too_long(&self.message)
 953    }
 954}
 955
 956pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
 957    message
 958        .strip_prefix("prompt is too long: ")?
 959        .split_once(" tokens")?
 960        .0
 961        .parse()
 962        .ok()
 963}
 964
 965/// Request body for the token counting API.
 966/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
 967#[derive(Debug, Serialize)]
 968pub struct CountTokensRequest {
 969    pub model: String,
 970    pub messages: Vec<Message>,
 971    #[serde(default, skip_serializing_if = "Option::is_none")]
 972    pub system: Option<StringOrContents>,
 973    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 974    pub tools: Vec<Tool>,
 975    #[serde(default, skip_serializing_if = "Option::is_none")]
 976    pub thinking: Option<Thinking>,
 977    #[serde(default, skip_serializing_if = "Option::is_none")]
 978    pub tool_choice: Option<ToolChoice>,
 979}
 980
 981/// Response from the token counting API.
 982#[derive(Debug, Deserialize)]
 983pub struct CountTokensResponse {
 984    pub input_tokens: u64,
 985}
 986
 987/// Count the number of tokens in a message without creating it.
 988pub async fn count_tokens(
 989    client: &dyn HttpClient,
 990    api_url: &str,
 991    api_key: &str,
 992    request: CountTokensRequest,
 993) -> Result<CountTokensResponse, AnthropicError> {
 994    let uri = format!("{api_url}/v1/messages/count_tokens");
 995
 996    let request_builder = HttpRequest::builder()
 997        .method(Method::POST)
 998        .uri(uri)
 999        .header("Anthropic-Version", "2023-06-01")
1000        .header("X-Api-Key", api_key.trim())
1001        .header("Content-Type", "application/json");
1002
1003    let serialized_request =
1004        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1005    let http_request = request_builder
1006        .body(AsyncBody::from(serialized_request))
1007        .map_err(AnthropicError::BuildRequestBody)?;
1008
1009    let mut response = client
1010        .send(http_request)
1011        .await
1012        .map_err(AnthropicError::HttpSend)?;
1013
1014    let rate_limits = RateLimitInfo::from_headers(response.headers());
1015
1016    if response.status().is_success() {
1017        let mut body = String::new();
1018        response
1019            .body_mut()
1020            .read_to_string(&mut body)
1021            .await
1022            .map_err(AnthropicError::ReadResponse)?;
1023
1024        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1025    } else {
1026        Err(handle_error_response(response, rate_limits).await)
1027    }
1028}
1029
1030// -- Conversions from/to `language_model_core` types --
1031
1032impl From<language_model_core::Speed> for Speed {
1033    fn from(speed: language_model_core::Speed) -> Self {
1034        match speed {
1035            language_model_core::Speed::Standard => Speed::Standard,
1036            language_model_core::Speed::Fast => Speed::Fast,
1037        }
1038    }
1039}
1040
1041impl From<AnthropicError> for language_model_core::LanguageModelCompletionError {
1042    fn from(error: AnthropicError) -> Self {
1043        let provider = language_model_core::ANTHROPIC_PROVIDER_NAME;
1044        match error {
1045            AnthropicError::SerializeRequest(error) => Self::SerializeRequest { provider, error },
1046            AnthropicError::BuildRequestBody(error) => Self::BuildRequestBody { provider, error },
1047            AnthropicError::HttpSend(error) => Self::HttpSend { provider, error },
1048            AnthropicError::DeserializeResponse(error) => {
1049                Self::DeserializeResponse { provider, error }
1050            }
1051            AnthropicError::ReadResponse(error) => Self::ApiReadResponseError { provider, error },
1052            AnthropicError::HttpResponseError {
1053                status_code,
1054                message,
1055            } => Self::HttpResponseError {
1056                provider,
1057                status_code,
1058                message,
1059            },
1060            AnthropicError::RateLimit { retry_after } => Self::RateLimitExceeded {
1061                provider,
1062                retry_after: Some(retry_after),
1063            },
1064            AnthropicError::ServerOverloaded { retry_after } => Self::ServerOverloaded {
1065                provider,
1066                retry_after,
1067            },
1068            AnthropicError::ApiError(api_error) => api_error.into(),
1069        }
1070    }
1071}
1072
1073impl From<ApiError> for language_model_core::LanguageModelCompletionError {
1074    fn from(error: ApiError) -> Self {
1075        use ApiErrorCode::*;
1076        let provider = language_model_core::ANTHROPIC_PROVIDER_NAME;
1077        match error.code() {
1078            Some(code) => match code {
1079                InvalidRequestError => Self::BadRequestFormat {
1080                    provider,
1081                    message: error.message,
1082                },
1083                AuthenticationError => Self::AuthenticationError {
1084                    provider,
1085                    message: error.message,
1086                },
1087                PermissionError => Self::PermissionError {
1088                    provider,
1089                    message: error.message,
1090                },
1091                NotFoundError => Self::ApiEndpointNotFound { provider },
1092                RequestTooLarge => Self::PromptTooLarge {
1093                    tokens: language_model_core::parse_prompt_too_long(&error.message),
1094                },
1095                RateLimitError => Self::RateLimitExceeded {
1096                    provider,
1097                    retry_after: None,
1098                },
1099                ApiError => Self::ApiInternalServerError {
1100                    provider,
1101                    message: error.message,
1102                },
1103                OverloadedError => Self::ServerOverloaded {
1104                    provider,
1105                    retry_after: None,
1106                },
1107            },
1108            None => Self::Other(error.into()),
1109        }
1110    }
1111}
1112
1113#[test]
1114fn test_match_window_exceeded() {
1115    let error = ApiError {
1116        error_type: "invalid_request_error".to_string(),
1117        message: "prompt is too long: 220000 tokens > 200000".to_string(),
1118    };
1119    assert_eq!(error.match_window_exceeded(), Some(220_000));
1120
1121    let error = ApiError {
1122        error_type: "invalid_request_error".to_string(),
1123        message: "prompt is too long: 1234953 tokens".to_string(),
1124    };
1125    assert_eq!(error.match_window_exceeded(), Some(1234953));
1126
1127    let error = ApiError {
1128        error_type: "invalid_request_error".to_string(),
1129        message: "not a prompt length error".to_string(),
1130    };
1131    assert_eq!(error.match_window_exceeded(), None);
1132
1133    let error = ApiError {
1134        error_type: "rate_limit_error".to_string(),
1135        message: "prompt is too long: 12345 tokens".to_string(),
1136    };
1137    assert_eq!(error.match_window_exceeded(), None);
1138
1139    let error = ApiError {
1140        error_type: "invalid_request_error".to_string(),
1141        message: "prompt is too long: invalid tokens".to_string(),
1142    };
1143    assert_eq!(error.match_window_exceeded(), None);
1144}