anthropic.rs

   1use std::io;
   2use std::str::FromStr;
   3use std::time::Duration;
   4
   5use anyhow::{Context as _, Result, anyhow};
   6use chrono::{DateTime, Utc};
   7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
   8use http_client::http::{self, HeaderMap, HeaderValue};
   9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
  10use serde::{Deserialize, Serialize};
  11use strum::{EnumIter, EnumString};
  12use thiserror::Error;
  13
  14pub mod batches;
  15pub mod completion;
  16
  17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
  18
  19#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  20#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  21pub struct AnthropicModelCacheConfiguration {
  22    pub min_total_token: u64,
  23    pub should_speculate: bool,
  24    pub max_cache_anchors: usize,
  25}
  26
  27#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  28#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  29pub enum AnthropicModelMode {
  30    #[default]
  31    Default,
  32    Thinking {
  33        budget_tokens: Option<u32>,
  34    },
  35    AdaptiveThinking,
  36}
  37
  38#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  39#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
  40pub enum Model {
  41    #[serde(
  42        rename = "claude-opus-4",
  43        alias = "claude-opus-4-latest",
  44        alias = "claude-opus-4-thinking",
  45        alias = "claude-opus-4-thinking-latest"
  46    )]
  47    ClaudeOpus4,
  48    #[serde(
  49        rename = "claude-opus-4-1",
  50        alias = "claude-opus-4-1-latest",
  51        alias = "claude-opus-4-1-thinking",
  52        alias = "claude-opus-4-1-thinking-latest"
  53    )]
  54    ClaudeOpus4_1,
  55    #[serde(
  56        rename = "claude-opus-4-5",
  57        alias = "claude-opus-4-5-latest",
  58        alias = "claude-opus-4-5-thinking",
  59        alias = "claude-opus-4-5-thinking-latest"
  60    )]
  61    ClaudeOpus4_5,
  62    #[serde(
  63        rename = "claude-opus-4-6",
  64        alias = "claude-opus-4-6-latest",
  65        alias = "claude-opus-4-6-1m-context",
  66        alias = "claude-opus-4-6-1m-context-latest",
  67        alias = "claude-opus-4-6-thinking",
  68        alias = "claude-opus-4-6-thinking-latest",
  69        alias = "claude-opus-4-6-1m-context-thinking",
  70        alias = "claude-opus-4-6-1m-context-thinking-latest"
  71    )]
  72    ClaudeOpus4_6,
  73    #[serde(
  74        rename = "claude-opus-4-7",
  75        alias = "claude-opus-4-7-latest",
  76        alias = "claude-opus-4-7-1m-context",
  77        alias = "claude-opus-4-7-1m-context-latest",
  78        alias = "claude-opus-4-7-thinking",
  79        alias = "claude-opus-4-7-thinking-latest",
  80        alias = "claude-opus-4-7-1m-context-thinking",
  81        alias = "claude-opus-4-7-1m-context-thinking-latest"
  82    )]
  83    ClaudeOpus4_7,
  84    #[serde(
  85        rename = "claude-sonnet-4",
  86        alias = "claude-sonnet-4-latest",
  87        alias = "claude-sonnet-4-thinking",
  88        alias = "claude-sonnet-4-thinking-latest"
  89    )]
  90    ClaudeSonnet4,
  91    #[serde(
  92        rename = "claude-sonnet-4-5",
  93        alias = "claude-sonnet-4-5-latest",
  94        alias = "claude-sonnet-4-5-thinking",
  95        alias = "claude-sonnet-4-5-thinking-latest"
  96    )]
  97    ClaudeSonnet4_5,
  98    #[default]
  99    #[serde(
 100        rename = "claude-sonnet-4-6",
 101        alias = "claude-sonnet-4-6-latest",
 102        alias = "claude-sonnet-4-6-1m-context",
 103        alias = "claude-sonnet-4-6-1m-context-latest",
 104        alias = "claude-sonnet-4-6-thinking",
 105        alias = "claude-sonnet-4-6-thinking-latest",
 106        alias = "claude-sonnet-4-6-1m-context-thinking",
 107        alias = "claude-sonnet-4-6-1m-context-thinking-latest"
 108    )]
 109    ClaudeSonnet4_6,
 110    #[serde(
 111        rename = "claude-haiku-4-5",
 112        alias = "claude-haiku-4-5-latest",
 113        alias = "claude-haiku-4-5-thinking",
 114        alias = "claude-haiku-4-5-thinking-latest"
 115    )]
 116    ClaudeHaiku4_5,
 117    #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
 118    Claude3Haiku,
 119    #[serde(rename = "custom")]
 120    Custom {
 121        name: String,
 122        max_tokens: u64,
 123        /// The name displayed in the UI, such as in the agent panel model dropdown menu.
 124        display_name: Option<String>,
 125        /// Override this model with a different Anthropic model for tool calls.
 126        tool_override: Option<String>,
 127        /// Indicates whether this custom model supports caching.
 128        cache_configuration: Option<AnthropicModelCacheConfiguration>,
 129        max_output_tokens: Option<u64>,
 130        default_temperature: Option<f32>,
 131        #[serde(default)]
 132        extra_beta_headers: Vec<String>,
 133        #[serde(default)]
 134        mode: AnthropicModelMode,
 135    },
 136}
 137
 138impl Model {
 139    pub fn default_fast() -> Self {
 140        Self::ClaudeHaiku4_5
 141    }
 142
 143    pub fn from_id(id: &str) -> Result<Self> {
 144        if id.starts_with("claude-opus-4-7") {
 145            return Ok(Self::ClaudeOpus4_7);
 146        }
 147
 148        if id.starts_with("claude-opus-4-6") {
 149            return Ok(Self::ClaudeOpus4_6);
 150        }
 151
 152        if id.starts_with("claude-opus-4-5") {
 153            return Ok(Self::ClaudeOpus4_5);
 154        }
 155
 156        if id.starts_with("claude-opus-4-1") {
 157            return Ok(Self::ClaudeOpus4_1);
 158        }
 159
 160        if id.starts_with("claude-opus-4") {
 161            return Ok(Self::ClaudeOpus4);
 162        }
 163
 164        if id.starts_with("claude-sonnet-4-6") {
 165            return Ok(Self::ClaudeSonnet4_6);
 166        }
 167
 168        if id.starts_with("claude-sonnet-4-5") {
 169            return Ok(Self::ClaudeSonnet4_5);
 170        }
 171
 172        if id.starts_with("claude-sonnet-4") {
 173            return Ok(Self::ClaudeSonnet4);
 174        }
 175
 176        if id.starts_with("claude-haiku-4-5") {
 177            return Ok(Self::ClaudeHaiku4_5);
 178        }
 179
 180        if id.starts_with("claude-3-haiku") {
 181            return Ok(Self::Claude3Haiku);
 182        }
 183
 184        Err(anyhow!("invalid model ID: {id}"))
 185    }
 186
 187    pub fn id(&self) -> &str {
 188        match self {
 189            Self::ClaudeOpus4 => "claude-opus-4-latest",
 190            Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
 191            Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
 192            Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
 193            Self::ClaudeOpus4_7 => "claude-opus-4-7-latest",
 194            Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
 195            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
 196            Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
 197            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
 198            Self::Claude3Haiku => "claude-3-haiku-20240307",
 199            Self::Custom { name, .. } => name,
 200        }
 201    }
 202
 203    /// The id of the model that should be used for making API requests
 204    pub fn request_id(&self) -> &str {
 205        match self {
 206            Self::ClaudeOpus4 => "claude-opus-4-20250514",
 207            Self::ClaudeOpus4_1 => "claude-opus-4-1-20250805",
 208            Self::ClaudeOpus4_5 => "claude-opus-4-5-20251101",
 209            Self::ClaudeOpus4_6 => "claude-opus-4-6",
 210            Self::ClaudeOpus4_7 => "claude-opus-4-7",
 211            Self::ClaudeSonnet4 => "claude-sonnet-4-20250514",
 212            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-20250929",
 213            Self::ClaudeSonnet4_6 => "claude-sonnet-4-6",
 214            Self::ClaudeHaiku4_5 => "claude-haiku-4-5-20251001",
 215            Self::Claude3Haiku => "claude-3-haiku-20240307",
 216            Self::Custom { name, .. } => name,
 217        }
 218    }
 219
 220    pub fn display_name(&self) -> &str {
 221        match self {
 222            Self::ClaudeOpus4 => "Claude Opus 4",
 223            Self::ClaudeOpus4_1 => "Claude Opus 4.1",
 224            Self::ClaudeOpus4_5 => "Claude Opus 4.5",
 225            Self::ClaudeOpus4_6 => "Claude Opus 4.6",
 226            Self::ClaudeOpus4_7 => "Claude Opus 4.7",
 227            Self::ClaudeSonnet4 => "Claude Sonnet 4",
 228            Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
 229            Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
 230            Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
 231            Self::Claude3Haiku => "Claude 3 Haiku",
 232            Self::Custom {
 233                name, display_name, ..
 234            } => display_name.as_ref().unwrap_or(name),
 235        }
 236    }
 237
 238    pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
 239        match self {
 240            Self::ClaudeOpus4
 241            | Self::ClaudeOpus4_1
 242            | Self::ClaudeOpus4_5
 243            | Self::ClaudeOpus4_6
 244            | Self::ClaudeOpus4_7
 245            | Self::ClaudeSonnet4
 246            | Self::ClaudeSonnet4_5
 247            | Self::ClaudeSonnet4_6
 248            | Self::ClaudeHaiku4_5
 249            | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
 250                min_total_token: 2_048,
 251                should_speculate: true,
 252                max_cache_anchors: 4,
 253            }),
 254            Self::Custom {
 255                cache_configuration,
 256                ..
 257            } => cache_configuration.clone(),
 258        }
 259    }
 260
 261    pub fn max_token_count(&self) -> u64 {
 262        match self {
 263            Self::ClaudeOpus4
 264            | Self::ClaudeOpus4_1
 265            | Self::ClaudeOpus4_5
 266            | Self::ClaudeSonnet4
 267            | Self::ClaudeSonnet4_5
 268            | Self::ClaudeHaiku4_5
 269            | Self::Claude3Haiku => 200_000,
 270            Self::ClaudeOpus4_6 | Self::ClaudeOpus4_7 | Self::ClaudeSonnet4_6 => 1_000_000,
 271            Self::Custom { max_tokens, .. } => *max_tokens,
 272        }
 273    }
 274
 275    pub fn max_output_tokens(&self) -> u64 {
 276        match self {
 277            Self::ClaudeOpus4 | Self::ClaudeOpus4_1 => 32_000,
 278            Self::ClaudeOpus4_5
 279            | Self::ClaudeSonnet4
 280            | Self::ClaudeSonnet4_5
 281            | Self::ClaudeSonnet4_6
 282            | Self::ClaudeHaiku4_5 => 64_000,
 283            Self::ClaudeOpus4_6 | Self::ClaudeOpus4_7 => 128_000,
 284            Self::Claude3Haiku => 4_096,
 285            Self::Custom {
 286                max_output_tokens, ..
 287            } => max_output_tokens.unwrap_or(4_096),
 288        }
 289    }
 290
 291    pub fn default_temperature(&self) -> f32 {
 292        match self {
 293            Self::ClaudeOpus4
 294            | Self::ClaudeOpus4_1
 295            | Self::ClaudeOpus4_5
 296            | Self::ClaudeOpus4_6
 297            | Self::ClaudeOpus4_7
 298            | Self::ClaudeSonnet4
 299            | Self::ClaudeSonnet4_5
 300            | Self::ClaudeSonnet4_6
 301            | Self::ClaudeHaiku4_5
 302            | Self::Claude3Haiku => 1.0,
 303            Self::Custom {
 304                default_temperature,
 305                ..
 306            } => default_temperature.unwrap_or(1.0),
 307        }
 308    }
 309
 310    pub fn mode(&self) -> AnthropicModelMode {
 311        match self {
 312            Self::Custom { mode, .. } => mode.clone(),
 313            _ if self.supports_adaptive_thinking() => AnthropicModelMode::AdaptiveThinking,
 314            _ if self.supports_thinking() => AnthropicModelMode::Thinking {
 315                budget_tokens: Some(4_096),
 316            },
 317            _ => AnthropicModelMode::Default,
 318        }
 319    }
 320
 321    pub fn supports_thinking(&self) -> bool {
 322        match self {
 323            Self::Custom { mode, .. } => {
 324                matches!(
 325                    mode,
 326                    AnthropicModelMode::Thinking { .. } | AnthropicModelMode::AdaptiveThinking
 327                )
 328            }
 329            _ => matches!(
 330                self,
 331                Self::ClaudeOpus4
 332                    | Self::ClaudeOpus4_1
 333                    | Self::ClaudeOpus4_5
 334                    | Self::ClaudeOpus4_6
 335                    | Self::ClaudeOpus4_7
 336                    | Self::ClaudeSonnet4
 337                    | Self::ClaudeSonnet4_5
 338                    | Self::ClaudeSonnet4_6
 339                    | Self::ClaudeHaiku4_5
 340            ),
 341        }
 342    }
 343
 344    pub fn supports_speed(&self) -> bool {
 345        matches!(self, Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6)
 346    }
 347
 348    pub fn supports_adaptive_thinking(&self) -> bool {
 349        match self {
 350            Self::Custom { mode, .. } => matches!(mode, AnthropicModelMode::AdaptiveThinking),
 351            _ => matches!(
 352                self,
 353                Self::ClaudeOpus4_6 | Self::ClaudeOpus4_7 | Self::ClaudeSonnet4_6
 354            ),
 355        }
 356    }
 357
 358    pub fn beta_headers(&self) -> Option<String> {
 359        let mut headers = vec![];
 360
 361        match self {
 362            Self::Custom {
 363                extra_beta_headers, ..
 364            } => {
 365                headers.extend(
 366                    extra_beta_headers
 367                        .iter()
 368                        .filter(|header| !header.trim().is_empty())
 369                        .cloned(),
 370                );
 371            }
 372            _ => {}
 373        }
 374
 375        if headers.is_empty() {
 376            None
 377        } else {
 378            Some(headers.join(","))
 379        }
 380    }
 381
 382    pub fn tool_model_id(&self) -> &str {
 383        if let Self::Custom {
 384            tool_override: Some(tool_override),
 385            ..
 386        } = self
 387        {
 388            tool_override
 389        } else {
 390            self.request_id()
 391        }
 392    }
 393}
 394
 395/// Generate completion with streaming.
 396pub async fn stream_completion(
 397    client: &dyn HttpClient,
 398    api_url: &str,
 399    api_key: &str,
 400    request: Request,
 401    beta_headers: Option<String>,
 402) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
 403    stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
 404        .await
 405        .map(|output| output.0)
 406}
 407
 408/// Generate completion without streaming.
 409pub async fn non_streaming_completion(
 410    client: &dyn HttpClient,
 411    api_url: &str,
 412    api_key: &str,
 413    request: Request,
 414    beta_headers: Option<String>,
 415) -> Result<Response, AnthropicError> {
 416    let (mut response, rate_limits) =
 417        send_request(client, api_url, api_key, &request, beta_headers).await?;
 418
 419    if response.status().is_success() {
 420        let mut body = String::new();
 421        response
 422            .body_mut()
 423            .read_to_string(&mut body)
 424            .await
 425            .map_err(AnthropicError::ReadResponse)?;
 426
 427        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
 428    } else {
 429        Err(handle_error_response(response, rate_limits).await)
 430    }
 431}
 432
 433async fn send_request(
 434    client: &dyn HttpClient,
 435    api_url: &str,
 436    api_key: &str,
 437    request: impl Serialize,
 438    beta_headers: Option<String>,
 439) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
 440    let uri = format!("{api_url}/v1/messages");
 441
 442    let mut request_builder = HttpRequest::builder()
 443        .method(Method::POST)
 444        .uri(uri)
 445        .header("Anthropic-Version", "2023-06-01")
 446        .header("X-Api-Key", api_key.trim())
 447        .header("Content-Type", "application/json");
 448
 449    if let Some(beta_headers) = beta_headers {
 450        request_builder = request_builder.header("Anthropic-Beta", beta_headers);
 451    }
 452
 453    let serialized_request =
 454        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
 455    let request = request_builder
 456        .body(AsyncBody::from(serialized_request))
 457        .map_err(AnthropicError::BuildRequestBody)?;
 458
 459    let response = client
 460        .send(request)
 461        .await
 462        .map_err(AnthropicError::HttpSend)?;
 463
 464    let rate_limits = RateLimitInfo::from_headers(response.headers());
 465
 466    Ok((response, rate_limits))
 467}
 468
 469async fn handle_error_response(
 470    mut response: http::Response<AsyncBody>,
 471    rate_limits: RateLimitInfo,
 472) -> AnthropicError {
 473    if response.status().as_u16() == 529 {
 474        return AnthropicError::ServerOverloaded {
 475            retry_after: rate_limits.retry_after,
 476        };
 477    }
 478
 479    if let Some(retry_after) = rate_limits.retry_after {
 480        return AnthropicError::RateLimit { retry_after };
 481    }
 482
 483    let mut body = String::new();
 484    let read_result = response
 485        .body_mut()
 486        .read_to_string(&mut body)
 487        .await
 488        .map_err(AnthropicError::ReadResponse);
 489
 490    if let Err(err) = read_result {
 491        return err;
 492    }
 493
 494    match serde_json::from_str::<Event>(&body) {
 495        Ok(Event::Error { error }) => AnthropicError::ApiError(error),
 496        Ok(_) | Err(_) => AnthropicError::HttpResponseError {
 497            status_code: response.status(),
 498            message: body,
 499        },
 500    }
 501}
 502
 503/// An individual rate limit.
 504#[derive(Debug)]
 505pub struct RateLimit {
 506    pub limit: usize,
 507    pub remaining: usize,
 508    pub reset: DateTime<Utc>,
 509}
 510
 511impl RateLimit {
 512    fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
 513        let limit =
 514            get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
 515        let remaining = get_header(
 516            &format!("anthropic-ratelimit-{resource}-remaining"),
 517            headers,
 518        )?
 519        .parse()?;
 520        let reset = DateTime::parse_from_rfc3339(get_header(
 521            &format!("anthropic-ratelimit-{resource}-reset"),
 522            headers,
 523        )?)?
 524        .to_utc();
 525
 526        Ok(Self {
 527            limit,
 528            remaining,
 529            reset,
 530        })
 531    }
 532}
 533
 534/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
 535#[derive(Debug)]
 536pub struct RateLimitInfo {
 537    pub retry_after: Option<Duration>,
 538    pub requests: Option<RateLimit>,
 539    pub tokens: Option<RateLimit>,
 540    pub input_tokens: Option<RateLimit>,
 541    pub output_tokens: Option<RateLimit>,
 542}
 543
 544impl RateLimitInfo {
 545    fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
 546        // Check if any rate limit headers exist
 547        let has_rate_limit_headers = headers
 548            .keys()
 549            .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
 550
 551        if !has_rate_limit_headers {
 552            return Self {
 553                retry_after: None,
 554                requests: None,
 555                tokens: None,
 556                input_tokens: None,
 557                output_tokens: None,
 558            };
 559        }
 560
 561        Self {
 562            retry_after: parse_retry_after(headers),
 563            requests: RateLimit::from_headers("requests", headers).ok(),
 564            tokens: RateLimit::from_headers("tokens", headers).ok(),
 565            input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
 566            output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
 567        }
 568    }
 569}
 570
 571/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
 572/// seconds). Note that other services might specify an HTTP date or some other format for this
 573/// header. Returns `None` if the header is not present or cannot be parsed.
 574pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
 575    headers
 576        .get("retry-after")
 577        .and_then(|v| v.to_str().ok())
 578        .and_then(|v| v.parse::<u64>().ok())
 579        .map(Duration::from_secs)
 580}
 581
 582fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
 583    Ok(headers
 584        .get(key)
 585        .with_context(|| format!("missing header `{key}`"))?
 586        .to_str()?)
 587}
 588
 589pub async fn stream_completion_with_rate_limit_info(
 590    client: &dyn HttpClient,
 591    api_url: &str,
 592    api_key: &str,
 593    request: Request,
 594    beta_headers: Option<String>,
 595) -> Result<
 596    (
 597        BoxStream<'static, Result<Event, AnthropicError>>,
 598        Option<RateLimitInfo>,
 599    ),
 600    AnthropicError,
 601> {
 602    let request = StreamingRequest {
 603        base: request,
 604        stream: true,
 605    };
 606
 607    let (response, rate_limits) =
 608        send_request(client, api_url, api_key, &request, beta_headers).await?;
 609
 610    if response.status().is_success() {
 611        let reader = BufReader::new(response.into_body());
 612        let stream = reader
 613            .lines()
 614            .filter_map(|line| async move {
 615                match line {
 616                    Ok(line) => {
 617                        let line = line
 618                            .strip_prefix("data: ")
 619                            .or_else(|| line.strip_prefix("data:"))?;
 620
 621                        match serde_json::from_str(line) {
 622                            Ok(response) => Some(Ok(response)),
 623                            Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
 624                        }
 625                    }
 626                    Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
 627                }
 628            })
 629            .boxed();
 630        Ok((stream, Some(rate_limits)))
 631    } else {
 632        Err(handle_error_response(response, rate_limits).await)
 633    }
 634}
 635
 636#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 637#[serde(rename_all = "lowercase")]
 638pub enum CacheControlType {
 639    Ephemeral,
 640}
 641
 642#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
 643pub struct CacheControl {
 644    #[serde(rename = "type")]
 645    pub cache_type: CacheControlType,
 646}
 647
 648#[derive(Debug, Serialize, Deserialize)]
 649pub struct Message {
 650    pub role: Role,
 651    pub content: Vec<RequestContent>,
 652}
 653
 654#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
 655#[serde(rename_all = "lowercase")]
 656pub enum Role {
 657    User,
 658    Assistant,
 659}
 660
 661#[derive(Debug, Serialize, Deserialize)]
 662#[serde(tag = "type")]
 663pub enum RequestContent {
 664    #[serde(rename = "text")]
 665    Text {
 666        text: String,
 667        #[serde(skip_serializing_if = "Option::is_none")]
 668        cache_control: Option<CacheControl>,
 669    },
 670    #[serde(rename = "thinking")]
 671    Thinking {
 672        thinking: String,
 673        signature: String,
 674        #[serde(skip_serializing_if = "Option::is_none")]
 675        cache_control: Option<CacheControl>,
 676    },
 677    #[serde(rename = "redacted_thinking")]
 678    RedactedThinking { data: String },
 679    #[serde(rename = "image")]
 680    Image {
 681        source: ImageSource,
 682        #[serde(skip_serializing_if = "Option::is_none")]
 683        cache_control: Option<CacheControl>,
 684    },
 685    #[serde(rename = "tool_use")]
 686    ToolUse {
 687        id: String,
 688        name: String,
 689        input: serde_json::Value,
 690        #[serde(skip_serializing_if = "Option::is_none")]
 691        cache_control: Option<CacheControl>,
 692    },
 693    #[serde(rename = "tool_result")]
 694    ToolResult {
 695        tool_use_id: String,
 696        is_error: bool,
 697        content: ToolResultContent,
 698        #[serde(skip_serializing_if = "Option::is_none")]
 699        cache_control: Option<CacheControl>,
 700    },
 701}
 702
 703#[derive(Debug, Serialize, Deserialize)]
 704#[serde(untagged)]
 705pub enum ToolResultContent {
 706    Plain(String),
 707    Multipart(Vec<ToolResultPart>),
 708}
 709
 710#[derive(Debug, Serialize, Deserialize)]
 711#[serde(tag = "type", rename_all = "lowercase")]
 712pub enum ToolResultPart {
 713    Text { text: String },
 714    Image { source: ImageSource },
 715}
 716
 717#[derive(Debug, Serialize, Deserialize)]
 718#[serde(tag = "type")]
 719pub enum ResponseContent {
 720    #[serde(rename = "text")]
 721    Text { text: String },
 722    #[serde(rename = "thinking")]
 723    Thinking { thinking: String },
 724    #[serde(rename = "redacted_thinking")]
 725    RedactedThinking { data: String },
 726    #[serde(rename = "tool_use")]
 727    ToolUse {
 728        id: String,
 729        name: String,
 730        input: serde_json::Value,
 731    },
 732}
 733
 734#[derive(Debug, Serialize, Deserialize)]
 735pub struct ImageSource {
 736    #[serde(rename = "type")]
 737    pub source_type: String,
 738    pub media_type: String,
 739    pub data: String,
 740}
 741
 742fn is_false(value: &bool) -> bool {
 743    !value
 744}
 745
 746#[derive(Debug, Serialize, Deserialize)]
 747pub struct Tool {
 748    pub name: String,
 749    pub description: String,
 750    pub input_schema: serde_json::Value,
 751    #[serde(default, skip_serializing_if = "is_false")]
 752    pub eager_input_streaming: bool,
 753}
 754
 755#[derive(Debug, Serialize, Deserialize)]
 756#[serde(tag = "type", rename_all = "lowercase")]
 757pub enum ToolChoice {
 758    Auto,
 759    Any,
 760    Tool { name: String },
 761    None,
 762}
 763
 764#[derive(Debug, Serialize, Deserialize)]
 765#[serde(tag = "type", rename_all = "lowercase")]
 766pub enum Thinking {
 767    Enabled { budget_tokens: Option<u32> },
 768    Adaptive,
 769}
 770
 771#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
 772#[serde(rename_all = "snake_case")]
 773#[strum(serialize_all = "snake_case")]
 774pub enum Effort {
 775    Low,
 776    Medium,
 777    High,
 778    Max,
 779}
 780
 781#[derive(Debug, Clone, Serialize, Deserialize)]
 782pub struct OutputConfig {
 783    pub effort: Option<Effort>,
 784}
 785
 786#[derive(Debug, Serialize, Deserialize)]
 787#[serde(untagged)]
 788pub enum StringOrContents {
 789    String(String),
 790    Content(Vec<RequestContent>),
 791}
 792
 793#[derive(Debug, Serialize, Deserialize)]
 794pub struct Request {
 795    pub model: String,
 796    pub max_tokens: u64,
 797    pub messages: Vec<Message>,
 798    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 799    pub tools: Vec<Tool>,
 800    #[serde(default, skip_serializing_if = "Option::is_none")]
 801    pub thinking: Option<Thinking>,
 802    #[serde(default, skip_serializing_if = "Option::is_none")]
 803    pub tool_choice: Option<ToolChoice>,
 804    #[serde(default, skip_serializing_if = "Option::is_none")]
 805    pub system: Option<StringOrContents>,
 806    #[serde(default, skip_serializing_if = "Option::is_none")]
 807    pub metadata: Option<Metadata>,
 808    #[serde(default, skip_serializing_if = "Option::is_none")]
 809    pub output_config: Option<OutputConfig>,
 810    #[serde(default, skip_serializing_if = "Vec::is_empty")]
 811    pub stop_sequences: Vec<String>,
 812    #[serde(default, skip_serializing_if = "Option::is_none")]
 813    pub speed: Option<Speed>,
 814    #[serde(default, skip_serializing_if = "Option::is_none")]
 815    pub temperature: Option<f32>,
 816    #[serde(default, skip_serializing_if = "Option::is_none")]
 817    pub top_k: Option<u32>,
 818    #[serde(default, skip_serializing_if = "Option::is_none")]
 819    pub top_p: Option<f32>,
 820}
 821
 822#[derive(Debug, Default, Serialize, Deserialize)]
 823#[serde(rename_all = "snake_case")]
 824pub enum Speed {
 825    #[default]
 826    Standard,
 827    Fast,
 828}
 829
 830#[derive(Debug, Serialize, Deserialize)]
 831pub struct StreamingRequest {
 832    #[serde(flatten)]
 833    pub base: Request,
 834    pub stream: bool,
 835}
 836
 837#[derive(Debug, Serialize, Deserialize)]
 838pub struct Metadata {
 839    pub user_id: Option<String>,
 840}
 841
 842#[derive(Debug, Serialize, Deserialize, Default)]
 843pub struct Usage {
 844    #[serde(default, skip_serializing_if = "Option::is_none")]
 845    pub input_tokens: Option<u64>,
 846    #[serde(default, skip_serializing_if = "Option::is_none")]
 847    pub output_tokens: Option<u64>,
 848    #[serde(default, skip_serializing_if = "Option::is_none")]
 849    pub cache_creation_input_tokens: Option<u64>,
 850    #[serde(default, skip_serializing_if = "Option::is_none")]
 851    pub cache_read_input_tokens: Option<u64>,
 852}
 853
 854#[derive(Debug, Serialize, Deserialize)]
 855pub struct Response {
 856    pub id: String,
 857    #[serde(rename = "type")]
 858    pub response_type: String,
 859    pub role: Role,
 860    pub content: Vec<ResponseContent>,
 861    pub model: String,
 862    #[serde(default, skip_serializing_if = "Option::is_none")]
 863    pub stop_reason: Option<String>,
 864    #[serde(default, skip_serializing_if = "Option::is_none")]
 865    pub stop_sequence: Option<String>,
 866    pub usage: Usage,
 867}
 868
 869#[derive(Debug, Serialize, Deserialize)]
 870#[serde(tag = "type")]
 871pub enum Event {
 872    #[serde(rename = "message_start")]
 873    MessageStart { message: Response },
 874    #[serde(rename = "content_block_start")]
 875    ContentBlockStart {
 876        index: usize,
 877        content_block: ResponseContent,
 878    },
 879    #[serde(rename = "content_block_delta")]
 880    ContentBlockDelta { index: usize, delta: ContentDelta },
 881    #[serde(rename = "content_block_stop")]
 882    ContentBlockStop { index: usize },
 883    #[serde(rename = "message_delta")]
 884    MessageDelta { delta: MessageDelta, usage: Usage },
 885    #[serde(rename = "message_stop")]
 886    MessageStop,
 887    #[serde(rename = "ping")]
 888    Ping,
 889    #[serde(rename = "error")]
 890    Error { error: ApiError },
 891}
 892
 893#[derive(Debug, Serialize, Deserialize)]
 894#[serde(tag = "type")]
 895pub enum ContentDelta {
 896    #[serde(rename = "text_delta")]
 897    TextDelta { text: String },
 898    #[serde(rename = "thinking_delta")]
 899    ThinkingDelta { thinking: String },
 900    #[serde(rename = "signature_delta")]
 901    SignatureDelta { signature: String },
 902    #[serde(rename = "input_json_delta")]
 903    InputJsonDelta { partial_json: String },
 904}
 905
 906#[derive(Debug, Serialize, Deserialize)]
 907pub struct MessageDelta {
 908    pub stop_reason: Option<String>,
 909    pub stop_sequence: Option<String>,
 910}
 911
 912#[derive(Debug)]
 913pub enum AnthropicError {
 914    /// Failed to serialize the HTTP request body to JSON
 915    SerializeRequest(serde_json::Error),
 916
 917    /// Failed to construct the HTTP request body
 918    BuildRequestBody(http::Error),
 919
 920    /// Failed to send the HTTP request
 921    HttpSend(anyhow::Error),
 922
 923    /// Failed to deserialize the response from JSON
 924    DeserializeResponse(serde_json::Error),
 925
 926    /// Failed to read from response stream
 927    ReadResponse(io::Error),
 928
 929    /// HTTP error response from the API
 930    HttpResponseError {
 931        status_code: StatusCode,
 932        message: String,
 933    },
 934
 935    /// Rate limit exceeded
 936    RateLimit { retry_after: Duration },
 937
 938    /// Server overloaded
 939    ServerOverloaded { retry_after: Option<Duration> },
 940
 941    /// API returned an error response
 942    ApiError(ApiError),
 943}
 944
 945#[derive(Debug, Serialize, Deserialize, Error)]
 946#[error("Anthropic API Error: {error_type}: {message}")]
 947pub struct ApiError {
 948    #[serde(rename = "type")]
 949    pub error_type: String,
 950    pub message: String,
 951}
 952
 953/// An Anthropic API error code.
 954/// <https://docs.anthropic.com/en/api/errors#http-errors>
 955#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
 956#[strum(serialize_all = "snake_case")]
 957pub enum ApiErrorCode {
 958    /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
 959    InvalidRequestError,
 960    /// 401 - `authentication_error`: There's an issue with your API key.
 961    AuthenticationError,
 962    /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
 963    PermissionError,
 964    /// 404 - `not_found_error`: The requested resource was not found.
 965    NotFoundError,
 966    /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
 967    RequestTooLarge,
 968    /// 429 - `rate_limit_error`: Your account has hit a rate limit.
 969    RateLimitError,
 970    /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
 971    ApiError,
 972    /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
 973    OverloadedError,
 974}
 975
 976impl ApiError {
 977    pub fn code(&self) -> Option<ApiErrorCode> {
 978        ApiErrorCode::from_str(&self.error_type).ok()
 979    }
 980
 981    pub fn is_rate_limit_error(&self) -> bool {
 982        matches!(self.error_type.as_str(), "rate_limit_error")
 983    }
 984
 985    pub fn match_window_exceeded(&self) -> Option<u64> {
 986        let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
 987            return None;
 988        };
 989
 990        parse_prompt_too_long(&self.message)
 991    }
 992}
 993
 994pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
 995    message
 996        .strip_prefix("prompt is too long: ")?
 997        .split_once(" tokens")?
 998        .0
 999        .parse()
1000        .ok()
1001}
1002
1003/// Request body for the token counting API.
1004/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
1005#[derive(Debug, Serialize)]
1006pub struct CountTokensRequest {
1007    pub model: String,
1008    pub messages: Vec<Message>,
1009    #[serde(default, skip_serializing_if = "Option::is_none")]
1010    pub system: Option<StringOrContents>,
1011    #[serde(default, skip_serializing_if = "Vec::is_empty")]
1012    pub tools: Vec<Tool>,
1013    #[serde(default, skip_serializing_if = "Option::is_none")]
1014    pub thinking: Option<Thinking>,
1015    #[serde(default, skip_serializing_if = "Option::is_none")]
1016    pub tool_choice: Option<ToolChoice>,
1017}
1018
1019/// Response from the token counting API.
1020#[derive(Debug, Deserialize)]
1021pub struct CountTokensResponse {
1022    pub input_tokens: u64,
1023}
1024
1025/// Count the number of tokens in a message without creating it.
1026pub async fn count_tokens(
1027    client: &dyn HttpClient,
1028    api_url: &str,
1029    api_key: &str,
1030    request: CountTokensRequest,
1031) -> Result<CountTokensResponse, AnthropicError> {
1032    let uri = format!("{api_url}/v1/messages/count_tokens");
1033
1034    let request_builder = HttpRequest::builder()
1035        .method(Method::POST)
1036        .uri(uri)
1037        .header("Anthropic-Version", "2023-06-01")
1038        .header("X-Api-Key", api_key.trim())
1039        .header("Content-Type", "application/json");
1040
1041    let serialized_request =
1042        serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1043    let http_request = request_builder
1044        .body(AsyncBody::from(serialized_request))
1045        .map_err(AnthropicError::BuildRequestBody)?;
1046
1047    let mut response = client
1048        .send(http_request)
1049        .await
1050        .map_err(AnthropicError::HttpSend)?;
1051
1052    let rate_limits = RateLimitInfo::from_headers(response.headers());
1053
1054    if response.status().is_success() {
1055        let mut body = String::new();
1056        response
1057            .body_mut()
1058            .read_to_string(&mut body)
1059            .await
1060            .map_err(AnthropicError::ReadResponse)?;
1061
1062        serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1063    } else {
1064        Err(handle_error_response(response, rate_limits).await)
1065    }
1066}
1067
1068// -- Conversions from/to `language_model_core` types --
1069
1070impl From<language_model_core::Speed> for Speed {
1071    fn from(speed: language_model_core::Speed) -> Self {
1072        match speed {
1073            language_model_core::Speed::Standard => Speed::Standard,
1074            language_model_core::Speed::Fast => Speed::Fast,
1075        }
1076    }
1077}
1078
1079impl From<AnthropicError> for language_model_core::LanguageModelCompletionError {
1080    fn from(error: AnthropicError) -> Self {
1081        let provider = language_model_core::ANTHROPIC_PROVIDER_NAME;
1082        match error {
1083            AnthropicError::SerializeRequest(error) => Self::SerializeRequest { provider, error },
1084            AnthropicError::BuildRequestBody(error) => Self::BuildRequestBody { provider, error },
1085            AnthropicError::HttpSend(error) => Self::HttpSend { provider, error },
1086            AnthropicError::DeserializeResponse(error) => {
1087                Self::DeserializeResponse { provider, error }
1088            }
1089            AnthropicError::ReadResponse(error) => Self::ApiReadResponseError { provider, error },
1090            AnthropicError::HttpResponseError {
1091                status_code,
1092                message,
1093            } => Self::HttpResponseError {
1094                provider,
1095                status_code,
1096                message,
1097            },
1098            AnthropicError::RateLimit { retry_after } => Self::RateLimitExceeded {
1099                provider,
1100                retry_after: Some(retry_after),
1101            },
1102            AnthropicError::ServerOverloaded { retry_after } => Self::ServerOverloaded {
1103                provider,
1104                retry_after,
1105            },
1106            AnthropicError::ApiError(api_error) => api_error.into(),
1107        }
1108    }
1109}
1110
1111impl From<ApiError> for language_model_core::LanguageModelCompletionError {
1112    fn from(error: ApiError) -> Self {
1113        use ApiErrorCode::*;
1114        let provider = language_model_core::ANTHROPIC_PROVIDER_NAME;
1115        match error.code() {
1116            Some(code) => match code {
1117                InvalidRequestError => Self::BadRequestFormat {
1118                    provider,
1119                    message: error.message,
1120                },
1121                AuthenticationError => Self::AuthenticationError {
1122                    provider,
1123                    message: error.message,
1124                },
1125                PermissionError => Self::PermissionError {
1126                    provider,
1127                    message: error.message,
1128                },
1129                NotFoundError => Self::ApiEndpointNotFound { provider },
1130                RequestTooLarge => Self::PromptTooLarge {
1131                    tokens: language_model_core::parse_prompt_too_long(&error.message),
1132                },
1133                RateLimitError => Self::RateLimitExceeded {
1134                    provider,
1135                    retry_after: None,
1136                },
1137                ApiError => Self::ApiInternalServerError {
1138                    provider,
1139                    message: error.message,
1140                },
1141                OverloadedError => Self::ServerOverloaded {
1142                    provider,
1143                    retry_after: None,
1144                },
1145            },
1146            None => Self::Other(error.into()),
1147        }
1148    }
1149}
1150
1151#[test]
1152fn custom_mode_thinking_is_preserved() {
1153    let model = Model::Custom {
1154        name: "my-custom-model".to_string(),
1155        max_tokens: 8192,
1156        display_name: None,
1157        tool_override: None,
1158        cache_configuration: None,
1159        max_output_tokens: None,
1160        default_temperature: None,
1161        extra_beta_headers: vec![],
1162        mode: AnthropicModelMode::Thinking {
1163            budget_tokens: Some(2048),
1164        },
1165    };
1166    assert_eq!(
1167        model.mode(),
1168        AnthropicModelMode::Thinking {
1169            budget_tokens: Some(2048)
1170        }
1171    );
1172    assert!(model.supports_thinking());
1173}
1174
1175#[test]
1176fn custom_mode_adaptive_is_preserved() {
1177    let model = Model::Custom {
1178        name: "my-custom-model".to_string(),
1179        max_tokens: 8192,
1180        display_name: None,
1181        tool_override: None,
1182        cache_configuration: None,
1183        max_output_tokens: None,
1184        default_temperature: None,
1185        extra_beta_headers: vec![],
1186        mode: AnthropicModelMode::AdaptiveThinking,
1187    };
1188    assert_eq!(model.mode(), AnthropicModelMode::AdaptiveThinking);
1189    assert!(model.supports_adaptive_thinking());
1190    assert!(model.supports_thinking());
1191}
1192
1193#[test]
1194fn custom_mode_default_disables_thinking() {
1195    let model = Model::Custom {
1196        name: "my-custom-model".to_string(),
1197        max_tokens: 8192,
1198        display_name: None,
1199        tool_override: None,
1200        cache_configuration: None,
1201        max_output_tokens: None,
1202        default_temperature: None,
1203        extra_beta_headers: vec![],
1204        mode: AnthropicModelMode::Default,
1205    };
1206    assert!(!model.supports_thinking());
1207    assert!(!model.supports_adaptive_thinking());
1208}
1209
1210#[test]
1211fn test_match_window_exceeded() {
1212    let error = ApiError {
1213        error_type: "invalid_request_error".to_string(),
1214        message: "prompt is too long: 220000 tokens > 200000".to_string(),
1215    };
1216    assert_eq!(error.match_window_exceeded(), Some(220_000));
1217
1218    let error = ApiError {
1219        error_type: "invalid_request_error".to_string(),
1220        message: "prompt is too long: 1234953 tokens".to_string(),
1221    };
1222    assert_eq!(error.match_window_exceeded(), Some(1234953));
1223
1224    let error = ApiError {
1225        error_type: "invalid_request_error".to_string(),
1226        message: "not a prompt length error".to_string(),
1227    };
1228    assert_eq!(error.match_window_exceeded(), None);
1229
1230    let error = ApiError {
1231        error_type: "rate_limit_error".to_string(),
1232        message: "prompt is too long: 12345 tokens".to_string(),
1233    };
1234    assert_eq!(error.match_window_exceeded(), None);
1235
1236    let error = ApiError {
1237        error_type: "invalid_request_error".to_string(),
1238        message: "prompt is too long: invalid tokens".to_string(),
1239    };
1240    assert_eq!(error.match_window_exceeded(), None);
1241}