1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
12use strum::{EnumIter, EnumString};
13use thiserror::Error;
14
15pub mod batches;
16
17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
18
19pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07";
20
21#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
22#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
23pub struct AnthropicModelCacheConfiguration {
24 pub min_total_token: u64,
25 pub should_speculate: bool,
26 pub max_cache_anchors: usize,
27}
28
29#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
30#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
31pub enum AnthropicModelMode {
32 #[default]
33 Default,
34 Thinking {
35 budget_tokens: Option<u32>,
36 },
37 AdaptiveThinking,
38}
39
40impl From<ModelMode> for AnthropicModelMode {
41 fn from(value: ModelMode) -> Self {
42 match value {
43 ModelMode::Default => AnthropicModelMode::Default,
44 ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
45 }
46 }
47}
48
49impl From<AnthropicModelMode> for ModelMode {
50 fn from(value: AnthropicModelMode) -> Self {
51 match value {
52 AnthropicModelMode::Default => ModelMode::Default,
53 AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
54 AnthropicModelMode::AdaptiveThinking => ModelMode::Default,
55 }
56 }
57}
58
59#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
60#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
61pub enum Model {
62 #[serde(
63 rename = "claude-opus-4",
64 alias = "claude-opus-4-latest",
65 alias = "claude-opus-4-thinking",
66 alias = "claude-opus-4-thinking-latest"
67 )]
68 ClaudeOpus4,
69 #[serde(
70 rename = "claude-opus-4-1",
71 alias = "claude-opus-4-1-latest",
72 alias = "claude-opus-4-1-thinking",
73 alias = "claude-opus-4-1-thinking-latest"
74 )]
75 ClaudeOpus4_1,
76 #[serde(
77 rename = "claude-opus-4-5",
78 alias = "claude-opus-4-5-latest",
79 alias = "claude-opus-4-5-thinking",
80 alias = "claude-opus-4-5-thinking-latest"
81 )]
82 ClaudeOpus4_5,
83 #[serde(
84 rename = "claude-opus-4-6",
85 alias = "claude-opus-4-6-latest",
86 alias = "claude-opus-4-6-1m-context",
87 alias = "claude-opus-4-6-1m-context-latest",
88 alias = "claude-opus-4-6-thinking",
89 alias = "claude-opus-4-6-thinking-latest",
90 alias = "claude-opus-4-6-1m-context-thinking",
91 alias = "claude-opus-4-6-1m-context-thinking-latest"
92 )]
93 ClaudeOpus4_6,
94 #[serde(
95 rename = "claude-sonnet-4",
96 alias = "claude-sonnet-4-latest",
97 alias = "claude-sonnet-4-thinking",
98 alias = "claude-sonnet-4-thinking-latest"
99 )]
100 ClaudeSonnet4,
101 #[serde(
102 rename = "claude-sonnet-4-5",
103 alias = "claude-sonnet-4-5-latest",
104 alias = "claude-sonnet-4-5-thinking",
105 alias = "claude-sonnet-4-5-thinking-latest"
106 )]
107 ClaudeSonnet4_5,
108 #[serde(
109 rename = "claude-sonnet-4-5-1m-context",
110 alias = "claude-sonnet-4-5-1m-context-latest",
111 alias = "claude-sonnet-4-5-1m-context-thinking",
112 alias = "claude-sonnet-4-5-1m-context-thinking-latest"
113 )]
114 ClaudeSonnet4_5_1mContext,
115 #[default]
116 #[serde(
117 rename = "claude-sonnet-4-6",
118 alias = "claude-sonnet-4-6-latest",
119 alias = "claude-sonnet-4-6-1m-context",
120 alias = "claude-sonnet-4-6-1m-context-latest",
121 alias = "claude-sonnet-4-6-thinking",
122 alias = "claude-sonnet-4-6-thinking-latest",
123 alias = "claude-sonnet-4-6-1m-context-thinking",
124 alias = "claude-sonnet-4-6-1m-context-thinking-latest"
125 )]
126 ClaudeSonnet4_6,
127 #[serde(
128 rename = "claude-haiku-4-5",
129 alias = "claude-haiku-4-5-latest",
130 alias = "claude-haiku-4-5-thinking",
131 alias = "claude-haiku-4-5-thinking-latest"
132 )]
133 ClaudeHaiku4_5,
134 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
135 Claude3Haiku,
136 #[serde(rename = "custom")]
137 Custom {
138 name: String,
139 max_tokens: u64,
140 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
141 display_name: Option<String>,
142 /// Override this model with a different Anthropic model for tool calls.
143 tool_override: Option<String>,
144 /// Indicates whether this custom model supports caching.
145 cache_configuration: Option<AnthropicModelCacheConfiguration>,
146 max_output_tokens: Option<u64>,
147 default_temperature: Option<f32>,
148 #[serde(default)]
149 extra_beta_headers: Vec<String>,
150 #[serde(default)]
151 mode: AnthropicModelMode,
152 },
153}
154
155impl Model {
156 pub fn default_fast() -> Self {
157 Self::ClaudeHaiku4_5
158 }
159
160 pub fn from_id(id: &str) -> Result<Self> {
161 if id.starts_with("claude-opus-4-6") {
162 return Ok(Self::ClaudeOpus4_6);
163 }
164
165 if id.starts_with("claude-opus-4-5") {
166 return Ok(Self::ClaudeOpus4_5);
167 }
168
169 if id.starts_with("claude-opus-4-1") {
170 return Ok(Self::ClaudeOpus4_1);
171 }
172
173 if id.starts_with("claude-opus-4") {
174 return Ok(Self::ClaudeOpus4);
175 }
176
177 if id.starts_with("claude-sonnet-4-6") {
178 return Ok(Self::ClaudeSonnet4_6);
179 }
180
181 if id.starts_with("claude-sonnet-4-5-1m-context") {
182 return Ok(Self::ClaudeSonnet4_5_1mContext);
183 }
184
185 if id.starts_with("claude-sonnet-4-5") {
186 return Ok(Self::ClaudeSonnet4_5);
187 }
188
189 if id.starts_with("claude-sonnet-4") {
190 return Ok(Self::ClaudeSonnet4);
191 }
192
193 if id.starts_with("claude-haiku-4-5") {
194 return Ok(Self::ClaudeHaiku4_5);
195 }
196
197 if id.starts_with("claude-3-haiku") {
198 return Ok(Self::Claude3Haiku);
199 }
200
201 Err(anyhow!("invalid model ID: {id}"))
202 }
203
204 pub fn id(&self) -> &str {
205 match self {
206 Self::ClaudeOpus4 => "claude-opus-4-latest",
207 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
208 Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
209 Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
210 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
211 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
212 Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-1m-context-latest",
213 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
214 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
215 Self::Claude3Haiku => "claude-3-haiku-20240307",
216 Self::Custom { name, .. } => name,
217 }
218 }
219
220 /// The id of the model that should be used for making API requests
221 pub fn request_id(&self) -> &str {
222 match self {
223 Self::ClaudeOpus4 => "claude-opus-4-20250514",
224 Self::ClaudeOpus4_1 => "claude-opus-4-1-20250805",
225 Self::ClaudeOpus4_5 => "claude-opus-4-5-20251101",
226 Self::ClaudeOpus4_6 => "claude-opus-4-6",
227 Self::ClaudeSonnet4 => "claude-sonnet-4-20250514",
228 Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-20250929",
229 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6",
230 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-20251001",
231 Self::Claude3Haiku => "claude-3-haiku-20240307",
232 Self::Custom { name, .. } => name,
233 }
234 }
235
236 pub fn display_name(&self) -> &str {
237 match self {
238 Self::ClaudeOpus4 => "Claude Opus 4",
239 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
240 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
241 Self::ClaudeOpus4_6 => "Claude Opus 4.6",
242 Self::ClaudeSonnet4 => "Claude Sonnet 4",
243 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
244 Self::ClaudeSonnet4_5_1mContext => "Claude Sonnet 4.5 (1M context)",
245 Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
246 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
247 Self::Claude3Haiku => "Claude 3 Haiku",
248 Self::Custom {
249 name, display_name, ..
250 } => display_name.as_ref().unwrap_or(name),
251 }
252 }
253
254 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
255 match self {
256 Self::ClaudeOpus4
257 | Self::ClaudeOpus4_1
258 | Self::ClaudeOpus4_5
259 | Self::ClaudeOpus4_6
260 | Self::ClaudeSonnet4
261 | Self::ClaudeSonnet4_5
262 | Self::ClaudeSonnet4_5_1mContext
263 | Self::ClaudeSonnet4_6
264 | Self::ClaudeHaiku4_5
265 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
266 min_total_token: 2_048,
267 should_speculate: true,
268 max_cache_anchors: 4,
269 }),
270 Self::Custom {
271 cache_configuration,
272 ..
273 } => cache_configuration.clone(),
274 }
275 }
276
277 pub fn max_token_count(&self) -> u64 {
278 match self {
279 Self::ClaudeOpus4
280 | Self::ClaudeOpus4_1
281 | Self::ClaudeOpus4_5
282 | Self::ClaudeSonnet4
283 | Self::ClaudeSonnet4_5
284 | Self::ClaudeHaiku4_5
285 | Self::Claude3Haiku => 200_000,
286 Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_5_1mContext | Self::ClaudeSonnet4_6 => {
287 1_000_000
288 }
289 Self::Custom { max_tokens, .. } => *max_tokens,
290 }
291 }
292
293 pub fn max_output_tokens(&self) -> u64 {
294 match self {
295 Self::ClaudeOpus4 | Self::ClaudeOpus4_1 => 32_000,
296 Self::ClaudeOpus4_5
297 | Self::ClaudeSonnet4
298 | Self::ClaudeSonnet4_5
299 | Self::ClaudeSonnet4_5_1mContext
300 | Self::ClaudeSonnet4_6
301 | Self::ClaudeHaiku4_5 => 64_000,
302 Self::ClaudeOpus4_6 => 128_000,
303 Self::Claude3Haiku => 4_096,
304 Self::Custom {
305 max_output_tokens, ..
306 } => max_output_tokens.unwrap_or(4_096),
307 }
308 }
309
310 pub fn default_temperature(&self) -> f32 {
311 match self {
312 Self::ClaudeOpus4
313 | Self::ClaudeOpus4_1
314 | Self::ClaudeOpus4_5
315 | Self::ClaudeOpus4_6
316 | Self::ClaudeSonnet4
317 | Self::ClaudeSonnet4_5
318 | Self::ClaudeSonnet4_5_1mContext
319 | Self::ClaudeSonnet4_6
320 | Self::ClaudeHaiku4_5
321 | Self::Claude3Haiku => 1.0,
322 Self::Custom {
323 default_temperature,
324 ..
325 } => default_temperature.unwrap_or(1.0),
326 }
327 }
328
329 pub fn mode(&self) -> AnthropicModelMode {
330 if self.supports_adaptive_thinking() {
331 AnthropicModelMode::AdaptiveThinking
332 } else if self.supports_thinking() {
333 AnthropicModelMode::Thinking {
334 budget_tokens: Some(4_096),
335 }
336 } else {
337 AnthropicModelMode::Default
338 }
339 }
340
341 pub fn supports_thinking(&self) -> bool {
342 matches!(
343 self,
344 Self::ClaudeOpus4
345 | Self::ClaudeOpus4_1
346 | Self::ClaudeOpus4_5
347 | Self::ClaudeOpus4_6
348 | Self::ClaudeSonnet4
349 | Self::ClaudeSonnet4_5
350 | Self::ClaudeSonnet4_5_1mContext
351 | Self::ClaudeSonnet4_6
352 | Self::ClaudeHaiku4_5
353 )
354 }
355
356 pub fn supports_adaptive_thinking(&self) -> bool {
357 matches!(self, Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6)
358 }
359
360 pub fn beta_headers(&self) -> Option<String> {
361 let mut headers = vec![];
362
363 match self {
364 Self::ClaudeSonnet4_5_1mContext => {
365 headers.push(CONTEXT_1M_BETA_HEADER.to_string());
366 }
367 Self::Custom {
368 extra_beta_headers, ..
369 } => {
370 headers.extend(
371 extra_beta_headers
372 .iter()
373 .filter(|header| !header.trim().is_empty())
374 .cloned(),
375 );
376 }
377 _ => {}
378 }
379
380 if headers.is_empty() {
381 None
382 } else {
383 Some(headers.join(","))
384 }
385 }
386
387 pub fn tool_model_id(&self) -> &str {
388 if let Self::Custom {
389 tool_override: Some(tool_override),
390 ..
391 } = self
392 {
393 tool_override
394 } else {
395 self.request_id()
396 }
397 }
398}
399
400/// Generate completion with streaming.
401pub async fn stream_completion(
402 client: &dyn HttpClient,
403 api_url: &str,
404 api_key: &str,
405 request: Request,
406 beta_headers: Option<String>,
407) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
408 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
409 .await
410 .map(|output| output.0)
411}
412
413/// Generate completion without streaming.
414pub async fn non_streaming_completion(
415 client: &dyn HttpClient,
416 api_url: &str,
417 api_key: &str,
418 request: Request,
419 beta_headers: Option<String>,
420) -> Result<Response, AnthropicError> {
421 let (mut response, rate_limits) =
422 send_request(client, api_url, api_key, &request, beta_headers).await?;
423
424 if response.status().is_success() {
425 let mut body = String::new();
426 response
427 .body_mut()
428 .read_to_string(&mut body)
429 .await
430 .map_err(AnthropicError::ReadResponse)?;
431
432 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
433 } else {
434 Err(handle_error_response(response, rate_limits).await)
435 }
436}
437
438async fn send_request(
439 client: &dyn HttpClient,
440 api_url: &str,
441 api_key: &str,
442 request: impl Serialize,
443 beta_headers: Option<String>,
444) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
445 let uri = format!("{api_url}/v1/messages");
446
447 let mut request_builder = HttpRequest::builder()
448 .method(Method::POST)
449 .uri(uri)
450 .header("Anthropic-Version", "2023-06-01")
451 .header("X-Api-Key", api_key.trim())
452 .header("Content-Type", "application/json");
453
454 if let Some(beta_headers) = beta_headers {
455 request_builder = request_builder.header("Anthropic-Beta", beta_headers);
456 }
457
458 let serialized_request =
459 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
460 let request = request_builder
461 .body(AsyncBody::from(serialized_request))
462 .map_err(AnthropicError::BuildRequestBody)?;
463
464 let response = client
465 .send(request)
466 .await
467 .map_err(AnthropicError::HttpSend)?;
468
469 let rate_limits = RateLimitInfo::from_headers(response.headers());
470
471 Ok((response, rate_limits))
472}
473
474async fn handle_error_response(
475 mut response: http::Response<AsyncBody>,
476 rate_limits: RateLimitInfo,
477) -> AnthropicError {
478 if response.status().as_u16() == 529 {
479 return AnthropicError::ServerOverloaded {
480 retry_after: rate_limits.retry_after,
481 };
482 }
483
484 if let Some(retry_after) = rate_limits.retry_after {
485 return AnthropicError::RateLimit { retry_after };
486 }
487
488 let mut body = String::new();
489 let read_result = response
490 .body_mut()
491 .read_to_string(&mut body)
492 .await
493 .map_err(AnthropicError::ReadResponse);
494
495 if let Err(err) = read_result {
496 return err;
497 }
498
499 match serde_json::from_str::<Event>(&body) {
500 Ok(Event::Error { error }) => AnthropicError::ApiError(error),
501 Ok(_) | Err(_) => AnthropicError::HttpResponseError {
502 status_code: response.status(),
503 message: body,
504 },
505 }
506}
507
508/// An individual rate limit.
509#[derive(Debug)]
510pub struct RateLimit {
511 pub limit: usize,
512 pub remaining: usize,
513 pub reset: DateTime<Utc>,
514}
515
516impl RateLimit {
517 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
518 let limit =
519 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
520 let remaining = get_header(
521 &format!("anthropic-ratelimit-{resource}-remaining"),
522 headers,
523 )?
524 .parse()?;
525 let reset = DateTime::parse_from_rfc3339(get_header(
526 &format!("anthropic-ratelimit-{resource}-reset"),
527 headers,
528 )?)?
529 .to_utc();
530
531 Ok(Self {
532 limit,
533 remaining,
534 reset,
535 })
536 }
537}
538
539/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
540#[derive(Debug)]
541pub struct RateLimitInfo {
542 pub retry_after: Option<Duration>,
543 pub requests: Option<RateLimit>,
544 pub tokens: Option<RateLimit>,
545 pub input_tokens: Option<RateLimit>,
546 pub output_tokens: Option<RateLimit>,
547}
548
549impl RateLimitInfo {
550 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
551 // Check if any rate limit headers exist
552 let has_rate_limit_headers = headers
553 .keys()
554 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
555
556 if !has_rate_limit_headers {
557 return Self {
558 retry_after: None,
559 requests: None,
560 tokens: None,
561 input_tokens: None,
562 output_tokens: None,
563 };
564 }
565
566 Self {
567 retry_after: parse_retry_after(headers),
568 requests: RateLimit::from_headers("requests", headers).ok(),
569 tokens: RateLimit::from_headers("tokens", headers).ok(),
570 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
571 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
572 }
573 }
574}
575
576/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
577/// seconds). Note that other services might specify an HTTP date or some other format for this
578/// header. Returns `None` if the header is not present or cannot be parsed.
579pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
580 headers
581 .get("retry-after")
582 .and_then(|v| v.to_str().ok())
583 .and_then(|v| v.parse::<u64>().ok())
584 .map(Duration::from_secs)
585}
586
587fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
588 Ok(headers
589 .get(key)
590 .with_context(|| format!("missing header `{key}`"))?
591 .to_str()?)
592}
593
594pub async fn stream_completion_with_rate_limit_info(
595 client: &dyn HttpClient,
596 api_url: &str,
597 api_key: &str,
598 request: Request,
599 beta_headers: Option<String>,
600) -> Result<
601 (
602 BoxStream<'static, Result<Event, AnthropicError>>,
603 Option<RateLimitInfo>,
604 ),
605 AnthropicError,
606> {
607 let request = StreamingRequest {
608 base: request,
609 stream: true,
610 };
611
612 let (response, rate_limits) =
613 send_request(client, api_url, api_key, &request, beta_headers).await?;
614
615 if response.status().is_success() {
616 let reader = BufReader::new(response.into_body());
617 let stream = reader
618 .lines()
619 .filter_map(|line| async move {
620 match line {
621 Ok(line) => {
622 let line = line
623 .strip_prefix("data: ")
624 .or_else(|| line.strip_prefix("data:"))?;
625
626 match serde_json::from_str(line) {
627 Ok(response) => Some(Ok(response)),
628 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
629 }
630 }
631 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
632 }
633 })
634 .boxed();
635 Ok((stream, Some(rate_limits)))
636 } else {
637 Err(handle_error_response(response, rate_limits).await)
638 }
639}
640
641#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
642#[serde(rename_all = "lowercase")]
643pub enum CacheControlType {
644 Ephemeral,
645}
646
647#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
648pub struct CacheControl {
649 #[serde(rename = "type")]
650 pub cache_type: CacheControlType,
651}
652
653#[derive(Debug, Serialize, Deserialize)]
654pub struct Message {
655 pub role: Role,
656 pub content: Vec<RequestContent>,
657}
658
659#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
660#[serde(rename_all = "lowercase")]
661pub enum Role {
662 User,
663 Assistant,
664}
665
666#[derive(Debug, Serialize, Deserialize)]
667#[serde(tag = "type")]
668pub enum RequestContent {
669 #[serde(rename = "text")]
670 Text {
671 text: String,
672 #[serde(skip_serializing_if = "Option::is_none")]
673 cache_control: Option<CacheControl>,
674 },
675 #[serde(rename = "thinking")]
676 Thinking {
677 thinking: String,
678 signature: String,
679 #[serde(skip_serializing_if = "Option::is_none")]
680 cache_control: Option<CacheControl>,
681 },
682 #[serde(rename = "redacted_thinking")]
683 RedactedThinking { data: String },
684 #[serde(rename = "image")]
685 Image {
686 source: ImageSource,
687 #[serde(skip_serializing_if = "Option::is_none")]
688 cache_control: Option<CacheControl>,
689 },
690 #[serde(rename = "tool_use")]
691 ToolUse {
692 id: String,
693 name: String,
694 input: serde_json::Value,
695 #[serde(skip_serializing_if = "Option::is_none")]
696 cache_control: Option<CacheControl>,
697 },
698 #[serde(rename = "tool_result")]
699 ToolResult {
700 tool_use_id: String,
701 is_error: bool,
702 content: ToolResultContent,
703 #[serde(skip_serializing_if = "Option::is_none")]
704 cache_control: Option<CacheControl>,
705 },
706}
707
708#[derive(Debug, Serialize, Deserialize)]
709#[serde(untagged)]
710pub enum ToolResultContent {
711 Plain(String),
712 Multipart(Vec<ToolResultPart>),
713}
714
715#[derive(Debug, Serialize, Deserialize)]
716#[serde(tag = "type", rename_all = "lowercase")]
717pub enum ToolResultPart {
718 Text { text: String },
719 Image { source: ImageSource },
720}
721
722#[derive(Debug, Serialize, Deserialize)]
723#[serde(tag = "type")]
724pub enum ResponseContent {
725 #[serde(rename = "text")]
726 Text { text: String },
727 #[serde(rename = "thinking")]
728 Thinking { thinking: String },
729 #[serde(rename = "redacted_thinking")]
730 RedactedThinking { data: String },
731 #[serde(rename = "tool_use")]
732 ToolUse {
733 id: String,
734 name: String,
735 input: serde_json::Value,
736 },
737}
738
739#[derive(Debug, Serialize, Deserialize)]
740pub struct ImageSource {
741 #[serde(rename = "type")]
742 pub source_type: String,
743 pub media_type: String,
744 pub data: String,
745}
746
747fn is_false(value: &bool) -> bool {
748 !value
749}
750
751#[derive(Debug, Serialize, Deserialize)]
752pub struct Tool {
753 pub name: String,
754 pub description: String,
755 pub input_schema: serde_json::Value,
756 #[serde(default, skip_serializing_if = "is_false")]
757 pub eager_input_streaming: bool,
758}
759
760#[derive(Debug, Serialize, Deserialize)]
761#[serde(tag = "type", rename_all = "lowercase")]
762pub enum ToolChoice {
763 Auto,
764 Any,
765 Tool { name: String },
766 None,
767}
768
769#[derive(Debug, Serialize, Deserialize)]
770#[serde(tag = "type", rename_all = "lowercase")]
771pub enum Thinking {
772 Enabled { budget_tokens: Option<u32> },
773 Adaptive,
774}
775
776#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
777#[serde(rename_all = "snake_case")]
778#[strum(serialize_all = "snake_case")]
779pub enum Effort {
780 Low,
781 Medium,
782 High,
783 Max,
784}
785
786#[derive(Debug, Clone, Serialize, Deserialize)]
787pub struct OutputConfig {
788 pub effort: Option<Effort>,
789}
790
791#[derive(Debug, Serialize, Deserialize)]
792#[serde(untagged)]
793pub enum StringOrContents {
794 String(String),
795 Content(Vec<RequestContent>),
796}
797
798#[derive(Debug, Serialize, Deserialize)]
799pub struct Request {
800 pub model: String,
801 pub max_tokens: u64,
802 pub messages: Vec<Message>,
803 #[serde(default, skip_serializing_if = "Vec::is_empty")]
804 pub tools: Vec<Tool>,
805 #[serde(default, skip_serializing_if = "Option::is_none")]
806 pub thinking: Option<Thinking>,
807 #[serde(default, skip_serializing_if = "Option::is_none")]
808 pub tool_choice: Option<ToolChoice>,
809 #[serde(default, skip_serializing_if = "Option::is_none")]
810 pub system: Option<StringOrContents>,
811 #[serde(default, skip_serializing_if = "Option::is_none")]
812 pub metadata: Option<Metadata>,
813 #[serde(default, skip_serializing_if = "Option::is_none")]
814 pub output_config: Option<OutputConfig>,
815 #[serde(default, skip_serializing_if = "Vec::is_empty")]
816 pub stop_sequences: Vec<String>,
817 #[serde(default, skip_serializing_if = "Option::is_none")]
818 pub speed: Option<Speed>,
819 #[serde(default, skip_serializing_if = "Option::is_none")]
820 pub temperature: Option<f32>,
821 #[serde(default, skip_serializing_if = "Option::is_none")]
822 pub top_k: Option<u32>,
823 #[serde(default, skip_serializing_if = "Option::is_none")]
824 pub top_p: Option<f32>,
825}
826
827#[derive(Debug, Default, Serialize, Deserialize)]
828#[serde(rename_all = "snake_case")]
829pub enum Speed {
830 #[default]
831 Standard,
832 Fast,
833}
834
835#[derive(Debug, Serialize, Deserialize)]
836pub struct StreamingRequest {
837 #[serde(flatten)]
838 pub base: Request,
839 pub stream: bool,
840}
841
842#[derive(Debug, Serialize, Deserialize)]
843pub struct Metadata {
844 pub user_id: Option<String>,
845}
846
847#[derive(Debug, Serialize, Deserialize, Default)]
848pub struct Usage {
849 #[serde(default, skip_serializing_if = "Option::is_none")]
850 pub input_tokens: Option<u64>,
851 #[serde(default, skip_serializing_if = "Option::is_none")]
852 pub output_tokens: Option<u64>,
853 #[serde(default, skip_serializing_if = "Option::is_none")]
854 pub cache_creation_input_tokens: Option<u64>,
855 #[serde(default, skip_serializing_if = "Option::is_none")]
856 pub cache_read_input_tokens: Option<u64>,
857}
858
859#[derive(Debug, Serialize, Deserialize)]
860pub struct Response {
861 pub id: String,
862 #[serde(rename = "type")]
863 pub response_type: String,
864 pub role: Role,
865 pub content: Vec<ResponseContent>,
866 pub model: String,
867 #[serde(default, skip_serializing_if = "Option::is_none")]
868 pub stop_reason: Option<String>,
869 #[serde(default, skip_serializing_if = "Option::is_none")]
870 pub stop_sequence: Option<String>,
871 pub usage: Usage,
872}
873
874#[derive(Debug, Serialize, Deserialize)]
875#[serde(tag = "type")]
876pub enum Event {
877 #[serde(rename = "message_start")]
878 MessageStart { message: Response },
879 #[serde(rename = "content_block_start")]
880 ContentBlockStart {
881 index: usize,
882 content_block: ResponseContent,
883 },
884 #[serde(rename = "content_block_delta")]
885 ContentBlockDelta { index: usize, delta: ContentDelta },
886 #[serde(rename = "content_block_stop")]
887 ContentBlockStop { index: usize },
888 #[serde(rename = "message_delta")]
889 MessageDelta { delta: MessageDelta, usage: Usage },
890 #[serde(rename = "message_stop")]
891 MessageStop,
892 #[serde(rename = "ping")]
893 Ping,
894 #[serde(rename = "error")]
895 Error { error: ApiError },
896}
897
898#[derive(Debug, Serialize, Deserialize)]
899#[serde(tag = "type")]
900pub enum ContentDelta {
901 #[serde(rename = "text_delta")]
902 TextDelta { text: String },
903 #[serde(rename = "thinking_delta")]
904 ThinkingDelta { thinking: String },
905 #[serde(rename = "signature_delta")]
906 SignatureDelta { signature: String },
907 #[serde(rename = "input_json_delta")]
908 InputJsonDelta { partial_json: String },
909}
910
911#[derive(Debug, Serialize, Deserialize)]
912pub struct MessageDelta {
913 pub stop_reason: Option<String>,
914 pub stop_sequence: Option<String>,
915}
916
917#[derive(Debug)]
918pub enum AnthropicError {
919 /// Failed to serialize the HTTP request body to JSON
920 SerializeRequest(serde_json::Error),
921
922 /// Failed to construct the HTTP request body
923 BuildRequestBody(http::Error),
924
925 /// Failed to send the HTTP request
926 HttpSend(anyhow::Error),
927
928 /// Failed to deserialize the response from JSON
929 DeserializeResponse(serde_json::Error),
930
931 /// Failed to read from response stream
932 ReadResponse(io::Error),
933
934 /// HTTP error response from the API
935 HttpResponseError {
936 status_code: StatusCode,
937 message: String,
938 },
939
940 /// Rate limit exceeded
941 RateLimit { retry_after: Duration },
942
943 /// Server overloaded
944 ServerOverloaded { retry_after: Option<Duration> },
945
946 /// API returned an error response
947 ApiError(ApiError),
948}
949
950#[derive(Debug, Serialize, Deserialize, Error)]
951#[error("Anthropic API Error: {error_type}: {message}")]
952pub struct ApiError {
953 #[serde(rename = "type")]
954 pub error_type: String,
955 pub message: String,
956}
957
958/// An Anthropic API error code.
959/// <https://docs.anthropic.com/en/api/errors#http-errors>
960#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
961#[strum(serialize_all = "snake_case")]
962pub enum ApiErrorCode {
963 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
964 InvalidRequestError,
965 /// 401 - `authentication_error`: There's an issue with your API key.
966 AuthenticationError,
967 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
968 PermissionError,
969 /// 404 - `not_found_error`: The requested resource was not found.
970 NotFoundError,
971 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
972 RequestTooLarge,
973 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
974 RateLimitError,
975 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
976 ApiError,
977 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
978 OverloadedError,
979}
980
981impl ApiError {
982 pub fn code(&self) -> Option<ApiErrorCode> {
983 ApiErrorCode::from_str(&self.error_type).ok()
984 }
985
986 pub fn is_rate_limit_error(&self) -> bool {
987 matches!(self.error_type.as_str(), "rate_limit_error")
988 }
989
990 pub fn match_window_exceeded(&self) -> Option<u64> {
991 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
992 return None;
993 };
994
995 parse_prompt_too_long(&self.message)
996 }
997}
998
999pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1000 message
1001 .strip_prefix("prompt is too long: ")?
1002 .split_once(" tokens")?
1003 .0
1004 .parse()
1005 .ok()
1006}
1007
1008/// Request body for the token counting API.
1009/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
1010#[derive(Debug, Serialize)]
1011pub struct CountTokensRequest {
1012 pub model: String,
1013 pub messages: Vec<Message>,
1014 #[serde(default, skip_serializing_if = "Option::is_none")]
1015 pub system: Option<StringOrContents>,
1016 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1017 pub tools: Vec<Tool>,
1018 #[serde(default, skip_serializing_if = "Option::is_none")]
1019 pub thinking: Option<Thinking>,
1020 #[serde(default, skip_serializing_if = "Option::is_none")]
1021 pub tool_choice: Option<ToolChoice>,
1022}
1023
1024/// Response from the token counting API.
1025#[derive(Debug, Deserialize)]
1026pub struct CountTokensResponse {
1027 pub input_tokens: u64,
1028}
1029
1030/// Count the number of tokens in a message without creating it.
1031pub async fn count_tokens(
1032 client: &dyn HttpClient,
1033 api_url: &str,
1034 api_key: &str,
1035 request: CountTokensRequest,
1036) -> Result<CountTokensResponse, AnthropicError> {
1037 let uri = format!("{api_url}/v1/messages/count_tokens");
1038
1039 let request_builder = HttpRequest::builder()
1040 .method(Method::POST)
1041 .uri(uri)
1042 .header("Anthropic-Version", "2023-06-01")
1043 .header("X-Api-Key", api_key.trim())
1044 .header("Content-Type", "application/json");
1045
1046 let serialized_request =
1047 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1048 let http_request = request_builder
1049 .body(AsyncBody::from(serialized_request))
1050 .map_err(AnthropicError::BuildRequestBody)?;
1051
1052 let mut response = client
1053 .send(http_request)
1054 .await
1055 .map_err(AnthropicError::HttpSend)?;
1056
1057 let rate_limits = RateLimitInfo::from_headers(response.headers());
1058
1059 if response.status().is_success() {
1060 let mut body = String::new();
1061 response
1062 .body_mut()
1063 .read_to_string(&mut body)
1064 .await
1065 .map_err(AnthropicError::ReadResponse)?;
1066
1067 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1068 } else {
1069 Err(handle_error_response(response, rate_limits).await)
1070 }
1071}
1072
1073#[test]
1074fn test_match_window_exceeded() {
1075 let error = ApiError {
1076 error_type: "invalid_request_error".to_string(),
1077 message: "prompt is too long: 220000 tokens > 200000".to_string(),
1078 };
1079 assert_eq!(error.match_window_exceeded(), Some(220_000));
1080
1081 let error = ApiError {
1082 error_type: "invalid_request_error".to_string(),
1083 message: "prompt is too long: 1234953 tokens".to_string(),
1084 };
1085 assert_eq!(error.match_window_exceeded(), Some(1234953));
1086
1087 let error = ApiError {
1088 error_type: "invalid_request_error".to_string(),
1089 message: "not a prompt length error".to_string(),
1090 };
1091 assert_eq!(error.match_window_exceeded(), None);
1092
1093 let error = ApiError {
1094 error_type: "rate_limit_error".to_string(),
1095 message: "prompt is too long: 12345 tokens".to_string(),
1096 };
1097 assert_eq!(error.match_window_exceeded(), None);
1098
1099 let error = ApiError {
1100 error_type: "invalid_request_error".to_string(),
1101 message: "prompt is too long: invalid tokens".to_string(),
1102 };
1103 assert_eq!(error.match_window_exceeded(), None);
1104}