1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11use strum::{EnumIter, EnumString};
12use thiserror::Error;
13
14pub mod batches;
15
16pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
17
18pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07";
19
20#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
21#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
22pub struct AnthropicModelCacheConfiguration {
23 pub min_total_token: u64,
24 pub should_speculate: bool,
25 pub max_cache_anchors: usize,
26}
27
28#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
29#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
30pub enum AnthropicModelMode {
31 #[default]
32 Default,
33 Thinking {
34 budget_tokens: Option<u32>,
35 },
36 AdaptiveThinking,
37}
38
39#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
40#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
41pub enum Model {
42 #[serde(
43 rename = "claude-opus-4",
44 alias = "claude-opus-4-latest",
45 alias = "claude-opus-4-thinking",
46 alias = "claude-opus-4-thinking-latest"
47 )]
48 ClaudeOpus4,
49 #[serde(
50 rename = "claude-opus-4-1",
51 alias = "claude-opus-4-1-latest",
52 alias = "claude-opus-4-1-thinking",
53 alias = "claude-opus-4-1-thinking-latest"
54 )]
55 ClaudeOpus4_1,
56 #[serde(
57 rename = "claude-opus-4-5",
58 alias = "claude-opus-4-5-latest",
59 alias = "claude-opus-4-5-thinking",
60 alias = "claude-opus-4-5-thinking-latest"
61 )]
62 ClaudeOpus4_5,
63 #[serde(
64 rename = "claude-opus-4-6",
65 alias = "claude-opus-4-6-latest",
66 alias = "claude-opus-4-6-1m-context",
67 alias = "claude-opus-4-6-1m-context-latest",
68 alias = "claude-opus-4-6-thinking",
69 alias = "claude-opus-4-6-thinking-latest",
70 alias = "claude-opus-4-6-1m-context-thinking",
71 alias = "claude-opus-4-6-1m-context-thinking-latest"
72 )]
73 ClaudeOpus4_6,
74 #[serde(
75 rename = "claude-sonnet-4",
76 alias = "claude-sonnet-4-latest",
77 alias = "claude-sonnet-4-thinking",
78 alias = "claude-sonnet-4-thinking-latest"
79 )]
80 ClaudeSonnet4,
81 #[serde(
82 rename = "claude-sonnet-4-5",
83 alias = "claude-sonnet-4-5-latest",
84 alias = "claude-sonnet-4-5-thinking",
85 alias = "claude-sonnet-4-5-thinking-latest"
86 )]
87 ClaudeSonnet4_5,
88 #[serde(
89 rename = "claude-sonnet-4-5-1m-context",
90 alias = "claude-sonnet-4-5-1m-context-latest",
91 alias = "claude-sonnet-4-5-1m-context-thinking",
92 alias = "claude-sonnet-4-5-1m-context-thinking-latest"
93 )]
94 ClaudeSonnet4_5_1mContext,
95 #[default]
96 #[serde(
97 rename = "claude-sonnet-4-6",
98 alias = "claude-sonnet-4-6-latest",
99 alias = "claude-sonnet-4-6-1m-context",
100 alias = "claude-sonnet-4-6-1m-context-latest",
101 alias = "claude-sonnet-4-6-thinking",
102 alias = "claude-sonnet-4-6-thinking-latest",
103 alias = "claude-sonnet-4-6-1m-context-thinking",
104 alias = "claude-sonnet-4-6-1m-context-thinking-latest"
105 )]
106 ClaudeSonnet4_6,
107 #[serde(
108 rename = "claude-haiku-4-5",
109 alias = "claude-haiku-4-5-latest",
110 alias = "claude-haiku-4-5-thinking",
111 alias = "claude-haiku-4-5-thinking-latest"
112 )]
113 ClaudeHaiku4_5,
114 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
115 Claude3Haiku,
116 #[serde(rename = "custom")]
117 Custom {
118 name: String,
119 max_tokens: u64,
120 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
121 display_name: Option<String>,
122 /// Override this model with a different Anthropic model for tool calls.
123 tool_override: Option<String>,
124 /// Indicates whether this custom model supports caching.
125 cache_configuration: Option<AnthropicModelCacheConfiguration>,
126 max_output_tokens: Option<u64>,
127 default_temperature: Option<f32>,
128 #[serde(default)]
129 extra_beta_headers: Vec<String>,
130 #[serde(default)]
131 mode: AnthropicModelMode,
132 },
133}
134
135impl Model {
136 pub fn default_fast() -> Self {
137 Self::ClaudeHaiku4_5
138 }
139
140 pub fn from_id(id: &str) -> Result<Self> {
141 if id.starts_with("claude-opus-4-6") {
142 return Ok(Self::ClaudeOpus4_6);
143 }
144
145 if id.starts_with("claude-opus-4-5") {
146 return Ok(Self::ClaudeOpus4_5);
147 }
148
149 if id.starts_with("claude-opus-4-1") {
150 return Ok(Self::ClaudeOpus4_1);
151 }
152
153 if id.starts_with("claude-opus-4") {
154 return Ok(Self::ClaudeOpus4);
155 }
156
157 if id.starts_with("claude-sonnet-4-6") {
158 return Ok(Self::ClaudeSonnet4_6);
159 }
160
161 if id.starts_with("claude-sonnet-4-5-1m-context") {
162 return Ok(Self::ClaudeSonnet4_5_1mContext);
163 }
164
165 if id.starts_with("claude-sonnet-4-5") {
166 return Ok(Self::ClaudeSonnet4_5);
167 }
168
169 if id.starts_with("claude-sonnet-4") {
170 return Ok(Self::ClaudeSonnet4);
171 }
172
173 if id.starts_with("claude-haiku-4-5") {
174 return Ok(Self::ClaudeHaiku4_5);
175 }
176
177 if id.starts_with("claude-3-haiku") {
178 return Ok(Self::Claude3Haiku);
179 }
180
181 Err(anyhow!("invalid model ID: {id}"))
182 }
183
184 pub fn id(&self) -> &str {
185 match self {
186 Self::ClaudeOpus4 => "claude-opus-4-latest",
187 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
188 Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
189 Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
190 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
191 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
192 Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-1m-context-latest",
193 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
194 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
195 Self::Claude3Haiku => "claude-3-haiku-20240307",
196 Self::Custom { name, .. } => name,
197 }
198 }
199
200 /// The id of the model that should be used for making API requests
201 pub fn request_id(&self) -> &str {
202 match self {
203 Self::ClaudeOpus4 => "claude-opus-4-20250514",
204 Self::ClaudeOpus4_1 => "claude-opus-4-1-20250805",
205 Self::ClaudeOpus4_5 => "claude-opus-4-5-20251101",
206 Self::ClaudeOpus4_6 => "claude-opus-4-6",
207 Self::ClaudeSonnet4 => "claude-sonnet-4-20250514",
208 Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-20250929",
209 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6",
210 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-20251001",
211 Self::Claude3Haiku => "claude-3-haiku-20240307",
212 Self::Custom { name, .. } => name,
213 }
214 }
215
216 pub fn display_name(&self) -> &str {
217 match self {
218 Self::ClaudeOpus4 => "Claude Opus 4",
219 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
220 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
221 Self::ClaudeOpus4_6 => "Claude Opus 4.6",
222 Self::ClaudeSonnet4 => "Claude Sonnet 4",
223 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
224 Self::ClaudeSonnet4_5_1mContext => "Claude Sonnet 4.5 (1M context)",
225 Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
226 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
227 Self::Claude3Haiku => "Claude 3 Haiku",
228 Self::Custom {
229 name, display_name, ..
230 } => display_name.as_ref().unwrap_or(name),
231 }
232 }
233
234 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
235 match self {
236 Self::ClaudeOpus4
237 | Self::ClaudeOpus4_1
238 | Self::ClaudeOpus4_5
239 | Self::ClaudeOpus4_6
240 | Self::ClaudeSonnet4
241 | Self::ClaudeSonnet4_5
242 | Self::ClaudeSonnet4_5_1mContext
243 | Self::ClaudeSonnet4_6
244 | Self::ClaudeHaiku4_5
245 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
246 min_total_token: 2_048,
247 should_speculate: true,
248 max_cache_anchors: 4,
249 }),
250 Self::Custom {
251 cache_configuration,
252 ..
253 } => cache_configuration.clone(),
254 }
255 }
256
257 pub fn max_token_count(&self) -> u64 {
258 match self {
259 Self::ClaudeOpus4
260 | Self::ClaudeOpus4_1
261 | Self::ClaudeOpus4_5
262 | Self::ClaudeSonnet4
263 | Self::ClaudeSonnet4_5
264 | Self::ClaudeHaiku4_5
265 | Self::Claude3Haiku => 200_000,
266 Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_5_1mContext | Self::ClaudeSonnet4_6 => {
267 1_000_000
268 }
269 Self::Custom { max_tokens, .. } => *max_tokens,
270 }
271 }
272
273 pub fn max_output_tokens(&self) -> u64 {
274 match self {
275 Self::ClaudeOpus4 | Self::ClaudeOpus4_1 => 32_000,
276 Self::ClaudeOpus4_5
277 | Self::ClaudeSonnet4
278 | Self::ClaudeSonnet4_5
279 | Self::ClaudeSonnet4_5_1mContext
280 | Self::ClaudeSonnet4_6
281 | Self::ClaudeHaiku4_5 => 64_000,
282 Self::ClaudeOpus4_6 => 128_000,
283 Self::Claude3Haiku => 4_096,
284 Self::Custom {
285 max_output_tokens, ..
286 } => max_output_tokens.unwrap_or(4_096),
287 }
288 }
289
290 pub fn default_temperature(&self) -> f32 {
291 match self {
292 Self::ClaudeOpus4
293 | Self::ClaudeOpus4_1
294 | Self::ClaudeOpus4_5
295 | Self::ClaudeOpus4_6
296 | Self::ClaudeSonnet4
297 | Self::ClaudeSonnet4_5
298 | Self::ClaudeSonnet4_5_1mContext
299 | Self::ClaudeSonnet4_6
300 | Self::ClaudeHaiku4_5
301 | Self::Claude3Haiku => 1.0,
302 Self::Custom {
303 default_temperature,
304 ..
305 } => default_temperature.unwrap_or(1.0),
306 }
307 }
308
309 pub fn mode(&self) -> AnthropicModelMode {
310 if self.supports_adaptive_thinking() {
311 AnthropicModelMode::AdaptiveThinking
312 } else if self.supports_thinking() {
313 AnthropicModelMode::Thinking {
314 budget_tokens: Some(4_096),
315 }
316 } else {
317 AnthropicModelMode::Default
318 }
319 }
320
321 pub fn supports_thinking(&self) -> bool {
322 matches!(
323 self,
324 Self::ClaudeOpus4
325 | Self::ClaudeOpus4_1
326 | Self::ClaudeOpus4_5
327 | Self::ClaudeOpus4_6
328 | Self::ClaudeSonnet4
329 | Self::ClaudeSonnet4_5
330 | Self::ClaudeSonnet4_5_1mContext
331 | Self::ClaudeSonnet4_6
332 | Self::ClaudeHaiku4_5
333 )
334 }
335
336 pub fn supports_adaptive_thinking(&self) -> bool {
337 matches!(self, Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6)
338 }
339
340 pub fn beta_headers(&self) -> Option<String> {
341 let mut headers = vec![];
342
343 match self {
344 Self::ClaudeSonnet4_5_1mContext => {
345 headers.push(CONTEXT_1M_BETA_HEADER.to_string());
346 }
347 Self::Custom {
348 extra_beta_headers, ..
349 } => {
350 headers.extend(
351 extra_beta_headers
352 .iter()
353 .filter(|header| !header.trim().is_empty())
354 .cloned(),
355 );
356 }
357 _ => {}
358 }
359
360 if headers.is_empty() {
361 None
362 } else {
363 Some(headers.join(","))
364 }
365 }
366
367 pub fn tool_model_id(&self) -> &str {
368 if let Self::Custom {
369 tool_override: Some(tool_override),
370 ..
371 } = self
372 {
373 tool_override
374 } else {
375 self.request_id()
376 }
377 }
378}
379
380/// Generate completion with streaming.
381pub async fn stream_completion(
382 client: &dyn HttpClient,
383 api_url: &str,
384 api_key: &str,
385 request: Request,
386 beta_headers: Option<String>,
387) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
388 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
389 .await
390 .map(|output| output.0)
391}
392
393/// Generate completion without streaming.
394pub async fn non_streaming_completion(
395 client: &dyn HttpClient,
396 api_url: &str,
397 api_key: &str,
398 request: Request,
399 beta_headers: Option<String>,
400) -> Result<Response, AnthropicError> {
401 let (mut response, rate_limits) =
402 send_request(client, api_url, api_key, &request, beta_headers).await?;
403
404 if response.status().is_success() {
405 let mut body = String::new();
406 response
407 .body_mut()
408 .read_to_string(&mut body)
409 .await
410 .map_err(AnthropicError::ReadResponse)?;
411
412 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
413 } else {
414 Err(handle_error_response(response, rate_limits).await)
415 }
416}
417
418async fn send_request(
419 client: &dyn HttpClient,
420 api_url: &str,
421 api_key: &str,
422 request: impl Serialize,
423 beta_headers: Option<String>,
424) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
425 let uri = format!("{api_url}/v1/messages");
426
427 let mut request_builder = HttpRequest::builder()
428 .method(Method::POST)
429 .uri(uri)
430 .header("Anthropic-Version", "2023-06-01")
431 .header("X-Api-Key", api_key.trim())
432 .header("Content-Type", "application/json");
433
434 if let Some(beta_headers) = beta_headers {
435 request_builder = request_builder.header("Anthropic-Beta", beta_headers);
436 }
437
438 let serialized_request =
439 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
440 let request = request_builder
441 .body(AsyncBody::from(serialized_request))
442 .map_err(AnthropicError::BuildRequestBody)?;
443
444 let response = client
445 .send(request)
446 .await
447 .map_err(AnthropicError::HttpSend)?;
448
449 let rate_limits = RateLimitInfo::from_headers(response.headers());
450
451 Ok((response, rate_limits))
452}
453
454async fn handle_error_response(
455 mut response: http::Response<AsyncBody>,
456 rate_limits: RateLimitInfo,
457) -> AnthropicError {
458 if response.status().as_u16() == 529 {
459 return AnthropicError::ServerOverloaded {
460 retry_after: rate_limits.retry_after,
461 };
462 }
463
464 if let Some(retry_after) = rate_limits.retry_after {
465 return AnthropicError::RateLimit { retry_after };
466 }
467
468 let mut body = String::new();
469 let read_result = response
470 .body_mut()
471 .read_to_string(&mut body)
472 .await
473 .map_err(AnthropicError::ReadResponse);
474
475 if let Err(err) = read_result {
476 return err;
477 }
478
479 match serde_json::from_str::<Event>(&body) {
480 Ok(Event::Error { error }) => AnthropicError::ApiError(error),
481 Ok(_) | Err(_) => AnthropicError::HttpResponseError {
482 status_code: response.status(),
483 message: body,
484 },
485 }
486}
487
488/// An individual rate limit.
489#[derive(Debug)]
490pub struct RateLimit {
491 pub limit: usize,
492 pub remaining: usize,
493 pub reset: DateTime<Utc>,
494}
495
496impl RateLimit {
497 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
498 let limit =
499 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
500 let remaining = get_header(
501 &format!("anthropic-ratelimit-{resource}-remaining"),
502 headers,
503 )?
504 .parse()?;
505 let reset = DateTime::parse_from_rfc3339(get_header(
506 &format!("anthropic-ratelimit-{resource}-reset"),
507 headers,
508 )?)?
509 .to_utc();
510
511 Ok(Self {
512 limit,
513 remaining,
514 reset,
515 })
516 }
517}
518
519/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
520#[derive(Debug)]
521pub struct RateLimitInfo {
522 pub retry_after: Option<Duration>,
523 pub requests: Option<RateLimit>,
524 pub tokens: Option<RateLimit>,
525 pub input_tokens: Option<RateLimit>,
526 pub output_tokens: Option<RateLimit>,
527}
528
529impl RateLimitInfo {
530 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
531 // Check if any rate limit headers exist
532 let has_rate_limit_headers = headers
533 .keys()
534 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
535
536 if !has_rate_limit_headers {
537 return Self {
538 retry_after: None,
539 requests: None,
540 tokens: None,
541 input_tokens: None,
542 output_tokens: None,
543 };
544 }
545
546 Self {
547 retry_after: parse_retry_after(headers),
548 requests: RateLimit::from_headers("requests", headers).ok(),
549 tokens: RateLimit::from_headers("tokens", headers).ok(),
550 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
551 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
552 }
553 }
554}
555
556/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
557/// seconds). Note that other services might specify an HTTP date or some other format for this
558/// header. Returns `None` if the header is not present or cannot be parsed.
559pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
560 headers
561 .get("retry-after")
562 .and_then(|v| v.to_str().ok())
563 .and_then(|v| v.parse::<u64>().ok())
564 .map(Duration::from_secs)
565}
566
567fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
568 Ok(headers
569 .get(key)
570 .with_context(|| format!("missing header `{key}`"))?
571 .to_str()?)
572}
573
574pub async fn stream_completion_with_rate_limit_info(
575 client: &dyn HttpClient,
576 api_url: &str,
577 api_key: &str,
578 request: Request,
579 beta_headers: Option<String>,
580) -> Result<
581 (
582 BoxStream<'static, Result<Event, AnthropicError>>,
583 Option<RateLimitInfo>,
584 ),
585 AnthropicError,
586> {
587 let request = StreamingRequest {
588 base: request,
589 stream: true,
590 };
591
592 let (response, rate_limits) =
593 send_request(client, api_url, api_key, &request, beta_headers).await?;
594
595 if response.status().is_success() {
596 let reader = BufReader::new(response.into_body());
597 let stream = reader
598 .lines()
599 .filter_map(|line| async move {
600 match line {
601 Ok(line) => {
602 let line = line
603 .strip_prefix("data: ")
604 .or_else(|| line.strip_prefix("data:"))?;
605
606 match serde_json::from_str(line) {
607 Ok(response) => Some(Ok(response)),
608 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
609 }
610 }
611 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
612 }
613 })
614 .boxed();
615 Ok((stream, Some(rate_limits)))
616 } else {
617 Err(handle_error_response(response, rate_limits).await)
618 }
619}
620
621#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
622#[serde(rename_all = "lowercase")]
623pub enum CacheControlType {
624 Ephemeral,
625}
626
627#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
628pub struct CacheControl {
629 #[serde(rename = "type")]
630 pub cache_type: CacheControlType,
631}
632
633#[derive(Debug, Serialize, Deserialize)]
634pub struct Message {
635 pub role: Role,
636 pub content: Vec<RequestContent>,
637}
638
639#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
640#[serde(rename_all = "lowercase")]
641pub enum Role {
642 User,
643 Assistant,
644}
645
646#[derive(Debug, Serialize, Deserialize)]
647#[serde(tag = "type")]
648pub enum RequestContent {
649 #[serde(rename = "text")]
650 Text {
651 text: String,
652 #[serde(skip_serializing_if = "Option::is_none")]
653 cache_control: Option<CacheControl>,
654 },
655 #[serde(rename = "thinking")]
656 Thinking {
657 thinking: String,
658 signature: String,
659 #[serde(skip_serializing_if = "Option::is_none")]
660 cache_control: Option<CacheControl>,
661 },
662 #[serde(rename = "redacted_thinking")]
663 RedactedThinking { data: String },
664 #[serde(rename = "image")]
665 Image {
666 source: ImageSource,
667 #[serde(skip_serializing_if = "Option::is_none")]
668 cache_control: Option<CacheControl>,
669 },
670 #[serde(rename = "tool_use")]
671 ToolUse {
672 id: String,
673 name: String,
674 input: serde_json::Value,
675 #[serde(skip_serializing_if = "Option::is_none")]
676 cache_control: Option<CacheControl>,
677 },
678 #[serde(rename = "tool_result")]
679 ToolResult {
680 tool_use_id: String,
681 is_error: bool,
682 content: ToolResultContent,
683 #[serde(skip_serializing_if = "Option::is_none")]
684 cache_control: Option<CacheControl>,
685 },
686}
687
688#[derive(Debug, Serialize, Deserialize)]
689#[serde(untagged)]
690pub enum ToolResultContent {
691 Plain(String),
692 Multipart(Vec<ToolResultPart>),
693}
694
695#[derive(Debug, Serialize, Deserialize)]
696#[serde(tag = "type", rename_all = "lowercase")]
697pub enum ToolResultPart {
698 Text { text: String },
699 Image { source: ImageSource },
700}
701
702#[derive(Debug, Serialize, Deserialize)]
703#[serde(tag = "type")]
704pub enum ResponseContent {
705 #[serde(rename = "text")]
706 Text { text: String },
707 #[serde(rename = "thinking")]
708 Thinking { thinking: String },
709 #[serde(rename = "redacted_thinking")]
710 RedactedThinking { data: String },
711 #[serde(rename = "tool_use")]
712 ToolUse {
713 id: String,
714 name: String,
715 input: serde_json::Value,
716 },
717}
718
719#[derive(Debug, Serialize, Deserialize)]
720pub struct ImageSource {
721 #[serde(rename = "type")]
722 pub source_type: String,
723 pub media_type: String,
724 pub data: String,
725}
726
727fn is_false(value: &bool) -> bool {
728 !value
729}
730
731#[derive(Debug, Serialize, Deserialize)]
732pub struct Tool {
733 pub name: String,
734 pub description: String,
735 pub input_schema: serde_json::Value,
736 #[serde(default, skip_serializing_if = "is_false")]
737 pub eager_input_streaming: bool,
738}
739
740#[derive(Debug, Serialize, Deserialize)]
741#[serde(tag = "type", rename_all = "lowercase")]
742pub enum ToolChoice {
743 Auto,
744 Any,
745 Tool { name: String },
746 None,
747}
748
749#[derive(Debug, Serialize, Deserialize)]
750#[serde(tag = "type", rename_all = "lowercase")]
751pub enum Thinking {
752 Enabled { budget_tokens: Option<u32> },
753 Adaptive,
754}
755
756#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
757#[serde(rename_all = "snake_case")]
758#[strum(serialize_all = "snake_case")]
759pub enum Effort {
760 Low,
761 Medium,
762 High,
763 Max,
764}
765
766#[derive(Debug, Clone, Serialize, Deserialize)]
767pub struct OutputConfig {
768 pub effort: Option<Effort>,
769}
770
771#[derive(Debug, Serialize, Deserialize)]
772#[serde(untagged)]
773pub enum StringOrContents {
774 String(String),
775 Content(Vec<RequestContent>),
776}
777
778#[derive(Debug, Serialize, Deserialize)]
779pub struct Request {
780 pub model: String,
781 pub max_tokens: u64,
782 pub messages: Vec<Message>,
783 #[serde(default, skip_serializing_if = "Vec::is_empty")]
784 pub tools: Vec<Tool>,
785 #[serde(default, skip_serializing_if = "Option::is_none")]
786 pub thinking: Option<Thinking>,
787 #[serde(default, skip_serializing_if = "Option::is_none")]
788 pub tool_choice: Option<ToolChoice>,
789 #[serde(default, skip_serializing_if = "Option::is_none")]
790 pub system: Option<StringOrContents>,
791 #[serde(default, skip_serializing_if = "Option::is_none")]
792 pub metadata: Option<Metadata>,
793 #[serde(default, skip_serializing_if = "Option::is_none")]
794 pub output_config: Option<OutputConfig>,
795 #[serde(default, skip_serializing_if = "Vec::is_empty")]
796 pub stop_sequences: Vec<String>,
797 #[serde(default, skip_serializing_if = "Option::is_none")]
798 pub speed: Option<Speed>,
799 #[serde(default, skip_serializing_if = "Option::is_none")]
800 pub temperature: Option<f32>,
801 #[serde(default, skip_serializing_if = "Option::is_none")]
802 pub top_k: Option<u32>,
803 #[serde(default, skip_serializing_if = "Option::is_none")]
804 pub top_p: Option<f32>,
805}
806
807#[derive(Debug, Default, Serialize, Deserialize)]
808#[serde(rename_all = "snake_case")]
809pub enum Speed {
810 #[default]
811 Standard,
812 Fast,
813}
814
815#[derive(Debug, Serialize, Deserialize)]
816pub struct StreamingRequest {
817 #[serde(flatten)]
818 pub base: Request,
819 pub stream: bool,
820}
821
822#[derive(Debug, Serialize, Deserialize)]
823pub struct Metadata {
824 pub user_id: Option<String>,
825}
826
827#[derive(Debug, Serialize, Deserialize, Default)]
828pub struct Usage {
829 #[serde(default, skip_serializing_if = "Option::is_none")]
830 pub input_tokens: Option<u64>,
831 #[serde(default, skip_serializing_if = "Option::is_none")]
832 pub output_tokens: Option<u64>,
833 #[serde(default, skip_serializing_if = "Option::is_none")]
834 pub cache_creation_input_tokens: Option<u64>,
835 #[serde(default, skip_serializing_if = "Option::is_none")]
836 pub cache_read_input_tokens: Option<u64>,
837}
838
839#[derive(Debug, Serialize, Deserialize)]
840pub struct Response {
841 pub id: String,
842 #[serde(rename = "type")]
843 pub response_type: String,
844 pub role: Role,
845 pub content: Vec<ResponseContent>,
846 pub model: String,
847 #[serde(default, skip_serializing_if = "Option::is_none")]
848 pub stop_reason: Option<String>,
849 #[serde(default, skip_serializing_if = "Option::is_none")]
850 pub stop_sequence: Option<String>,
851 pub usage: Usage,
852}
853
854#[derive(Debug, Serialize, Deserialize)]
855#[serde(tag = "type")]
856pub enum Event {
857 #[serde(rename = "message_start")]
858 MessageStart { message: Response },
859 #[serde(rename = "content_block_start")]
860 ContentBlockStart {
861 index: usize,
862 content_block: ResponseContent,
863 },
864 #[serde(rename = "content_block_delta")]
865 ContentBlockDelta { index: usize, delta: ContentDelta },
866 #[serde(rename = "content_block_stop")]
867 ContentBlockStop { index: usize },
868 #[serde(rename = "message_delta")]
869 MessageDelta { delta: MessageDelta, usage: Usage },
870 #[serde(rename = "message_stop")]
871 MessageStop,
872 #[serde(rename = "ping")]
873 Ping,
874 #[serde(rename = "error")]
875 Error { error: ApiError },
876}
877
878#[derive(Debug, Serialize, Deserialize)]
879#[serde(tag = "type")]
880pub enum ContentDelta {
881 #[serde(rename = "text_delta")]
882 TextDelta { text: String },
883 #[serde(rename = "thinking_delta")]
884 ThinkingDelta { thinking: String },
885 #[serde(rename = "signature_delta")]
886 SignatureDelta { signature: String },
887 #[serde(rename = "input_json_delta")]
888 InputJsonDelta { partial_json: String },
889}
890
891#[derive(Debug, Serialize, Deserialize)]
892pub struct MessageDelta {
893 pub stop_reason: Option<String>,
894 pub stop_sequence: Option<String>,
895}
896
897#[derive(Debug)]
898pub enum AnthropicError {
899 /// Failed to serialize the HTTP request body to JSON
900 SerializeRequest(serde_json::Error),
901
902 /// Failed to construct the HTTP request body
903 BuildRequestBody(http::Error),
904
905 /// Failed to send the HTTP request
906 HttpSend(anyhow::Error),
907
908 /// Failed to deserialize the response from JSON
909 DeserializeResponse(serde_json::Error),
910
911 /// Failed to read from response stream
912 ReadResponse(io::Error),
913
914 /// HTTP error response from the API
915 HttpResponseError {
916 status_code: StatusCode,
917 message: String,
918 },
919
920 /// Rate limit exceeded
921 RateLimit { retry_after: Duration },
922
923 /// Server overloaded
924 ServerOverloaded { retry_after: Option<Duration> },
925
926 /// API returned an error response
927 ApiError(ApiError),
928}
929
930#[derive(Debug, Serialize, Deserialize, Error)]
931#[error("Anthropic API Error: {error_type}: {message}")]
932pub struct ApiError {
933 #[serde(rename = "type")]
934 pub error_type: String,
935 pub message: String,
936}
937
938/// An Anthropic API error code.
939/// <https://docs.anthropic.com/en/api/errors#http-errors>
940#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
941#[strum(serialize_all = "snake_case")]
942pub enum ApiErrorCode {
943 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
944 InvalidRequestError,
945 /// 401 - `authentication_error`: There's an issue with your API key.
946 AuthenticationError,
947 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
948 PermissionError,
949 /// 404 - `not_found_error`: The requested resource was not found.
950 NotFoundError,
951 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
952 RequestTooLarge,
953 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
954 RateLimitError,
955 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
956 ApiError,
957 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
958 OverloadedError,
959}
960
961impl ApiError {
962 pub fn code(&self) -> Option<ApiErrorCode> {
963 ApiErrorCode::from_str(&self.error_type).ok()
964 }
965
966 pub fn is_rate_limit_error(&self) -> bool {
967 matches!(self.error_type.as_str(), "rate_limit_error")
968 }
969
970 pub fn match_window_exceeded(&self) -> Option<u64> {
971 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
972 return None;
973 };
974
975 parse_prompt_too_long(&self.message)
976 }
977}
978
979pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
980 message
981 .strip_prefix("prompt is too long: ")?
982 .split_once(" tokens")?
983 .0
984 .parse()
985 .ok()
986}
987
988/// Request body for the token counting API.
989/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
990#[derive(Debug, Serialize)]
991pub struct CountTokensRequest {
992 pub model: String,
993 pub messages: Vec<Message>,
994 #[serde(default, skip_serializing_if = "Option::is_none")]
995 pub system: Option<StringOrContents>,
996 #[serde(default, skip_serializing_if = "Vec::is_empty")]
997 pub tools: Vec<Tool>,
998 #[serde(default, skip_serializing_if = "Option::is_none")]
999 pub thinking: Option<Thinking>,
1000 #[serde(default, skip_serializing_if = "Option::is_none")]
1001 pub tool_choice: Option<ToolChoice>,
1002}
1003
1004/// Response from the token counting API.
1005#[derive(Debug, Deserialize)]
1006pub struct CountTokensResponse {
1007 pub input_tokens: u64,
1008}
1009
1010/// Count the number of tokens in a message without creating it.
1011pub async fn count_tokens(
1012 client: &dyn HttpClient,
1013 api_url: &str,
1014 api_key: &str,
1015 request: CountTokensRequest,
1016) -> Result<CountTokensResponse, AnthropicError> {
1017 let uri = format!("{api_url}/v1/messages/count_tokens");
1018
1019 let request_builder = HttpRequest::builder()
1020 .method(Method::POST)
1021 .uri(uri)
1022 .header("Anthropic-Version", "2023-06-01")
1023 .header("X-Api-Key", api_key.trim())
1024 .header("Content-Type", "application/json");
1025
1026 let serialized_request =
1027 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1028 let http_request = request_builder
1029 .body(AsyncBody::from(serialized_request))
1030 .map_err(AnthropicError::BuildRequestBody)?;
1031
1032 let mut response = client
1033 .send(http_request)
1034 .await
1035 .map_err(AnthropicError::HttpSend)?;
1036
1037 let rate_limits = RateLimitInfo::from_headers(response.headers());
1038
1039 if response.status().is_success() {
1040 let mut body = String::new();
1041 response
1042 .body_mut()
1043 .read_to_string(&mut body)
1044 .await
1045 .map_err(AnthropicError::ReadResponse)?;
1046
1047 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1048 } else {
1049 Err(handle_error_response(response, rate_limits).await)
1050 }
1051}
1052
1053#[test]
1054fn test_match_window_exceeded() {
1055 let error = ApiError {
1056 error_type: "invalid_request_error".to_string(),
1057 message: "prompt is too long: 220000 tokens > 200000".to_string(),
1058 };
1059 assert_eq!(error.match_window_exceeded(), Some(220_000));
1060
1061 let error = ApiError {
1062 error_type: "invalid_request_error".to_string(),
1063 message: "prompt is too long: 1234953 tokens".to_string(),
1064 };
1065 assert_eq!(error.match_window_exceeded(), Some(1234953));
1066
1067 let error = ApiError {
1068 error_type: "invalid_request_error".to_string(),
1069 message: "not a prompt length error".to_string(),
1070 };
1071 assert_eq!(error.match_window_exceeded(), None);
1072
1073 let error = ApiError {
1074 error_type: "rate_limit_error".to_string(),
1075 message: "prompt is too long: 12345 tokens".to_string(),
1076 };
1077 assert_eq!(error.match_window_exceeded(), None);
1078
1079 let error = ApiError {
1080 error_type: "invalid_request_error".to_string(),
1081 message: "prompt is too long: invalid tokens".to_string(),
1082 };
1083 assert_eq!(error.match_window_exceeded(), None);
1084}