1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11use strum::{EnumIter, EnumString};
12use thiserror::Error;
13
14pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
15
16#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
17#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
18pub struct AnthropicModelCacheConfiguration {
19 pub min_total_token: u64,
20 pub should_speculate: bool,
21 pub max_cache_anchors: usize,
22}
23
24#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
25#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
26pub enum AnthropicModelMode {
27 #[default]
28 Default,
29 Thinking {
30 budget_tokens: Option<u32>,
31 },
32}
33
34#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
35#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
36pub enum Model {
37 #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
38 ClaudeOpus4,
39 #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
40 ClaudeOpus4_1,
41 #[serde(
42 rename = "claude-opus-4-thinking",
43 alias = "claude-opus-4-thinking-latest"
44 )]
45 ClaudeOpus4Thinking,
46 #[serde(
47 rename = "claude-opus-4-1-thinking",
48 alias = "claude-opus-4-1-thinking-latest"
49 )]
50 ClaudeOpus4_1Thinking,
51 #[default]
52 #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
53 ClaudeSonnet4,
54 #[serde(
55 rename = "claude-sonnet-4-thinking",
56 alias = "claude-sonnet-4-thinking-latest"
57 )]
58 ClaudeSonnet4Thinking,
59 #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
60 ClaudeSonnet4_5,
61 #[serde(
62 rename = "claude-sonnet-4-5-thinking",
63 alias = "claude-sonnet-4-5-thinking-latest"
64 )]
65 ClaudeSonnet4_5Thinking,
66 #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
67 Claude3_7Sonnet,
68 #[serde(
69 rename = "claude-3-7-sonnet-thinking",
70 alias = "claude-3-7-sonnet-thinking-latest"
71 )]
72 Claude3_7SonnetThinking,
73 #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
74 Claude3_5Sonnet,
75 #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
76 Claude3_5Haiku,
77 #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
78 Claude3Opus,
79 #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
80 Claude3Sonnet,
81 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
82 Claude3Haiku,
83 #[serde(rename = "custom")]
84 Custom {
85 name: String,
86 max_tokens: u64,
87 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
88 display_name: Option<String>,
89 /// Override this model with a different Anthropic model for tool calls.
90 tool_override: Option<String>,
91 /// Indicates whether this custom model supports caching.
92 cache_configuration: Option<AnthropicModelCacheConfiguration>,
93 max_output_tokens: Option<u64>,
94 default_temperature: Option<f32>,
95 #[serde(default)]
96 extra_beta_headers: Vec<String>,
97 #[serde(default)]
98 mode: AnthropicModelMode,
99 },
100}
101
102impl Model {
103 pub fn default_fast() -> Self {
104 Self::Claude3_5Haiku
105 }
106
107 pub fn from_id(id: &str) -> Result<Self> {
108 if id.starts_with("claude-opus-4-1-thinking") {
109 return Ok(Self::ClaudeOpus4_1Thinking);
110 }
111
112 if id.starts_with("claude-opus-4-thinking") {
113 return Ok(Self::ClaudeOpus4Thinking);
114 }
115
116 if id.starts_with("claude-opus-4-1") {
117 return Ok(Self::ClaudeOpus4_1);
118 }
119
120 if id.starts_with("claude-opus-4") {
121 return Ok(Self::ClaudeOpus4);
122 }
123
124 if id.starts_with("claude-sonnet-4-5-thinking") {
125 return Ok(Self::ClaudeSonnet4_5Thinking);
126 }
127
128 if id.starts_with("claude-sonnet-4-5") {
129 return Ok(Self::ClaudeSonnet4_5);
130 }
131
132 if id.starts_with("claude-sonnet-4-thinking") {
133 return Ok(Self::ClaudeSonnet4Thinking);
134 }
135
136 if id.starts_with("claude-sonnet-4") {
137 return Ok(Self::ClaudeSonnet4);
138 }
139
140 if id.starts_with("claude-3-7-sonnet-thinking") {
141 return Ok(Self::Claude3_7SonnetThinking);
142 }
143
144 if id.starts_with("claude-3-7-sonnet") {
145 return Ok(Self::Claude3_7Sonnet);
146 }
147
148 if id.starts_with("claude-3-5-sonnet") {
149 return Ok(Self::Claude3_5Sonnet);
150 }
151
152 if id.starts_with("claude-3-5-haiku") {
153 return Ok(Self::Claude3_5Haiku);
154 }
155
156 if id.starts_with("claude-3-opus") {
157 return Ok(Self::Claude3Opus);
158 }
159
160 if id.starts_with("claude-3-sonnet") {
161 return Ok(Self::Claude3Sonnet);
162 }
163
164 if id.starts_with("claude-3-haiku") {
165 return Ok(Self::Claude3Haiku);
166 }
167
168 Err(anyhow!("invalid model ID: {id}"))
169 }
170
171 pub fn id(&self) -> &str {
172 match self {
173 Self::ClaudeOpus4 => "claude-opus-4-latest",
174 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
175 Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
176 Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
177 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
178 Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
179 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
180 Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
181 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
182 Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
183 Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
184 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
185 Self::Claude3Opus => "claude-3-opus-latest",
186 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
187 Self::Claude3Haiku => "claude-3-haiku-20240307",
188 Self::Custom { name, .. } => name,
189 }
190 }
191
192 /// The id of the model that should be used for making API requests
193 pub fn request_id(&self) -> &str {
194 match self {
195 Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
196 Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
197 Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
198 Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-20250929",
199 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
200 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
201 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
202 Self::Claude3Opus => "claude-3-opus-latest",
203 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
204 Self::Claude3Haiku => "claude-3-haiku-20240307",
205 Self::Custom { name, .. } => name,
206 }
207 }
208
209 pub fn display_name(&self) -> &str {
210 match self {
211 Self::ClaudeOpus4 => "Claude Opus 4",
212 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
213 Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
214 Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
215 Self::ClaudeSonnet4 => "Claude Sonnet 4",
216 Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
217 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
218 Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
219 Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
220 Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
221 Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
222 Self::Claude3_5Haiku => "Claude 3.5 Haiku",
223 Self::Claude3Opus => "Claude 3 Opus",
224 Self::Claude3Sonnet => "Claude 3 Sonnet",
225 Self::Claude3Haiku => "Claude 3 Haiku",
226 Self::Custom {
227 name, display_name, ..
228 } => display_name.as_ref().unwrap_or(name),
229 }
230 }
231
232 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
233 match self {
234 Self::ClaudeOpus4
235 | Self::ClaudeOpus4_1
236 | Self::ClaudeOpus4Thinking
237 | Self::ClaudeOpus4_1Thinking
238 | Self::ClaudeSonnet4
239 | Self::ClaudeSonnet4Thinking
240 | Self::ClaudeSonnet4_5
241 | Self::ClaudeSonnet4_5Thinking
242 | Self::Claude3_5Sonnet
243 | Self::Claude3_5Haiku
244 | Self::Claude3_7Sonnet
245 | Self::Claude3_7SonnetThinking
246 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
247 min_total_token: 2_048,
248 should_speculate: true,
249 max_cache_anchors: 4,
250 }),
251 Self::Custom {
252 cache_configuration,
253 ..
254 } => cache_configuration.clone(),
255 _ => None,
256 }
257 }
258
259 pub fn max_token_count(&self) -> u64 {
260 match self {
261 Self::ClaudeOpus4
262 | Self::ClaudeOpus4_1
263 | Self::ClaudeOpus4Thinking
264 | Self::ClaudeOpus4_1Thinking
265 | Self::ClaudeSonnet4
266 | Self::ClaudeSonnet4Thinking
267 | Self::ClaudeSonnet4_5
268 | Self::ClaudeSonnet4_5Thinking
269 | Self::Claude3_5Sonnet
270 | Self::Claude3_5Haiku
271 | Self::Claude3_7Sonnet
272 | Self::Claude3_7SonnetThinking
273 | Self::Claude3Opus
274 | Self::Claude3Sonnet
275 | Self::Claude3Haiku => 200_000,
276 Self::Custom { max_tokens, .. } => *max_tokens,
277 }
278 }
279
280 pub fn max_output_tokens(&self) -> u64 {
281 match self {
282 Self::ClaudeOpus4
283 | Self::ClaudeOpus4_1
284 | Self::ClaudeOpus4Thinking
285 | Self::ClaudeOpus4_1Thinking
286 | Self::ClaudeSonnet4
287 | Self::ClaudeSonnet4Thinking
288 | Self::ClaudeSonnet4_5
289 | Self::ClaudeSonnet4_5Thinking
290 | Self::Claude3_5Sonnet
291 | Self::Claude3_7Sonnet
292 | Self::Claude3_7SonnetThinking
293 | Self::Claude3_5Haiku => 8_192,
294 Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
295 Self::Custom {
296 max_output_tokens, ..
297 } => max_output_tokens.unwrap_or(4_096),
298 }
299 }
300
301 pub fn default_temperature(&self) -> f32 {
302 match self {
303 Self::ClaudeOpus4
304 | Self::ClaudeOpus4_1
305 | Self::ClaudeOpus4Thinking
306 | Self::ClaudeOpus4_1Thinking
307 | Self::ClaudeSonnet4
308 | Self::ClaudeSonnet4Thinking
309 | Self::ClaudeSonnet4_5
310 | Self::ClaudeSonnet4_5Thinking
311 | Self::Claude3_5Sonnet
312 | Self::Claude3_7Sonnet
313 | Self::Claude3_7SonnetThinking
314 | Self::Claude3_5Haiku
315 | Self::Claude3Opus
316 | Self::Claude3Sonnet
317 | Self::Claude3Haiku => 1.0,
318 Self::Custom {
319 default_temperature,
320 ..
321 } => default_temperature.unwrap_or(1.0),
322 }
323 }
324
325 pub fn mode(&self) -> AnthropicModelMode {
326 match self {
327 Self::ClaudeOpus4
328 | Self::ClaudeOpus4_1
329 | Self::ClaudeSonnet4
330 | Self::ClaudeSonnet4_5
331 | Self::Claude3_5Sonnet
332 | Self::Claude3_7Sonnet
333 | Self::Claude3_5Haiku
334 | Self::Claude3Opus
335 | Self::Claude3Sonnet
336 | Self::Claude3Haiku => AnthropicModelMode::Default,
337 Self::ClaudeOpus4Thinking
338 | Self::ClaudeOpus4_1Thinking
339 | Self::ClaudeSonnet4Thinking
340 | Self::ClaudeSonnet4_5Thinking
341 | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
342 budget_tokens: Some(4_096),
343 },
344 Self::Custom { mode, .. } => mode.clone(),
345 }
346 }
347
348 pub const DEFAULT_BETA_HEADERS: &[&str] = &["prompt-caching-2024-07-31"];
349
350 pub fn beta_headers(&self) -> String {
351 let mut headers = Self::DEFAULT_BETA_HEADERS
352 .iter()
353 .map(|header| header.to_string())
354 .collect::<Vec<_>>();
355
356 match self {
357 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
358 // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
359 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
360 headers.push("token-efficient-tools-2025-02-19".to_string());
361 }
362 Self::Custom {
363 extra_beta_headers, ..
364 } => {
365 headers.extend(
366 extra_beta_headers
367 .iter()
368 .filter(|header| !header.trim().is_empty())
369 .cloned(),
370 );
371 }
372 _ => {}
373 }
374
375 headers.join(",")
376 }
377
378 pub fn tool_model_id(&self) -> &str {
379 if let Self::Custom {
380 tool_override: Some(tool_override),
381 ..
382 } = self
383 {
384 tool_override
385 } else {
386 self.request_id()
387 }
388 }
389}
390
391pub async fn complete(
392 client: &dyn HttpClient,
393 api_url: &str,
394 api_key: &str,
395 request: Request,
396 beta_headers: String,
397) -> Result<Response, AnthropicError> {
398 let uri = format!("{api_url}/v1/messages");
399 let request_builder = HttpRequest::builder()
400 .method(Method::POST)
401 .uri(uri)
402 .header("Anthropic-Version", "2023-06-01")
403 .header("Anthropic-Beta", beta_headers)
404 .header("X-Api-Key", api_key.trim())
405 .header("Content-Type", "application/json");
406
407 let serialized_request =
408 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
409 let request = request_builder
410 .body(AsyncBody::from(serialized_request))
411 .map_err(AnthropicError::BuildRequestBody)?;
412
413 let mut response = client
414 .send(request)
415 .await
416 .map_err(AnthropicError::HttpSend)?;
417 let status_code = response.status();
418 let mut body = String::new();
419 response
420 .body_mut()
421 .read_to_string(&mut body)
422 .await
423 .map_err(AnthropicError::ReadResponse)?;
424
425 if status_code.is_success() {
426 Ok(serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)?)
427 } else {
428 Err(AnthropicError::HttpResponseError {
429 status_code,
430 message: body,
431 })
432 }
433}
434
435pub async fn stream_completion(
436 client: &dyn HttpClient,
437 api_url: &str,
438 api_key: &str,
439 request: Request,
440 beta_headers: String,
441) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
442 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
443 .await
444 .map(|output| output.0)
445}
446
447/// An individual rate limit.
448#[derive(Debug)]
449pub struct RateLimit {
450 pub limit: usize,
451 pub remaining: usize,
452 pub reset: DateTime<Utc>,
453}
454
455impl RateLimit {
456 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
457 let limit =
458 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
459 let remaining = get_header(
460 &format!("anthropic-ratelimit-{resource}-remaining"),
461 headers,
462 )?
463 .parse()?;
464 let reset = DateTime::parse_from_rfc3339(get_header(
465 &format!("anthropic-ratelimit-{resource}-reset"),
466 headers,
467 )?)?
468 .to_utc();
469
470 Ok(Self {
471 limit,
472 remaining,
473 reset,
474 })
475 }
476}
477
478/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
479#[derive(Debug)]
480pub struct RateLimitInfo {
481 pub retry_after: Option<Duration>,
482 pub requests: Option<RateLimit>,
483 pub tokens: Option<RateLimit>,
484 pub input_tokens: Option<RateLimit>,
485 pub output_tokens: Option<RateLimit>,
486}
487
488impl RateLimitInfo {
489 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
490 // Check if any rate limit headers exist
491 let has_rate_limit_headers = headers
492 .keys()
493 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
494
495 if !has_rate_limit_headers {
496 return Self {
497 retry_after: None,
498 requests: None,
499 tokens: None,
500 input_tokens: None,
501 output_tokens: None,
502 };
503 }
504
505 Self {
506 retry_after: parse_retry_after(headers),
507 requests: RateLimit::from_headers("requests", headers).ok(),
508 tokens: RateLimit::from_headers("tokens", headers).ok(),
509 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
510 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
511 }
512 }
513}
514
515/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
516/// seconds). Note that other services might specify an HTTP date or some other format for this
517/// header. Returns `None` if the header is not present or cannot be parsed.
518pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
519 headers
520 .get("retry-after")
521 .and_then(|v| v.to_str().ok())
522 .and_then(|v| v.parse::<u64>().ok())
523 .map(Duration::from_secs)
524}
525
526fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
527 Ok(headers
528 .get(key)
529 .with_context(|| format!("missing header `{key}`"))?
530 .to_str()?)
531}
532
533pub async fn stream_completion_with_rate_limit_info(
534 client: &dyn HttpClient,
535 api_url: &str,
536 api_key: &str,
537 request: Request,
538 beta_headers: String,
539) -> Result<
540 (
541 BoxStream<'static, Result<Event, AnthropicError>>,
542 Option<RateLimitInfo>,
543 ),
544 AnthropicError,
545> {
546 let request = StreamingRequest {
547 base: request,
548 stream: true,
549 };
550 let uri = format!("{api_url}/v1/messages");
551
552 let request_builder = HttpRequest::builder()
553 .method(Method::POST)
554 .uri(uri)
555 .header("Anthropic-Version", "2023-06-01")
556 .header("Anthropic-Beta", beta_headers)
557 .header("X-Api-Key", api_key.trim())
558 .header("Content-Type", "application/json");
559 let serialized_request =
560 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
561 let request = request_builder
562 .body(AsyncBody::from(serialized_request))
563 .map_err(AnthropicError::BuildRequestBody)?;
564
565 let mut response = client
566 .send(request)
567 .await
568 .map_err(AnthropicError::HttpSend)?;
569 let rate_limits = RateLimitInfo::from_headers(response.headers());
570 if response.status().is_success() {
571 let reader = BufReader::new(response.into_body());
572 let stream = reader
573 .lines()
574 .filter_map(|line| async move {
575 match line {
576 Ok(line) => {
577 let line = line.strip_prefix("data: ")?;
578 match serde_json::from_str(line) {
579 Ok(response) => Some(Ok(response)),
580 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
581 }
582 }
583 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
584 }
585 })
586 .boxed();
587 Ok((stream, Some(rate_limits)))
588 } else if response.status().as_u16() == 529 {
589 Err(AnthropicError::ServerOverloaded {
590 retry_after: rate_limits.retry_after,
591 })
592 } else if let Some(retry_after) = rate_limits.retry_after {
593 Err(AnthropicError::RateLimit { retry_after })
594 } else {
595 let mut body = String::new();
596 response
597 .body_mut()
598 .read_to_string(&mut body)
599 .await
600 .map_err(AnthropicError::ReadResponse)?;
601
602 match serde_json::from_str::<Event>(&body) {
603 Ok(Event::Error { error }) => Err(AnthropicError::ApiError(error)),
604 Ok(_) | Err(_) => Err(AnthropicError::HttpResponseError {
605 status_code: response.status(),
606 message: body,
607 }),
608 }
609 }
610}
611
612#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
613#[serde(rename_all = "lowercase")]
614pub enum CacheControlType {
615 Ephemeral,
616}
617
618#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
619pub struct CacheControl {
620 #[serde(rename = "type")]
621 pub cache_type: CacheControlType,
622}
623
624#[derive(Debug, Serialize, Deserialize)]
625pub struct Message {
626 pub role: Role,
627 pub content: Vec<RequestContent>,
628}
629
630#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
631#[serde(rename_all = "lowercase")]
632pub enum Role {
633 User,
634 Assistant,
635}
636
637#[derive(Debug, Serialize, Deserialize)]
638#[serde(tag = "type")]
639pub enum RequestContent {
640 #[serde(rename = "text")]
641 Text {
642 text: String,
643 #[serde(skip_serializing_if = "Option::is_none")]
644 cache_control: Option<CacheControl>,
645 },
646 #[serde(rename = "thinking")]
647 Thinking {
648 thinking: String,
649 signature: String,
650 #[serde(skip_serializing_if = "Option::is_none")]
651 cache_control: Option<CacheControl>,
652 },
653 #[serde(rename = "redacted_thinking")]
654 RedactedThinking { data: String },
655 #[serde(rename = "image")]
656 Image {
657 source: ImageSource,
658 #[serde(skip_serializing_if = "Option::is_none")]
659 cache_control: Option<CacheControl>,
660 },
661 #[serde(rename = "tool_use")]
662 ToolUse {
663 id: String,
664 name: String,
665 input: serde_json::Value,
666 #[serde(skip_serializing_if = "Option::is_none")]
667 cache_control: Option<CacheControl>,
668 },
669 #[serde(rename = "tool_result")]
670 ToolResult {
671 tool_use_id: String,
672 is_error: bool,
673 content: ToolResultContent,
674 #[serde(skip_serializing_if = "Option::is_none")]
675 cache_control: Option<CacheControl>,
676 },
677}
678
679#[derive(Debug, Serialize, Deserialize)]
680#[serde(untagged)]
681pub enum ToolResultContent {
682 Plain(String),
683 Multipart(Vec<ToolResultPart>),
684}
685
686#[derive(Debug, Serialize, Deserialize)]
687#[serde(tag = "type", rename_all = "lowercase")]
688pub enum ToolResultPart {
689 Text { text: String },
690 Image { source: ImageSource },
691}
692
693#[derive(Debug, Serialize, Deserialize)]
694#[serde(tag = "type")]
695pub enum ResponseContent {
696 #[serde(rename = "text")]
697 Text { text: String },
698 #[serde(rename = "thinking")]
699 Thinking { thinking: String },
700 #[serde(rename = "redacted_thinking")]
701 RedactedThinking { data: String },
702 #[serde(rename = "tool_use")]
703 ToolUse {
704 id: String,
705 name: String,
706 input: serde_json::Value,
707 },
708}
709
710#[derive(Debug, Serialize, Deserialize)]
711pub struct ImageSource {
712 #[serde(rename = "type")]
713 pub source_type: String,
714 pub media_type: String,
715 pub data: String,
716}
717
718#[derive(Debug, Serialize, Deserialize)]
719pub struct Tool {
720 pub name: String,
721 pub description: String,
722 pub input_schema: serde_json::Value,
723}
724
725#[derive(Debug, Serialize, Deserialize)]
726#[serde(tag = "type", rename_all = "lowercase")]
727pub enum ToolChoice {
728 Auto,
729 Any,
730 Tool { name: String },
731 None,
732}
733
734#[derive(Debug, Serialize, Deserialize)]
735#[serde(tag = "type", rename_all = "lowercase")]
736pub enum Thinking {
737 Enabled { budget_tokens: Option<u32> },
738}
739
740#[derive(Debug, Serialize, Deserialize)]
741#[serde(untagged)]
742pub enum StringOrContents {
743 String(String),
744 Content(Vec<RequestContent>),
745}
746
747#[derive(Debug, Serialize, Deserialize)]
748pub struct Request {
749 pub model: String,
750 pub max_tokens: u64,
751 pub messages: Vec<Message>,
752 #[serde(default, skip_serializing_if = "Vec::is_empty")]
753 pub tools: Vec<Tool>,
754 #[serde(default, skip_serializing_if = "Option::is_none")]
755 pub thinking: Option<Thinking>,
756 #[serde(default, skip_serializing_if = "Option::is_none")]
757 pub tool_choice: Option<ToolChoice>,
758 #[serde(default, skip_serializing_if = "Option::is_none")]
759 pub system: Option<StringOrContents>,
760 #[serde(default, skip_serializing_if = "Option::is_none")]
761 pub metadata: Option<Metadata>,
762 #[serde(default, skip_serializing_if = "Vec::is_empty")]
763 pub stop_sequences: Vec<String>,
764 #[serde(default, skip_serializing_if = "Option::is_none")]
765 pub temperature: Option<f32>,
766 #[serde(default, skip_serializing_if = "Option::is_none")]
767 pub top_k: Option<u32>,
768 #[serde(default, skip_serializing_if = "Option::is_none")]
769 pub top_p: Option<f32>,
770}
771
772#[derive(Debug, Serialize, Deserialize)]
773struct StreamingRequest {
774 #[serde(flatten)]
775 pub base: Request,
776 pub stream: bool,
777}
778
779#[derive(Debug, Serialize, Deserialize)]
780pub struct Metadata {
781 pub user_id: Option<String>,
782}
783
784#[derive(Debug, Serialize, Deserialize, Default)]
785pub struct Usage {
786 #[serde(default, skip_serializing_if = "Option::is_none")]
787 pub input_tokens: Option<u64>,
788 #[serde(default, skip_serializing_if = "Option::is_none")]
789 pub output_tokens: Option<u64>,
790 #[serde(default, skip_serializing_if = "Option::is_none")]
791 pub cache_creation_input_tokens: Option<u64>,
792 #[serde(default, skip_serializing_if = "Option::is_none")]
793 pub cache_read_input_tokens: Option<u64>,
794}
795
796#[derive(Debug, Serialize, Deserialize)]
797pub struct Response {
798 pub id: String,
799 #[serde(rename = "type")]
800 pub response_type: String,
801 pub role: Role,
802 pub content: Vec<ResponseContent>,
803 pub model: String,
804 #[serde(default, skip_serializing_if = "Option::is_none")]
805 pub stop_reason: Option<String>,
806 #[serde(default, skip_serializing_if = "Option::is_none")]
807 pub stop_sequence: Option<String>,
808 pub usage: Usage,
809}
810
811#[derive(Debug, Serialize, Deserialize)]
812#[serde(tag = "type")]
813pub enum Event {
814 #[serde(rename = "message_start")]
815 MessageStart { message: Response },
816 #[serde(rename = "content_block_start")]
817 ContentBlockStart {
818 index: usize,
819 content_block: ResponseContent,
820 },
821 #[serde(rename = "content_block_delta")]
822 ContentBlockDelta { index: usize, delta: ContentDelta },
823 #[serde(rename = "content_block_stop")]
824 ContentBlockStop { index: usize },
825 #[serde(rename = "message_delta")]
826 MessageDelta { delta: MessageDelta, usage: Usage },
827 #[serde(rename = "message_stop")]
828 MessageStop,
829 #[serde(rename = "ping")]
830 Ping,
831 #[serde(rename = "error")]
832 Error { error: ApiError },
833}
834
835#[derive(Debug, Serialize, Deserialize)]
836#[serde(tag = "type")]
837pub enum ContentDelta {
838 #[serde(rename = "text_delta")]
839 TextDelta { text: String },
840 #[serde(rename = "thinking_delta")]
841 ThinkingDelta { thinking: String },
842 #[serde(rename = "signature_delta")]
843 SignatureDelta { signature: String },
844 #[serde(rename = "input_json_delta")]
845 InputJsonDelta { partial_json: String },
846}
847
848#[derive(Debug, Serialize, Deserialize)]
849pub struct MessageDelta {
850 pub stop_reason: Option<String>,
851 pub stop_sequence: Option<String>,
852}
853
854#[derive(Debug)]
855pub enum AnthropicError {
856 /// Failed to serialize the HTTP request body to JSON
857 SerializeRequest(serde_json::Error),
858
859 /// Failed to construct the HTTP request body
860 BuildRequestBody(http::Error),
861
862 /// Failed to send the HTTP request
863 HttpSend(anyhow::Error),
864
865 /// Failed to deserialize the response from JSON
866 DeserializeResponse(serde_json::Error),
867
868 /// Failed to read from response stream
869 ReadResponse(io::Error),
870
871 /// HTTP error response from the API
872 HttpResponseError {
873 status_code: StatusCode,
874 message: String,
875 },
876
877 /// Rate limit exceeded
878 RateLimit { retry_after: Duration },
879
880 /// Server overloaded
881 ServerOverloaded { retry_after: Option<Duration> },
882
883 /// API returned an error response
884 ApiError(ApiError),
885}
886
887#[derive(Debug, Serialize, Deserialize, Error)]
888#[error("Anthropic API Error: {error_type}: {message}")]
889pub struct ApiError {
890 #[serde(rename = "type")]
891 pub error_type: String,
892 pub message: String,
893}
894
895/// An Anthropic API error code.
896/// <https://docs.anthropic.com/en/api/errors#http-errors>
897#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
898#[strum(serialize_all = "snake_case")]
899pub enum ApiErrorCode {
900 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
901 InvalidRequestError,
902 /// 401 - `authentication_error`: There's an issue with your API key.
903 AuthenticationError,
904 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
905 PermissionError,
906 /// 404 - `not_found_error`: The requested resource was not found.
907 NotFoundError,
908 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
909 RequestTooLarge,
910 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
911 RateLimitError,
912 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
913 ApiError,
914 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
915 OverloadedError,
916}
917
918impl ApiError {
919 pub fn code(&self) -> Option<ApiErrorCode> {
920 ApiErrorCode::from_str(&self.error_type).ok()
921 }
922
923 pub fn is_rate_limit_error(&self) -> bool {
924 matches!(self.error_type.as_str(), "rate_limit_error")
925 }
926
927 pub fn match_window_exceeded(&self) -> Option<u64> {
928 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
929 return None;
930 };
931
932 parse_prompt_too_long(&self.message)
933 }
934}
935
936pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
937 message
938 .strip_prefix("prompt is too long: ")?
939 .split_once(" tokens")?
940 .0
941 .parse()
942 .ok()
943}
944
945#[test]
946fn test_match_window_exceeded() {
947 let error = ApiError {
948 error_type: "invalid_request_error".to_string(),
949 message: "prompt is too long: 220000 tokens > 200000".to_string(),
950 };
951 assert_eq!(error.match_window_exceeded(), Some(220_000));
952
953 let error = ApiError {
954 error_type: "invalid_request_error".to_string(),
955 message: "prompt is too long: 1234953 tokens".to_string(),
956 };
957 assert_eq!(error.match_window_exceeded(), Some(1234953));
958
959 let error = ApiError {
960 error_type: "invalid_request_error".to_string(),
961 message: "not a prompt length error".to_string(),
962 };
963 assert_eq!(error.match_window_exceeded(), None);
964
965 let error = ApiError {
966 error_type: "rate_limit_error".to_string(),
967 message: "prompt is too long: 12345 tokens".to_string(),
968 };
969 assert_eq!(error.match_window_exceeded(), None);
970
971 let error = ApiError {
972 error_type: "invalid_request_error".to_string(),
973 message: "prompt is too long: invalid tokens".to_string(),
974 };
975 assert_eq!(error.match_window_exceeded(), None);
976}