1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11use strum::{EnumIter, EnumString};
12use thiserror::Error;
13
14pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
15
16#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
17#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
18pub struct AnthropicModelCacheConfiguration {
19 pub min_total_token: u64,
20 pub should_speculate: bool,
21 pub max_cache_anchors: usize,
22}
23
24#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
25#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
26pub enum AnthropicModelMode {
27 #[default]
28 Default,
29 Thinking {
30 budget_tokens: Option<u32>,
31 },
32}
33
34#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
35#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
36pub enum Model {
37 #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
38 ClaudeOpus4,
39 #[serde(
40 rename = "claude-opus-4-thinking",
41 alias = "claude-opus-4-thinking-latest"
42 )]
43 ClaudeOpus4Thinking,
44 #[default]
45 #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
46 ClaudeSonnet4,
47 #[serde(
48 rename = "claude-sonnet-4-thinking",
49 alias = "claude-sonnet-4-thinking-latest"
50 )]
51 ClaudeSonnet4Thinking,
52 #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
53 Claude3_7Sonnet,
54 #[serde(
55 rename = "claude-3-7-sonnet-thinking",
56 alias = "claude-3-7-sonnet-thinking-latest"
57 )]
58 Claude3_7SonnetThinking,
59 #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
60 Claude3_5Sonnet,
61 #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
62 Claude3_5Haiku,
63 #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
64 Claude3Opus,
65 #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
66 Claude3Sonnet,
67 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
68 Claude3Haiku,
69 #[serde(rename = "custom")]
70 Custom {
71 name: String,
72 max_tokens: u64,
73 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
74 display_name: Option<String>,
75 /// Override this model with a different Anthropic model for tool calls.
76 tool_override: Option<String>,
77 /// Indicates whether this custom model supports caching.
78 cache_configuration: Option<AnthropicModelCacheConfiguration>,
79 max_output_tokens: Option<u64>,
80 default_temperature: Option<f32>,
81 #[serde(default)]
82 extra_beta_headers: Vec<String>,
83 #[serde(default)]
84 mode: AnthropicModelMode,
85 },
86}
87
88impl Model {
89 pub fn default_fast() -> Self {
90 Self::Claude3_5Haiku
91 }
92
93 pub fn from_id(id: &str) -> Result<Self> {
94 if id.starts_with("claude-opus-4-thinking") {
95 return Ok(Self::ClaudeOpus4Thinking);
96 }
97
98 if id.starts_with("claude-opus-4") {
99 return Ok(Self::ClaudeOpus4);
100 }
101
102 if id.starts_with("claude-sonnet-4-thinking") {
103 return Ok(Self::ClaudeSonnet4Thinking);
104 }
105
106 if id.starts_with("claude-sonnet-4") {
107 return Ok(Self::ClaudeSonnet4);
108 }
109
110 if id.starts_with("claude-3-7-sonnet-thinking") {
111 return Ok(Self::Claude3_7SonnetThinking);
112 }
113
114 if id.starts_with("claude-3-7-sonnet") {
115 return Ok(Self::Claude3_7Sonnet);
116 }
117
118 if id.starts_with("claude-3-5-sonnet") {
119 return Ok(Self::Claude3_5Sonnet);
120 }
121
122 if id.starts_with("claude-3-5-haiku") {
123 return Ok(Self::Claude3_5Haiku);
124 }
125
126 if id.starts_with("claude-3-opus") {
127 return Ok(Self::Claude3Opus);
128 }
129
130 if id.starts_with("claude-3-sonnet") {
131 return Ok(Self::Claude3Sonnet);
132 }
133
134 if id.starts_with("claude-3-haiku") {
135 return Ok(Self::Claude3Haiku);
136 }
137
138 Err(anyhow!("invalid model ID: {id}"))
139 }
140
141 pub fn id(&self) -> &str {
142 match self {
143 Self::ClaudeOpus4 => "claude-opus-4-latest",
144 Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
145 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
146 Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
147 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
148 Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
149 Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
150 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
151 Self::Claude3Opus => "claude-3-opus-latest",
152 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
153 Self::Claude3Haiku => "claude-3-haiku-20240307",
154 Self::Custom { name, .. } => name,
155 }
156 }
157
158 /// The id of the model that should be used for making API requests
159 pub fn request_id(&self) -> &str {
160 match self {
161 Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
162 Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
163 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
164 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
165 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
166 Self::Claude3Opus => "claude-3-opus-latest",
167 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
168 Self::Claude3Haiku => "claude-3-haiku-20240307",
169 Self::Custom { name, .. } => name,
170 }
171 }
172
173 pub fn display_name(&self) -> &str {
174 match self {
175 Self::ClaudeOpus4 => "Claude Opus 4",
176 Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
177 Self::ClaudeSonnet4 => "Claude Sonnet 4",
178 Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
179 Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
180 Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
181 Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
182 Self::Claude3_5Haiku => "Claude 3.5 Haiku",
183 Self::Claude3Opus => "Claude 3 Opus",
184 Self::Claude3Sonnet => "Claude 3 Sonnet",
185 Self::Claude3Haiku => "Claude 3 Haiku",
186 Self::Custom {
187 name, display_name, ..
188 } => display_name.as_ref().unwrap_or(name),
189 }
190 }
191
192 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
193 match self {
194 Self::ClaudeOpus4
195 | Self::ClaudeOpus4Thinking
196 | Self::ClaudeSonnet4
197 | Self::ClaudeSonnet4Thinking
198 | Self::Claude3_5Sonnet
199 | Self::Claude3_5Haiku
200 | Self::Claude3_7Sonnet
201 | Self::Claude3_7SonnetThinking
202 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
203 min_total_token: 2_048,
204 should_speculate: true,
205 max_cache_anchors: 4,
206 }),
207 Self::Custom {
208 cache_configuration,
209 ..
210 } => cache_configuration.clone(),
211 _ => None,
212 }
213 }
214
215 pub fn max_token_count(&self) -> u64 {
216 match self {
217 Self::ClaudeOpus4
218 | Self::ClaudeOpus4Thinking
219 | Self::ClaudeSonnet4
220 | Self::ClaudeSonnet4Thinking
221 | Self::Claude3_5Sonnet
222 | Self::Claude3_5Haiku
223 | Self::Claude3_7Sonnet
224 | Self::Claude3_7SonnetThinking
225 | Self::Claude3Opus
226 | Self::Claude3Sonnet
227 | Self::Claude3Haiku => 200_000,
228 Self::Custom { max_tokens, .. } => *max_tokens,
229 }
230 }
231
232 pub fn max_output_tokens(&self) -> u64 {
233 match self {
234 Self::ClaudeOpus4
235 | Self::ClaudeOpus4Thinking
236 | Self::ClaudeSonnet4
237 | Self::ClaudeSonnet4Thinking
238 | Self::Claude3_5Sonnet
239 | Self::Claude3_7Sonnet
240 | Self::Claude3_7SonnetThinking
241 | Self::Claude3_5Haiku => 8_192,
242 Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
243 Self::Custom {
244 max_output_tokens, ..
245 } => max_output_tokens.unwrap_or(4_096),
246 }
247 }
248
249 pub fn default_temperature(&self) -> f32 {
250 match self {
251 Self::ClaudeOpus4
252 | Self::ClaudeOpus4Thinking
253 | Self::ClaudeSonnet4
254 | Self::ClaudeSonnet4Thinking
255 | Self::Claude3_5Sonnet
256 | Self::Claude3_7Sonnet
257 | Self::Claude3_7SonnetThinking
258 | Self::Claude3_5Haiku
259 | Self::Claude3Opus
260 | Self::Claude3Sonnet
261 | Self::Claude3Haiku => 1.0,
262 Self::Custom {
263 default_temperature,
264 ..
265 } => default_temperature.unwrap_or(1.0),
266 }
267 }
268
269 pub fn mode(&self) -> AnthropicModelMode {
270 match self {
271 Self::ClaudeOpus4
272 | Self::ClaudeSonnet4
273 | Self::Claude3_5Sonnet
274 | Self::Claude3_7Sonnet
275 | Self::Claude3_5Haiku
276 | Self::Claude3Opus
277 | Self::Claude3Sonnet
278 | Self::Claude3Haiku => AnthropicModelMode::Default,
279 Self::ClaudeOpus4Thinking
280 | Self::ClaudeSonnet4Thinking
281 | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
282 budget_tokens: Some(4_096),
283 },
284 Self::Custom { mode, .. } => mode.clone(),
285 }
286 }
287
288 pub const DEFAULT_BETA_HEADERS: &[&str] = &["prompt-caching-2024-07-31"];
289
290 pub fn beta_headers(&self) -> String {
291 let mut headers = Self::DEFAULT_BETA_HEADERS
292 .iter()
293 .map(|header| header.to_string())
294 .collect::<Vec<_>>();
295
296 match self {
297 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
298 // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
299 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
300 headers.push("token-efficient-tools-2025-02-19".to_string());
301 }
302 Self::Custom {
303 extra_beta_headers, ..
304 } => {
305 headers.extend(
306 extra_beta_headers
307 .iter()
308 .filter(|header| !header.trim().is_empty())
309 .cloned(),
310 );
311 }
312 _ => {}
313 }
314
315 headers.join(",")
316 }
317
318 pub fn tool_model_id(&self) -> &str {
319 if let Self::Custom {
320 tool_override: Some(tool_override),
321 ..
322 } = self
323 {
324 tool_override
325 } else {
326 self.request_id()
327 }
328 }
329}
330
331pub async fn complete(
332 client: &dyn HttpClient,
333 api_url: &str,
334 api_key: &str,
335 request: Request,
336) -> Result<Response, AnthropicError> {
337 let uri = format!("{api_url}/v1/messages");
338 let beta_headers = Model::from_id(&request.model)
339 .map(|model| model.beta_headers())
340 .unwrap_or_else(|_| Model::DEFAULT_BETA_HEADERS.join(","));
341 let request_builder = HttpRequest::builder()
342 .method(Method::POST)
343 .uri(uri)
344 .header("Anthropic-Version", "2023-06-01")
345 .header("Anthropic-Beta", beta_headers)
346 .header("X-Api-Key", api_key)
347 .header("Content-Type", "application/json");
348
349 let serialized_request =
350 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
351 let request = request_builder
352 .body(AsyncBody::from(serialized_request))
353 .map_err(AnthropicError::BuildRequestBody)?;
354
355 let mut response = client
356 .send(request)
357 .await
358 .map_err(AnthropicError::HttpSend)?;
359 let status_code = response.status();
360 let mut body = String::new();
361 response
362 .body_mut()
363 .read_to_string(&mut body)
364 .await
365 .map_err(AnthropicError::ReadResponse)?;
366
367 if status_code.is_success() {
368 Ok(serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)?)
369 } else {
370 Err(AnthropicError::HttpResponseError {
371 status_code,
372 message: body,
373 })
374 }
375}
376
377pub async fn stream_completion(
378 client: &dyn HttpClient,
379 api_url: &str,
380 api_key: &str,
381 request: Request,
382) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
383 stream_completion_with_rate_limit_info(client, api_url, api_key, request)
384 .await
385 .map(|output| output.0)
386}
387
388/// An individual rate limit.
389#[derive(Debug)]
390pub struct RateLimit {
391 pub limit: usize,
392 pub remaining: usize,
393 pub reset: DateTime<Utc>,
394}
395
396impl RateLimit {
397 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
398 let limit =
399 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
400 let remaining = get_header(
401 &format!("anthropic-ratelimit-{resource}-remaining"),
402 headers,
403 )?
404 .parse()?;
405 let reset = DateTime::parse_from_rfc3339(get_header(
406 &format!("anthropic-ratelimit-{resource}-reset"),
407 headers,
408 )?)?
409 .to_utc();
410
411 Ok(Self {
412 limit,
413 remaining,
414 reset,
415 })
416 }
417}
418
419/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
420#[derive(Debug)]
421pub struct RateLimitInfo {
422 pub retry_after: Option<Duration>,
423 pub requests: Option<RateLimit>,
424 pub tokens: Option<RateLimit>,
425 pub input_tokens: Option<RateLimit>,
426 pub output_tokens: Option<RateLimit>,
427}
428
429impl RateLimitInfo {
430 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
431 // Check if any rate limit headers exist
432 let has_rate_limit_headers = headers
433 .keys()
434 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
435
436 if !has_rate_limit_headers {
437 return Self {
438 retry_after: None,
439 requests: None,
440 tokens: None,
441 input_tokens: None,
442 output_tokens: None,
443 };
444 }
445
446 Self {
447 retry_after: parse_retry_after(headers),
448 requests: RateLimit::from_headers("requests", headers).ok(),
449 tokens: RateLimit::from_headers("tokens", headers).ok(),
450 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
451 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
452 }
453 }
454}
455
456/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
457/// seconds). Note that other services might specify an HTTP date or some other format for this
458/// header. Returns `None` if the header is not present or cannot be parsed.
459pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
460 headers
461 .get("retry-after")
462 .and_then(|v| v.to_str().ok())
463 .and_then(|v| v.parse::<u64>().ok())
464 .map(Duration::from_secs)
465}
466
467fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
468 Ok(headers
469 .get(key)
470 .with_context(|| format!("missing header `{key}`"))?
471 .to_str()?)
472}
473
474pub async fn stream_completion_with_rate_limit_info(
475 client: &dyn HttpClient,
476 api_url: &str,
477 api_key: &str,
478 request: Request,
479) -> Result<
480 (
481 BoxStream<'static, Result<Event, AnthropicError>>,
482 Option<RateLimitInfo>,
483 ),
484 AnthropicError,
485> {
486 let request = StreamingRequest {
487 base: request,
488 stream: true,
489 };
490 let uri = format!("{api_url}/v1/messages");
491 let beta_headers = Model::from_id(&request.base.model)
492 .map(|model| model.beta_headers())
493 .unwrap_or_else(|_| Model::DEFAULT_BETA_HEADERS.join(","));
494 let request_builder = HttpRequest::builder()
495 .method(Method::POST)
496 .uri(uri)
497 .header("Anthropic-Version", "2023-06-01")
498 .header("Anthropic-Beta", beta_headers)
499 .header("X-Api-Key", api_key)
500 .header("Content-Type", "application/json");
501 let serialized_request =
502 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
503 let request = request_builder
504 .body(AsyncBody::from(serialized_request))
505 .map_err(AnthropicError::BuildRequestBody)?;
506
507 let mut response = client
508 .send(request)
509 .await
510 .map_err(AnthropicError::HttpSend)?;
511 let rate_limits = RateLimitInfo::from_headers(response.headers());
512 if response.status().is_success() {
513 let reader = BufReader::new(response.into_body());
514 let stream = reader
515 .lines()
516 .filter_map(|line| async move {
517 match line {
518 Ok(line) => {
519 let line = line.strip_prefix("data: ")?;
520 match serde_json::from_str(line) {
521 Ok(response) => Some(Ok(response)),
522 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
523 }
524 }
525 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
526 }
527 })
528 .boxed();
529 Ok((stream, Some(rate_limits)))
530 } else if response.status().as_u16() == 529 {
531 Err(AnthropicError::ServerOverloaded {
532 retry_after: rate_limits.retry_after,
533 })
534 } else if let Some(retry_after) = rate_limits.retry_after {
535 Err(AnthropicError::RateLimit { retry_after })
536 } else {
537 let mut body = String::new();
538 response
539 .body_mut()
540 .read_to_string(&mut body)
541 .await
542 .map_err(AnthropicError::ReadResponse)?;
543
544 match serde_json::from_str::<Event>(&body) {
545 Ok(Event::Error { error }) => Err(AnthropicError::ApiError(error)),
546 Ok(_) | Err(_) => Err(AnthropicError::HttpResponseError {
547 status_code: response.status(),
548 message: body,
549 }),
550 }
551 }
552}
553
554#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
555#[serde(rename_all = "lowercase")]
556pub enum CacheControlType {
557 Ephemeral,
558}
559
560#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
561pub struct CacheControl {
562 #[serde(rename = "type")]
563 pub cache_type: CacheControlType,
564}
565
566#[derive(Debug, Serialize, Deserialize)]
567pub struct Message {
568 pub role: Role,
569 pub content: Vec<RequestContent>,
570}
571
572#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
573#[serde(rename_all = "lowercase")]
574pub enum Role {
575 User,
576 Assistant,
577}
578
579#[derive(Debug, Serialize, Deserialize)]
580#[serde(tag = "type")]
581pub enum RequestContent {
582 #[serde(rename = "text")]
583 Text {
584 text: String,
585 #[serde(skip_serializing_if = "Option::is_none")]
586 cache_control: Option<CacheControl>,
587 },
588 #[serde(rename = "thinking")]
589 Thinking {
590 thinking: String,
591 signature: String,
592 #[serde(skip_serializing_if = "Option::is_none")]
593 cache_control: Option<CacheControl>,
594 },
595 #[serde(rename = "redacted_thinking")]
596 RedactedThinking { data: String },
597 #[serde(rename = "image")]
598 Image {
599 source: ImageSource,
600 #[serde(skip_serializing_if = "Option::is_none")]
601 cache_control: Option<CacheControl>,
602 },
603 #[serde(rename = "tool_use")]
604 ToolUse {
605 id: String,
606 name: String,
607 input: serde_json::Value,
608 #[serde(skip_serializing_if = "Option::is_none")]
609 cache_control: Option<CacheControl>,
610 },
611 #[serde(rename = "tool_result")]
612 ToolResult {
613 tool_use_id: String,
614 is_error: bool,
615 content: ToolResultContent,
616 #[serde(skip_serializing_if = "Option::is_none")]
617 cache_control: Option<CacheControl>,
618 },
619}
620
621#[derive(Debug, Serialize, Deserialize)]
622#[serde(untagged)]
623pub enum ToolResultContent {
624 Plain(String),
625 Multipart(Vec<ToolResultPart>),
626}
627
628#[derive(Debug, Serialize, Deserialize)]
629#[serde(tag = "type", rename_all = "lowercase")]
630pub enum ToolResultPart {
631 Text { text: String },
632 Image { source: ImageSource },
633}
634
635#[derive(Debug, Serialize, Deserialize)]
636#[serde(tag = "type")]
637pub enum ResponseContent {
638 #[serde(rename = "text")]
639 Text { text: String },
640 #[serde(rename = "thinking")]
641 Thinking { thinking: String },
642 #[serde(rename = "redacted_thinking")]
643 RedactedThinking { data: String },
644 #[serde(rename = "tool_use")]
645 ToolUse {
646 id: String,
647 name: String,
648 input: serde_json::Value,
649 },
650}
651
652#[derive(Debug, Serialize, Deserialize)]
653pub struct ImageSource {
654 #[serde(rename = "type")]
655 pub source_type: String,
656 pub media_type: String,
657 pub data: String,
658}
659
660#[derive(Debug, Serialize, Deserialize)]
661pub struct Tool {
662 pub name: String,
663 pub description: String,
664 pub input_schema: serde_json::Value,
665}
666
667#[derive(Debug, Serialize, Deserialize)]
668#[serde(tag = "type", rename_all = "lowercase")]
669pub enum ToolChoice {
670 Auto,
671 Any,
672 Tool { name: String },
673 None,
674}
675
676#[derive(Debug, Serialize, Deserialize)]
677#[serde(tag = "type", rename_all = "lowercase")]
678pub enum Thinking {
679 Enabled { budget_tokens: Option<u32> },
680}
681
682#[derive(Debug, Serialize, Deserialize)]
683#[serde(untagged)]
684pub enum StringOrContents {
685 String(String),
686 Content(Vec<RequestContent>),
687}
688
689#[derive(Debug, Serialize, Deserialize)]
690pub struct Request {
691 pub model: String,
692 pub max_tokens: u64,
693 pub messages: Vec<Message>,
694 #[serde(default, skip_serializing_if = "Vec::is_empty")]
695 pub tools: Vec<Tool>,
696 #[serde(default, skip_serializing_if = "Option::is_none")]
697 pub thinking: Option<Thinking>,
698 #[serde(default, skip_serializing_if = "Option::is_none")]
699 pub tool_choice: Option<ToolChoice>,
700 #[serde(default, skip_serializing_if = "Option::is_none")]
701 pub system: Option<StringOrContents>,
702 #[serde(default, skip_serializing_if = "Option::is_none")]
703 pub metadata: Option<Metadata>,
704 #[serde(default, skip_serializing_if = "Vec::is_empty")]
705 pub stop_sequences: Vec<String>,
706 #[serde(default, skip_serializing_if = "Option::is_none")]
707 pub temperature: Option<f32>,
708 #[serde(default, skip_serializing_if = "Option::is_none")]
709 pub top_k: Option<u32>,
710 #[serde(default, skip_serializing_if = "Option::is_none")]
711 pub top_p: Option<f32>,
712}
713
714#[derive(Debug, Serialize, Deserialize)]
715struct StreamingRequest {
716 #[serde(flatten)]
717 pub base: Request,
718 pub stream: bool,
719}
720
721#[derive(Debug, Serialize, Deserialize)]
722pub struct Metadata {
723 pub user_id: Option<String>,
724}
725
726#[derive(Debug, Serialize, Deserialize, Default)]
727pub struct Usage {
728 #[serde(default, skip_serializing_if = "Option::is_none")]
729 pub input_tokens: Option<u64>,
730 #[serde(default, skip_serializing_if = "Option::is_none")]
731 pub output_tokens: Option<u64>,
732 #[serde(default, skip_serializing_if = "Option::is_none")]
733 pub cache_creation_input_tokens: Option<u64>,
734 #[serde(default, skip_serializing_if = "Option::is_none")]
735 pub cache_read_input_tokens: Option<u64>,
736}
737
738#[derive(Debug, Serialize, Deserialize)]
739pub struct Response {
740 pub id: String,
741 #[serde(rename = "type")]
742 pub response_type: String,
743 pub role: Role,
744 pub content: Vec<ResponseContent>,
745 pub model: String,
746 #[serde(default, skip_serializing_if = "Option::is_none")]
747 pub stop_reason: Option<String>,
748 #[serde(default, skip_serializing_if = "Option::is_none")]
749 pub stop_sequence: Option<String>,
750 pub usage: Usage,
751}
752
753#[derive(Debug, Serialize, Deserialize)]
754#[serde(tag = "type")]
755pub enum Event {
756 #[serde(rename = "message_start")]
757 MessageStart { message: Response },
758 #[serde(rename = "content_block_start")]
759 ContentBlockStart {
760 index: usize,
761 content_block: ResponseContent,
762 },
763 #[serde(rename = "content_block_delta")]
764 ContentBlockDelta { index: usize, delta: ContentDelta },
765 #[serde(rename = "content_block_stop")]
766 ContentBlockStop { index: usize },
767 #[serde(rename = "message_delta")]
768 MessageDelta { delta: MessageDelta, usage: Usage },
769 #[serde(rename = "message_stop")]
770 MessageStop,
771 #[serde(rename = "ping")]
772 Ping,
773 #[serde(rename = "error")]
774 Error { error: ApiError },
775}
776
777#[derive(Debug, Serialize, Deserialize)]
778#[serde(tag = "type")]
779pub enum ContentDelta {
780 #[serde(rename = "text_delta")]
781 TextDelta { text: String },
782 #[serde(rename = "thinking_delta")]
783 ThinkingDelta { thinking: String },
784 #[serde(rename = "signature_delta")]
785 SignatureDelta { signature: String },
786 #[serde(rename = "input_json_delta")]
787 InputJsonDelta { partial_json: String },
788}
789
790#[derive(Debug, Serialize, Deserialize)]
791pub struct MessageDelta {
792 pub stop_reason: Option<String>,
793 pub stop_sequence: Option<String>,
794}
795
796#[derive(Debug)]
797pub enum AnthropicError {
798 /// Failed to serialize the HTTP request body to JSON
799 SerializeRequest(serde_json::Error),
800
801 /// Failed to construct the HTTP request body
802 BuildRequestBody(http::Error),
803
804 /// Failed to send the HTTP request
805 HttpSend(anyhow::Error),
806
807 /// Failed to deserialize the response from JSON
808 DeserializeResponse(serde_json::Error),
809
810 /// Failed to read from response stream
811 ReadResponse(io::Error),
812
813 /// HTTP error response from the API
814 HttpResponseError {
815 status_code: StatusCode,
816 message: String,
817 },
818
819 /// Rate limit exceeded
820 RateLimit { retry_after: Duration },
821
822 /// Server overloaded
823 ServerOverloaded { retry_after: Option<Duration> },
824
825 /// API returned an error response
826 ApiError(ApiError),
827}
828
829#[derive(Debug, Serialize, Deserialize, Error)]
830#[error("Anthropic API Error: {error_type}: {message}")]
831pub struct ApiError {
832 #[serde(rename = "type")]
833 pub error_type: String,
834 pub message: String,
835}
836
837/// An Anthropic API error code.
838/// <https://docs.anthropic.com/en/api/errors#http-errors>
839#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
840#[strum(serialize_all = "snake_case")]
841pub enum ApiErrorCode {
842 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
843 InvalidRequestError,
844 /// 401 - `authentication_error`: There's an issue with your API key.
845 AuthenticationError,
846 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
847 PermissionError,
848 /// 404 - `not_found_error`: The requested resource was not found.
849 NotFoundError,
850 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
851 RequestTooLarge,
852 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
853 RateLimitError,
854 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
855 ApiError,
856 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
857 OverloadedError,
858}
859
860impl ApiError {
861 pub fn code(&self) -> Option<ApiErrorCode> {
862 ApiErrorCode::from_str(&self.error_type).ok()
863 }
864
865 pub fn is_rate_limit_error(&self) -> bool {
866 matches!(self.error_type.as_str(), "rate_limit_error")
867 }
868
869 pub fn match_window_exceeded(&self) -> Option<u64> {
870 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
871 return None;
872 };
873
874 parse_prompt_too_long(&self.message)
875 }
876}
877
878pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
879 message
880 .strip_prefix("prompt is too long: ")?
881 .split_once(" tokens")?
882 .0
883 .parse()
884 .ok()
885}
886
887#[test]
888fn test_match_window_exceeded() {
889 let error = ApiError {
890 error_type: "invalid_request_error".to_string(),
891 message: "prompt is too long: 220000 tokens > 200000".to_string(),
892 };
893 assert_eq!(error.match_window_exceeded(), Some(220_000));
894
895 let error = ApiError {
896 error_type: "invalid_request_error".to_string(),
897 message: "prompt is too long: 1234953 tokens".to_string(),
898 };
899 assert_eq!(error.match_window_exceeded(), Some(1234953));
900
901 let error = ApiError {
902 error_type: "invalid_request_error".to_string(),
903 message: "not a prompt length error".to_string(),
904 };
905 assert_eq!(error.match_window_exceeded(), None);
906
907 let error = ApiError {
908 error_type: "rate_limit_error".to_string(),
909 message: "prompt is too long: 12345 tokens".to_string(),
910 };
911 assert_eq!(error.match_window_exceeded(), None);
912
913 let error = ApiError {
914 error_type: "invalid_request_error".to_string(),
915 message: "prompt is too long: invalid tokens".to_string(),
916 };
917 assert_eq!(error.match_window_exceeded(), None);
918}