1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
12use strum::{EnumIter, EnumString};
13use thiserror::Error;
14
15pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
16
17#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
18#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
19pub struct AnthropicModelCacheConfiguration {
20 pub min_total_token: u64,
21 pub should_speculate: bool,
22 pub max_cache_anchors: usize,
23}
24
25#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
26#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
27pub enum AnthropicModelMode {
28 #[default]
29 Default,
30 Thinking {
31 budget_tokens: Option<u32>,
32 },
33}
34
35impl From<ModelMode> for AnthropicModelMode {
36 fn from(value: ModelMode) -> Self {
37 match value {
38 ModelMode::Default => AnthropicModelMode::Default,
39 ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
40 }
41 }
42}
43
44impl From<AnthropicModelMode> for ModelMode {
45 fn from(value: AnthropicModelMode) -> Self {
46 match value {
47 AnthropicModelMode::Default => ModelMode::Default,
48 AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
49 }
50 }
51}
52
53#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
54#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
55pub enum Model {
56 #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
57 ClaudeOpus4,
58 #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
59 ClaudeOpus4_1,
60 #[serde(
61 rename = "claude-opus-4-thinking",
62 alias = "claude-opus-4-thinking-latest"
63 )]
64 ClaudeOpus4Thinking,
65 #[serde(
66 rename = "claude-opus-4-1-thinking",
67 alias = "claude-opus-4-1-thinking-latest"
68 )]
69 ClaudeOpus4_1Thinking,
70 #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
71 ClaudeSonnet4,
72 #[serde(
73 rename = "claude-sonnet-4-thinking",
74 alias = "claude-sonnet-4-thinking-latest"
75 )]
76 ClaudeSonnet4Thinking,
77 #[default]
78 #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
79 ClaudeSonnet4_5,
80 #[serde(
81 rename = "claude-sonnet-4-5-thinking",
82 alias = "claude-sonnet-4-5-thinking-latest"
83 )]
84 ClaudeSonnet4_5Thinking,
85 #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
86 Claude3_7Sonnet,
87 #[serde(
88 rename = "claude-3-7-sonnet-thinking",
89 alias = "claude-3-7-sonnet-thinking-latest"
90 )]
91 Claude3_7SonnetThinking,
92 #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
93 Claude3_5Sonnet,
94 #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
95 ClaudeHaiku4_5,
96 #[serde(
97 rename = "claude-haiku-4-5-thinking",
98 alias = "claude-haiku-4-5-thinking-latest"
99 )]
100 ClaudeHaiku4_5Thinking,
101 #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
102 Claude3_5Haiku,
103 #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
104 Claude3Opus,
105 #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
106 Claude3Sonnet,
107 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
108 Claude3Haiku,
109 #[serde(rename = "custom")]
110 Custom {
111 name: String,
112 max_tokens: u64,
113 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
114 display_name: Option<String>,
115 /// Override this model with a different Anthropic model for tool calls.
116 tool_override: Option<String>,
117 /// Indicates whether this custom model supports caching.
118 cache_configuration: Option<AnthropicModelCacheConfiguration>,
119 max_output_tokens: Option<u64>,
120 default_temperature: Option<f32>,
121 #[serde(default)]
122 extra_beta_headers: Vec<String>,
123 #[serde(default)]
124 mode: AnthropicModelMode,
125 },
126}
127
128impl Model {
129 pub fn default_fast() -> Self {
130 Self::Claude3_5Haiku
131 }
132
133 pub fn from_id(id: &str) -> Result<Self> {
134 if id.starts_with("claude-opus-4-1-thinking") {
135 return Ok(Self::ClaudeOpus4_1Thinking);
136 }
137
138 if id.starts_with("claude-opus-4-thinking") {
139 return Ok(Self::ClaudeOpus4Thinking);
140 }
141
142 if id.starts_with("claude-opus-4-1") {
143 return Ok(Self::ClaudeOpus4_1);
144 }
145
146 if id.starts_with("claude-opus-4") {
147 return Ok(Self::ClaudeOpus4);
148 }
149
150 if id.starts_with("claude-sonnet-4-5-thinking") {
151 return Ok(Self::ClaudeSonnet4_5Thinking);
152 }
153
154 if id.starts_with("claude-sonnet-4-5") {
155 return Ok(Self::ClaudeSonnet4_5);
156 }
157
158 if id.starts_with("claude-sonnet-4-thinking") {
159 return Ok(Self::ClaudeSonnet4Thinking);
160 }
161
162 if id.starts_with("claude-sonnet-4") {
163 return Ok(Self::ClaudeSonnet4);
164 }
165
166 if id.starts_with("claude-3-7-sonnet-thinking") {
167 return Ok(Self::Claude3_7SonnetThinking);
168 }
169
170 if id.starts_with("claude-3-7-sonnet") {
171 return Ok(Self::Claude3_7Sonnet);
172 }
173
174 if id.starts_with("claude-3-5-sonnet") {
175 return Ok(Self::Claude3_5Sonnet);
176 }
177
178 if id.starts_with("claude-haiku-4-5-thinking") {
179 return Ok(Self::ClaudeHaiku4_5Thinking);
180 }
181
182 if id.starts_with("claude-haiku-4-5") {
183 return Ok(Self::ClaudeHaiku4_5);
184 }
185
186 if id.starts_with("claude-3-5-haiku") {
187 return Ok(Self::Claude3_5Haiku);
188 }
189
190 if id.starts_with("claude-3-opus") {
191 return Ok(Self::Claude3Opus);
192 }
193
194 if id.starts_with("claude-3-sonnet") {
195 return Ok(Self::Claude3Sonnet);
196 }
197
198 if id.starts_with("claude-3-haiku") {
199 return Ok(Self::Claude3Haiku);
200 }
201
202 Err(anyhow!("invalid model ID: {id}"))
203 }
204
205 pub fn id(&self) -> &str {
206 match self {
207 Self::ClaudeOpus4 => "claude-opus-4-latest",
208 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
209 Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
210 Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
211 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
212 Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
213 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
214 Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
215 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
216 Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
217 Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
218 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
219 Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
220 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
221 Self::Claude3Opus => "claude-3-opus-latest",
222 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
223 Self::Claude3Haiku => "claude-3-haiku-20240307",
224 Self::Custom { name, .. } => name,
225 }
226 }
227
228 /// The id of the model that should be used for making API requests
229 pub fn request_id(&self) -> &str {
230 match self {
231 Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
232 Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
233 Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
234 Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-20250929",
235 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
236 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
237 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
238 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
239 Self::Claude3Opus => "claude-3-opus-latest",
240 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
241 Self::Claude3Haiku => "claude-3-haiku-20240307",
242 Self::Custom { name, .. } => name,
243 }
244 }
245
246 pub fn display_name(&self) -> &str {
247 match self {
248 Self::ClaudeOpus4 => "Claude Opus 4",
249 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
250 Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
251 Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
252 Self::ClaudeSonnet4 => "Claude Sonnet 4",
253 Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
254 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
255 Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
256 Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
257 Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
258 Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
259 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
260 Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
261 Self::Claude3_5Haiku => "Claude 3.5 Haiku",
262 Self::Claude3Opus => "Claude 3 Opus",
263 Self::Claude3Sonnet => "Claude 3 Sonnet",
264 Self::Claude3Haiku => "Claude 3 Haiku",
265 Self::Custom {
266 name, display_name, ..
267 } => display_name.as_ref().unwrap_or(name),
268 }
269 }
270
271 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
272 match self {
273 Self::ClaudeOpus4
274 | Self::ClaudeOpus4_1
275 | Self::ClaudeOpus4Thinking
276 | Self::ClaudeOpus4_1Thinking
277 | Self::ClaudeSonnet4
278 | Self::ClaudeSonnet4Thinking
279 | Self::ClaudeSonnet4_5
280 | Self::ClaudeSonnet4_5Thinking
281 | Self::Claude3_5Sonnet
282 | Self::ClaudeHaiku4_5
283 | Self::ClaudeHaiku4_5Thinking
284 | Self::Claude3_5Haiku
285 | Self::Claude3_7Sonnet
286 | Self::Claude3_7SonnetThinking
287 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
288 min_total_token: 2_048,
289 should_speculate: true,
290 max_cache_anchors: 4,
291 }),
292 Self::Custom {
293 cache_configuration,
294 ..
295 } => cache_configuration.clone(),
296 _ => None,
297 }
298 }
299
300 pub fn max_token_count(&self) -> u64 {
301 match self {
302 Self::ClaudeOpus4
303 | Self::ClaudeOpus4_1
304 | Self::ClaudeOpus4Thinking
305 | Self::ClaudeOpus4_1Thinking
306 | Self::ClaudeSonnet4
307 | Self::ClaudeSonnet4Thinking
308 | Self::ClaudeSonnet4_5
309 | Self::ClaudeSonnet4_5Thinking
310 | Self::Claude3_5Sonnet
311 | Self::ClaudeHaiku4_5
312 | Self::ClaudeHaiku4_5Thinking
313 | Self::Claude3_5Haiku
314 | Self::Claude3_7Sonnet
315 | Self::Claude3_7SonnetThinking
316 | Self::Claude3Opus
317 | Self::Claude3Sonnet
318 | Self::Claude3Haiku => 200_000,
319 Self::Custom { max_tokens, .. } => *max_tokens,
320 }
321 }
322
323 pub fn max_output_tokens(&self) -> u64 {
324 match self {
325 Self::ClaudeOpus4
326 | Self::ClaudeOpus4_1
327 | Self::ClaudeOpus4Thinking
328 | Self::ClaudeOpus4_1Thinking
329 | Self::ClaudeSonnet4
330 | Self::ClaudeSonnet4Thinking
331 | Self::ClaudeSonnet4_5
332 | Self::ClaudeSonnet4_5Thinking
333 | Self::Claude3_5Sonnet
334 | Self::Claude3_7Sonnet
335 | Self::Claude3_7SonnetThinking
336 | Self::Claude3_5Haiku => 8_192,
337 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => 64_000,
338 Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
339 Self::Custom {
340 max_output_tokens, ..
341 } => max_output_tokens.unwrap_or(4_096),
342 }
343 }
344
345 pub fn default_temperature(&self) -> f32 {
346 match self {
347 Self::ClaudeOpus4
348 | Self::ClaudeOpus4_1
349 | Self::ClaudeOpus4Thinking
350 | Self::ClaudeOpus4_1Thinking
351 | Self::ClaudeSonnet4
352 | Self::ClaudeSonnet4Thinking
353 | Self::ClaudeSonnet4_5
354 | Self::ClaudeSonnet4_5Thinking
355 | Self::Claude3_5Sonnet
356 | Self::Claude3_7Sonnet
357 | Self::Claude3_7SonnetThinking
358 | Self::ClaudeHaiku4_5
359 | Self::ClaudeHaiku4_5Thinking
360 | Self::Claude3_5Haiku
361 | Self::Claude3Opus
362 | Self::Claude3Sonnet
363 | Self::Claude3Haiku => 1.0,
364 Self::Custom {
365 default_temperature,
366 ..
367 } => default_temperature.unwrap_or(1.0),
368 }
369 }
370
371 pub fn mode(&self) -> AnthropicModelMode {
372 match self {
373 Self::ClaudeOpus4
374 | Self::ClaudeOpus4_1
375 | Self::ClaudeSonnet4
376 | Self::ClaudeSonnet4_5
377 | Self::Claude3_5Sonnet
378 | Self::Claude3_7Sonnet
379 | Self::ClaudeHaiku4_5
380 | Self::Claude3_5Haiku
381 | Self::Claude3Opus
382 | Self::Claude3Sonnet
383 | Self::Claude3Haiku => AnthropicModelMode::Default,
384 Self::ClaudeOpus4Thinking
385 | Self::ClaudeOpus4_1Thinking
386 | Self::ClaudeSonnet4Thinking
387 | Self::ClaudeSonnet4_5Thinking
388 | Self::ClaudeHaiku4_5Thinking
389 | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
390 budget_tokens: Some(4_096),
391 },
392 Self::Custom { mode, .. } => mode.clone(),
393 }
394 }
395
396 pub fn beta_headers(&self) -> Option<String> {
397 let mut headers = vec![];
398
399 match self {
400 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
401 // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
402 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
403 headers.push("token-efficient-tools-2025-02-19".to_string());
404 }
405 Self::Custom {
406 extra_beta_headers, ..
407 } => {
408 headers.extend(
409 extra_beta_headers
410 .iter()
411 .filter(|header| !header.trim().is_empty())
412 .cloned(),
413 );
414 }
415 _ => {}
416 }
417
418 if headers.is_empty() {
419 None
420 } else {
421 Some(headers.join(","))
422 }
423 }
424
425 pub fn tool_model_id(&self) -> &str {
426 if let Self::Custom {
427 tool_override: Some(tool_override),
428 ..
429 } = self
430 {
431 tool_override
432 } else {
433 self.request_id()
434 }
435 }
436}
437
438pub async fn stream_completion(
439 client: &dyn HttpClient,
440 api_url: &str,
441 api_key: &str,
442 request: Request,
443 beta_headers: Option<String>,
444) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
445 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
446 .await
447 .map(|output| output.0)
448}
449
450/// An individual rate limit.
451#[derive(Debug)]
452pub struct RateLimit {
453 pub limit: usize,
454 pub remaining: usize,
455 pub reset: DateTime<Utc>,
456}
457
458impl RateLimit {
459 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
460 let limit =
461 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
462 let remaining = get_header(
463 &format!("anthropic-ratelimit-{resource}-remaining"),
464 headers,
465 )?
466 .parse()?;
467 let reset = DateTime::parse_from_rfc3339(get_header(
468 &format!("anthropic-ratelimit-{resource}-reset"),
469 headers,
470 )?)?
471 .to_utc();
472
473 Ok(Self {
474 limit,
475 remaining,
476 reset,
477 })
478 }
479}
480
481/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
482#[derive(Debug)]
483pub struct RateLimitInfo {
484 pub retry_after: Option<Duration>,
485 pub requests: Option<RateLimit>,
486 pub tokens: Option<RateLimit>,
487 pub input_tokens: Option<RateLimit>,
488 pub output_tokens: Option<RateLimit>,
489}
490
491impl RateLimitInfo {
492 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
493 // Check if any rate limit headers exist
494 let has_rate_limit_headers = headers
495 .keys()
496 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
497
498 if !has_rate_limit_headers {
499 return Self {
500 retry_after: None,
501 requests: None,
502 tokens: None,
503 input_tokens: None,
504 output_tokens: None,
505 };
506 }
507
508 Self {
509 retry_after: parse_retry_after(headers),
510 requests: RateLimit::from_headers("requests", headers).ok(),
511 tokens: RateLimit::from_headers("tokens", headers).ok(),
512 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
513 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
514 }
515 }
516}
517
518/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
519/// seconds). Note that other services might specify an HTTP date or some other format for this
520/// header. Returns `None` if the header is not present or cannot be parsed.
521pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
522 headers
523 .get("retry-after")
524 .and_then(|v| v.to_str().ok())
525 .and_then(|v| v.parse::<u64>().ok())
526 .map(Duration::from_secs)
527}
528
529fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
530 Ok(headers
531 .get(key)
532 .with_context(|| format!("missing header `{key}`"))?
533 .to_str()?)
534}
535
536pub async fn stream_completion_with_rate_limit_info(
537 client: &dyn HttpClient,
538 api_url: &str,
539 api_key: &str,
540 request: Request,
541 beta_headers: Option<String>,
542) -> Result<
543 (
544 BoxStream<'static, Result<Event, AnthropicError>>,
545 Option<RateLimitInfo>,
546 ),
547 AnthropicError,
548> {
549 let request = StreamingRequest {
550 base: request,
551 stream: true,
552 };
553 let uri = format!("{api_url}/v1/messages");
554
555 let mut request_builder = HttpRequest::builder()
556 .method(Method::POST)
557 .uri(uri)
558 .header("Anthropic-Version", "2023-06-01")
559 .header("X-Api-Key", api_key.trim())
560 .header("Content-Type", "application/json");
561
562 if let Some(beta_headers) = beta_headers {
563 request_builder = request_builder.header("Anthropic-Beta", beta_headers);
564 }
565
566 let serialized_request =
567 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
568 let request = request_builder
569 .body(AsyncBody::from(serialized_request))
570 .map_err(AnthropicError::BuildRequestBody)?;
571
572 let mut response = client
573 .send(request)
574 .await
575 .map_err(AnthropicError::HttpSend)?;
576 let rate_limits = RateLimitInfo::from_headers(response.headers());
577 if response.status().is_success() {
578 let reader = BufReader::new(response.into_body());
579 let stream = reader
580 .lines()
581 .filter_map(|line| async move {
582 match line {
583 Ok(line) => {
584 let line = line.strip_prefix("data: ")?;
585 match serde_json::from_str(line) {
586 Ok(response) => Some(Ok(response)),
587 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
588 }
589 }
590 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
591 }
592 })
593 .boxed();
594 Ok((stream, Some(rate_limits)))
595 } else if response.status().as_u16() == 529 {
596 Err(AnthropicError::ServerOverloaded {
597 retry_after: rate_limits.retry_after,
598 })
599 } else if let Some(retry_after) = rate_limits.retry_after {
600 Err(AnthropicError::RateLimit { retry_after })
601 } else {
602 let mut body = String::new();
603 response
604 .body_mut()
605 .read_to_string(&mut body)
606 .await
607 .map_err(AnthropicError::ReadResponse)?;
608
609 match serde_json::from_str::<Event>(&body) {
610 Ok(Event::Error { error }) => Err(AnthropicError::ApiError(error)),
611 Ok(_) | Err(_) => Err(AnthropicError::HttpResponseError {
612 status_code: response.status(),
613 message: body,
614 }),
615 }
616 }
617}
618
619#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
620#[serde(rename_all = "lowercase")]
621pub enum CacheControlType {
622 Ephemeral,
623}
624
625#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
626pub struct CacheControl {
627 #[serde(rename = "type")]
628 pub cache_type: CacheControlType,
629}
630
631#[derive(Debug, Serialize, Deserialize)]
632pub struct Message {
633 pub role: Role,
634 pub content: Vec<RequestContent>,
635}
636
637#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
638#[serde(rename_all = "lowercase")]
639pub enum Role {
640 User,
641 Assistant,
642}
643
644#[derive(Debug, Serialize, Deserialize)]
645#[serde(tag = "type")]
646pub enum RequestContent {
647 #[serde(rename = "text")]
648 Text {
649 text: String,
650 #[serde(skip_serializing_if = "Option::is_none")]
651 cache_control: Option<CacheControl>,
652 },
653 #[serde(rename = "thinking")]
654 Thinking {
655 thinking: String,
656 signature: String,
657 #[serde(skip_serializing_if = "Option::is_none")]
658 cache_control: Option<CacheControl>,
659 },
660 #[serde(rename = "redacted_thinking")]
661 RedactedThinking { data: String },
662 #[serde(rename = "image")]
663 Image {
664 source: ImageSource,
665 #[serde(skip_serializing_if = "Option::is_none")]
666 cache_control: Option<CacheControl>,
667 },
668 #[serde(rename = "tool_use")]
669 ToolUse {
670 id: String,
671 name: String,
672 input: serde_json::Value,
673 #[serde(skip_serializing_if = "Option::is_none")]
674 cache_control: Option<CacheControl>,
675 },
676 #[serde(rename = "tool_result")]
677 ToolResult {
678 tool_use_id: String,
679 is_error: bool,
680 content: ToolResultContent,
681 #[serde(skip_serializing_if = "Option::is_none")]
682 cache_control: Option<CacheControl>,
683 },
684}
685
686#[derive(Debug, Serialize, Deserialize)]
687#[serde(untagged)]
688pub enum ToolResultContent {
689 Plain(String),
690 Multipart(Vec<ToolResultPart>),
691}
692
693#[derive(Debug, Serialize, Deserialize)]
694#[serde(tag = "type", rename_all = "lowercase")]
695pub enum ToolResultPart {
696 Text { text: String },
697 Image { source: ImageSource },
698}
699
700#[derive(Debug, Serialize, Deserialize)]
701#[serde(tag = "type")]
702pub enum ResponseContent {
703 #[serde(rename = "text")]
704 Text { text: String },
705 #[serde(rename = "thinking")]
706 Thinking { thinking: String },
707 #[serde(rename = "redacted_thinking")]
708 RedactedThinking { data: String },
709 #[serde(rename = "tool_use")]
710 ToolUse {
711 id: String,
712 name: String,
713 input: serde_json::Value,
714 },
715}
716
717#[derive(Debug, Serialize, Deserialize)]
718pub struct ImageSource {
719 #[serde(rename = "type")]
720 pub source_type: String,
721 pub media_type: String,
722 pub data: String,
723}
724
725#[derive(Debug, Serialize, Deserialize)]
726pub struct Tool {
727 pub name: String,
728 pub description: String,
729 pub input_schema: serde_json::Value,
730}
731
732#[derive(Debug, Serialize, Deserialize)]
733#[serde(tag = "type", rename_all = "lowercase")]
734pub enum ToolChoice {
735 Auto,
736 Any,
737 Tool { name: String },
738 None,
739}
740
741#[derive(Debug, Serialize, Deserialize)]
742#[serde(tag = "type", rename_all = "lowercase")]
743pub enum Thinking {
744 Enabled { budget_tokens: Option<u32> },
745}
746
747#[derive(Debug, Serialize, Deserialize)]
748#[serde(untagged)]
749pub enum StringOrContents {
750 String(String),
751 Content(Vec<RequestContent>),
752}
753
754#[derive(Debug, Serialize, Deserialize)]
755pub struct Request {
756 pub model: String,
757 pub max_tokens: u64,
758 pub messages: Vec<Message>,
759 #[serde(default, skip_serializing_if = "Vec::is_empty")]
760 pub tools: Vec<Tool>,
761 #[serde(default, skip_serializing_if = "Option::is_none")]
762 pub thinking: Option<Thinking>,
763 #[serde(default, skip_serializing_if = "Option::is_none")]
764 pub tool_choice: Option<ToolChoice>,
765 #[serde(default, skip_serializing_if = "Option::is_none")]
766 pub system: Option<StringOrContents>,
767 #[serde(default, skip_serializing_if = "Option::is_none")]
768 pub metadata: Option<Metadata>,
769 #[serde(default, skip_serializing_if = "Vec::is_empty")]
770 pub stop_sequences: Vec<String>,
771 #[serde(default, skip_serializing_if = "Option::is_none")]
772 pub temperature: Option<f32>,
773 #[serde(default, skip_serializing_if = "Option::is_none")]
774 pub top_k: Option<u32>,
775 #[serde(default, skip_serializing_if = "Option::is_none")]
776 pub top_p: Option<f32>,
777}
778
779#[derive(Debug, Serialize, Deserialize)]
780struct StreamingRequest {
781 #[serde(flatten)]
782 pub base: Request,
783 pub stream: bool,
784}
785
786#[derive(Debug, Serialize, Deserialize)]
787pub struct Metadata {
788 pub user_id: Option<String>,
789}
790
791#[derive(Debug, Serialize, Deserialize, Default)]
792pub struct Usage {
793 #[serde(default, skip_serializing_if = "Option::is_none")]
794 pub input_tokens: Option<u64>,
795 #[serde(default, skip_serializing_if = "Option::is_none")]
796 pub output_tokens: Option<u64>,
797 #[serde(default, skip_serializing_if = "Option::is_none")]
798 pub cache_creation_input_tokens: Option<u64>,
799 #[serde(default, skip_serializing_if = "Option::is_none")]
800 pub cache_read_input_tokens: Option<u64>,
801}
802
803#[derive(Debug, Serialize, Deserialize)]
804pub struct Response {
805 pub id: String,
806 #[serde(rename = "type")]
807 pub response_type: String,
808 pub role: Role,
809 pub content: Vec<ResponseContent>,
810 pub model: String,
811 #[serde(default, skip_serializing_if = "Option::is_none")]
812 pub stop_reason: Option<String>,
813 #[serde(default, skip_serializing_if = "Option::is_none")]
814 pub stop_sequence: Option<String>,
815 pub usage: Usage,
816}
817
818#[derive(Debug, Serialize, Deserialize)]
819#[serde(tag = "type")]
820pub enum Event {
821 #[serde(rename = "message_start")]
822 MessageStart { message: Response },
823 #[serde(rename = "content_block_start")]
824 ContentBlockStart {
825 index: usize,
826 content_block: ResponseContent,
827 },
828 #[serde(rename = "content_block_delta")]
829 ContentBlockDelta { index: usize, delta: ContentDelta },
830 #[serde(rename = "content_block_stop")]
831 ContentBlockStop { index: usize },
832 #[serde(rename = "message_delta")]
833 MessageDelta { delta: MessageDelta, usage: Usage },
834 #[serde(rename = "message_stop")]
835 MessageStop,
836 #[serde(rename = "ping")]
837 Ping,
838 #[serde(rename = "error")]
839 Error { error: ApiError },
840}
841
842#[derive(Debug, Serialize, Deserialize)]
843#[serde(tag = "type")]
844pub enum ContentDelta {
845 #[serde(rename = "text_delta")]
846 TextDelta { text: String },
847 #[serde(rename = "thinking_delta")]
848 ThinkingDelta { thinking: String },
849 #[serde(rename = "signature_delta")]
850 SignatureDelta { signature: String },
851 #[serde(rename = "input_json_delta")]
852 InputJsonDelta { partial_json: String },
853}
854
855#[derive(Debug, Serialize, Deserialize)]
856pub struct MessageDelta {
857 pub stop_reason: Option<String>,
858 pub stop_sequence: Option<String>,
859}
860
861#[derive(Debug)]
862pub enum AnthropicError {
863 /// Failed to serialize the HTTP request body to JSON
864 SerializeRequest(serde_json::Error),
865
866 /// Failed to construct the HTTP request body
867 BuildRequestBody(http::Error),
868
869 /// Failed to send the HTTP request
870 HttpSend(anyhow::Error),
871
872 /// Failed to deserialize the response from JSON
873 DeserializeResponse(serde_json::Error),
874
875 /// Failed to read from response stream
876 ReadResponse(io::Error),
877
878 /// HTTP error response from the API
879 HttpResponseError {
880 status_code: StatusCode,
881 message: String,
882 },
883
884 /// Rate limit exceeded
885 RateLimit { retry_after: Duration },
886
887 /// Server overloaded
888 ServerOverloaded { retry_after: Option<Duration> },
889
890 /// API returned an error response
891 ApiError(ApiError),
892}
893
894#[derive(Debug, Serialize, Deserialize, Error)]
895#[error("Anthropic API Error: {error_type}: {message}")]
896pub struct ApiError {
897 #[serde(rename = "type")]
898 pub error_type: String,
899 pub message: String,
900}
901
902/// An Anthropic API error code.
903/// <https://docs.anthropic.com/en/api/errors#http-errors>
904#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
905#[strum(serialize_all = "snake_case")]
906pub enum ApiErrorCode {
907 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
908 InvalidRequestError,
909 /// 401 - `authentication_error`: There's an issue with your API key.
910 AuthenticationError,
911 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
912 PermissionError,
913 /// 404 - `not_found_error`: The requested resource was not found.
914 NotFoundError,
915 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
916 RequestTooLarge,
917 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
918 RateLimitError,
919 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
920 ApiError,
921 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
922 OverloadedError,
923}
924
925impl ApiError {
926 pub fn code(&self) -> Option<ApiErrorCode> {
927 ApiErrorCode::from_str(&self.error_type).ok()
928 }
929
930 pub fn is_rate_limit_error(&self) -> bool {
931 matches!(self.error_type.as_str(), "rate_limit_error")
932 }
933
934 pub fn match_window_exceeded(&self) -> Option<u64> {
935 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
936 return None;
937 };
938
939 parse_prompt_too_long(&self.message)
940 }
941}
942
943pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
944 message
945 .strip_prefix("prompt is too long: ")?
946 .split_once(" tokens")?
947 .0
948 .parse()
949 .ok()
950}
951
952#[test]
953fn test_match_window_exceeded() {
954 let error = ApiError {
955 error_type: "invalid_request_error".to_string(),
956 message: "prompt is too long: 220000 tokens > 200000".to_string(),
957 };
958 assert_eq!(error.match_window_exceeded(), Some(220_000));
959
960 let error = ApiError {
961 error_type: "invalid_request_error".to_string(),
962 message: "prompt is too long: 1234953 tokens".to_string(),
963 };
964 assert_eq!(error.match_window_exceeded(), Some(1234953));
965
966 let error = ApiError {
967 error_type: "invalid_request_error".to_string(),
968 message: "not a prompt length error".to_string(),
969 };
970 assert_eq!(error.match_window_exceeded(), None);
971
972 let error = ApiError {
973 error_type: "rate_limit_error".to_string(),
974 message: "prompt is too long: 12345 tokens".to_string(),
975 };
976 assert_eq!(error.match_window_exceeded(), None);
977
978 let error = ApiError {
979 error_type: "invalid_request_error".to_string(),
980 message: "prompt is too long: invalid tokens".to_string(),
981 };
982 assert_eq!(error.match_window_exceeded(), None);
983}