1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
12use strum::{EnumIter, EnumString};
13use thiserror::Error;
14
15pub mod batches;
16
17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
18
19#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
20#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
21pub struct AnthropicModelCacheConfiguration {
22 pub min_total_token: u64,
23 pub should_speculate: bool,
24 pub max_cache_anchors: usize,
25}
26
27#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
28#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
29pub enum AnthropicModelMode {
30 #[default]
31 Default,
32 Thinking {
33 budget_tokens: Option<u32>,
34 },
35}
36
37impl From<ModelMode> for AnthropicModelMode {
38 fn from(value: ModelMode) -> Self {
39 match value {
40 ModelMode::Default => AnthropicModelMode::Default,
41 ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
42 }
43 }
44}
45
46impl From<AnthropicModelMode> for ModelMode {
47 fn from(value: AnthropicModelMode) -> Self {
48 match value {
49 AnthropicModelMode::Default => ModelMode::Default,
50 AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
51 }
52 }
53}
54
55#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
56#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
57pub enum Model {
58 #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
59 ClaudeOpus4,
60 #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
61 ClaudeOpus4_1,
62 #[serde(
63 rename = "claude-opus-4-thinking",
64 alias = "claude-opus-4-thinking-latest"
65 )]
66 ClaudeOpus4Thinking,
67 #[serde(
68 rename = "claude-opus-4-1-thinking",
69 alias = "claude-opus-4-1-thinking-latest"
70 )]
71 ClaudeOpus4_1Thinking,
72 #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
73 ClaudeOpus4_5,
74 #[serde(
75 rename = "claude-opus-4-5-thinking",
76 alias = "claude-opus-4-5-thinking-latest"
77 )]
78 ClaudeOpus4_5Thinking,
79 #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
80 ClaudeSonnet4,
81 #[serde(
82 rename = "claude-sonnet-4-thinking",
83 alias = "claude-sonnet-4-thinking-latest"
84 )]
85 ClaudeSonnet4Thinking,
86 #[default]
87 #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
88 ClaudeSonnet4_5,
89 #[serde(
90 rename = "claude-sonnet-4-5-thinking",
91 alias = "claude-sonnet-4-5-thinking-latest"
92 )]
93 ClaudeSonnet4_5Thinking,
94 #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
95 Claude3_7Sonnet,
96 #[serde(
97 rename = "claude-3-7-sonnet-thinking",
98 alias = "claude-3-7-sonnet-thinking-latest"
99 )]
100 Claude3_7SonnetThinking,
101 #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
102 Claude3_5Sonnet,
103 #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
104 ClaudeHaiku4_5,
105 #[serde(
106 rename = "claude-haiku-4-5-thinking",
107 alias = "claude-haiku-4-5-thinking-latest"
108 )]
109 ClaudeHaiku4_5Thinking,
110 #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
111 Claude3_5Haiku,
112 #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
113 Claude3Opus,
114 #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
115 Claude3Sonnet,
116 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
117 Claude3Haiku,
118 #[serde(rename = "custom")]
119 Custom {
120 name: String,
121 max_tokens: u64,
122 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
123 display_name: Option<String>,
124 /// Override this model with a different Anthropic model for tool calls.
125 tool_override: Option<String>,
126 /// Indicates whether this custom model supports caching.
127 cache_configuration: Option<AnthropicModelCacheConfiguration>,
128 max_output_tokens: Option<u64>,
129 default_temperature: Option<f32>,
130 #[serde(default)]
131 extra_beta_headers: Vec<String>,
132 #[serde(default)]
133 mode: AnthropicModelMode,
134 },
135}
136
137impl Model {
138 pub fn default_fast() -> Self {
139 Self::Claude3_5Haiku
140 }
141
142 pub fn from_id(id: &str) -> Result<Self> {
143 if id.starts_with("claude-opus-4-5-thinking") {
144 return Ok(Self::ClaudeOpus4_5Thinking);
145 }
146
147 if id.starts_with("claude-opus-4-5") {
148 return Ok(Self::ClaudeOpus4_5);
149 }
150
151 if id.starts_with("claude-opus-4-1-thinking") {
152 return Ok(Self::ClaudeOpus4_1Thinking);
153 }
154
155 if id.starts_with("claude-opus-4-thinking") {
156 return Ok(Self::ClaudeOpus4Thinking);
157 }
158
159 if id.starts_with("claude-opus-4-1") {
160 return Ok(Self::ClaudeOpus4_1);
161 }
162
163 if id.starts_with("claude-opus-4") {
164 return Ok(Self::ClaudeOpus4);
165 }
166
167 if id.starts_with("claude-sonnet-4-5-thinking") {
168 return Ok(Self::ClaudeSonnet4_5Thinking);
169 }
170
171 if id.starts_with("claude-sonnet-4-5") {
172 return Ok(Self::ClaudeSonnet4_5);
173 }
174
175 if id.starts_with("claude-sonnet-4-thinking") {
176 return Ok(Self::ClaudeSonnet4Thinking);
177 }
178
179 if id.starts_with("claude-sonnet-4") {
180 return Ok(Self::ClaudeSonnet4);
181 }
182
183 if id.starts_with("claude-3-7-sonnet-thinking") {
184 return Ok(Self::Claude3_7SonnetThinking);
185 }
186
187 if id.starts_with("claude-3-7-sonnet") {
188 return Ok(Self::Claude3_7Sonnet);
189 }
190
191 if id.starts_with("claude-3-5-sonnet") {
192 return Ok(Self::Claude3_5Sonnet);
193 }
194
195 if id.starts_with("claude-haiku-4-5-thinking") {
196 return Ok(Self::ClaudeHaiku4_5Thinking);
197 }
198
199 if id.starts_with("claude-haiku-4-5") {
200 return Ok(Self::ClaudeHaiku4_5);
201 }
202
203 if id.starts_with("claude-3-5-haiku") {
204 return Ok(Self::Claude3_5Haiku);
205 }
206
207 if id.starts_with("claude-3-opus") {
208 return Ok(Self::Claude3Opus);
209 }
210
211 if id.starts_with("claude-3-sonnet") {
212 return Ok(Self::Claude3Sonnet);
213 }
214
215 if id.starts_with("claude-3-haiku") {
216 return Ok(Self::Claude3Haiku);
217 }
218
219 Err(anyhow!("invalid model ID: {id}"))
220 }
221
222 pub fn id(&self) -> &str {
223 match self {
224 Self::ClaudeOpus4 => "claude-opus-4-latest",
225 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
226 Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
227 Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
228 Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
229 Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking-latest",
230 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
231 Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
232 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
233 Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
234 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
235 Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
236 Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
237 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
238 Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
239 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
240 Self::Claude3Opus => "claude-3-opus-latest",
241 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
242 Self::Claude3Haiku => "claude-3-haiku-20240307",
243 Self::Custom { name, .. } => name,
244 }
245 }
246
247 /// The id of the model that should be used for making API requests
248 pub fn request_id(&self) -> &str {
249 match self {
250 Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
251 Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
252 Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-20251101",
253 Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
254 Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-20250929",
255 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
256 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
257 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
258 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
259 Self::Claude3Opus => "claude-3-opus-latest",
260 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
261 Self::Claude3Haiku => "claude-3-haiku-20240307",
262 Self::Custom { name, .. } => name,
263 }
264 }
265
266 pub fn display_name(&self) -> &str {
267 match self {
268 Self::ClaudeOpus4 => "Claude Opus 4",
269 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
270 Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
271 Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
272 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
273 Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
274 Self::ClaudeSonnet4 => "Claude Sonnet 4",
275 Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
276 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
277 Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
278 Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
279 Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
280 Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
281 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
282 Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
283 Self::Claude3_5Haiku => "Claude 3.5 Haiku",
284 Self::Claude3Opus => "Claude 3 Opus",
285 Self::Claude3Sonnet => "Claude 3 Sonnet",
286 Self::Claude3Haiku => "Claude 3 Haiku",
287 Self::Custom {
288 name, display_name, ..
289 } => display_name.as_ref().unwrap_or(name),
290 }
291 }
292
293 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
294 match self {
295 Self::ClaudeOpus4
296 | Self::ClaudeOpus4_1
297 | Self::ClaudeOpus4Thinking
298 | Self::ClaudeOpus4_1Thinking
299 | Self::ClaudeOpus4_5
300 | Self::ClaudeOpus4_5Thinking
301 | Self::ClaudeSonnet4
302 | Self::ClaudeSonnet4Thinking
303 | Self::ClaudeSonnet4_5
304 | Self::ClaudeSonnet4_5Thinking
305 | Self::Claude3_5Sonnet
306 | Self::ClaudeHaiku4_5
307 | Self::ClaudeHaiku4_5Thinking
308 | Self::Claude3_5Haiku
309 | Self::Claude3_7Sonnet
310 | Self::Claude3_7SonnetThinking
311 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
312 min_total_token: 2_048,
313 should_speculate: true,
314 max_cache_anchors: 4,
315 }),
316 Self::Custom {
317 cache_configuration,
318 ..
319 } => cache_configuration.clone(),
320 _ => None,
321 }
322 }
323
324 pub fn max_token_count(&self) -> u64 {
325 match self {
326 Self::ClaudeOpus4
327 | Self::ClaudeOpus4_1
328 | Self::ClaudeOpus4Thinking
329 | Self::ClaudeOpus4_1Thinking
330 | Self::ClaudeOpus4_5
331 | Self::ClaudeOpus4_5Thinking
332 | Self::ClaudeSonnet4
333 | Self::ClaudeSonnet4Thinking
334 | Self::ClaudeSonnet4_5
335 | Self::ClaudeSonnet4_5Thinking
336 | Self::Claude3_5Sonnet
337 | Self::ClaudeHaiku4_5
338 | Self::ClaudeHaiku4_5Thinking
339 | Self::Claude3_5Haiku
340 | Self::Claude3_7Sonnet
341 | Self::Claude3_7SonnetThinking
342 | Self::Claude3Opus
343 | Self::Claude3Sonnet
344 | Self::Claude3Haiku => 200_000,
345 Self::Custom { max_tokens, .. } => *max_tokens,
346 }
347 }
348
349 pub fn max_output_tokens(&self) -> u64 {
350 match self {
351 Self::ClaudeOpus4
352 | Self::ClaudeOpus4_1
353 | Self::ClaudeOpus4Thinking
354 | Self::ClaudeOpus4_1Thinking
355 | Self::ClaudeOpus4_5
356 | Self::ClaudeOpus4_5Thinking
357 | Self::ClaudeSonnet4
358 | Self::ClaudeSonnet4Thinking
359 | Self::ClaudeSonnet4_5
360 | Self::ClaudeSonnet4_5Thinking
361 | Self::Claude3_5Sonnet
362 | Self::Claude3_7Sonnet
363 | Self::Claude3_7SonnetThinking
364 | Self::Claude3_5Haiku => 8_192,
365 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => 64_000,
366 Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
367 Self::Custom {
368 max_output_tokens, ..
369 } => max_output_tokens.unwrap_or(4_096),
370 }
371 }
372
373 pub fn default_temperature(&self) -> f32 {
374 match self {
375 Self::ClaudeOpus4
376 | Self::ClaudeOpus4_1
377 | Self::ClaudeOpus4Thinking
378 | Self::ClaudeOpus4_1Thinking
379 | Self::ClaudeOpus4_5
380 | Self::ClaudeOpus4_5Thinking
381 | Self::ClaudeSonnet4
382 | Self::ClaudeSonnet4Thinking
383 | Self::ClaudeSonnet4_5
384 | Self::ClaudeSonnet4_5Thinking
385 | Self::Claude3_5Sonnet
386 | Self::Claude3_7Sonnet
387 | Self::Claude3_7SonnetThinking
388 | Self::ClaudeHaiku4_5
389 | Self::ClaudeHaiku4_5Thinking
390 | Self::Claude3_5Haiku
391 | Self::Claude3Opus
392 | Self::Claude3Sonnet
393 | Self::Claude3Haiku => 1.0,
394 Self::Custom {
395 default_temperature,
396 ..
397 } => default_temperature.unwrap_or(1.0),
398 }
399 }
400
401 pub fn mode(&self) -> AnthropicModelMode {
402 match self {
403 Self::ClaudeOpus4
404 | Self::ClaudeOpus4_1
405 | Self::ClaudeOpus4_5
406 | Self::ClaudeSonnet4
407 | Self::ClaudeSonnet4_5
408 | Self::Claude3_5Sonnet
409 | Self::Claude3_7Sonnet
410 | Self::ClaudeHaiku4_5
411 | Self::Claude3_5Haiku
412 | Self::Claude3Opus
413 | Self::Claude3Sonnet
414 | Self::Claude3Haiku => AnthropicModelMode::Default,
415 Self::ClaudeOpus4Thinking
416 | Self::ClaudeOpus4_1Thinking
417 | Self::ClaudeOpus4_5Thinking
418 | Self::ClaudeSonnet4Thinking
419 | Self::ClaudeSonnet4_5Thinking
420 | Self::ClaudeHaiku4_5Thinking
421 | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
422 budget_tokens: Some(4_096),
423 },
424 Self::Custom { mode, .. } => mode.clone(),
425 }
426 }
427
428 pub fn beta_headers(&self) -> Option<String> {
429 let mut headers = vec![];
430
431 match self {
432 Self::ClaudeOpus4
433 | Self::ClaudeOpus4_1
434 | Self::ClaudeOpus4_5
435 | Self::ClaudeSonnet4
436 | Self::ClaudeSonnet4_5
437 | Self::ClaudeOpus4Thinking
438 | Self::ClaudeOpus4_1Thinking
439 | Self::ClaudeOpus4_5Thinking
440 | Self::ClaudeSonnet4Thinking
441 | Self::ClaudeSonnet4_5Thinking => {
442 // Fine-grained tool streaming for newer models
443 headers.push("fine-grained-tool-streaming-2025-05-14".to_string());
444 }
445 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
446 // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
447 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
448 headers.push("token-efficient-tools-2025-02-19".to_string());
449 headers.push("fine-grained-tool-streaming-2025-05-14".to_string());
450 }
451 Self::Custom {
452 extra_beta_headers, ..
453 } => {
454 headers.extend(
455 extra_beta_headers
456 .iter()
457 .filter(|header| !header.trim().is_empty())
458 .cloned(),
459 );
460 }
461 _ => {}
462 }
463
464 if headers.is_empty() {
465 None
466 } else {
467 Some(headers.join(","))
468 }
469 }
470
471 pub fn tool_model_id(&self) -> &str {
472 if let Self::Custom {
473 tool_override: Some(tool_override),
474 ..
475 } = self
476 {
477 tool_override
478 } else {
479 self.request_id()
480 }
481 }
482}
483
484/// Generate completion with streaming.
485pub async fn stream_completion(
486 client: &dyn HttpClient,
487 api_url: &str,
488 api_key: &str,
489 request: Request,
490 beta_headers: Option<String>,
491) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
492 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
493 .await
494 .map(|output| output.0)
495}
496
497/// Generate completion without streaming.
498pub async fn non_streaming_completion(
499 client: &dyn HttpClient,
500 api_url: &str,
501 api_key: &str,
502 request: Request,
503 beta_headers: Option<String>,
504) -> Result<Response, AnthropicError> {
505 let (mut response, rate_limits) =
506 send_request(client, api_url, api_key, &request, beta_headers).await?;
507
508 if response.status().is_success() {
509 let mut body = String::new();
510 response
511 .body_mut()
512 .read_to_string(&mut body)
513 .await
514 .map_err(AnthropicError::ReadResponse)?;
515
516 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
517 } else {
518 Err(handle_error_response(response, rate_limits).await)
519 }
520}
521
522async fn send_request(
523 client: &dyn HttpClient,
524 api_url: &str,
525 api_key: &str,
526 request: impl Serialize,
527 beta_headers: Option<String>,
528) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
529 let uri = format!("{api_url}/v1/messages");
530
531 let mut request_builder = HttpRequest::builder()
532 .method(Method::POST)
533 .uri(uri)
534 .header("Anthropic-Version", "2023-06-01")
535 .header("X-Api-Key", api_key.trim())
536 .header("Content-Type", "application/json");
537
538 if let Some(beta_headers) = beta_headers {
539 request_builder = request_builder.header("Anthropic-Beta", beta_headers);
540 }
541
542 let serialized_request =
543 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
544 let request = request_builder
545 .body(AsyncBody::from(serialized_request))
546 .map_err(AnthropicError::BuildRequestBody)?;
547
548 let response = client
549 .send(request)
550 .await
551 .map_err(AnthropicError::HttpSend)?;
552
553 let rate_limits = RateLimitInfo::from_headers(response.headers());
554
555 Ok((response, rate_limits))
556}
557
558async fn handle_error_response(
559 mut response: http::Response<AsyncBody>,
560 rate_limits: RateLimitInfo,
561) -> AnthropicError {
562 if response.status().as_u16() == 529 {
563 return AnthropicError::ServerOverloaded {
564 retry_after: rate_limits.retry_after,
565 };
566 }
567
568 if let Some(retry_after) = rate_limits.retry_after {
569 return AnthropicError::RateLimit { retry_after };
570 }
571
572 let mut body = String::new();
573 let read_result = response
574 .body_mut()
575 .read_to_string(&mut body)
576 .await
577 .map_err(AnthropicError::ReadResponse);
578
579 if let Err(err) = read_result {
580 return err;
581 }
582
583 match serde_json::from_str::<Event>(&body) {
584 Ok(Event::Error { error }) => AnthropicError::ApiError(error),
585 Ok(_) | Err(_) => AnthropicError::HttpResponseError {
586 status_code: response.status(),
587 message: body,
588 },
589 }
590}
591
592/// An individual rate limit.
593#[derive(Debug)]
594pub struct RateLimit {
595 pub limit: usize,
596 pub remaining: usize,
597 pub reset: DateTime<Utc>,
598}
599
600impl RateLimit {
601 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
602 let limit =
603 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
604 let remaining = get_header(
605 &format!("anthropic-ratelimit-{resource}-remaining"),
606 headers,
607 )?
608 .parse()?;
609 let reset = DateTime::parse_from_rfc3339(get_header(
610 &format!("anthropic-ratelimit-{resource}-reset"),
611 headers,
612 )?)?
613 .to_utc();
614
615 Ok(Self {
616 limit,
617 remaining,
618 reset,
619 })
620 }
621}
622
623/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
624#[derive(Debug)]
625pub struct RateLimitInfo {
626 pub retry_after: Option<Duration>,
627 pub requests: Option<RateLimit>,
628 pub tokens: Option<RateLimit>,
629 pub input_tokens: Option<RateLimit>,
630 pub output_tokens: Option<RateLimit>,
631}
632
633impl RateLimitInfo {
634 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
635 // Check if any rate limit headers exist
636 let has_rate_limit_headers = headers
637 .keys()
638 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
639
640 if !has_rate_limit_headers {
641 return Self {
642 retry_after: None,
643 requests: None,
644 tokens: None,
645 input_tokens: None,
646 output_tokens: None,
647 };
648 }
649
650 Self {
651 retry_after: parse_retry_after(headers),
652 requests: RateLimit::from_headers("requests", headers).ok(),
653 tokens: RateLimit::from_headers("tokens", headers).ok(),
654 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
655 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
656 }
657 }
658}
659
660/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
661/// seconds). Note that other services might specify an HTTP date or some other format for this
662/// header. Returns `None` if the header is not present or cannot be parsed.
663pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
664 headers
665 .get("retry-after")
666 .and_then(|v| v.to_str().ok())
667 .and_then(|v| v.parse::<u64>().ok())
668 .map(Duration::from_secs)
669}
670
671fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
672 Ok(headers
673 .get(key)
674 .with_context(|| format!("missing header `{key}`"))?
675 .to_str()?)
676}
677
678pub async fn stream_completion_with_rate_limit_info(
679 client: &dyn HttpClient,
680 api_url: &str,
681 api_key: &str,
682 request: Request,
683 beta_headers: Option<String>,
684) -> Result<
685 (
686 BoxStream<'static, Result<Event, AnthropicError>>,
687 Option<RateLimitInfo>,
688 ),
689 AnthropicError,
690> {
691 let request = StreamingRequest {
692 base: request,
693 stream: true,
694 };
695
696 let (response, rate_limits) =
697 send_request(client, api_url, api_key, &request, beta_headers).await?;
698
699 if response.status().is_success() {
700 let reader = BufReader::new(response.into_body());
701 let stream = reader
702 .lines()
703 .filter_map(|line| async move {
704 match line {
705 Ok(line) => {
706 let line = line.strip_prefix("data: ")?;
707 match serde_json::from_str(line) {
708 Ok(response) => Some(Ok(response)),
709 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
710 }
711 }
712 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
713 }
714 })
715 .boxed();
716 Ok((stream, Some(rate_limits)))
717 } else {
718 Err(handle_error_response(response, rate_limits).await)
719 }
720}
721
722#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
723#[serde(rename_all = "lowercase")]
724pub enum CacheControlType {
725 Ephemeral,
726}
727
728#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
729pub struct CacheControl {
730 #[serde(rename = "type")]
731 pub cache_type: CacheControlType,
732}
733
734#[derive(Debug, Serialize, Deserialize)]
735pub struct Message {
736 pub role: Role,
737 pub content: Vec<RequestContent>,
738}
739
740#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
741#[serde(rename_all = "lowercase")]
742pub enum Role {
743 User,
744 Assistant,
745}
746
747#[derive(Debug, Serialize, Deserialize)]
748#[serde(tag = "type")]
749pub enum RequestContent {
750 #[serde(rename = "text")]
751 Text {
752 text: String,
753 #[serde(skip_serializing_if = "Option::is_none")]
754 cache_control: Option<CacheControl>,
755 },
756 #[serde(rename = "thinking")]
757 Thinking {
758 thinking: String,
759 signature: String,
760 #[serde(skip_serializing_if = "Option::is_none")]
761 cache_control: Option<CacheControl>,
762 },
763 #[serde(rename = "redacted_thinking")]
764 RedactedThinking { data: String },
765 #[serde(rename = "image")]
766 Image {
767 source: ImageSource,
768 #[serde(skip_serializing_if = "Option::is_none")]
769 cache_control: Option<CacheControl>,
770 },
771 #[serde(rename = "tool_use")]
772 ToolUse {
773 id: String,
774 name: String,
775 input: serde_json::Value,
776 #[serde(skip_serializing_if = "Option::is_none")]
777 cache_control: Option<CacheControl>,
778 },
779 #[serde(rename = "tool_result")]
780 ToolResult {
781 tool_use_id: String,
782 is_error: bool,
783 content: ToolResultContent,
784 #[serde(skip_serializing_if = "Option::is_none")]
785 cache_control: Option<CacheControl>,
786 },
787}
788
789#[derive(Debug, Serialize, Deserialize)]
790#[serde(untagged)]
791pub enum ToolResultContent {
792 Plain(String),
793 Multipart(Vec<ToolResultPart>),
794}
795
796#[derive(Debug, Serialize, Deserialize)]
797#[serde(tag = "type", rename_all = "lowercase")]
798pub enum ToolResultPart {
799 Text { text: String },
800 Image { source: ImageSource },
801}
802
803#[derive(Debug, Serialize, Deserialize)]
804#[serde(tag = "type")]
805pub enum ResponseContent {
806 #[serde(rename = "text")]
807 Text { text: String },
808 #[serde(rename = "thinking")]
809 Thinking { thinking: String },
810 #[serde(rename = "redacted_thinking")]
811 RedactedThinking { data: String },
812 #[serde(rename = "tool_use")]
813 ToolUse {
814 id: String,
815 name: String,
816 input: serde_json::Value,
817 },
818}
819
820#[derive(Debug, Serialize, Deserialize)]
821pub struct ImageSource {
822 #[serde(rename = "type")]
823 pub source_type: String,
824 pub media_type: String,
825 pub data: String,
826}
827
828#[derive(Debug, Serialize, Deserialize)]
829pub struct Tool {
830 pub name: String,
831 pub description: String,
832 pub input_schema: serde_json::Value,
833}
834
835#[derive(Debug, Serialize, Deserialize)]
836#[serde(tag = "type", rename_all = "lowercase")]
837pub enum ToolChoice {
838 Auto,
839 Any,
840 Tool { name: String },
841 None,
842}
843
844#[derive(Debug, Serialize, Deserialize)]
845#[serde(tag = "type", rename_all = "lowercase")]
846pub enum Thinking {
847 Enabled { budget_tokens: Option<u32> },
848}
849
850#[derive(Debug, Serialize, Deserialize)]
851#[serde(untagged)]
852pub enum StringOrContents {
853 String(String),
854 Content(Vec<RequestContent>),
855}
856
857#[derive(Debug, Serialize, Deserialize)]
858pub struct Request {
859 pub model: String,
860 pub max_tokens: u64,
861 pub messages: Vec<Message>,
862 #[serde(default, skip_serializing_if = "Vec::is_empty")]
863 pub tools: Vec<Tool>,
864 #[serde(default, skip_serializing_if = "Option::is_none")]
865 pub thinking: Option<Thinking>,
866 #[serde(default, skip_serializing_if = "Option::is_none")]
867 pub tool_choice: Option<ToolChoice>,
868 #[serde(default, skip_serializing_if = "Option::is_none")]
869 pub system: Option<StringOrContents>,
870 #[serde(default, skip_serializing_if = "Option::is_none")]
871 pub metadata: Option<Metadata>,
872 #[serde(default, skip_serializing_if = "Vec::is_empty")]
873 pub stop_sequences: Vec<String>,
874 #[serde(default, skip_serializing_if = "Option::is_none")]
875 pub temperature: Option<f32>,
876 #[serde(default, skip_serializing_if = "Option::is_none")]
877 pub top_k: Option<u32>,
878 #[serde(default, skip_serializing_if = "Option::is_none")]
879 pub top_p: Option<f32>,
880}
881
882#[derive(Debug, Serialize, Deserialize)]
883struct StreamingRequest {
884 #[serde(flatten)]
885 pub base: Request,
886 pub stream: bool,
887}
888
889#[derive(Debug, Serialize, Deserialize)]
890pub struct Metadata {
891 pub user_id: Option<String>,
892}
893
894#[derive(Debug, Serialize, Deserialize, Default)]
895pub struct Usage {
896 #[serde(default, skip_serializing_if = "Option::is_none")]
897 pub input_tokens: Option<u64>,
898 #[serde(default, skip_serializing_if = "Option::is_none")]
899 pub output_tokens: Option<u64>,
900 #[serde(default, skip_serializing_if = "Option::is_none")]
901 pub cache_creation_input_tokens: Option<u64>,
902 #[serde(default, skip_serializing_if = "Option::is_none")]
903 pub cache_read_input_tokens: Option<u64>,
904}
905
906#[derive(Debug, Serialize, Deserialize)]
907pub struct Response {
908 pub id: String,
909 #[serde(rename = "type")]
910 pub response_type: String,
911 pub role: Role,
912 pub content: Vec<ResponseContent>,
913 pub model: String,
914 #[serde(default, skip_serializing_if = "Option::is_none")]
915 pub stop_reason: Option<String>,
916 #[serde(default, skip_serializing_if = "Option::is_none")]
917 pub stop_sequence: Option<String>,
918 pub usage: Usage,
919}
920
921#[derive(Debug, Serialize, Deserialize)]
922#[serde(tag = "type")]
923pub enum Event {
924 #[serde(rename = "message_start")]
925 MessageStart { message: Response },
926 #[serde(rename = "content_block_start")]
927 ContentBlockStart {
928 index: usize,
929 content_block: ResponseContent,
930 },
931 #[serde(rename = "content_block_delta")]
932 ContentBlockDelta { index: usize, delta: ContentDelta },
933 #[serde(rename = "content_block_stop")]
934 ContentBlockStop { index: usize },
935 #[serde(rename = "message_delta")]
936 MessageDelta { delta: MessageDelta, usage: Usage },
937 #[serde(rename = "message_stop")]
938 MessageStop,
939 #[serde(rename = "ping")]
940 Ping,
941 #[serde(rename = "error")]
942 Error { error: ApiError },
943}
944
945#[derive(Debug, Serialize, Deserialize)]
946#[serde(tag = "type")]
947pub enum ContentDelta {
948 #[serde(rename = "text_delta")]
949 TextDelta { text: String },
950 #[serde(rename = "thinking_delta")]
951 ThinkingDelta { thinking: String },
952 #[serde(rename = "signature_delta")]
953 SignatureDelta { signature: String },
954 #[serde(rename = "input_json_delta")]
955 InputJsonDelta { partial_json: String },
956}
957
958#[derive(Debug, Serialize, Deserialize)]
959pub struct MessageDelta {
960 pub stop_reason: Option<String>,
961 pub stop_sequence: Option<String>,
962}
963
964#[derive(Debug)]
965pub enum AnthropicError {
966 /// Failed to serialize the HTTP request body to JSON
967 SerializeRequest(serde_json::Error),
968
969 /// Failed to construct the HTTP request body
970 BuildRequestBody(http::Error),
971
972 /// Failed to send the HTTP request
973 HttpSend(anyhow::Error),
974
975 /// Failed to deserialize the response from JSON
976 DeserializeResponse(serde_json::Error),
977
978 /// Failed to read from response stream
979 ReadResponse(io::Error),
980
981 /// HTTP error response from the API
982 HttpResponseError {
983 status_code: StatusCode,
984 message: String,
985 },
986
987 /// Rate limit exceeded
988 RateLimit { retry_after: Duration },
989
990 /// Server overloaded
991 ServerOverloaded { retry_after: Option<Duration> },
992
993 /// API returned an error response
994 ApiError(ApiError),
995}
996
997#[derive(Debug, Serialize, Deserialize, Error)]
998#[error("Anthropic API Error: {error_type}: {message}")]
999pub struct ApiError {
1000 #[serde(rename = "type")]
1001 pub error_type: String,
1002 pub message: String,
1003}
1004
1005/// An Anthropic API error code.
1006/// <https://docs.anthropic.com/en/api/errors#http-errors>
1007#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
1008#[strum(serialize_all = "snake_case")]
1009pub enum ApiErrorCode {
1010 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
1011 InvalidRequestError,
1012 /// 401 - `authentication_error`: There's an issue with your API key.
1013 AuthenticationError,
1014 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
1015 PermissionError,
1016 /// 404 - `not_found_error`: The requested resource was not found.
1017 NotFoundError,
1018 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
1019 RequestTooLarge,
1020 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
1021 RateLimitError,
1022 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
1023 ApiError,
1024 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
1025 OverloadedError,
1026}
1027
1028impl ApiError {
1029 pub fn code(&self) -> Option<ApiErrorCode> {
1030 ApiErrorCode::from_str(&self.error_type).ok()
1031 }
1032
1033 pub fn is_rate_limit_error(&self) -> bool {
1034 matches!(self.error_type.as_str(), "rate_limit_error")
1035 }
1036
1037 pub fn match_window_exceeded(&self) -> Option<u64> {
1038 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
1039 return None;
1040 };
1041
1042 parse_prompt_too_long(&self.message)
1043 }
1044}
1045
1046pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1047 message
1048 .strip_prefix("prompt is too long: ")?
1049 .split_once(" tokens")?
1050 .0
1051 .parse()
1052 .ok()
1053}
1054
1055#[test]
1056fn test_match_window_exceeded() {
1057 let error = ApiError {
1058 error_type: "invalid_request_error".to_string(),
1059 message: "prompt is too long: 220000 tokens > 200000".to_string(),
1060 };
1061 assert_eq!(error.match_window_exceeded(), Some(220_000));
1062
1063 let error = ApiError {
1064 error_type: "invalid_request_error".to_string(),
1065 message: "prompt is too long: 1234953 tokens".to_string(),
1066 };
1067 assert_eq!(error.match_window_exceeded(), Some(1234953));
1068
1069 let error = ApiError {
1070 error_type: "invalid_request_error".to_string(),
1071 message: "not a prompt length error".to_string(),
1072 };
1073 assert_eq!(error.match_window_exceeded(), None);
1074
1075 let error = ApiError {
1076 error_type: "rate_limit_error".to_string(),
1077 message: "prompt is too long: 12345 tokens".to_string(),
1078 };
1079 assert_eq!(error.match_window_exceeded(), None);
1080
1081 let error = ApiError {
1082 error_type: "invalid_request_error".to_string(),
1083 message: "prompt is too long: invalid tokens".to_string(),
1084 };
1085 assert_eq!(error.match_window_exceeded(), None);
1086}