1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
12use strum::{EnumIter, EnumString};
13use thiserror::Error;
14
15pub mod batches;
16
17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
18
19#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
20#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
21pub struct AnthropicModelCacheConfiguration {
22 pub min_total_token: u64,
23 pub should_speculate: bool,
24 pub max_cache_anchors: usize,
25}
26
27#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
28#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
29pub enum AnthropicModelMode {
30 #[default]
31 Default,
32 Thinking {
33 budget_tokens: Option<u32>,
34 },
35}
36
37impl From<ModelMode> for AnthropicModelMode {
38 fn from(value: ModelMode) -> Self {
39 match value {
40 ModelMode::Default => AnthropicModelMode::Default,
41 ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
42 }
43 }
44}
45
46impl From<AnthropicModelMode> for ModelMode {
47 fn from(value: AnthropicModelMode) -> Self {
48 match value {
49 AnthropicModelMode::Default => ModelMode::Default,
50 AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
51 }
52 }
53}
54
55#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
56#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
57pub enum Model {
58 #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
59 ClaudeOpus4,
60 #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
61 ClaudeOpus4_1,
62 #[serde(
63 rename = "claude-opus-4-thinking",
64 alias = "claude-opus-4-thinking-latest"
65 )]
66 ClaudeOpus4Thinking,
67 #[serde(
68 rename = "claude-opus-4-1-thinking",
69 alias = "claude-opus-4-1-thinking-latest"
70 )]
71 ClaudeOpus4_1Thinking,
72 #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
73 ClaudeOpus4_5,
74 #[serde(
75 rename = "claude-opus-4-5-thinking",
76 alias = "claude-opus-4-5-thinking-latest"
77 )]
78 ClaudeOpus4_5Thinking,
79 #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
80 ClaudeSonnet4,
81 #[serde(
82 rename = "claude-sonnet-4-thinking",
83 alias = "claude-sonnet-4-thinking-latest"
84 )]
85 ClaudeSonnet4Thinking,
86 #[default]
87 #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
88 ClaudeSonnet4_5,
89 #[serde(
90 rename = "claude-sonnet-4-5-thinking",
91 alias = "claude-sonnet-4-5-thinking-latest"
92 )]
93 ClaudeSonnet4_5Thinking,
94 #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
95 Claude3_7Sonnet,
96 #[serde(
97 rename = "claude-3-7-sonnet-thinking",
98 alias = "claude-3-7-sonnet-thinking-latest"
99 )]
100 Claude3_7SonnetThinking,
101 #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
102 Claude3_5Sonnet,
103 #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
104 ClaudeHaiku4_5,
105 #[serde(
106 rename = "claude-haiku-4-5-thinking",
107 alias = "claude-haiku-4-5-thinking-latest"
108 )]
109 ClaudeHaiku4_5Thinking,
110 #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
111 Claude3_5Haiku,
112 #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
113 Claude3Opus,
114 #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
115 Claude3Sonnet,
116 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
117 Claude3Haiku,
118 #[serde(rename = "custom")]
119 Custom {
120 name: String,
121 max_tokens: u64,
122 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
123 display_name: Option<String>,
124 /// Override this model with a different Anthropic model for tool calls.
125 tool_override: Option<String>,
126 /// Indicates whether this custom model supports caching.
127 cache_configuration: Option<AnthropicModelCacheConfiguration>,
128 max_output_tokens: Option<u64>,
129 default_temperature: Option<f32>,
130 #[serde(default)]
131 extra_beta_headers: Vec<String>,
132 #[serde(default)]
133 mode: AnthropicModelMode,
134 },
135}
136
137impl Model {
138 pub fn default_fast() -> Self {
139 Self::Claude3_5Haiku
140 }
141
142 pub fn from_id(id: &str) -> Result<Self> {
143 if id.starts_with("claude-opus-4-5-thinking") {
144 return Ok(Self::ClaudeOpus4_5Thinking);
145 }
146
147 if id.starts_with("claude-opus-4-5") {
148 return Ok(Self::ClaudeOpus4_5);
149 }
150
151 if id.starts_with("claude-opus-4-1-thinking") {
152 return Ok(Self::ClaudeOpus4_1Thinking);
153 }
154
155 if id.starts_with("claude-opus-4-thinking") {
156 return Ok(Self::ClaudeOpus4Thinking);
157 }
158
159 if id.starts_with("claude-opus-4-1") {
160 return Ok(Self::ClaudeOpus4_1);
161 }
162
163 if id.starts_with("claude-opus-4") {
164 return Ok(Self::ClaudeOpus4);
165 }
166
167 if id.starts_with("claude-sonnet-4-5-thinking") {
168 return Ok(Self::ClaudeSonnet4_5Thinking);
169 }
170
171 if id.starts_with("claude-sonnet-4-5") {
172 return Ok(Self::ClaudeSonnet4_5);
173 }
174
175 if id.starts_with("claude-sonnet-4-thinking") {
176 return Ok(Self::ClaudeSonnet4Thinking);
177 }
178
179 if id.starts_with("claude-sonnet-4") {
180 return Ok(Self::ClaudeSonnet4);
181 }
182
183 if id.starts_with("claude-3-7-sonnet-thinking") {
184 return Ok(Self::Claude3_7SonnetThinking);
185 }
186
187 if id.starts_with("claude-3-7-sonnet") {
188 return Ok(Self::Claude3_7Sonnet);
189 }
190
191 if id.starts_with("claude-3-5-sonnet") {
192 return Ok(Self::Claude3_5Sonnet);
193 }
194
195 if id.starts_with("claude-haiku-4-5-thinking") {
196 return Ok(Self::ClaudeHaiku4_5Thinking);
197 }
198
199 if id.starts_with("claude-haiku-4-5") {
200 return Ok(Self::ClaudeHaiku4_5);
201 }
202
203 if id.starts_with("claude-3-5-haiku") {
204 return Ok(Self::Claude3_5Haiku);
205 }
206
207 if id.starts_with("claude-3-opus") {
208 return Ok(Self::Claude3Opus);
209 }
210
211 if id.starts_with("claude-3-sonnet") {
212 return Ok(Self::Claude3Sonnet);
213 }
214
215 if id.starts_with("claude-3-haiku") {
216 return Ok(Self::Claude3Haiku);
217 }
218
219 Err(anyhow!("invalid model ID: {id}"))
220 }
221
222 pub fn id(&self) -> &str {
223 match self {
224 Self::ClaudeOpus4 => "claude-opus-4-latest",
225 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
226 Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
227 Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
228 Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
229 Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking-latest",
230 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
231 Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
232 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
233 Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
234 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
235 Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
236 Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
237 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
238 Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
239 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
240 Self::Claude3Opus => "claude-3-opus-latest",
241 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
242 Self::Claude3Haiku => "claude-3-haiku-20240307",
243 Self::Custom { name, .. } => name,
244 }
245 }
246
247 /// The id of the model that should be used for making API requests
248 pub fn request_id(&self) -> &str {
249 match self {
250 Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
251 Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
252 Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-20251101",
253 Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
254 Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-20250929",
255 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
256 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
257 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
258 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
259 Self::Claude3Opus => "claude-3-opus-latest",
260 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
261 Self::Claude3Haiku => "claude-3-haiku-20240307",
262 Self::Custom { name, .. } => name,
263 }
264 }
265
266 pub fn display_name(&self) -> &str {
267 match self {
268 Self::ClaudeOpus4 => "Claude Opus 4",
269 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
270 Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
271 Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
272 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
273 Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
274 Self::ClaudeSonnet4 => "Claude Sonnet 4",
275 Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
276 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
277 Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
278 Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
279 Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
280 Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
281 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
282 Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
283 Self::Claude3_5Haiku => "Claude 3.5 Haiku",
284 Self::Claude3Opus => "Claude 3 Opus",
285 Self::Claude3Sonnet => "Claude 3 Sonnet",
286 Self::Claude3Haiku => "Claude 3 Haiku",
287 Self::Custom {
288 name, display_name, ..
289 } => display_name.as_ref().unwrap_or(name),
290 }
291 }
292
293 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
294 match self {
295 Self::ClaudeOpus4
296 | Self::ClaudeOpus4_1
297 | Self::ClaudeOpus4Thinking
298 | Self::ClaudeOpus4_1Thinking
299 | Self::ClaudeOpus4_5
300 | Self::ClaudeOpus4_5Thinking
301 | Self::ClaudeSonnet4
302 | Self::ClaudeSonnet4Thinking
303 | Self::ClaudeSonnet4_5
304 | Self::ClaudeSonnet4_5Thinking
305 | Self::Claude3_5Sonnet
306 | Self::ClaudeHaiku4_5
307 | Self::ClaudeHaiku4_5Thinking
308 | Self::Claude3_5Haiku
309 | Self::Claude3_7Sonnet
310 | Self::Claude3_7SonnetThinking
311 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
312 min_total_token: 2_048,
313 should_speculate: true,
314 max_cache_anchors: 4,
315 }),
316 Self::Custom {
317 cache_configuration,
318 ..
319 } => cache_configuration.clone(),
320 _ => None,
321 }
322 }
323
324 pub fn max_token_count(&self) -> u64 {
325 match self {
326 Self::ClaudeOpus4
327 | Self::ClaudeOpus4_1
328 | Self::ClaudeOpus4Thinking
329 | Self::ClaudeOpus4_1Thinking
330 | Self::ClaudeOpus4_5
331 | Self::ClaudeOpus4_5Thinking
332 | Self::ClaudeSonnet4
333 | Self::ClaudeSonnet4Thinking
334 | Self::ClaudeSonnet4_5
335 | Self::ClaudeSonnet4_5Thinking
336 | Self::Claude3_5Sonnet
337 | Self::ClaudeHaiku4_5
338 | Self::ClaudeHaiku4_5Thinking
339 | Self::Claude3_5Haiku
340 | Self::Claude3_7Sonnet
341 | Self::Claude3_7SonnetThinking
342 | Self::Claude3Opus
343 | Self::Claude3Sonnet
344 | Self::Claude3Haiku => 200_000,
345 Self::Custom { max_tokens, .. } => *max_tokens,
346 }
347 }
348
349 pub fn max_output_tokens(&self) -> u64 {
350 match self {
351 Self::ClaudeOpus4
352 | Self::ClaudeOpus4_1
353 | Self::ClaudeOpus4Thinking
354 | Self::ClaudeOpus4_1Thinking
355 | Self::ClaudeOpus4_5
356 | Self::ClaudeOpus4_5Thinking
357 | Self::ClaudeSonnet4
358 | Self::ClaudeSonnet4Thinking
359 | Self::ClaudeSonnet4_5
360 | Self::ClaudeSonnet4_5Thinking
361 | Self::Claude3_5Sonnet
362 | Self::Claude3_7Sonnet
363 | Self::Claude3_7SonnetThinking
364 | Self::Claude3_5Haiku => 8_192,
365 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => 64_000,
366 Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
367 Self::Custom {
368 max_output_tokens, ..
369 } => max_output_tokens.unwrap_or(4_096),
370 }
371 }
372
373 pub fn default_temperature(&self) -> f32 {
374 match self {
375 Self::ClaudeOpus4
376 | Self::ClaudeOpus4_1
377 | Self::ClaudeOpus4Thinking
378 | Self::ClaudeOpus4_1Thinking
379 | Self::ClaudeOpus4_5
380 | Self::ClaudeOpus4_5Thinking
381 | Self::ClaudeSonnet4
382 | Self::ClaudeSonnet4Thinking
383 | Self::ClaudeSonnet4_5
384 | Self::ClaudeSonnet4_5Thinking
385 | Self::Claude3_5Sonnet
386 | Self::Claude3_7Sonnet
387 | Self::Claude3_7SonnetThinking
388 | Self::ClaudeHaiku4_5
389 | Self::ClaudeHaiku4_5Thinking
390 | Self::Claude3_5Haiku
391 | Self::Claude3Opus
392 | Self::Claude3Sonnet
393 | Self::Claude3Haiku => 1.0,
394 Self::Custom {
395 default_temperature,
396 ..
397 } => default_temperature.unwrap_or(1.0),
398 }
399 }
400
401 pub fn mode(&self) -> AnthropicModelMode {
402 match self {
403 Self::ClaudeOpus4
404 | Self::ClaudeOpus4_1
405 | Self::ClaudeOpus4_5
406 | Self::ClaudeSonnet4
407 | Self::ClaudeSonnet4_5
408 | Self::Claude3_5Sonnet
409 | Self::Claude3_7Sonnet
410 | Self::ClaudeHaiku4_5
411 | Self::Claude3_5Haiku
412 | Self::Claude3Opus
413 | Self::Claude3Sonnet
414 | Self::Claude3Haiku => AnthropicModelMode::Default,
415 Self::ClaudeOpus4Thinking
416 | Self::ClaudeOpus4_1Thinking
417 | Self::ClaudeOpus4_5Thinking
418 | Self::ClaudeSonnet4Thinking
419 | Self::ClaudeSonnet4_5Thinking
420 | Self::ClaudeHaiku4_5Thinking
421 | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
422 budget_tokens: Some(4_096),
423 },
424 Self::Custom { mode, .. } => mode.clone(),
425 }
426 }
427
428 pub fn beta_headers(&self) -> Option<String> {
429 let mut headers = vec![];
430
431 match self {
432 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
433 // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
434 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
435 headers.push("token-efficient-tools-2025-02-19".to_string());
436 }
437 Self::Custom {
438 extra_beta_headers, ..
439 } => {
440 headers.extend(
441 extra_beta_headers
442 .iter()
443 .filter(|header| !header.trim().is_empty())
444 .cloned(),
445 );
446 }
447 _ => {}
448 }
449
450 if headers.is_empty() {
451 None
452 } else {
453 Some(headers.join(","))
454 }
455 }
456
457 pub fn tool_model_id(&self) -> &str {
458 if let Self::Custom {
459 tool_override: Some(tool_override),
460 ..
461 } = self
462 {
463 tool_override
464 } else {
465 self.request_id()
466 }
467 }
468}
469
470/// Generate completion with streaming.
471pub async fn stream_completion(
472 client: &dyn HttpClient,
473 api_url: &str,
474 api_key: &str,
475 request: Request,
476 beta_headers: Option<String>,
477) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
478 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
479 .await
480 .map(|output| output.0)
481}
482
483/// Generate completion without streaming.
484pub async fn non_streaming_completion(
485 client: &dyn HttpClient,
486 api_url: &str,
487 api_key: &str,
488 request: Request,
489 beta_headers: Option<String>,
490) -> Result<Response, AnthropicError> {
491 let (mut response, rate_limits) =
492 send_request(client, api_url, api_key, &request, beta_headers).await?;
493
494 if response.status().is_success() {
495 let mut body = String::new();
496 response
497 .body_mut()
498 .read_to_string(&mut body)
499 .await
500 .map_err(AnthropicError::ReadResponse)?;
501
502 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
503 } else {
504 Err(handle_error_response(response, rate_limits).await)
505 }
506}
507
508async fn send_request(
509 client: &dyn HttpClient,
510 api_url: &str,
511 api_key: &str,
512 request: impl Serialize,
513 beta_headers: Option<String>,
514) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
515 let uri = format!("{api_url}/v1/messages");
516
517 let mut request_builder = HttpRequest::builder()
518 .method(Method::POST)
519 .uri(uri)
520 .header("Anthropic-Version", "2023-06-01")
521 .header("X-Api-Key", api_key.trim())
522 .header("Content-Type", "application/json");
523
524 if let Some(beta_headers) = beta_headers {
525 request_builder = request_builder.header("Anthropic-Beta", beta_headers);
526 }
527
528 let serialized_request =
529 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
530 let request = request_builder
531 .body(AsyncBody::from(serialized_request))
532 .map_err(AnthropicError::BuildRequestBody)?;
533
534 let response = client
535 .send(request)
536 .await
537 .map_err(AnthropicError::HttpSend)?;
538
539 let rate_limits = RateLimitInfo::from_headers(response.headers());
540
541 Ok((response, rate_limits))
542}
543
544async fn handle_error_response(
545 mut response: http::Response<AsyncBody>,
546 rate_limits: RateLimitInfo,
547) -> AnthropicError {
548 if response.status().as_u16() == 529 {
549 return AnthropicError::ServerOverloaded {
550 retry_after: rate_limits.retry_after,
551 };
552 }
553
554 if let Some(retry_after) = rate_limits.retry_after {
555 return AnthropicError::RateLimit { retry_after };
556 }
557
558 let mut body = String::new();
559 let read_result = response
560 .body_mut()
561 .read_to_string(&mut body)
562 .await
563 .map_err(AnthropicError::ReadResponse);
564
565 if let Err(err) = read_result {
566 return err;
567 }
568
569 match serde_json::from_str::<Event>(&body) {
570 Ok(Event::Error { error }) => AnthropicError::ApiError(error),
571 Ok(_) | Err(_) => AnthropicError::HttpResponseError {
572 status_code: response.status(),
573 message: body,
574 },
575 }
576}
577
578/// An individual rate limit.
579#[derive(Debug)]
580pub struct RateLimit {
581 pub limit: usize,
582 pub remaining: usize,
583 pub reset: DateTime<Utc>,
584}
585
586impl RateLimit {
587 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
588 let limit =
589 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
590 let remaining = get_header(
591 &format!("anthropic-ratelimit-{resource}-remaining"),
592 headers,
593 )?
594 .parse()?;
595 let reset = DateTime::parse_from_rfc3339(get_header(
596 &format!("anthropic-ratelimit-{resource}-reset"),
597 headers,
598 )?)?
599 .to_utc();
600
601 Ok(Self {
602 limit,
603 remaining,
604 reset,
605 })
606 }
607}
608
609/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
610#[derive(Debug)]
611pub struct RateLimitInfo {
612 pub retry_after: Option<Duration>,
613 pub requests: Option<RateLimit>,
614 pub tokens: Option<RateLimit>,
615 pub input_tokens: Option<RateLimit>,
616 pub output_tokens: Option<RateLimit>,
617}
618
619impl RateLimitInfo {
620 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
621 // Check if any rate limit headers exist
622 let has_rate_limit_headers = headers
623 .keys()
624 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
625
626 if !has_rate_limit_headers {
627 return Self {
628 retry_after: None,
629 requests: None,
630 tokens: None,
631 input_tokens: None,
632 output_tokens: None,
633 };
634 }
635
636 Self {
637 retry_after: parse_retry_after(headers),
638 requests: RateLimit::from_headers("requests", headers).ok(),
639 tokens: RateLimit::from_headers("tokens", headers).ok(),
640 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
641 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
642 }
643 }
644}
645
646/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
647/// seconds). Note that other services might specify an HTTP date or some other format for this
648/// header. Returns `None` if the header is not present or cannot be parsed.
649pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
650 headers
651 .get("retry-after")
652 .and_then(|v| v.to_str().ok())
653 .and_then(|v| v.parse::<u64>().ok())
654 .map(Duration::from_secs)
655}
656
657fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
658 Ok(headers
659 .get(key)
660 .with_context(|| format!("missing header `{key}`"))?
661 .to_str()?)
662}
663
664pub async fn stream_completion_with_rate_limit_info(
665 client: &dyn HttpClient,
666 api_url: &str,
667 api_key: &str,
668 request: Request,
669 beta_headers: Option<String>,
670) -> Result<
671 (
672 BoxStream<'static, Result<Event, AnthropicError>>,
673 Option<RateLimitInfo>,
674 ),
675 AnthropicError,
676> {
677 let request = StreamingRequest {
678 base: request,
679 stream: true,
680 };
681
682 let (response, rate_limits) =
683 send_request(client, api_url, api_key, &request, beta_headers).await?;
684
685 if response.status().is_success() {
686 let reader = BufReader::new(response.into_body());
687 let stream = reader
688 .lines()
689 .filter_map(|line| async move {
690 match line {
691 Ok(line) => {
692 let line = line.strip_prefix("data: ")?;
693 match serde_json::from_str(line) {
694 Ok(response) => Some(Ok(response)),
695 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
696 }
697 }
698 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
699 }
700 })
701 .boxed();
702 Ok((stream, Some(rate_limits)))
703 } else {
704 Err(handle_error_response(response, rate_limits).await)
705 }
706}
707
708#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
709#[serde(rename_all = "lowercase")]
710pub enum CacheControlType {
711 Ephemeral,
712}
713
714#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
715pub struct CacheControl {
716 #[serde(rename = "type")]
717 pub cache_type: CacheControlType,
718}
719
720#[derive(Debug, Serialize, Deserialize)]
721pub struct Message {
722 pub role: Role,
723 pub content: Vec<RequestContent>,
724}
725
726#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
727#[serde(rename_all = "lowercase")]
728pub enum Role {
729 User,
730 Assistant,
731}
732
733#[derive(Debug, Serialize, Deserialize)]
734#[serde(tag = "type")]
735pub enum RequestContent {
736 #[serde(rename = "text")]
737 Text {
738 text: String,
739 #[serde(skip_serializing_if = "Option::is_none")]
740 cache_control: Option<CacheControl>,
741 },
742 #[serde(rename = "thinking")]
743 Thinking {
744 thinking: String,
745 signature: String,
746 #[serde(skip_serializing_if = "Option::is_none")]
747 cache_control: Option<CacheControl>,
748 },
749 #[serde(rename = "redacted_thinking")]
750 RedactedThinking { data: String },
751 #[serde(rename = "image")]
752 Image {
753 source: ImageSource,
754 #[serde(skip_serializing_if = "Option::is_none")]
755 cache_control: Option<CacheControl>,
756 },
757 #[serde(rename = "tool_use")]
758 ToolUse {
759 id: String,
760 name: String,
761 input: serde_json::Value,
762 #[serde(skip_serializing_if = "Option::is_none")]
763 cache_control: Option<CacheControl>,
764 },
765 #[serde(rename = "tool_result")]
766 ToolResult {
767 tool_use_id: String,
768 is_error: bool,
769 content: ToolResultContent,
770 #[serde(skip_serializing_if = "Option::is_none")]
771 cache_control: Option<CacheControl>,
772 },
773}
774
775#[derive(Debug, Serialize, Deserialize)]
776#[serde(untagged)]
777pub enum ToolResultContent {
778 Plain(String),
779 Multipart(Vec<ToolResultPart>),
780}
781
782#[derive(Debug, Serialize, Deserialize)]
783#[serde(tag = "type", rename_all = "lowercase")]
784pub enum ToolResultPart {
785 Text { text: String },
786 Image { source: ImageSource },
787}
788
789#[derive(Debug, Serialize, Deserialize)]
790#[serde(tag = "type")]
791pub enum ResponseContent {
792 #[serde(rename = "text")]
793 Text { text: String },
794 #[serde(rename = "thinking")]
795 Thinking { thinking: String },
796 #[serde(rename = "redacted_thinking")]
797 RedactedThinking { data: String },
798 #[serde(rename = "tool_use")]
799 ToolUse {
800 id: String,
801 name: String,
802 input: serde_json::Value,
803 },
804}
805
806#[derive(Debug, Serialize, Deserialize)]
807pub struct ImageSource {
808 #[serde(rename = "type")]
809 pub source_type: String,
810 pub media_type: String,
811 pub data: String,
812}
813
814#[derive(Debug, Serialize, Deserialize)]
815pub struct Tool {
816 pub name: String,
817 pub description: String,
818 pub input_schema: serde_json::Value,
819}
820
821#[derive(Debug, Serialize, Deserialize)]
822#[serde(tag = "type", rename_all = "lowercase")]
823pub enum ToolChoice {
824 Auto,
825 Any,
826 Tool { name: String },
827 None,
828}
829
830#[derive(Debug, Serialize, Deserialize)]
831#[serde(tag = "type", rename_all = "lowercase")]
832pub enum Thinking {
833 Enabled { budget_tokens: Option<u32> },
834}
835
836#[derive(Debug, Serialize, Deserialize)]
837#[serde(untagged)]
838pub enum StringOrContents {
839 String(String),
840 Content(Vec<RequestContent>),
841}
842
843#[derive(Debug, Serialize, Deserialize)]
844pub struct Request {
845 pub model: String,
846 pub max_tokens: u64,
847 pub messages: Vec<Message>,
848 #[serde(default, skip_serializing_if = "Vec::is_empty")]
849 pub tools: Vec<Tool>,
850 #[serde(default, skip_serializing_if = "Option::is_none")]
851 pub thinking: Option<Thinking>,
852 #[serde(default, skip_serializing_if = "Option::is_none")]
853 pub tool_choice: Option<ToolChoice>,
854 #[serde(default, skip_serializing_if = "Option::is_none")]
855 pub system: Option<StringOrContents>,
856 #[serde(default, skip_serializing_if = "Option::is_none")]
857 pub metadata: Option<Metadata>,
858 #[serde(default, skip_serializing_if = "Vec::is_empty")]
859 pub stop_sequences: Vec<String>,
860 #[serde(default, skip_serializing_if = "Option::is_none")]
861 pub temperature: Option<f32>,
862 #[serde(default, skip_serializing_if = "Option::is_none")]
863 pub top_k: Option<u32>,
864 #[serde(default, skip_serializing_if = "Option::is_none")]
865 pub top_p: Option<f32>,
866}
867
868#[derive(Debug, Serialize, Deserialize)]
869struct StreamingRequest {
870 #[serde(flatten)]
871 pub base: Request,
872 pub stream: bool,
873}
874
875#[derive(Debug, Serialize, Deserialize)]
876pub struct Metadata {
877 pub user_id: Option<String>,
878}
879
880#[derive(Debug, Serialize, Deserialize, Default)]
881pub struct Usage {
882 #[serde(default, skip_serializing_if = "Option::is_none")]
883 pub input_tokens: Option<u64>,
884 #[serde(default, skip_serializing_if = "Option::is_none")]
885 pub output_tokens: Option<u64>,
886 #[serde(default, skip_serializing_if = "Option::is_none")]
887 pub cache_creation_input_tokens: Option<u64>,
888 #[serde(default, skip_serializing_if = "Option::is_none")]
889 pub cache_read_input_tokens: Option<u64>,
890}
891
892#[derive(Debug, Serialize, Deserialize)]
893pub struct Response {
894 pub id: String,
895 #[serde(rename = "type")]
896 pub response_type: String,
897 pub role: Role,
898 pub content: Vec<ResponseContent>,
899 pub model: String,
900 #[serde(default, skip_serializing_if = "Option::is_none")]
901 pub stop_reason: Option<String>,
902 #[serde(default, skip_serializing_if = "Option::is_none")]
903 pub stop_sequence: Option<String>,
904 pub usage: Usage,
905}
906
907#[derive(Debug, Serialize, Deserialize)]
908#[serde(tag = "type")]
909pub enum Event {
910 #[serde(rename = "message_start")]
911 MessageStart { message: Response },
912 #[serde(rename = "content_block_start")]
913 ContentBlockStart {
914 index: usize,
915 content_block: ResponseContent,
916 },
917 #[serde(rename = "content_block_delta")]
918 ContentBlockDelta { index: usize, delta: ContentDelta },
919 #[serde(rename = "content_block_stop")]
920 ContentBlockStop { index: usize },
921 #[serde(rename = "message_delta")]
922 MessageDelta { delta: MessageDelta, usage: Usage },
923 #[serde(rename = "message_stop")]
924 MessageStop,
925 #[serde(rename = "ping")]
926 Ping,
927 #[serde(rename = "error")]
928 Error { error: ApiError },
929}
930
931#[derive(Debug, Serialize, Deserialize)]
932#[serde(tag = "type")]
933pub enum ContentDelta {
934 #[serde(rename = "text_delta")]
935 TextDelta { text: String },
936 #[serde(rename = "thinking_delta")]
937 ThinkingDelta { thinking: String },
938 #[serde(rename = "signature_delta")]
939 SignatureDelta { signature: String },
940 #[serde(rename = "input_json_delta")]
941 InputJsonDelta { partial_json: String },
942}
943
944#[derive(Debug, Serialize, Deserialize)]
945pub struct MessageDelta {
946 pub stop_reason: Option<String>,
947 pub stop_sequence: Option<String>,
948}
949
950#[derive(Debug)]
951pub enum AnthropicError {
952 /// Failed to serialize the HTTP request body to JSON
953 SerializeRequest(serde_json::Error),
954
955 /// Failed to construct the HTTP request body
956 BuildRequestBody(http::Error),
957
958 /// Failed to send the HTTP request
959 HttpSend(anyhow::Error),
960
961 /// Failed to deserialize the response from JSON
962 DeserializeResponse(serde_json::Error),
963
964 /// Failed to read from response stream
965 ReadResponse(io::Error),
966
967 /// HTTP error response from the API
968 HttpResponseError {
969 status_code: StatusCode,
970 message: String,
971 },
972
973 /// Rate limit exceeded
974 RateLimit { retry_after: Duration },
975
976 /// Server overloaded
977 ServerOverloaded { retry_after: Option<Duration> },
978
979 /// API returned an error response
980 ApiError(ApiError),
981}
982
983#[derive(Debug, Serialize, Deserialize, Error)]
984#[error("Anthropic API Error: {error_type}: {message}")]
985pub struct ApiError {
986 #[serde(rename = "type")]
987 pub error_type: String,
988 pub message: String,
989}
990
991/// An Anthropic API error code.
992/// <https://docs.anthropic.com/en/api/errors#http-errors>
993#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
994#[strum(serialize_all = "snake_case")]
995pub enum ApiErrorCode {
996 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
997 InvalidRequestError,
998 /// 401 - `authentication_error`: There's an issue with your API key.
999 AuthenticationError,
1000 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
1001 PermissionError,
1002 /// 404 - `not_found_error`: The requested resource was not found.
1003 NotFoundError,
1004 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
1005 RequestTooLarge,
1006 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
1007 RateLimitError,
1008 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
1009 ApiError,
1010 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
1011 OverloadedError,
1012}
1013
1014impl ApiError {
1015 pub fn code(&self) -> Option<ApiErrorCode> {
1016 ApiErrorCode::from_str(&self.error_type).ok()
1017 }
1018
1019 pub fn is_rate_limit_error(&self) -> bool {
1020 matches!(self.error_type.as_str(), "rate_limit_error")
1021 }
1022
1023 pub fn match_window_exceeded(&self) -> Option<u64> {
1024 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
1025 return None;
1026 };
1027
1028 parse_prompt_too_long(&self.message)
1029 }
1030}
1031
1032pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1033 message
1034 .strip_prefix("prompt is too long: ")?
1035 .split_once(" tokens")?
1036 .0
1037 .parse()
1038 .ok()
1039}
1040
1041#[test]
1042fn test_match_window_exceeded() {
1043 let error = ApiError {
1044 error_type: "invalid_request_error".to_string(),
1045 message: "prompt is too long: 220000 tokens > 200000".to_string(),
1046 };
1047 assert_eq!(error.match_window_exceeded(), Some(220_000));
1048
1049 let error = ApiError {
1050 error_type: "invalid_request_error".to_string(),
1051 message: "prompt is too long: 1234953 tokens".to_string(),
1052 };
1053 assert_eq!(error.match_window_exceeded(), Some(1234953));
1054
1055 let error = ApiError {
1056 error_type: "invalid_request_error".to_string(),
1057 message: "not a prompt length error".to_string(),
1058 };
1059 assert_eq!(error.match_window_exceeded(), None);
1060
1061 let error = ApiError {
1062 error_type: "rate_limit_error".to_string(),
1063 message: "prompt is too long: 12345 tokens".to_string(),
1064 };
1065 assert_eq!(error.match_window_exceeded(), None);
1066
1067 let error = ApiError {
1068 error_type: "invalid_request_error".to_string(),
1069 message: "prompt is too long: invalid tokens".to_string(),
1070 };
1071 assert_eq!(error.match_window_exceeded(), None);
1072}