1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
12use strum::{EnumIter, EnumString};
13use thiserror::Error;
14
15pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
16
17#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
18#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
19pub struct AnthropicModelCacheConfiguration {
20 pub min_total_token: u64,
21 pub should_speculate: bool,
22 pub max_cache_anchors: usize,
23}
24
25#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
26#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
27pub enum AnthropicModelMode {
28 #[default]
29 Default,
30 Thinking {
31 budget_tokens: Option<u32>,
32 },
33}
34
35impl From<ModelMode> for AnthropicModelMode {
36 fn from(value: ModelMode) -> Self {
37 match value {
38 ModelMode::Default => AnthropicModelMode::Default,
39 ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
40 }
41 }
42}
43
44impl From<AnthropicModelMode> for ModelMode {
45 fn from(value: AnthropicModelMode) -> Self {
46 match value {
47 AnthropicModelMode::Default => ModelMode::Default,
48 AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
49 }
50 }
51}
52
53#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
54#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
55pub enum Model {
56 #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
57 ClaudeOpus4,
58 #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
59 ClaudeOpus4_1,
60 #[serde(
61 rename = "claude-opus-4-thinking",
62 alias = "claude-opus-4-thinking-latest"
63 )]
64 ClaudeOpus4Thinking,
65 #[serde(
66 rename = "claude-opus-4-1-thinking",
67 alias = "claude-opus-4-1-thinking-latest"
68 )]
69 ClaudeOpus4_1Thinking,
70 #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
71 ClaudeSonnet4,
72 #[serde(
73 rename = "claude-sonnet-4-thinking",
74 alias = "claude-sonnet-4-thinking-latest"
75 )]
76 ClaudeSonnet4Thinking,
77 #[default]
78 #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
79 ClaudeSonnet4_5,
80 #[serde(
81 rename = "claude-sonnet-4-5-thinking",
82 alias = "claude-sonnet-4-5-thinking-latest"
83 )]
84 ClaudeSonnet4_5Thinking,
85 #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
86 Claude3_7Sonnet,
87 #[serde(
88 rename = "claude-3-7-sonnet-thinking",
89 alias = "claude-3-7-sonnet-thinking-latest"
90 )]
91 Claude3_7SonnetThinking,
92 #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
93 Claude3_5Sonnet,
94 #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
95 ClaudeHaiku4_5,
96 #[serde(
97 rename = "claude-haiku-4-5-thinking",
98 alias = "claude-haiku-4-5-thinking-latest"
99 )]
100 ClaudeHaiku4_5Thinking,
101 #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
102 Claude3_5Haiku,
103 #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
104 Claude3Opus,
105 #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
106 Claude3Sonnet,
107 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
108 Claude3Haiku,
109 #[serde(rename = "custom")]
110 Custom {
111 name: String,
112 max_tokens: u64,
113 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
114 display_name: Option<String>,
115 /// Override this model with a different Anthropic model for tool calls.
116 tool_override: Option<String>,
117 /// Indicates whether this custom model supports caching.
118 cache_configuration: Option<AnthropicModelCacheConfiguration>,
119 max_output_tokens: Option<u64>,
120 default_temperature: Option<f32>,
121 #[serde(default)]
122 extra_beta_headers: Vec<String>,
123 #[serde(default)]
124 mode: AnthropicModelMode,
125 },
126}
127
128impl Model {
129 pub fn default_fast() -> Self {
130 Self::Claude3_5Haiku
131 }
132
133 pub fn from_id(id: &str) -> Result<Self> {
134 if id.starts_with("claude-opus-4-1-thinking") {
135 return Ok(Self::ClaudeOpus4_1Thinking);
136 }
137
138 if id.starts_with("claude-opus-4-thinking") {
139 return Ok(Self::ClaudeOpus4Thinking);
140 }
141
142 if id.starts_with("claude-opus-4-1") {
143 return Ok(Self::ClaudeOpus4_1);
144 }
145
146 if id.starts_with("claude-opus-4") {
147 return Ok(Self::ClaudeOpus4);
148 }
149
150 if id.starts_with("claude-sonnet-4-5-thinking") {
151 return Ok(Self::ClaudeSonnet4_5Thinking);
152 }
153
154 if id.starts_with("claude-sonnet-4-5") {
155 return Ok(Self::ClaudeSonnet4_5);
156 }
157
158 if id.starts_with("claude-sonnet-4-thinking") {
159 return Ok(Self::ClaudeSonnet4Thinking);
160 }
161
162 if id.starts_with("claude-sonnet-4") {
163 return Ok(Self::ClaudeSonnet4);
164 }
165
166 if id.starts_with("claude-3-7-sonnet-thinking") {
167 return Ok(Self::Claude3_7SonnetThinking);
168 }
169
170 if id.starts_with("claude-3-7-sonnet") {
171 return Ok(Self::Claude3_7Sonnet);
172 }
173
174 if id.starts_with("claude-3-5-sonnet") {
175 return Ok(Self::Claude3_5Sonnet);
176 }
177
178 if id.starts_with("claude-haiku-4-5-thinking") {
179 return Ok(Self::ClaudeHaiku4_5Thinking);
180 }
181
182 if id.starts_with("claude-haiku-4-5") {
183 return Ok(Self::ClaudeHaiku4_5);
184 }
185
186 if id.starts_with("claude-3-5-haiku") {
187 return Ok(Self::Claude3_5Haiku);
188 }
189
190 if id.starts_with("claude-3-opus") {
191 return Ok(Self::Claude3Opus);
192 }
193
194 if id.starts_with("claude-3-sonnet") {
195 return Ok(Self::Claude3Sonnet);
196 }
197
198 if id.starts_with("claude-3-haiku") {
199 return Ok(Self::Claude3Haiku);
200 }
201
202 Err(anyhow!("invalid model ID: {id}"))
203 }
204
205 pub fn id(&self) -> &str {
206 match self {
207 Self::ClaudeOpus4 => "claude-opus-4-latest",
208 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
209 Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
210 Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
211 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
212 Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
213 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
214 Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
215 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
216 Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
217 Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
218 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
219 Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
220 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
221 Self::Claude3Opus => "claude-3-opus-latest",
222 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
223 Self::Claude3Haiku => "claude-3-haiku-20240307",
224 Self::Custom { name, .. } => name,
225 }
226 }
227
228 /// The id of the model that should be used for making API requests
229 pub fn request_id(&self) -> &str {
230 match self {
231 Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
232 Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
233 Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
234 Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-20250929",
235 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
236 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
237 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
238 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
239 Self::Claude3Opus => "claude-3-opus-latest",
240 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
241 Self::Claude3Haiku => "claude-3-haiku-20240307",
242 Self::Custom { name, .. } => name,
243 }
244 }
245
246 pub fn display_name(&self) -> &str {
247 match self {
248 Self::ClaudeOpus4 => "Claude Opus 4",
249 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
250 Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
251 Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
252 Self::ClaudeSonnet4 => "Claude Sonnet 4",
253 Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
254 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
255 Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
256 Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
257 Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
258 Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
259 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
260 Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
261 Self::Claude3_5Haiku => "Claude 3.5 Haiku",
262 Self::Claude3Opus => "Claude 3 Opus",
263 Self::Claude3Sonnet => "Claude 3 Sonnet",
264 Self::Claude3Haiku => "Claude 3 Haiku",
265 Self::Custom {
266 name, display_name, ..
267 } => display_name.as_ref().unwrap_or(name),
268 }
269 }
270
271 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
272 match self {
273 Self::ClaudeOpus4
274 | Self::ClaudeOpus4_1
275 | Self::ClaudeOpus4Thinking
276 | Self::ClaudeOpus4_1Thinking
277 | Self::ClaudeSonnet4
278 | Self::ClaudeSonnet4Thinking
279 | Self::ClaudeSonnet4_5
280 | Self::ClaudeSonnet4_5Thinking
281 | Self::Claude3_5Sonnet
282 | Self::ClaudeHaiku4_5
283 | Self::ClaudeHaiku4_5Thinking
284 | Self::Claude3_5Haiku
285 | Self::Claude3_7Sonnet
286 | Self::Claude3_7SonnetThinking
287 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
288 min_total_token: 2_048,
289 should_speculate: true,
290 max_cache_anchors: 4,
291 }),
292 Self::Custom {
293 cache_configuration,
294 ..
295 } => cache_configuration.clone(),
296 _ => None,
297 }
298 }
299
300 pub fn max_token_count(&self) -> u64 {
301 match self {
302 Self::ClaudeOpus4
303 | Self::ClaudeOpus4_1
304 | Self::ClaudeOpus4Thinking
305 | Self::ClaudeOpus4_1Thinking
306 | Self::ClaudeSonnet4
307 | Self::ClaudeSonnet4Thinking
308 | Self::ClaudeSonnet4_5
309 | Self::ClaudeSonnet4_5Thinking
310 | Self::Claude3_5Sonnet
311 | Self::ClaudeHaiku4_5
312 | Self::ClaudeHaiku4_5Thinking
313 | Self::Claude3_5Haiku
314 | Self::Claude3_7Sonnet
315 | Self::Claude3_7SonnetThinking
316 | Self::Claude3Opus
317 | Self::Claude3Sonnet
318 | Self::Claude3Haiku => 200_000,
319 Self::Custom { max_tokens, .. } => *max_tokens,
320 }
321 }
322
323 pub fn max_output_tokens(&self) -> u64 {
324 match self {
325 Self::ClaudeOpus4
326 | Self::ClaudeOpus4_1
327 | Self::ClaudeOpus4Thinking
328 | Self::ClaudeOpus4_1Thinking
329 | Self::ClaudeSonnet4
330 | Self::ClaudeSonnet4Thinking
331 | Self::ClaudeSonnet4_5
332 | Self::ClaudeSonnet4_5Thinking
333 | Self::Claude3_5Sonnet
334 | Self::Claude3_7Sonnet
335 | Self::Claude3_7SonnetThinking
336 | Self::Claude3_5Haiku => 8_192,
337 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => 64_000,
338 Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
339 Self::Custom {
340 max_output_tokens, ..
341 } => max_output_tokens.unwrap_or(4_096),
342 }
343 }
344
345 pub fn default_temperature(&self) -> f32 {
346 match self {
347 Self::ClaudeOpus4
348 | Self::ClaudeOpus4_1
349 | Self::ClaudeOpus4Thinking
350 | Self::ClaudeOpus4_1Thinking
351 | Self::ClaudeSonnet4
352 | Self::ClaudeSonnet4Thinking
353 | Self::ClaudeSonnet4_5
354 | Self::ClaudeSonnet4_5Thinking
355 | Self::Claude3_5Sonnet
356 | Self::Claude3_7Sonnet
357 | Self::Claude3_7SonnetThinking
358 | Self::ClaudeHaiku4_5
359 | Self::ClaudeHaiku4_5Thinking
360 | Self::Claude3_5Haiku
361 | Self::Claude3Opus
362 | Self::Claude3Sonnet
363 | Self::Claude3Haiku => 1.0,
364 Self::Custom {
365 default_temperature,
366 ..
367 } => default_temperature.unwrap_or(1.0),
368 }
369 }
370
371 pub fn mode(&self) -> AnthropicModelMode {
372 match self {
373 Self::ClaudeOpus4
374 | Self::ClaudeOpus4_1
375 | Self::ClaudeSonnet4
376 | Self::ClaudeSonnet4_5
377 | Self::Claude3_5Sonnet
378 | Self::Claude3_7Sonnet
379 | Self::ClaudeHaiku4_5
380 | Self::Claude3_5Haiku
381 | Self::Claude3Opus
382 | Self::Claude3Sonnet
383 | Self::Claude3Haiku => AnthropicModelMode::Default,
384 Self::ClaudeOpus4Thinking
385 | Self::ClaudeOpus4_1Thinking
386 | Self::ClaudeSonnet4Thinking
387 | Self::ClaudeSonnet4_5Thinking
388 | Self::ClaudeHaiku4_5Thinking
389 | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
390 budget_tokens: Some(4_096),
391 },
392 Self::Custom { mode, .. } => mode.clone(),
393 }
394 }
395
396 pub fn beta_headers(&self) -> String {
397 let mut headers = vec![];
398
399 match self {
400 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
401 // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
402 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
403 headers.push("token-efficient-tools-2025-02-19".to_string());
404 }
405 Self::Custom {
406 extra_beta_headers, ..
407 } => {
408 headers.extend(
409 extra_beta_headers
410 .iter()
411 .filter(|header| !header.trim().is_empty())
412 .cloned(),
413 );
414 }
415 _ => {}
416 }
417
418 headers.join(",")
419 }
420
421 pub fn tool_model_id(&self) -> &str {
422 if let Self::Custom {
423 tool_override: Some(tool_override),
424 ..
425 } = self
426 {
427 tool_override
428 } else {
429 self.request_id()
430 }
431 }
432}
433
434pub async fn complete(
435 client: &dyn HttpClient,
436 api_url: &str,
437 api_key: &str,
438 request: Request,
439 beta_headers: String,
440) -> Result<Response, AnthropicError> {
441 let uri = format!("{api_url}/v1/messages");
442 let request_builder = HttpRequest::builder()
443 .method(Method::POST)
444 .uri(uri)
445 .header("Anthropic-Version", "2023-06-01")
446 .header("Anthropic-Beta", beta_headers)
447 .header("X-Api-Key", api_key.trim())
448 .header("Content-Type", "application/json");
449
450 let serialized_request =
451 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
452 let request = request_builder
453 .body(AsyncBody::from(serialized_request))
454 .map_err(AnthropicError::BuildRequestBody)?;
455
456 let mut response = client
457 .send(request)
458 .await
459 .map_err(AnthropicError::HttpSend)?;
460 let status_code = response.status();
461 let mut body = String::new();
462 response
463 .body_mut()
464 .read_to_string(&mut body)
465 .await
466 .map_err(AnthropicError::ReadResponse)?;
467
468 if status_code.is_success() {
469 Ok(serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)?)
470 } else {
471 Err(AnthropicError::HttpResponseError {
472 status_code,
473 message: body,
474 })
475 }
476}
477
478pub async fn stream_completion(
479 client: &dyn HttpClient,
480 api_url: &str,
481 api_key: &str,
482 request: Request,
483 beta_headers: String,
484) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
485 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
486 .await
487 .map(|output| output.0)
488}
489
490/// An individual rate limit.
491#[derive(Debug)]
492pub struct RateLimit {
493 pub limit: usize,
494 pub remaining: usize,
495 pub reset: DateTime<Utc>,
496}
497
498impl RateLimit {
499 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
500 let limit =
501 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
502 let remaining = get_header(
503 &format!("anthropic-ratelimit-{resource}-remaining"),
504 headers,
505 )?
506 .parse()?;
507 let reset = DateTime::parse_from_rfc3339(get_header(
508 &format!("anthropic-ratelimit-{resource}-reset"),
509 headers,
510 )?)?
511 .to_utc();
512
513 Ok(Self {
514 limit,
515 remaining,
516 reset,
517 })
518 }
519}
520
521/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
522#[derive(Debug)]
523pub struct RateLimitInfo {
524 pub retry_after: Option<Duration>,
525 pub requests: Option<RateLimit>,
526 pub tokens: Option<RateLimit>,
527 pub input_tokens: Option<RateLimit>,
528 pub output_tokens: Option<RateLimit>,
529}
530
531impl RateLimitInfo {
532 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
533 // Check if any rate limit headers exist
534 let has_rate_limit_headers = headers
535 .keys()
536 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
537
538 if !has_rate_limit_headers {
539 return Self {
540 retry_after: None,
541 requests: None,
542 tokens: None,
543 input_tokens: None,
544 output_tokens: None,
545 };
546 }
547
548 Self {
549 retry_after: parse_retry_after(headers),
550 requests: RateLimit::from_headers("requests", headers).ok(),
551 tokens: RateLimit::from_headers("tokens", headers).ok(),
552 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
553 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
554 }
555 }
556}
557
558/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
559/// seconds). Note that other services might specify an HTTP date or some other format for this
560/// header. Returns `None` if the header is not present or cannot be parsed.
561pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
562 headers
563 .get("retry-after")
564 .and_then(|v| v.to_str().ok())
565 .and_then(|v| v.parse::<u64>().ok())
566 .map(Duration::from_secs)
567}
568
569fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
570 Ok(headers
571 .get(key)
572 .with_context(|| format!("missing header `{key}`"))?
573 .to_str()?)
574}
575
576pub async fn stream_completion_with_rate_limit_info(
577 client: &dyn HttpClient,
578 api_url: &str,
579 api_key: &str,
580 request: Request,
581 beta_headers: String,
582) -> Result<
583 (
584 BoxStream<'static, Result<Event, AnthropicError>>,
585 Option<RateLimitInfo>,
586 ),
587 AnthropicError,
588> {
589 let request = StreamingRequest {
590 base: request,
591 stream: true,
592 };
593 let uri = format!("{api_url}/v1/messages");
594
595 let request_builder = HttpRequest::builder()
596 .method(Method::POST)
597 .uri(uri)
598 .header("Anthropic-Version", "2023-06-01")
599 .header("Anthropic-Beta", beta_headers)
600 .header("X-Api-Key", api_key.trim())
601 .header("Content-Type", "application/json");
602 let serialized_request =
603 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
604 let request = request_builder
605 .body(AsyncBody::from(serialized_request))
606 .map_err(AnthropicError::BuildRequestBody)?;
607
608 let mut response = client
609 .send(request)
610 .await
611 .map_err(AnthropicError::HttpSend)?;
612 let rate_limits = RateLimitInfo::from_headers(response.headers());
613 if response.status().is_success() {
614 let reader = BufReader::new(response.into_body());
615 let stream = reader
616 .lines()
617 .filter_map(|line| async move {
618 match line {
619 Ok(line) => {
620 let line = line.strip_prefix("data: ")?;
621 match serde_json::from_str(line) {
622 Ok(response) => Some(Ok(response)),
623 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
624 }
625 }
626 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
627 }
628 })
629 .boxed();
630 Ok((stream, Some(rate_limits)))
631 } else if response.status().as_u16() == 529 {
632 Err(AnthropicError::ServerOverloaded {
633 retry_after: rate_limits.retry_after,
634 })
635 } else if let Some(retry_after) = rate_limits.retry_after {
636 Err(AnthropicError::RateLimit { retry_after })
637 } else {
638 let mut body = String::new();
639 response
640 .body_mut()
641 .read_to_string(&mut body)
642 .await
643 .map_err(AnthropicError::ReadResponse)?;
644
645 match serde_json::from_str::<Event>(&body) {
646 Ok(Event::Error { error }) => Err(AnthropicError::ApiError(error)),
647 Ok(_) | Err(_) => Err(AnthropicError::HttpResponseError {
648 status_code: response.status(),
649 message: body,
650 }),
651 }
652 }
653}
654
655#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
656#[serde(rename_all = "lowercase")]
657pub enum CacheControlType {
658 Ephemeral,
659}
660
661#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
662pub struct CacheControl {
663 #[serde(rename = "type")]
664 pub cache_type: CacheControlType,
665}
666
667#[derive(Debug, Serialize, Deserialize)]
668pub struct Message {
669 pub role: Role,
670 pub content: Vec<RequestContent>,
671}
672
673#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
674#[serde(rename_all = "lowercase")]
675pub enum Role {
676 User,
677 Assistant,
678}
679
680#[derive(Debug, Serialize, Deserialize)]
681#[serde(tag = "type")]
682pub enum RequestContent {
683 #[serde(rename = "text")]
684 Text {
685 text: String,
686 #[serde(skip_serializing_if = "Option::is_none")]
687 cache_control: Option<CacheControl>,
688 },
689 #[serde(rename = "thinking")]
690 Thinking {
691 thinking: String,
692 signature: String,
693 #[serde(skip_serializing_if = "Option::is_none")]
694 cache_control: Option<CacheControl>,
695 },
696 #[serde(rename = "redacted_thinking")]
697 RedactedThinking { data: String },
698 #[serde(rename = "image")]
699 Image {
700 source: ImageSource,
701 #[serde(skip_serializing_if = "Option::is_none")]
702 cache_control: Option<CacheControl>,
703 },
704 #[serde(rename = "tool_use")]
705 ToolUse {
706 id: String,
707 name: String,
708 input: serde_json::Value,
709 #[serde(skip_serializing_if = "Option::is_none")]
710 cache_control: Option<CacheControl>,
711 },
712 #[serde(rename = "tool_result")]
713 ToolResult {
714 tool_use_id: String,
715 is_error: bool,
716 content: ToolResultContent,
717 #[serde(skip_serializing_if = "Option::is_none")]
718 cache_control: Option<CacheControl>,
719 },
720}
721
722#[derive(Debug, Serialize, Deserialize)]
723#[serde(untagged)]
724pub enum ToolResultContent {
725 Plain(String),
726 Multipart(Vec<ToolResultPart>),
727}
728
729#[derive(Debug, Serialize, Deserialize)]
730#[serde(tag = "type", rename_all = "lowercase")]
731pub enum ToolResultPart {
732 Text { text: String },
733 Image { source: ImageSource },
734}
735
736#[derive(Debug, Serialize, Deserialize)]
737#[serde(tag = "type")]
738pub enum ResponseContent {
739 #[serde(rename = "text")]
740 Text { text: String },
741 #[serde(rename = "thinking")]
742 Thinking { thinking: String },
743 #[serde(rename = "redacted_thinking")]
744 RedactedThinking { data: String },
745 #[serde(rename = "tool_use")]
746 ToolUse {
747 id: String,
748 name: String,
749 input: serde_json::Value,
750 },
751}
752
753#[derive(Debug, Serialize, Deserialize)]
754pub struct ImageSource {
755 #[serde(rename = "type")]
756 pub source_type: String,
757 pub media_type: String,
758 pub data: String,
759}
760
761#[derive(Debug, Serialize, Deserialize)]
762pub struct Tool {
763 pub name: String,
764 pub description: String,
765 pub input_schema: serde_json::Value,
766}
767
768#[derive(Debug, Serialize, Deserialize)]
769#[serde(tag = "type", rename_all = "lowercase")]
770pub enum ToolChoice {
771 Auto,
772 Any,
773 Tool { name: String },
774 None,
775}
776
777#[derive(Debug, Serialize, Deserialize)]
778#[serde(tag = "type", rename_all = "lowercase")]
779pub enum Thinking {
780 Enabled { budget_tokens: Option<u32> },
781}
782
783#[derive(Debug, Serialize, Deserialize)]
784#[serde(untagged)]
785pub enum StringOrContents {
786 String(String),
787 Content(Vec<RequestContent>),
788}
789
790#[derive(Debug, Serialize, Deserialize)]
791pub struct Request {
792 pub model: String,
793 pub max_tokens: u64,
794 pub messages: Vec<Message>,
795 #[serde(default, skip_serializing_if = "Vec::is_empty")]
796 pub tools: Vec<Tool>,
797 #[serde(default, skip_serializing_if = "Option::is_none")]
798 pub thinking: Option<Thinking>,
799 #[serde(default, skip_serializing_if = "Option::is_none")]
800 pub tool_choice: Option<ToolChoice>,
801 #[serde(default, skip_serializing_if = "Option::is_none")]
802 pub system: Option<StringOrContents>,
803 #[serde(default, skip_serializing_if = "Option::is_none")]
804 pub metadata: Option<Metadata>,
805 #[serde(default, skip_serializing_if = "Vec::is_empty")]
806 pub stop_sequences: Vec<String>,
807 #[serde(default, skip_serializing_if = "Option::is_none")]
808 pub temperature: Option<f32>,
809 #[serde(default, skip_serializing_if = "Option::is_none")]
810 pub top_k: Option<u32>,
811 #[serde(default, skip_serializing_if = "Option::is_none")]
812 pub top_p: Option<f32>,
813}
814
815#[derive(Debug, Serialize, Deserialize)]
816struct StreamingRequest {
817 #[serde(flatten)]
818 pub base: Request,
819 pub stream: bool,
820}
821
822#[derive(Debug, Serialize, Deserialize)]
823pub struct Metadata {
824 pub user_id: Option<String>,
825}
826
827#[derive(Debug, Serialize, Deserialize, Default)]
828pub struct Usage {
829 #[serde(default, skip_serializing_if = "Option::is_none")]
830 pub input_tokens: Option<u64>,
831 #[serde(default, skip_serializing_if = "Option::is_none")]
832 pub output_tokens: Option<u64>,
833 #[serde(default, skip_serializing_if = "Option::is_none")]
834 pub cache_creation_input_tokens: Option<u64>,
835 #[serde(default, skip_serializing_if = "Option::is_none")]
836 pub cache_read_input_tokens: Option<u64>,
837}
838
839#[derive(Debug, Serialize, Deserialize)]
840pub struct Response {
841 pub id: String,
842 #[serde(rename = "type")]
843 pub response_type: String,
844 pub role: Role,
845 pub content: Vec<ResponseContent>,
846 pub model: String,
847 #[serde(default, skip_serializing_if = "Option::is_none")]
848 pub stop_reason: Option<String>,
849 #[serde(default, skip_serializing_if = "Option::is_none")]
850 pub stop_sequence: Option<String>,
851 pub usage: Usage,
852}
853
854#[derive(Debug, Serialize, Deserialize)]
855#[serde(tag = "type")]
856pub enum Event {
857 #[serde(rename = "message_start")]
858 MessageStart { message: Response },
859 #[serde(rename = "content_block_start")]
860 ContentBlockStart {
861 index: usize,
862 content_block: ResponseContent,
863 },
864 #[serde(rename = "content_block_delta")]
865 ContentBlockDelta { index: usize, delta: ContentDelta },
866 #[serde(rename = "content_block_stop")]
867 ContentBlockStop { index: usize },
868 #[serde(rename = "message_delta")]
869 MessageDelta { delta: MessageDelta, usage: Usage },
870 #[serde(rename = "message_stop")]
871 MessageStop,
872 #[serde(rename = "ping")]
873 Ping,
874 #[serde(rename = "error")]
875 Error { error: ApiError },
876}
877
878#[derive(Debug, Serialize, Deserialize)]
879#[serde(tag = "type")]
880pub enum ContentDelta {
881 #[serde(rename = "text_delta")]
882 TextDelta { text: String },
883 #[serde(rename = "thinking_delta")]
884 ThinkingDelta { thinking: String },
885 #[serde(rename = "signature_delta")]
886 SignatureDelta { signature: String },
887 #[serde(rename = "input_json_delta")]
888 InputJsonDelta { partial_json: String },
889}
890
891#[derive(Debug, Serialize, Deserialize)]
892pub struct MessageDelta {
893 pub stop_reason: Option<String>,
894 pub stop_sequence: Option<String>,
895}
896
897#[derive(Debug)]
898pub enum AnthropicError {
899 /// Failed to serialize the HTTP request body to JSON
900 SerializeRequest(serde_json::Error),
901
902 /// Failed to construct the HTTP request body
903 BuildRequestBody(http::Error),
904
905 /// Failed to send the HTTP request
906 HttpSend(anyhow::Error),
907
908 /// Failed to deserialize the response from JSON
909 DeserializeResponse(serde_json::Error),
910
911 /// Failed to read from response stream
912 ReadResponse(io::Error),
913
914 /// HTTP error response from the API
915 HttpResponseError {
916 status_code: StatusCode,
917 message: String,
918 },
919
920 /// Rate limit exceeded
921 RateLimit { retry_after: Duration },
922
923 /// Server overloaded
924 ServerOverloaded { retry_after: Option<Duration> },
925
926 /// API returned an error response
927 ApiError(ApiError),
928}
929
930#[derive(Debug, Serialize, Deserialize, Error)]
931#[error("Anthropic API Error: {error_type}: {message}")]
932pub struct ApiError {
933 #[serde(rename = "type")]
934 pub error_type: String,
935 pub message: String,
936}
937
938/// An Anthropic API error code.
939/// <https://docs.anthropic.com/en/api/errors#http-errors>
940#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
941#[strum(serialize_all = "snake_case")]
942pub enum ApiErrorCode {
943 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
944 InvalidRequestError,
945 /// 401 - `authentication_error`: There's an issue with your API key.
946 AuthenticationError,
947 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
948 PermissionError,
949 /// 404 - `not_found_error`: The requested resource was not found.
950 NotFoundError,
951 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
952 RequestTooLarge,
953 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
954 RateLimitError,
955 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
956 ApiError,
957 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
958 OverloadedError,
959}
960
961impl ApiError {
962 pub fn code(&self) -> Option<ApiErrorCode> {
963 ApiErrorCode::from_str(&self.error_type).ok()
964 }
965
966 pub fn is_rate_limit_error(&self) -> bool {
967 matches!(self.error_type.as_str(), "rate_limit_error")
968 }
969
970 pub fn match_window_exceeded(&self) -> Option<u64> {
971 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
972 return None;
973 };
974
975 parse_prompt_too_long(&self.message)
976 }
977}
978
979pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
980 message
981 .strip_prefix("prompt is too long: ")?
982 .split_once(" tokens")?
983 .0
984 .parse()
985 .ok()
986}
987
988#[test]
989fn test_match_window_exceeded() {
990 let error = ApiError {
991 error_type: "invalid_request_error".to_string(),
992 message: "prompt is too long: 220000 tokens > 200000".to_string(),
993 };
994 assert_eq!(error.match_window_exceeded(), Some(220_000));
995
996 let error = ApiError {
997 error_type: "invalid_request_error".to_string(),
998 message: "prompt is too long: 1234953 tokens".to_string(),
999 };
1000 assert_eq!(error.match_window_exceeded(), Some(1234953));
1001
1002 let error = ApiError {
1003 error_type: "invalid_request_error".to_string(),
1004 message: "not a prompt length error".to_string(),
1005 };
1006 assert_eq!(error.match_window_exceeded(), None);
1007
1008 let error = ApiError {
1009 error_type: "rate_limit_error".to_string(),
1010 message: "prompt is too long: 12345 tokens".to_string(),
1011 };
1012 assert_eq!(error.match_window_exceeded(), None);
1013
1014 let error = ApiError {
1015 error_type: "invalid_request_error".to_string(),
1016 message: "prompt is too long: invalid tokens".to_string(),
1017 };
1018 assert_eq!(error.match_window_exceeded(), None);
1019}