1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
12use strum::{EnumIter, EnumString};
13use thiserror::Error;
14
15pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
16
17#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
18#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
19pub struct AnthropicModelCacheConfiguration {
20 pub min_total_token: u64,
21 pub should_speculate: bool,
22 pub max_cache_anchors: usize,
23}
24
25#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
26#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
27pub enum AnthropicModelMode {
28 #[default]
29 Default,
30 Thinking {
31 budget_tokens: Option<u32>,
32 },
33}
34
35impl From<ModelMode> for AnthropicModelMode {
36 fn from(value: ModelMode) -> Self {
37 match value {
38 ModelMode::Default => AnthropicModelMode::Default,
39 ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
40 }
41 }
42}
43
44impl From<AnthropicModelMode> for ModelMode {
45 fn from(value: AnthropicModelMode) -> Self {
46 match value {
47 AnthropicModelMode::Default => ModelMode::Default,
48 AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
49 }
50 }
51}
52
53#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
54#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
55pub enum Model {
56 #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
57 ClaudeOpus4,
58 #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
59 ClaudeOpus4_1,
60 #[serde(
61 rename = "claude-opus-4-thinking",
62 alias = "claude-opus-4-thinking-latest"
63 )]
64 ClaudeOpus4Thinking,
65 #[serde(
66 rename = "claude-opus-4-1-thinking",
67 alias = "claude-opus-4-1-thinking-latest"
68 )]
69 ClaudeOpus4_1Thinking,
70 #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
71 ClaudeSonnet4,
72 #[serde(
73 rename = "claude-sonnet-4-thinking",
74 alias = "claude-sonnet-4-thinking-latest"
75 )]
76 ClaudeSonnet4Thinking,
77 #[default]
78 #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
79 ClaudeSonnet4_5,
80 #[serde(
81 rename = "claude-sonnet-4-5-thinking",
82 alias = "claude-sonnet-4-5-thinking-latest"
83 )]
84 ClaudeSonnet4_5Thinking,
85 #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
86 Claude3_7Sonnet,
87 #[serde(
88 rename = "claude-3-7-sonnet-thinking",
89 alias = "claude-3-7-sonnet-thinking-latest"
90 )]
91 Claude3_7SonnetThinking,
92 #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
93 Claude3_5Sonnet,
94 #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
95 ClaudeHaiku4_5,
96 #[serde(
97 rename = "claude-haiku-4-5-thinking",
98 alias = "claude-haiku-4-5-thinking-latest"
99 )]
100 ClaudeHaiku4_5Thinking,
101 #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
102 Claude3_5Haiku,
103 #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
104 Claude3Opus,
105 #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
106 Claude3Sonnet,
107 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
108 Claude3Haiku,
109 #[serde(rename = "custom")]
110 Custom {
111 name: String,
112 max_tokens: u64,
113 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
114 display_name: Option<String>,
115 /// Override this model with a different Anthropic model for tool calls.
116 tool_override: Option<String>,
117 /// Indicates whether this custom model supports caching.
118 cache_configuration: Option<AnthropicModelCacheConfiguration>,
119 max_output_tokens: Option<u64>,
120 default_temperature: Option<f32>,
121 #[serde(default)]
122 extra_beta_headers: Vec<String>,
123 #[serde(default)]
124 mode: AnthropicModelMode,
125 },
126}
127
128impl Model {
129 pub fn default_fast() -> Self {
130 Self::Claude3_5Haiku
131 }
132
133 pub fn from_id(id: &str) -> Result<Self> {
134 if id.starts_with("claude-opus-4-1-thinking") {
135 return Ok(Self::ClaudeOpus4_1Thinking);
136 }
137
138 if id.starts_with("claude-opus-4-thinking") {
139 return Ok(Self::ClaudeOpus4Thinking);
140 }
141
142 if id.starts_with("claude-opus-4-1") {
143 return Ok(Self::ClaudeOpus4_1);
144 }
145
146 if id.starts_with("claude-opus-4") {
147 return Ok(Self::ClaudeOpus4);
148 }
149
150 if id.starts_with("claude-sonnet-4-5-thinking") {
151 return Ok(Self::ClaudeSonnet4_5Thinking);
152 }
153
154 if id.starts_with("claude-sonnet-4-5") {
155 return Ok(Self::ClaudeSonnet4_5);
156 }
157
158 if id.starts_with("claude-sonnet-4-thinking") {
159 return Ok(Self::ClaudeSonnet4Thinking);
160 }
161
162 if id.starts_with("claude-sonnet-4") {
163 return Ok(Self::ClaudeSonnet4);
164 }
165
166 if id.starts_with("claude-3-7-sonnet-thinking") {
167 return Ok(Self::Claude3_7SonnetThinking);
168 }
169
170 if id.starts_with("claude-3-7-sonnet") {
171 return Ok(Self::Claude3_7Sonnet);
172 }
173
174 if id.starts_with("claude-3-5-sonnet") {
175 return Ok(Self::Claude3_5Sonnet);
176 }
177
178 if id.starts_with("claude-haiku-4-5-thinking") {
179 return Ok(Self::ClaudeHaiku4_5Thinking);
180 }
181
182 if id.starts_with("claude-haiku-4-5") {
183 return Ok(Self::ClaudeHaiku4_5);
184 }
185
186 if id.starts_with("claude-3-5-haiku") {
187 return Ok(Self::Claude3_5Haiku);
188 }
189
190 if id.starts_with("claude-3-opus") {
191 return Ok(Self::Claude3Opus);
192 }
193
194 if id.starts_with("claude-3-sonnet") {
195 return Ok(Self::Claude3Sonnet);
196 }
197
198 if id.starts_with("claude-3-haiku") {
199 return Ok(Self::Claude3Haiku);
200 }
201
202 Err(anyhow!("invalid model ID: {id}"))
203 }
204
205 pub fn id(&self) -> &str {
206 match self {
207 Self::ClaudeOpus4 => "claude-opus-4-latest",
208 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
209 Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
210 Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
211 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
212 Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
213 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
214 Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
215 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
216 Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
217 Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
218 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
219 Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
220 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
221 Self::Claude3Opus => "claude-3-opus-latest",
222 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
223 Self::Claude3Haiku => "claude-3-haiku-20240307",
224 Self::Custom { name, .. } => name,
225 }
226 }
227
228 /// The id of the model that should be used for making API requests
229 pub fn request_id(&self) -> &str {
230 match self {
231 Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
232 Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
233 Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
234 Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-20250929",
235 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
236 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
237 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
238 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
239 Self::Claude3Opus => "claude-3-opus-latest",
240 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
241 Self::Claude3Haiku => "claude-3-haiku-20240307",
242 Self::Custom { name, .. } => name,
243 }
244 }
245
246 pub fn display_name(&self) -> &str {
247 match self {
248 Self::ClaudeOpus4 => "Claude Opus 4",
249 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
250 Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
251 Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
252 Self::ClaudeSonnet4 => "Claude Sonnet 4",
253 Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
254 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
255 Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
256 Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
257 Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
258 Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
259 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
260 Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
261 Self::Claude3_5Haiku => "Claude 3.5 Haiku",
262 Self::Claude3Opus => "Claude 3 Opus",
263 Self::Claude3Sonnet => "Claude 3 Sonnet",
264 Self::Claude3Haiku => "Claude 3 Haiku",
265 Self::Custom {
266 name, display_name, ..
267 } => display_name.as_ref().unwrap_or(name),
268 }
269 }
270
271 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
272 match self {
273 Self::ClaudeOpus4
274 | Self::ClaudeOpus4_1
275 | Self::ClaudeOpus4Thinking
276 | Self::ClaudeOpus4_1Thinking
277 | Self::ClaudeSonnet4
278 | Self::ClaudeSonnet4Thinking
279 | Self::ClaudeSonnet4_5
280 | Self::ClaudeSonnet4_5Thinking
281 | Self::Claude3_5Sonnet
282 | Self::ClaudeHaiku4_5
283 | Self::ClaudeHaiku4_5Thinking
284 | Self::Claude3_5Haiku
285 | Self::Claude3_7Sonnet
286 | Self::Claude3_7SonnetThinking
287 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
288 min_total_token: 2_048,
289 should_speculate: true,
290 max_cache_anchors: 4,
291 }),
292 Self::Custom {
293 cache_configuration,
294 ..
295 } => cache_configuration.clone(),
296 _ => None,
297 }
298 }
299
300 pub fn max_token_count(&self) -> u64 {
301 match self {
302 Self::ClaudeOpus4
303 | Self::ClaudeOpus4_1
304 | Self::ClaudeOpus4Thinking
305 | Self::ClaudeOpus4_1Thinking
306 | Self::ClaudeSonnet4
307 | Self::ClaudeSonnet4Thinking
308 | Self::ClaudeSonnet4_5
309 | Self::ClaudeSonnet4_5Thinking
310 | Self::Claude3_5Sonnet
311 | Self::ClaudeHaiku4_5
312 | Self::ClaudeHaiku4_5Thinking
313 | Self::Claude3_5Haiku
314 | Self::Claude3_7Sonnet
315 | Self::Claude3_7SonnetThinking
316 | Self::Claude3Opus
317 | Self::Claude3Sonnet
318 | Self::Claude3Haiku => 200_000,
319 Self::Custom { max_tokens, .. } => *max_tokens,
320 }
321 }
322
323 pub fn max_output_tokens(&self) -> u64 {
324 match self {
325 Self::ClaudeOpus4
326 | Self::ClaudeOpus4_1
327 | Self::ClaudeOpus4Thinking
328 | Self::ClaudeOpus4_1Thinking
329 | Self::ClaudeSonnet4
330 | Self::ClaudeSonnet4Thinking
331 | Self::ClaudeSonnet4_5
332 | Self::ClaudeSonnet4_5Thinking
333 | Self::Claude3_5Sonnet
334 | Self::Claude3_7Sonnet
335 | Self::Claude3_7SonnetThinking
336 | Self::Claude3_5Haiku => 8_192,
337 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => 64_000,
338 Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
339 Self::Custom {
340 max_output_tokens, ..
341 } => max_output_tokens.unwrap_or(4_096),
342 }
343 }
344
345 pub fn default_temperature(&self) -> f32 {
346 match self {
347 Self::ClaudeOpus4
348 | Self::ClaudeOpus4_1
349 | Self::ClaudeOpus4Thinking
350 | Self::ClaudeOpus4_1Thinking
351 | Self::ClaudeSonnet4
352 | Self::ClaudeSonnet4Thinking
353 | Self::ClaudeSonnet4_5
354 | Self::ClaudeSonnet4_5Thinking
355 | Self::Claude3_5Sonnet
356 | Self::Claude3_7Sonnet
357 | Self::Claude3_7SonnetThinking
358 | Self::ClaudeHaiku4_5
359 | Self::ClaudeHaiku4_5Thinking
360 | Self::Claude3_5Haiku
361 | Self::Claude3Opus
362 | Self::Claude3Sonnet
363 | Self::Claude3Haiku => 1.0,
364 Self::Custom {
365 default_temperature,
366 ..
367 } => default_temperature.unwrap_or(1.0),
368 }
369 }
370
371 pub fn mode(&self) -> AnthropicModelMode {
372 match self {
373 Self::ClaudeOpus4
374 | Self::ClaudeOpus4_1
375 | Self::ClaudeSonnet4
376 | Self::ClaudeSonnet4_5
377 | Self::Claude3_5Sonnet
378 | Self::Claude3_7Sonnet
379 | Self::ClaudeHaiku4_5
380 | Self::Claude3_5Haiku
381 | Self::Claude3Opus
382 | Self::Claude3Sonnet
383 | Self::Claude3Haiku => AnthropicModelMode::Default,
384 Self::ClaudeOpus4Thinking
385 | Self::ClaudeOpus4_1Thinking
386 | Self::ClaudeSonnet4Thinking
387 | Self::ClaudeSonnet4_5Thinking
388 | Self::ClaudeHaiku4_5Thinking
389 | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
390 budget_tokens: Some(4_096),
391 },
392 Self::Custom { mode, .. } => mode.clone(),
393 }
394 }
395
396 pub const DEFAULT_BETA_HEADERS: &[&str] = &["prompt-caching-2024-07-31"];
397
398 pub fn beta_headers(&self) -> String {
399 let mut headers = Self::DEFAULT_BETA_HEADERS
400 .iter()
401 .map(|header| header.to_string())
402 .collect::<Vec<_>>();
403
404 match self {
405 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
406 // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
407 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
408 headers.push("token-efficient-tools-2025-02-19".to_string());
409 }
410 Self::Custom {
411 extra_beta_headers, ..
412 } => {
413 headers.extend(
414 extra_beta_headers
415 .iter()
416 .filter(|header| !header.trim().is_empty())
417 .cloned(),
418 );
419 }
420 _ => {}
421 }
422
423 headers.join(",")
424 }
425
426 pub fn tool_model_id(&self) -> &str {
427 if let Self::Custom {
428 tool_override: Some(tool_override),
429 ..
430 } = self
431 {
432 tool_override
433 } else {
434 self.request_id()
435 }
436 }
437}
438
439pub async fn complete(
440 client: &dyn HttpClient,
441 api_url: &str,
442 api_key: &str,
443 request: Request,
444 beta_headers: String,
445) -> Result<Response, AnthropicError> {
446 let uri = format!("{api_url}/v1/messages");
447 let request_builder = HttpRequest::builder()
448 .method(Method::POST)
449 .uri(uri)
450 .header("Anthropic-Version", "2023-06-01")
451 .header("Anthropic-Beta", beta_headers)
452 .header("X-Api-Key", api_key.trim())
453 .header("Content-Type", "application/json");
454
455 let serialized_request =
456 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
457 let request = request_builder
458 .body(AsyncBody::from(serialized_request))
459 .map_err(AnthropicError::BuildRequestBody)?;
460
461 let mut response = client
462 .send(request)
463 .await
464 .map_err(AnthropicError::HttpSend)?;
465 let status_code = response.status();
466 let mut body = String::new();
467 response
468 .body_mut()
469 .read_to_string(&mut body)
470 .await
471 .map_err(AnthropicError::ReadResponse)?;
472
473 if status_code.is_success() {
474 Ok(serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)?)
475 } else {
476 Err(AnthropicError::HttpResponseError {
477 status_code,
478 message: body,
479 })
480 }
481}
482
483pub async fn stream_completion(
484 client: &dyn HttpClient,
485 api_url: &str,
486 api_key: &str,
487 request: Request,
488 beta_headers: String,
489) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
490 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
491 .await
492 .map(|output| output.0)
493}
494
495/// An individual rate limit.
496#[derive(Debug)]
497pub struct RateLimit {
498 pub limit: usize,
499 pub remaining: usize,
500 pub reset: DateTime<Utc>,
501}
502
503impl RateLimit {
504 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
505 let limit =
506 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
507 let remaining = get_header(
508 &format!("anthropic-ratelimit-{resource}-remaining"),
509 headers,
510 )?
511 .parse()?;
512 let reset = DateTime::parse_from_rfc3339(get_header(
513 &format!("anthropic-ratelimit-{resource}-reset"),
514 headers,
515 )?)?
516 .to_utc();
517
518 Ok(Self {
519 limit,
520 remaining,
521 reset,
522 })
523 }
524}
525
526/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
527#[derive(Debug)]
528pub struct RateLimitInfo {
529 pub retry_after: Option<Duration>,
530 pub requests: Option<RateLimit>,
531 pub tokens: Option<RateLimit>,
532 pub input_tokens: Option<RateLimit>,
533 pub output_tokens: Option<RateLimit>,
534}
535
536impl RateLimitInfo {
537 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
538 // Check if any rate limit headers exist
539 let has_rate_limit_headers = headers
540 .keys()
541 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
542
543 if !has_rate_limit_headers {
544 return Self {
545 retry_after: None,
546 requests: None,
547 tokens: None,
548 input_tokens: None,
549 output_tokens: None,
550 };
551 }
552
553 Self {
554 retry_after: parse_retry_after(headers),
555 requests: RateLimit::from_headers("requests", headers).ok(),
556 tokens: RateLimit::from_headers("tokens", headers).ok(),
557 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
558 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
559 }
560 }
561}
562
563/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
564/// seconds). Note that other services might specify an HTTP date or some other format for this
565/// header. Returns `None` if the header is not present or cannot be parsed.
566pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
567 headers
568 .get("retry-after")
569 .and_then(|v| v.to_str().ok())
570 .and_then(|v| v.parse::<u64>().ok())
571 .map(Duration::from_secs)
572}
573
574fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
575 Ok(headers
576 .get(key)
577 .with_context(|| format!("missing header `{key}`"))?
578 .to_str()?)
579}
580
581pub async fn stream_completion_with_rate_limit_info(
582 client: &dyn HttpClient,
583 api_url: &str,
584 api_key: &str,
585 request: Request,
586 beta_headers: String,
587) -> Result<
588 (
589 BoxStream<'static, Result<Event, AnthropicError>>,
590 Option<RateLimitInfo>,
591 ),
592 AnthropicError,
593> {
594 let request = StreamingRequest {
595 base: request,
596 stream: true,
597 };
598 let uri = format!("{api_url}/v1/messages");
599
600 let request_builder = HttpRequest::builder()
601 .method(Method::POST)
602 .uri(uri)
603 .header("Anthropic-Version", "2023-06-01")
604 .header("Anthropic-Beta", beta_headers)
605 .header("X-Api-Key", api_key.trim())
606 .header("Content-Type", "application/json");
607 let serialized_request =
608 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
609 let request = request_builder
610 .body(AsyncBody::from(serialized_request))
611 .map_err(AnthropicError::BuildRequestBody)?;
612
613 let mut response = client
614 .send(request)
615 .await
616 .map_err(AnthropicError::HttpSend)?;
617 let rate_limits = RateLimitInfo::from_headers(response.headers());
618 if response.status().is_success() {
619 let reader = BufReader::new(response.into_body());
620 let stream = reader
621 .lines()
622 .filter_map(|line| async move {
623 match line {
624 Ok(line) => {
625 let line = line.strip_prefix("data: ")?;
626 match serde_json::from_str(line) {
627 Ok(response) => Some(Ok(response)),
628 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
629 }
630 }
631 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
632 }
633 })
634 .boxed();
635 Ok((stream, Some(rate_limits)))
636 } else if response.status().as_u16() == 529 {
637 Err(AnthropicError::ServerOverloaded {
638 retry_after: rate_limits.retry_after,
639 })
640 } else if let Some(retry_after) = rate_limits.retry_after {
641 Err(AnthropicError::RateLimit { retry_after })
642 } else {
643 let mut body = String::new();
644 response
645 .body_mut()
646 .read_to_string(&mut body)
647 .await
648 .map_err(AnthropicError::ReadResponse)?;
649
650 match serde_json::from_str::<Event>(&body) {
651 Ok(Event::Error { error }) => Err(AnthropicError::ApiError(error)),
652 Ok(_) | Err(_) => Err(AnthropicError::HttpResponseError {
653 status_code: response.status(),
654 message: body,
655 }),
656 }
657 }
658}
659
660#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
661#[serde(rename_all = "lowercase")]
662pub enum CacheControlType {
663 Ephemeral,
664}
665
666#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
667pub struct CacheControl {
668 #[serde(rename = "type")]
669 pub cache_type: CacheControlType,
670}
671
672#[derive(Debug, Serialize, Deserialize)]
673pub struct Message {
674 pub role: Role,
675 pub content: Vec<RequestContent>,
676}
677
678#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
679#[serde(rename_all = "lowercase")]
680pub enum Role {
681 User,
682 Assistant,
683}
684
685#[derive(Debug, Serialize, Deserialize)]
686#[serde(tag = "type")]
687pub enum RequestContent {
688 #[serde(rename = "text")]
689 Text {
690 text: String,
691 #[serde(skip_serializing_if = "Option::is_none")]
692 cache_control: Option<CacheControl>,
693 },
694 #[serde(rename = "thinking")]
695 Thinking {
696 thinking: String,
697 signature: String,
698 #[serde(skip_serializing_if = "Option::is_none")]
699 cache_control: Option<CacheControl>,
700 },
701 #[serde(rename = "redacted_thinking")]
702 RedactedThinking { data: String },
703 #[serde(rename = "image")]
704 Image {
705 source: ImageSource,
706 #[serde(skip_serializing_if = "Option::is_none")]
707 cache_control: Option<CacheControl>,
708 },
709 #[serde(rename = "tool_use")]
710 ToolUse {
711 id: String,
712 name: String,
713 input: serde_json::Value,
714 #[serde(skip_serializing_if = "Option::is_none")]
715 cache_control: Option<CacheControl>,
716 },
717 #[serde(rename = "tool_result")]
718 ToolResult {
719 tool_use_id: String,
720 is_error: bool,
721 content: ToolResultContent,
722 #[serde(skip_serializing_if = "Option::is_none")]
723 cache_control: Option<CacheControl>,
724 },
725}
726
727#[derive(Debug, Serialize, Deserialize)]
728#[serde(untagged)]
729pub enum ToolResultContent {
730 Plain(String),
731 Multipart(Vec<ToolResultPart>),
732}
733
734#[derive(Debug, Serialize, Deserialize)]
735#[serde(tag = "type", rename_all = "lowercase")]
736pub enum ToolResultPart {
737 Text { text: String },
738 Image { source: ImageSource },
739}
740
741#[derive(Debug, Serialize, Deserialize)]
742#[serde(tag = "type")]
743pub enum ResponseContent {
744 #[serde(rename = "text")]
745 Text { text: String },
746 #[serde(rename = "thinking")]
747 Thinking { thinking: String },
748 #[serde(rename = "redacted_thinking")]
749 RedactedThinking { data: String },
750 #[serde(rename = "tool_use")]
751 ToolUse {
752 id: String,
753 name: String,
754 input: serde_json::Value,
755 },
756}
757
758#[derive(Debug, Serialize, Deserialize)]
759pub struct ImageSource {
760 #[serde(rename = "type")]
761 pub source_type: String,
762 pub media_type: String,
763 pub data: String,
764}
765
766#[derive(Debug, Serialize, Deserialize)]
767pub struct Tool {
768 pub name: String,
769 pub description: String,
770 pub input_schema: serde_json::Value,
771}
772
773#[derive(Debug, Serialize, Deserialize)]
774#[serde(tag = "type", rename_all = "lowercase")]
775pub enum ToolChoice {
776 Auto,
777 Any,
778 Tool { name: String },
779 None,
780}
781
782#[derive(Debug, Serialize, Deserialize)]
783#[serde(tag = "type", rename_all = "lowercase")]
784pub enum Thinking {
785 Enabled { budget_tokens: Option<u32> },
786}
787
788#[derive(Debug, Serialize, Deserialize)]
789#[serde(untagged)]
790pub enum StringOrContents {
791 String(String),
792 Content(Vec<RequestContent>),
793}
794
795#[derive(Debug, Serialize, Deserialize)]
796pub struct Request {
797 pub model: String,
798 pub max_tokens: u64,
799 pub messages: Vec<Message>,
800 #[serde(default, skip_serializing_if = "Vec::is_empty")]
801 pub tools: Vec<Tool>,
802 #[serde(default, skip_serializing_if = "Option::is_none")]
803 pub thinking: Option<Thinking>,
804 #[serde(default, skip_serializing_if = "Option::is_none")]
805 pub tool_choice: Option<ToolChoice>,
806 #[serde(default, skip_serializing_if = "Option::is_none")]
807 pub system: Option<StringOrContents>,
808 #[serde(default, skip_serializing_if = "Option::is_none")]
809 pub metadata: Option<Metadata>,
810 #[serde(default, skip_serializing_if = "Vec::is_empty")]
811 pub stop_sequences: Vec<String>,
812 #[serde(default, skip_serializing_if = "Option::is_none")]
813 pub temperature: Option<f32>,
814 #[serde(default, skip_serializing_if = "Option::is_none")]
815 pub top_k: Option<u32>,
816 #[serde(default, skip_serializing_if = "Option::is_none")]
817 pub top_p: Option<f32>,
818}
819
820#[derive(Debug, Serialize, Deserialize)]
821struct StreamingRequest {
822 #[serde(flatten)]
823 pub base: Request,
824 pub stream: bool,
825}
826
827#[derive(Debug, Serialize, Deserialize)]
828pub struct Metadata {
829 pub user_id: Option<String>,
830}
831
832#[derive(Debug, Serialize, Deserialize, Default)]
833pub struct Usage {
834 #[serde(default, skip_serializing_if = "Option::is_none")]
835 pub input_tokens: Option<u64>,
836 #[serde(default, skip_serializing_if = "Option::is_none")]
837 pub output_tokens: Option<u64>,
838 #[serde(default, skip_serializing_if = "Option::is_none")]
839 pub cache_creation_input_tokens: Option<u64>,
840 #[serde(default, skip_serializing_if = "Option::is_none")]
841 pub cache_read_input_tokens: Option<u64>,
842}
843
844#[derive(Debug, Serialize, Deserialize)]
845pub struct Response {
846 pub id: String,
847 #[serde(rename = "type")]
848 pub response_type: String,
849 pub role: Role,
850 pub content: Vec<ResponseContent>,
851 pub model: String,
852 #[serde(default, skip_serializing_if = "Option::is_none")]
853 pub stop_reason: Option<String>,
854 #[serde(default, skip_serializing_if = "Option::is_none")]
855 pub stop_sequence: Option<String>,
856 pub usage: Usage,
857}
858
859#[derive(Debug, Serialize, Deserialize)]
860#[serde(tag = "type")]
861pub enum Event {
862 #[serde(rename = "message_start")]
863 MessageStart { message: Response },
864 #[serde(rename = "content_block_start")]
865 ContentBlockStart {
866 index: usize,
867 content_block: ResponseContent,
868 },
869 #[serde(rename = "content_block_delta")]
870 ContentBlockDelta { index: usize, delta: ContentDelta },
871 #[serde(rename = "content_block_stop")]
872 ContentBlockStop { index: usize },
873 #[serde(rename = "message_delta")]
874 MessageDelta { delta: MessageDelta, usage: Usage },
875 #[serde(rename = "message_stop")]
876 MessageStop,
877 #[serde(rename = "ping")]
878 Ping,
879 #[serde(rename = "error")]
880 Error { error: ApiError },
881}
882
883#[derive(Debug, Serialize, Deserialize)]
884#[serde(tag = "type")]
885pub enum ContentDelta {
886 #[serde(rename = "text_delta")]
887 TextDelta { text: String },
888 #[serde(rename = "thinking_delta")]
889 ThinkingDelta { thinking: String },
890 #[serde(rename = "signature_delta")]
891 SignatureDelta { signature: String },
892 #[serde(rename = "input_json_delta")]
893 InputJsonDelta { partial_json: String },
894}
895
896#[derive(Debug, Serialize, Deserialize)]
897pub struct MessageDelta {
898 pub stop_reason: Option<String>,
899 pub stop_sequence: Option<String>,
900}
901
902#[derive(Debug)]
903pub enum AnthropicError {
904 /// Failed to serialize the HTTP request body to JSON
905 SerializeRequest(serde_json::Error),
906
907 /// Failed to construct the HTTP request body
908 BuildRequestBody(http::Error),
909
910 /// Failed to send the HTTP request
911 HttpSend(anyhow::Error),
912
913 /// Failed to deserialize the response from JSON
914 DeserializeResponse(serde_json::Error),
915
916 /// Failed to read from response stream
917 ReadResponse(io::Error),
918
919 /// HTTP error response from the API
920 HttpResponseError {
921 status_code: StatusCode,
922 message: String,
923 },
924
925 /// Rate limit exceeded
926 RateLimit { retry_after: Duration },
927
928 /// Server overloaded
929 ServerOverloaded { retry_after: Option<Duration> },
930
931 /// API returned an error response
932 ApiError(ApiError),
933}
934
935#[derive(Debug, Serialize, Deserialize, Error)]
936#[error("Anthropic API Error: {error_type}: {message}")]
937pub struct ApiError {
938 #[serde(rename = "type")]
939 pub error_type: String,
940 pub message: String,
941}
942
943/// An Anthropic API error code.
944/// <https://docs.anthropic.com/en/api/errors#http-errors>
945#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
946#[strum(serialize_all = "snake_case")]
947pub enum ApiErrorCode {
948 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
949 InvalidRequestError,
950 /// 401 - `authentication_error`: There's an issue with your API key.
951 AuthenticationError,
952 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
953 PermissionError,
954 /// 404 - `not_found_error`: The requested resource was not found.
955 NotFoundError,
956 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
957 RequestTooLarge,
958 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
959 RateLimitError,
960 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
961 ApiError,
962 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
963 OverloadedError,
964}
965
966impl ApiError {
967 pub fn code(&self) -> Option<ApiErrorCode> {
968 ApiErrorCode::from_str(&self.error_type).ok()
969 }
970
971 pub fn is_rate_limit_error(&self) -> bool {
972 matches!(self.error_type.as_str(), "rate_limit_error")
973 }
974
975 pub fn match_window_exceeded(&self) -> Option<u64> {
976 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
977 return None;
978 };
979
980 parse_prompt_too_long(&self.message)
981 }
982}
983
984pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
985 message
986 .strip_prefix("prompt is too long: ")?
987 .split_once(" tokens")?
988 .0
989 .parse()
990 .ok()
991}
992
993#[test]
994fn test_match_window_exceeded() {
995 let error = ApiError {
996 error_type: "invalid_request_error".to_string(),
997 message: "prompt is too long: 220000 tokens > 200000".to_string(),
998 };
999 assert_eq!(error.match_window_exceeded(), Some(220_000));
1000
1001 let error = ApiError {
1002 error_type: "invalid_request_error".to_string(),
1003 message: "prompt is too long: 1234953 tokens".to_string(),
1004 };
1005 assert_eq!(error.match_window_exceeded(), Some(1234953));
1006
1007 let error = ApiError {
1008 error_type: "invalid_request_error".to_string(),
1009 message: "not a prompt length error".to_string(),
1010 };
1011 assert_eq!(error.match_window_exceeded(), None);
1012
1013 let error = ApiError {
1014 error_type: "rate_limit_error".to_string(),
1015 message: "prompt is too long: 12345 tokens".to_string(),
1016 };
1017 assert_eq!(error.match_window_exceeded(), None);
1018
1019 let error = ApiError {
1020 error_type: "invalid_request_error".to_string(),
1021 message: "prompt is too long: invalid tokens".to_string(),
1022 };
1023 assert_eq!(error.match_window_exceeded(), None);
1024}