1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
12use strum::{EnumIter, EnumString};
13use thiserror::Error;
14
15pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
16
17#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
18#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
19pub struct AnthropicModelCacheConfiguration {
20 pub min_total_token: u64,
21 pub should_speculate: bool,
22 pub max_cache_anchors: usize,
23}
24
25#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
26#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
27pub enum AnthropicModelMode {
28 #[default]
29 Default,
30 Thinking {
31 budget_tokens: Option<u32>,
32 },
33}
34
35impl From<ModelMode> for AnthropicModelMode {
36 fn from(value: ModelMode) -> Self {
37 match value {
38 ModelMode::Default => AnthropicModelMode::Default,
39 ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
40 }
41 }
42}
43
44impl From<AnthropicModelMode> for ModelMode {
45 fn from(value: AnthropicModelMode) -> Self {
46 match value {
47 AnthropicModelMode::Default => ModelMode::Default,
48 AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
49 }
50 }
51}
52
53#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
54#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
55pub enum Model {
56 #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
57 ClaudeOpus4,
58 #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
59 ClaudeOpus4_1,
60 #[serde(
61 rename = "claude-opus-4-thinking",
62 alias = "claude-opus-4-thinking-latest"
63 )]
64 ClaudeOpus4Thinking,
65 #[serde(
66 rename = "claude-opus-4-1-thinking",
67 alias = "claude-opus-4-1-thinking-latest"
68 )]
69 ClaudeOpus4_1Thinking,
70 #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
71 ClaudeOpus4_5,
72 #[serde(
73 rename = "claude-opus-4-5-thinking",
74 alias = "claude-opus-4-5-thinking-latest"
75 )]
76 ClaudeOpus4_5Thinking,
77 #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
78 ClaudeSonnet4,
79 #[serde(
80 rename = "claude-sonnet-4-thinking",
81 alias = "claude-sonnet-4-thinking-latest"
82 )]
83 ClaudeSonnet4Thinking,
84 #[default]
85 #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
86 ClaudeSonnet4_5,
87 #[serde(
88 rename = "claude-sonnet-4-5-thinking",
89 alias = "claude-sonnet-4-5-thinking-latest"
90 )]
91 ClaudeSonnet4_5Thinking,
92 #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
93 Claude3_7Sonnet,
94 #[serde(
95 rename = "claude-3-7-sonnet-thinking",
96 alias = "claude-3-7-sonnet-thinking-latest"
97 )]
98 Claude3_7SonnetThinking,
99 #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
100 Claude3_5Sonnet,
101 #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
102 ClaudeHaiku4_5,
103 #[serde(
104 rename = "claude-haiku-4-5-thinking",
105 alias = "claude-haiku-4-5-thinking-latest"
106 )]
107 ClaudeHaiku4_5Thinking,
108 #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
109 Claude3_5Haiku,
110 #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
111 Claude3Opus,
112 #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
113 Claude3Sonnet,
114 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
115 Claude3Haiku,
116 #[serde(rename = "custom")]
117 Custom {
118 name: String,
119 max_tokens: u64,
120 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
121 display_name: Option<String>,
122 /// Override this model with a different Anthropic model for tool calls.
123 tool_override: Option<String>,
124 /// Indicates whether this custom model supports caching.
125 cache_configuration: Option<AnthropicModelCacheConfiguration>,
126 max_output_tokens: Option<u64>,
127 default_temperature: Option<f32>,
128 #[serde(default)]
129 extra_beta_headers: Vec<String>,
130 #[serde(default)]
131 mode: AnthropicModelMode,
132 },
133}
134
135impl Model {
136 pub fn default_fast() -> Self {
137 Self::Claude3_5Haiku
138 }
139
140 pub fn from_id(id: &str) -> Result<Self> {
141 if id.starts_with("claude-opus-4-5-thinking") {
142 return Ok(Self::ClaudeOpus4_5Thinking);
143 }
144
145 if id.starts_with("claude-opus-4-5") {
146 return Ok(Self::ClaudeOpus4_5);
147 }
148
149 if id.starts_with("claude-opus-4-1-thinking") {
150 return Ok(Self::ClaudeOpus4_1Thinking);
151 }
152
153 if id.starts_with("claude-opus-4-thinking") {
154 return Ok(Self::ClaudeOpus4Thinking);
155 }
156
157 if id.starts_with("claude-opus-4-1") {
158 return Ok(Self::ClaudeOpus4_1);
159 }
160
161 if id.starts_with("claude-opus-4") {
162 return Ok(Self::ClaudeOpus4);
163 }
164
165 if id.starts_with("claude-sonnet-4-5-thinking") {
166 return Ok(Self::ClaudeSonnet4_5Thinking);
167 }
168
169 if id.starts_with("claude-sonnet-4-5") {
170 return Ok(Self::ClaudeSonnet4_5);
171 }
172
173 if id.starts_with("claude-sonnet-4-thinking") {
174 return Ok(Self::ClaudeSonnet4Thinking);
175 }
176
177 if id.starts_with("claude-sonnet-4") {
178 return Ok(Self::ClaudeSonnet4);
179 }
180
181 if id.starts_with("claude-3-7-sonnet-thinking") {
182 return Ok(Self::Claude3_7SonnetThinking);
183 }
184
185 if id.starts_with("claude-3-7-sonnet") {
186 return Ok(Self::Claude3_7Sonnet);
187 }
188
189 if id.starts_with("claude-3-5-sonnet") {
190 return Ok(Self::Claude3_5Sonnet);
191 }
192
193 if id.starts_with("claude-haiku-4-5-thinking") {
194 return Ok(Self::ClaudeHaiku4_5Thinking);
195 }
196
197 if id.starts_with("claude-haiku-4-5") {
198 return Ok(Self::ClaudeHaiku4_5);
199 }
200
201 if id.starts_with("claude-3-5-haiku") {
202 return Ok(Self::Claude3_5Haiku);
203 }
204
205 if id.starts_with("claude-3-opus") {
206 return Ok(Self::Claude3Opus);
207 }
208
209 if id.starts_with("claude-3-sonnet") {
210 return Ok(Self::Claude3Sonnet);
211 }
212
213 if id.starts_with("claude-3-haiku") {
214 return Ok(Self::Claude3Haiku);
215 }
216
217 Err(anyhow!("invalid model ID: {id}"))
218 }
219
220 pub fn id(&self) -> &str {
221 match self {
222 Self::ClaudeOpus4 => "claude-opus-4-latest",
223 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
224 Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
225 Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
226 Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
227 Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking-latest",
228 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
229 Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
230 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
231 Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
232 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
233 Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
234 Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
235 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
236 Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
237 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
238 Self::Claude3Opus => "claude-3-opus-latest",
239 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
240 Self::Claude3Haiku => "claude-3-haiku-20240307",
241 Self::Custom { name, .. } => name,
242 }
243 }
244
245 /// The id of the model that should be used for making API requests
246 pub fn request_id(&self) -> &str {
247 match self {
248 Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
249 Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
250 Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-20251101",
251 Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
252 Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-20250929",
253 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
254 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
255 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
256 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
257 Self::Claude3Opus => "claude-3-opus-latest",
258 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
259 Self::Claude3Haiku => "claude-3-haiku-20240307",
260 Self::Custom { name, .. } => name,
261 }
262 }
263
264 pub fn display_name(&self) -> &str {
265 match self {
266 Self::ClaudeOpus4 => "Claude Opus 4",
267 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
268 Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
269 Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
270 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
271 Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
272 Self::ClaudeSonnet4 => "Claude Sonnet 4",
273 Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
274 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
275 Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
276 Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
277 Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
278 Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
279 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
280 Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
281 Self::Claude3_5Haiku => "Claude 3.5 Haiku",
282 Self::Claude3Opus => "Claude 3 Opus",
283 Self::Claude3Sonnet => "Claude 3 Sonnet",
284 Self::Claude3Haiku => "Claude 3 Haiku",
285 Self::Custom {
286 name, display_name, ..
287 } => display_name.as_ref().unwrap_or(name),
288 }
289 }
290
291 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
292 match self {
293 Self::ClaudeOpus4
294 | Self::ClaudeOpus4_1
295 | Self::ClaudeOpus4Thinking
296 | Self::ClaudeOpus4_1Thinking
297 | Self::ClaudeOpus4_5
298 | Self::ClaudeOpus4_5Thinking
299 | Self::ClaudeSonnet4
300 | Self::ClaudeSonnet4Thinking
301 | Self::ClaudeSonnet4_5
302 | Self::ClaudeSonnet4_5Thinking
303 | Self::Claude3_5Sonnet
304 | Self::ClaudeHaiku4_5
305 | Self::ClaudeHaiku4_5Thinking
306 | Self::Claude3_5Haiku
307 | Self::Claude3_7Sonnet
308 | Self::Claude3_7SonnetThinking
309 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
310 min_total_token: 2_048,
311 should_speculate: true,
312 max_cache_anchors: 4,
313 }),
314 Self::Custom {
315 cache_configuration,
316 ..
317 } => cache_configuration.clone(),
318 _ => None,
319 }
320 }
321
322 pub fn max_token_count(&self) -> u64 {
323 match self {
324 Self::ClaudeOpus4
325 | Self::ClaudeOpus4_1
326 | Self::ClaudeOpus4Thinking
327 | Self::ClaudeOpus4_1Thinking
328 | Self::ClaudeOpus4_5
329 | Self::ClaudeOpus4_5Thinking
330 | Self::ClaudeSonnet4
331 | Self::ClaudeSonnet4Thinking
332 | Self::ClaudeSonnet4_5
333 | Self::ClaudeSonnet4_5Thinking
334 | Self::Claude3_5Sonnet
335 | Self::ClaudeHaiku4_5
336 | Self::ClaudeHaiku4_5Thinking
337 | Self::Claude3_5Haiku
338 | Self::Claude3_7Sonnet
339 | Self::Claude3_7SonnetThinking
340 | Self::Claude3Opus
341 | Self::Claude3Sonnet
342 | Self::Claude3Haiku => 200_000,
343 Self::Custom { max_tokens, .. } => *max_tokens,
344 }
345 }
346
347 pub fn max_output_tokens(&self) -> u64 {
348 match self {
349 Self::ClaudeOpus4
350 | Self::ClaudeOpus4_1
351 | Self::ClaudeOpus4Thinking
352 | Self::ClaudeOpus4_1Thinking
353 | Self::ClaudeOpus4_5
354 | Self::ClaudeOpus4_5Thinking
355 | Self::ClaudeSonnet4
356 | Self::ClaudeSonnet4Thinking
357 | Self::ClaudeSonnet4_5
358 | Self::ClaudeSonnet4_5Thinking
359 | Self::Claude3_5Sonnet
360 | Self::Claude3_7Sonnet
361 | Self::Claude3_7SonnetThinking
362 | Self::Claude3_5Haiku => 8_192,
363 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => 64_000,
364 Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
365 Self::Custom {
366 max_output_tokens, ..
367 } => max_output_tokens.unwrap_or(4_096),
368 }
369 }
370
371 pub fn default_temperature(&self) -> f32 {
372 match self {
373 Self::ClaudeOpus4
374 | Self::ClaudeOpus4_1
375 | Self::ClaudeOpus4Thinking
376 | Self::ClaudeOpus4_1Thinking
377 | Self::ClaudeOpus4_5
378 | Self::ClaudeOpus4_5Thinking
379 | Self::ClaudeSonnet4
380 | Self::ClaudeSonnet4Thinking
381 | Self::ClaudeSonnet4_5
382 | Self::ClaudeSonnet4_5Thinking
383 | Self::Claude3_5Sonnet
384 | Self::Claude3_7Sonnet
385 | Self::Claude3_7SonnetThinking
386 | Self::ClaudeHaiku4_5
387 | Self::ClaudeHaiku4_5Thinking
388 | Self::Claude3_5Haiku
389 | Self::Claude3Opus
390 | Self::Claude3Sonnet
391 | Self::Claude3Haiku => 1.0,
392 Self::Custom {
393 default_temperature,
394 ..
395 } => default_temperature.unwrap_or(1.0),
396 }
397 }
398
399 pub fn mode(&self) -> AnthropicModelMode {
400 match self {
401 Self::ClaudeOpus4
402 | Self::ClaudeOpus4_1
403 | Self::ClaudeOpus4_5
404 | Self::ClaudeSonnet4
405 | Self::ClaudeSonnet4_5
406 | Self::Claude3_5Sonnet
407 | Self::Claude3_7Sonnet
408 | Self::ClaudeHaiku4_5
409 | Self::Claude3_5Haiku
410 | Self::Claude3Opus
411 | Self::Claude3Sonnet
412 | Self::Claude3Haiku => AnthropicModelMode::Default,
413 Self::ClaudeOpus4Thinking
414 | Self::ClaudeOpus4_1Thinking
415 | Self::ClaudeOpus4_5Thinking
416 | Self::ClaudeSonnet4Thinking
417 | Self::ClaudeSonnet4_5Thinking
418 | Self::ClaudeHaiku4_5Thinking
419 | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
420 budget_tokens: Some(4_096),
421 },
422 Self::Custom { mode, .. } => mode.clone(),
423 }
424 }
425
426 pub const DEFAULT_BETA_HEADERS: &[&str] = &["prompt-caching-2024-07-31"];
427
428 pub fn beta_headers(&self) -> String {
429 let mut headers = Self::DEFAULT_BETA_HEADERS
430 .iter()
431 .map(|header| header.to_string())
432 .collect::<Vec<_>>();
433
434 match self {
435 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
436 // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
437 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
438 headers.push("token-efficient-tools-2025-02-19".to_string());
439 }
440 Self::Custom {
441 extra_beta_headers, ..
442 } => {
443 headers.extend(
444 extra_beta_headers
445 .iter()
446 .filter(|header| !header.trim().is_empty())
447 .cloned(),
448 );
449 }
450 _ => {}
451 }
452
453 headers.join(",")
454 }
455
456 pub fn tool_model_id(&self) -> &str {
457 if let Self::Custom {
458 tool_override: Some(tool_override),
459 ..
460 } = self
461 {
462 tool_override
463 } else {
464 self.request_id()
465 }
466 }
467}
468
469pub async fn complete(
470 client: &dyn HttpClient,
471 api_url: &str,
472 api_key: &str,
473 request: Request,
474 beta_headers: String,
475) -> Result<Response, AnthropicError> {
476 let uri = format!("{api_url}/v1/messages");
477 let request_builder = HttpRequest::builder()
478 .method(Method::POST)
479 .uri(uri)
480 .header("Anthropic-Version", "2023-06-01")
481 .header("Anthropic-Beta", beta_headers)
482 .header("X-Api-Key", api_key.trim())
483 .header("Content-Type", "application/json");
484
485 let serialized_request =
486 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
487 let request = request_builder
488 .body(AsyncBody::from(serialized_request))
489 .map_err(AnthropicError::BuildRequestBody)?;
490
491 let mut response = client
492 .send(request)
493 .await
494 .map_err(AnthropicError::HttpSend)?;
495 let status_code = response.status();
496 let mut body = String::new();
497 response
498 .body_mut()
499 .read_to_string(&mut body)
500 .await
501 .map_err(AnthropicError::ReadResponse)?;
502
503 if status_code.is_success() {
504 Ok(serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)?)
505 } else {
506 Err(AnthropicError::HttpResponseError {
507 status_code,
508 message: body,
509 })
510 }
511}
512
513pub async fn stream_completion(
514 client: &dyn HttpClient,
515 api_url: &str,
516 api_key: &str,
517 request: Request,
518 beta_headers: String,
519) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
520 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
521 .await
522 .map(|output| output.0)
523}
524
525/// An individual rate limit.
526#[derive(Debug)]
527pub struct RateLimit {
528 pub limit: usize,
529 pub remaining: usize,
530 pub reset: DateTime<Utc>,
531}
532
533impl RateLimit {
534 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
535 let limit =
536 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
537 let remaining = get_header(
538 &format!("anthropic-ratelimit-{resource}-remaining"),
539 headers,
540 )?
541 .parse()?;
542 let reset = DateTime::parse_from_rfc3339(get_header(
543 &format!("anthropic-ratelimit-{resource}-reset"),
544 headers,
545 )?)?
546 .to_utc();
547
548 Ok(Self {
549 limit,
550 remaining,
551 reset,
552 })
553 }
554}
555
556/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
557#[derive(Debug)]
558pub struct RateLimitInfo {
559 pub retry_after: Option<Duration>,
560 pub requests: Option<RateLimit>,
561 pub tokens: Option<RateLimit>,
562 pub input_tokens: Option<RateLimit>,
563 pub output_tokens: Option<RateLimit>,
564}
565
566impl RateLimitInfo {
567 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
568 // Check if any rate limit headers exist
569 let has_rate_limit_headers = headers
570 .keys()
571 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
572
573 if !has_rate_limit_headers {
574 return Self {
575 retry_after: None,
576 requests: None,
577 tokens: None,
578 input_tokens: None,
579 output_tokens: None,
580 };
581 }
582
583 Self {
584 retry_after: parse_retry_after(headers),
585 requests: RateLimit::from_headers("requests", headers).ok(),
586 tokens: RateLimit::from_headers("tokens", headers).ok(),
587 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
588 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
589 }
590 }
591}
592
593/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
594/// seconds). Note that other services might specify an HTTP date or some other format for this
595/// header. Returns `None` if the header is not present or cannot be parsed.
596pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
597 headers
598 .get("retry-after")
599 .and_then(|v| v.to_str().ok())
600 .and_then(|v| v.parse::<u64>().ok())
601 .map(Duration::from_secs)
602}
603
604fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
605 Ok(headers
606 .get(key)
607 .with_context(|| format!("missing header `{key}`"))?
608 .to_str()?)
609}
610
611pub async fn stream_completion_with_rate_limit_info(
612 client: &dyn HttpClient,
613 api_url: &str,
614 api_key: &str,
615 request: Request,
616 beta_headers: String,
617) -> Result<
618 (
619 BoxStream<'static, Result<Event, AnthropicError>>,
620 Option<RateLimitInfo>,
621 ),
622 AnthropicError,
623> {
624 let request = StreamingRequest {
625 base: request,
626 stream: true,
627 };
628 let uri = format!("{api_url}/v1/messages");
629
630 let request_builder = HttpRequest::builder()
631 .method(Method::POST)
632 .uri(uri)
633 .header("Anthropic-Version", "2023-06-01")
634 .header("Anthropic-Beta", beta_headers)
635 .header("X-Api-Key", api_key.trim())
636 .header("Content-Type", "application/json");
637 let serialized_request =
638 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
639 let request = request_builder
640 .body(AsyncBody::from(serialized_request))
641 .map_err(AnthropicError::BuildRequestBody)?;
642
643 let mut response = client
644 .send(request)
645 .await
646 .map_err(AnthropicError::HttpSend)?;
647 let rate_limits = RateLimitInfo::from_headers(response.headers());
648 if response.status().is_success() {
649 let reader = BufReader::new(response.into_body());
650 let stream = reader
651 .lines()
652 .filter_map(|line| async move {
653 match line {
654 Ok(line) => {
655 let line = line.strip_prefix("data: ")?;
656 match serde_json::from_str(line) {
657 Ok(response) => Some(Ok(response)),
658 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
659 }
660 }
661 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
662 }
663 })
664 .boxed();
665 Ok((stream, Some(rate_limits)))
666 } else if response.status().as_u16() == 529 {
667 Err(AnthropicError::ServerOverloaded {
668 retry_after: rate_limits.retry_after,
669 })
670 } else if let Some(retry_after) = rate_limits.retry_after {
671 Err(AnthropicError::RateLimit { retry_after })
672 } else {
673 let mut body = String::new();
674 response
675 .body_mut()
676 .read_to_string(&mut body)
677 .await
678 .map_err(AnthropicError::ReadResponse)?;
679
680 match serde_json::from_str::<Event>(&body) {
681 Ok(Event::Error { error }) => Err(AnthropicError::ApiError(error)),
682 Ok(_) | Err(_) => Err(AnthropicError::HttpResponseError {
683 status_code: response.status(),
684 message: body,
685 }),
686 }
687 }
688}
689
690#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
691#[serde(rename_all = "lowercase")]
692pub enum CacheControlType {
693 Ephemeral,
694}
695
696#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
697pub struct CacheControl {
698 #[serde(rename = "type")]
699 pub cache_type: CacheControlType,
700}
701
702#[derive(Debug, Serialize, Deserialize)]
703pub struct Message {
704 pub role: Role,
705 pub content: Vec<RequestContent>,
706}
707
708#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
709#[serde(rename_all = "lowercase")]
710pub enum Role {
711 User,
712 Assistant,
713}
714
715#[derive(Debug, Serialize, Deserialize)]
716#[serde(tag = "type")]
717pub enum RequestContent {
718 #[serde(rename = "text")]
719 Text {
720 text: String,
721 #[serde(skip_serializing_if = "Option::is_none")]
722 cache_control: Option<CacheControl>,
723 },
724 #[serde(rename = "thinking")]
725 Thinking {
726 thinking: String,
727 signature: String,
728 #[serde(skip_serializing_if = "Option::is_none")]
729 cache_control: Option<CacheControl>,
730 },
731 #[serde(rename = "redacted_thinking")]
732 RedactedThinking { data: String },
733 #[serde(rename = "image")]
734 Image {
735 source: ImageSource,
736 #[serde(skip_serializing_if = "Option::is_none")]
737 cache_control: Option<CacheControl>,
738 },
739 #[serde(rename = "tool_use")]
740 ToolUse {
741 id: String,
742 name: String,
743 input: serde_json::Value,
744 #[serde(skip_serializing_if = "Option::is_none")]
745 cache_control: Option<CacheControl>,
746 },
747 #[serde(rename = "tool_result")]
748 ToolResult {
749 tool_use_id: String,
750 is_error: bool,
751 content: ToolResultContent,
752 #[serde(skip_serializing_if = "Option::is_none")]
753 cache_control: Option<CacheControl>,
754 },
755}
756
757#[derive(Debug, Serialize, Deserialize)]
758#[serde(untagged)]
759pub enum ToolResultContent {
760 Plain(String),
761 Multipart(Vec<ToolResultPart>),
762}
763
764#[derive(Debug, Serialize, Deserialize)]
765#[serde(tag = "type", rename_all = "lowercase")]
766pub enum ToolResultPart {
767 Text { text: String },
768 Image { source: ImageSource },
769}
770
771#[derive(Debug, Serialize, Deserialize)]
772#[serde(tag = "type")]
773pub enum ResponseContent {
774 #[serde(rename = "text")]
775 Text { text: String },
776 #[serde(rename = "thinking")]
777 Thinking { thinking: String },
778 #[serde(rename = "redacted_thinking")]
779 RedactedThinking { data: String },
780 #[serde(rename = "tool_use")]
781 ToolUse {
782 id: String,
783 name: String,
784 input: serde_json::Value,
785 },
786}
787
788#[derive(Debug, Serialize, Deserialize)]
789pub struct ImageSource {
790 #[serde(rename = "type")]
791 pub source_type: String,
792 pub media_type: String,
793 pub data: String,
794}
795
796#[derive(Debug, Serialize, Deserialize)]
797pub struct Tool {
798 pub name: String,
799 pub description: String,
800 pub input_schema: serde_json::Value,
801}
802
803#[derive(Debug, Serialize, Deserialize)]
804#[serde(tag = "type", rename_all = "lowercase")]
805pub enum ToolChoice {
806 Auto,
807 Any,
808 Tool { name: String },
809 None,
810}
811
812#[derive(Debug, Serialize, Deserialize)]
813#[serde(tag = "type", rename_all = "lowercase")]
814pub enum Thinking {
815 Enabled { budget_tokens: Option<u32> },
816}
817
818#[derive(Debug, Serialize, Deserialize)]
819#[serde(untagged)]
820pub enum StringOrContents {
821 String(String),
822 Content(Vec<RequestContent>),
823}
824
825#[derive(Debug, Serialize, Deserialize)]
826pub struct Request {
827 pub model: String,
828 pub max_tokens: u64,
829 pub messages: Vec<Message>,
830 #[serde(default, skip_serializing_if = "Vec::is_empty")]
831 pub tools: Vec<Tool>,
832 #[serde(default, skip_serializing_if = "Option::is_none")]
833 pub thinking: Option<Thinking>,
834 #[serde(default, skip_serializing_if = "Option::is_none")]
835 pub tool_choice: Option<ToolChoice>,
836 #[serde(default, skip_serializing_if = "Option::is_none")]
837 pub system: Option<StringOrContents>,
838 #[serde(default, skip_serializing_if = "Option::is_none")]
839 pub metadata: Option<Metadata>,
840 #[serde(default, skip_serializing_if = "Vec::is_empty")]
841 pub stop_sequences: Vec<String>,
842 #[serde(default, skip_serializing_if = "Option::is_none")]
843 pub temperature: Option<f32>,
844 #[serde(default, skip_serializing_if = "Option::is_none")]
845 pub top_k: Option<u32>,
846 #[serde(default, skip_serializing_if = "Option::is_none")]
847 pub top_p: Option<f32>,
848}
849
850#[derive(Debug, Serialize, Deserialize)]
851struct StreamingRequest {
852 #[serde(flatten)]
853 pub base: Request,
854 pub stream: bool,
855}
856
857#[derive(Debug, Serialize, Deserialize)]
858pub struct Metadata {
859 pub user_id: Option<String>,
860}
861
862#[derive(Debug, Serialize, Deserialize, Default)]
863pub struct Usage {
864 #[serde(default, skip_serializing_if = "Option::is_none")]
865 pub input_tokens: Option<u64>,
866 #[serde(default, skip_serializing_if = "Option::is_none")]
867 pub output_tokens: Option<u64>,
868 #[serde(default, skip_serializing_if = "Option::is_none")]
869 pub cache_creation_input_tokens: Option<u64>,
870 #[serde(default, skip_serializing_if = "Option::is_none")]
871 pub cache_read_input_tokens: Option<u64>,
872}
873
874#[derive(Debug, Serialize, Deserialize)]
875pub struct Response {
876 pub id: String,
877 #[serde(rename = "type")]
878 pub response_type: String,
879 pub role: Role,
880 pub content: Vec<ResponseContent>,
881 pub model: String,
882 #[serde(default, skip_serializing_if = "Option::is_none")]
883 pub stop_reason: Option<String>,
884 #[serde(default, skip_serializing_if = "Option::is_none")]
885 pub stop_sequence: Option<String>,
886 pub usage: Usage,
887}
888
889#[derive(Debug, Serialize, Deserialize)]
890#[serde(tag = "type")]
891pub enum Event {
892 #[serde(rename = "message_start")]
893 MessageStart { message: Response },
894 #[serde(rename = "content_block_start")]
895 ContentBlockStart {
896 index: usize,
897 content_block: ResponseContent,
898 },
899 #[serde(rename = "content_block_delta")]
900 ContentBlockDelta { index: usize, delta: ContentDelta },
901 #[serde(rename = "content_block_stop")]
902 ContentBlockStop { index: usize },
903 #[serde(rename = "message_delta")]
904 MessageDelta { delta: MessageDelta, usage: Usage },
905 #[serde(rename = "message_stop")]
906 MessageStop,
907 #[serde(rename = "ping")]
908 Ping,
909 #[serde(rename = "error")]
910 Error { error: ApiError },
911}
912
913#[derive(Debug, Serialize, Deserialize)]
914#[serde(tag = "type")]
915pub enum ContentDelta {
916 #[serde(rename = "text_delta")]
917 TextDelta { text: String },
918 #[serde(rename = "thinking_delta")]
919 ThinkingDelta { thinking: String },
920 #[serde(rename = "signature_delta")]
921 SignatureDelta { signature: String },
922 #[serde(rename = "input_json_delta")]
923 InputJsonDelta { partial_json: String },
924}
925
926#[derive(Debug, Serialize, Deserialize)]
927pub struct MessageDelta {
928 pub stop_reason: Option<String>,
929 pub stop_sequence: Option<String>,
930}
931
932#[derive(Debug)]
933pub enum AnthropicError {
934 /// Failed to serialize the HTTP request body to JSON
935 SerializeRequest(serde_json::Error),
936
937 /// Failed to construct the HTTP request body
938 BuildRequestBody(http::Error),
939
940 /// Failed to send the HTTP request
941 HttpSend(anyhow::Error),
942
943 /// Failed to deserialize the response from JSON
944 DeserializeResponse(serde_json::Error),
945
946 /// Failed to read from response stream
947 ReadResponse(io::Error),
948
949 /// HTTP error response from the API
950 HttpResponseError {
951 status_code: StatusCode,
952 message: String,
953 },
954
955 /// Rate limit exceeded
956 RateLimit { retry_after: Duration },
957
958 /// Server overloaded
959 ServerOverloaded { retry_after: Option<Duration> },
960
961 /// API returned an error response
962 ApiError(ApiError),
963}
964
965#[derive(Debug, Serialize, Deserialize, Error)]
966#[error("Anthropic API Error: {error_type}: {message}")]
967pub struct ApiError {
968 #[serde(rename = "type")]
969 pub error_type: String,
970 pub message: String,
971}
972
973/// An Anthropic API error code.
974/// <https://docs.anthropic.com/en/api/errors#http-errors>
975#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
976#[strum(serialize_all = "snake_case")]
977pub enum ApiErrorCode {
978 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
979 InvalidRequestError,
980 /// 401 - `authentication_error`: There's an issue with your API key.
981 AuthenticationError,
982 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
983 PermissionError,
984 /// 404 - `not_found_error`: The requested resource was not found.
985 NotFoundError,
986 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
987 RequestTooLarge,
988 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
989 RateLimitError,
990 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
991 ApiError,
992 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
993 OverloadedError,
994}
995
996impl ApiError {
997 pub fn code(&self) -> Option<ApiErrorCode> {
998 ApiErrorCode::from_str(&self.error_type).ok()
999 }
1000
1001 pub fn is_rate_limit_error(&self) -> bool {
1002 matches!(self.error_type.as_str(), "rate_limit_error")
1003 }
1004
1005 pub fn match_window_exceeded(&self) -> Option<u64> {
1006 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
1007 return None;
1008 };
1009
1010 parse_prompt_too_long(&self.message)
1011 }
1012}
1013
1014pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1015 message
1016 .strip_prefix("prompt is too long: ")?
1017 .split_once(" tokens")?
1018 .0
1019 .parse()
1020 .ok()
1021}
1022
1023#[test]
1024fn test_match_window_exceeded() {
1025 let error = ApiError {
1026 error_type: "invalid_request_error".to_string(),
1027 message: "prompt is too long: 220000 tokens > 200000".to_string(),
1028 };
1029 assert_eq!(error.match_window_exceeded(), Some(220_000));
1030
1031 let error = ApiError {
1032 error_type: "invalid_request_error".to_string(),
1033 message: "prompt is too long: 1234953 tokens".to_string(),
1034 };
1035 assert_eq!(error.match_window_exceeded(), Some(1234953));
1036
1037 let error = ApiError {
1038 error_type: "invalid_request_error".to_string(),
1039 message: "not a prompt length error".to_string(),
1040 };
1041 assert_eq!(error.match_window_exceeded(), None);
1042
1043 let error = ApiError {
1044 error_type: "rate_limit_error".to_string(),
1045 message: "prompt is too long: 12345 tokens".to_string(),
1046 };
1047 assert_eq!(error.match_window_exceeded(), None);
1048
1049 let error = ApiError {
1050 error_type: "invalid_request_error".to_string(),
1051 message: "prompt is too long: invalid tokens".to_string(),
1052 };
1053 assert_eq!(error.match_window_exceeded(), None);
1054}