1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
12use strum::{EnumIter, EnumString};
13use thiserror::Error;
14
15pub mod batches;
16
17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
18
19pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07";
20
21#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
22#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
23pub struct AnthropicModelCacheConfiguration {
24 pub min_total_token: u64,
25 pub should_speculate: bool,
26 pub max_cache_anchors: usize,
27}
28
29#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
30#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
31pub enum AnthropicModelMode {
32 #[default]
33 Default,
34 Thinking {
35 budget_tokens: Option<u32>,
36 },
37}
38
39impl From<ModelMode> for AnthropicModelMode {
40 fn from(value: ModelMode) -> Self {
41 match value {
42 ModelMode::Default => AnthropicModelMode::Default,
43 ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
44 }
45 }
46}
47
48impl From<AnthropicModelMode> for ModelMode {
49 fn from(value: AnthropicModelMode) -> Self {
50 match value {
51 AnthropicModelMode::Default => ModelMode::Default,
52 AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
53 }
54 }
55}
56
57#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
58#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
59pub enum Model {
60 #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
61 ClaudeOpus4,
62 #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
63 ClaudeOpus4_1,
64 #[serde(
65 rename = "claude-opus-4-thinking",
66 alias = "claude-opus-4-thinking-latest"
67 )]
68 ClaudeOpus4Thinking,
69 #[serde(
70 rename = "claude-opus-4-1-thinking",
71 alias = "claude-opus-4-1-thinking-latest"
72 )]
73 ClaudeOpus4_1Thinking,
74 #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
75 ClaudeOpus4_5,
76 #[serde(
77 rename = "claude-opus-4-5-thinking",
78 alias = "claude-opus-4-5-thinking-latest"
79 )]
80 ClaudeOpus4_5Thinking,
81 #[serde(
82 rename = "claude-opus-4-6",
83 alias = "claude-opus-4-6-latest",
84 alias = "claude-opus-4-6-1m-context",
85 alias = "claude-opus-4-6-1m-context-latest"
86 )]
87 ClaudeOpus4_6,
88 #[serde(
89 rename = "claude-opus-4-6-thinking",
90 alias = "claude-opus-4-6-thinking-latest",
91 alias = "claude-opus-4-6-1m-context-thinking",
92 alias = "claude-opus-4-6-1m-context-thinking-latest"
93 )]
94 ClaudeOpus4_6Thinking,
95 #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
96 ClaudeSonnet4,
97 #[serde(
98 rename = "claude-sonnet-4-thinking",
99 alias = "claude-sonnet-4-thinking-latest"
100 )]
101 ClaudeSonnet4Thinking,
102 #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
103 ClaudeSonnet4_5,
104 #[serde(
105 rename = "claude-sonnet-4-5-thinking",
106 alias = "claude-sonnet-4-5-thinking-latest"
107 )]
108 ClaudeSonnet4_5Thinking,
109 #[serde(
110 rename = "claude-sonnet-4-5-1m-context",
111 alias = "claude-sonnet-4-5-1m-context-latest"
112 )]
113 ClaudeSonnet4_5_1mContext,
114 #[serde(
115 rename = "claude-sonnet-4-5-1m-context-thinking",
116 alias = "claude-sonnet-4-5-1m-context-thinking-latest"
117 )]
118 ClaudeSonnet4_5_1mContextThinking,
119 #[default]
120 #[serde(
121 rename = "claude-sonnet-4-6",
122 alias = "claude-sonnet-4-6-latest",
123 alias = "claude-sonnet-4-6-1m-context",
124 alias = "claude-sonnet-4-6-1m-context-latest"
125 )]
126 ClaudeSonnet4_6,
127 #[serde(
128 rename = "claude-sonnet-4-6-thinking",
129 alias = "claude-sonnet-4-6-thinking-latest",
130 alias = "claude-sonnet-4-6-1m-context-thinking",
131 alias = "claude-sonnet-4-6-1m-context-thinking-latest"
132 )]
133 ClaudeSonnet4_6Thinking,
134 #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
135 ClaudeHaiku4_5,
136 #[serde(
137 rename = "claude-haiku-4-5-thinking",
138 alias = "claude-haiku-4-5-thinking-latest"
139 )]
140 ClaudeHaiku4_5Thinking,
141 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
142 Claude3Haiku,
143 #[serde(rename = "custom")]
144 Custom {
145 name: String,
146 max_tokens: u64,
147 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
148 display_name: Option<String>,
149 /// Override this model with a different Anthropic model for tool calls.
150 tool_override: Option<String>,
151 /// Indicates whether this custom model supports caching.
152 cache_configuration: Option<AnthropicModelCacheConfiguration>,
153 max_output_tokens: Option<u64>,
154 default_temperature: Option<f32>,
155 #[serde(default)]
156 extra_beta_headers: Vec<String>,
157 #[serde(default)]
158 mode: AnthropicModelMode,
159 },
160}
161
162impl Model {
163 pub fn default_fast() -> Self {
164 Self::ClaudeHaiku4_5
165 }
166
167 pub fn from_id(id: &str) -> Result<Self> {
168 if id.starts_with("claude-opus-4-6-1m-context-thinking") {
169 return Ok(Self::ClaudeOpus4_6Thinking);
170 }
171
172 if id.starts_with("claude-opus-4-6-1m-context") {
173 return Ok(Self::ClaudeOpus4_6);
174 }
175
176 if id.starts_with("claude-opus-4-6-thinking") {
177 return Ok(Self::ClaudeOpus4_6Thinking);
178 }
179
180 if id.starts_with("claude-opus-4-6") {
181 return Ok(Self::ClaudeOpus4_6);
182 }
183
184 if id.starts_with("claude-opus-4-5-thinking") {
185 return Ok(Self::ClaudeOpus4_5Thinking);
186 }
187
188 if id.starts_with("claude-opus-4-5") {
189 return Ok(Self::ClaudeOpus4_5);
190 }
191
192 if id.starts_with("claude-opus-4-1-thinking") {
193 return Ok(Self::ClaudeOpus4_1Thinking);
194 }
195
196 if id.starts_with("claude-opus-4-thinking") {
197 return Ok(Self::ClaudeOpus4Thinking);
198 }
199
200 if id.starts_with("claude-opus-4-1") {
201 return Ok(Self::ClaudeOpus4_1);
202 }
203
204 if id.starts_with("claude-opus-4") {
205 return Ok(Self::ClaudeOpus4);
206 }
207
208 if id.starts_with("claude-sonnet-4-6-1m-context-thinking") {
209 return Ok(Self::ClaudeSonnet4_6Thinking);
210 }
211
212 if id.starts_with("claude-sonnet-4-6-1m-context") {
213 return Ok(Self::ClaudeSonnet4_6);
214 }
215
216 if id.starts_with("claude-sonnet-4-6-thinking") {
217 return Ok(Self::ClaudeSonnet4_6Thinking);
218 }
219
220 if id.starts_with("claude-sonnet-4-6") {
221 return Ok(Self::ClaudeSonnet4_6);
222 }
223
224 if id.starts_with("claude-sonnet-4-5-1m-context-thinking") {
225 return Ok(Self::ClaudeSonnet4_5_1mContextThinking);
226 }
227
228 if id.starts_with("claude-sonnet-4-5-1m-context") {
229 return Ok(Self::ClaudeSonnet4_5_1mContext);
230 }
231
232 if id.starts_with("claude-sonnet-4-5-thinking") {
233 return Ok(Self::ClaudeSonnet4_5Thinking);
234 }
235
236 if id.starts_with("claude-sonnet-4-5") {
237 return Ok(Self::ClaudeSonnet4_5);
238 }
239
240 if id.starts_with("claude-sonnet-4-thinking") {
241 return Ok(Self::ClaudeSonnet4Thinking);
242 }
243
244 if id.starts_with("claude-sonnet-4") {
245 return Ok(Self::ClaudeSonnet4);
246 }
247
248 if id.starts_with("claude-haiku-4-5-thinking") {
249 return Ok(Self::ClaudeHaiku4_5Thinking);
250 }
251
252 if id.starts_with("claude-haiku-4-5") {
253 return Ok(Self::ClaudeHaiku4_5);
254 }
255
256 if id.starts_with("claude-3-haiku") {
257 return Ok(Self::Claude3Haiku);
258 }
259
260 Err(anyhow!("invalid model ID: {id}"))
261 }
262
263 pub fn id(&self) -> &str {
264 match self {
265 Self::ClaudeOpus4 => "claude-opus-4-latest",
266 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
267 Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
268 Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
269 Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
270 Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking-latest",
271 Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
272 Self::ClaudeOpus4_6Thinking => "claude-opus-4-6-thinking-latest",
273 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
274 Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
275 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
276 Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
277 Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-1m-context-latest",
278 Self::ClaudeSonnet4_5_1mContextThinking => {
279 "claude-sonnet-4-5-1m-context-thinking-latest"
280 }
281 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
282 Self::ClaudeSonnet4_6Thinking => "claude-sonnet-4-6-thinking-latest",
283 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
284 Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
285 Self::Claude3Haiku => "claude-3-haiku-20240307",
286 Self::Custom { name, .. } => name,
287 }
288 }
289
290 /// The id of the model that should be used for making API requests
291 pub fn request_id(&self) -> &str {
292 match self {
293 Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
294 Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
295 Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-20251101",
296 Self::ClaudeOpus4_6 | Self::ClaudeOpus4_6Thinking => "claude-opus-4-6",
297 Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
298 Self::ClaudeSonnet4_5
299 | Self::ClaudeSonnet4_5Thinking
300 | Self::ClaudeSonnet4_5_1mContext
301 | Self::ClaudeSonnet4_5_1mContextThinking => "claude-sonnet-4-5-20250929",
302 Self::ClaudeSonnet4_6 | Self::ClaudeSonnet4_6Thinking => "claude-sonnet-4-6",
303 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
304 Self::Claude3Haiku => "claude-3-haiku-20240307",
305 Self::Custom { name, .. } => name,
306 }
307 }
308
309 pub fn display_name(&self) -> &str {
310 match self {
311 Self::ClaudeOpus4 => "Claude Opus 4",
312 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
313 Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
314 Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
315 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
316 Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
317 Self::ClaudeOpus4_6 => "Claude Opus 4.6",
318 Self::ClaudeOpus4_6Thinking => "Claude Opus 4.6 Thinking",
319 Self::ClaudeSonnet4 => "Claude Sonnet 4",
320 Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
321 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
322 Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
323 Self::ClaudeSonnet4_5_1mContext => "Claude Sonnet 4.5 (1M context)",
324 Self::ClaudeSonnet4_5_1mContextThinking => "Claude Sonnet 4.5 Thinking (1M context)",
325 Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
326 Self::ClaudeSonnet4_6Thinking => "Claude Sonnet 4.6 Thinking",
327 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
328 Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
329 Self::Claude3Haiku => "Claude 3 Haiku",
330 Self::Custom {
331 name, display_name, ..
332 } => display_name.as_ref().unwrap_or(name),
333 }
334 }
335
336 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
337 match self {
338 Self::ClaudeOpus4
339 | Self::ClaudeOpus4_1
340 | Self::ClaudeOpus4Thinking
341 | Self::ClaudeOpus4_1Thinking
342 | Self::ClaudeOpus4_5
343 | Self::ClaudeOpus4_5Thinking
344 | Self::ClaudeOpus4_6
345 | Self::ClaudeOpus4_6Thinking
346 | Self::ClaudeSonnet4
347 | Self::ClaudeSonnet4Thinking
348 | Self::ClaudeSonnet4_5
349 | Self::ClaudeSonnet4_5Thinking
350 | Self::ClaudeSonnet4_5_1mContext
351 | Self::ClaudeSonnet4_5_1mContextThinking
352 | Self::ClaudeSonnet4_6
353 | Self::ClaudeSonnet4_6Thinking
354 | Self::ClaudeHaiku4_5
355 | Self::ClaudeHaiku4_5Thinking
356 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
357 min_total_token: 2_048,
358 should_speculate: true,
359 max_cache_anchors: 4,
360 }),
361 Self::Custom {
362 cache_configuration,
363 ..
364 } => cache_configuration.clone(),
365 }
366 }
367
368 pub fn max_token_count(&self) -> u64 {
369 match self {
370 Self::ClaudeOpus4
371 | Self::ClaudeOpus4_1
372 | Self::ClaudeOpus4Thinking
373 | Self::ClaudeOpus4_1Thinking
374 | Self::ClaudeOpus4_5
375 | Self::ClaudeOpus4_5Thinking
376 | Self::ClaudeSonnet4
377 | Self::ClaudeSonnet4Thinking
378 | Self::ClaudeSonnet4_5
379 | Self::ClaudeSonnet4_5Thinking
380 | Self::ClaudeHaiku4_5
381 | Self::ClaudeHaiku4_5Thinking
382 | Self::Claude3Haiku => 200_000,
383 Self::ClaudeOpus4_6
384 | Self::ClaudeOpus4_6Thinking
385 | Self::ClaudeSonnet4_5_1mContext
386 | Self::ClaudeSonnet4_5_1mContextThinking
387 | Self::ClaudeSonnet4_6
388 | Self::ClaudeSonnet4_6Thinking => 1_000_000,
389 Self::Custom { max_tokens, .. } => *max_tokens,
390 }
391 }
392
393 pub fn max_output_tokens(&self) -> u64 {
394 match self {
395 Self::ClaudeOpus4
396 | Self::ClaudeOpus4Thinking
397 | Self::ClaudeOpus4_1
398 | Self::ClaudeOpus4_1Thinking => 32_000,
399 Self::ClaudeOpus4_5
400 | Self::ClaudeOpus4_5Thinking
401 | Self::ClaudeSonnet4
402 | Self::ClaudeSonnet4Thinking
403 | Self::ClaudeSonnet4_5
404 | Self::ClaudeSonnet4_5Thinking
405 | Self::ClaudeSonnet4_5_1mContext
406 | Self::ClaudeSonnet4_5_1mContextThinking
407 | Self::ClaudeSonnet4_6
408 | Self::ClaudeSonnet4_6Thinking
409 | Self::ClaudeHaiku4_5
410 | Self::ClaudeHaiku4_5Thinking => 64_000,
411 Self::ClaudeOpus4_6 | Self::ClaudeOpus4_6Thinking => 128_000,
412 Self::Claude3Haiku => 4_096,
413 Self::Custom {
414 max_output_tokens, ..
415 } => max_output_tokens.unwrap_or(4_096),
416 }
417 }
418
419 pub fn default_temperature(&self) -> f32 {
420 match self {
421 Self::ClaudeOpus4
422 | Self::ClaudeOpus4_1
423 | Self::ClaudeOpus4Thinking
424 | Self::ClaudeOpus4_1Thinking
425 | Self::ClaudeOpus4_5
426 | Self::ClaudeOpus4_5Thinking
427 | Self::ClaudeOpus4_6
428 | Self::ClaudeOpus4_6Thinking
429 | Self::ClaudeSonnet4
430 | Self::ClaudeSonnet4Thinking
431 | Self::ClaudeSonnet4_5
432 | Self::ClaudeSonnet4_5Thinking
433 | Self::ClaudeSonnet4_5_1mContext
434 | Self::ClaudeSonnet4_5_1mContextThinking
435 | Self::ClaudeSonnet4_6
436 | Self::ClaudeSonnet4_6Thinking
437 | Self::ClaudeHaiku4_5
438 | Self::ClaudeHaiku4_5Thinking
439 | Self::Claude3Haiku => 1.0,
440 Self::Custom {
441 default_temperature,
442 ..
443 } => default_temperature.unwrap_or(1.0),
444 }
445 }
446
447 pub fn mode(&self) -> AnthropicModelMode {
448 match self {
449 Self::ClaudeOpus4
450 | Self::ClaudeOpus4_1
451 | Self::ClaudeOpus4_5
452 | Self::ClaudeOpus4_6
453 | Self::ClaudeSonnet4
454 | Self::ClaudeSonnet4_5
455 | Self::ClaudeSonnet4_5_1mContext
456 | Self::ClaudeSonnet4_6
457 | Self::ClaudeHaiku4_5
458 | Self::Claude3Haiku => AnthropicModelMode::Default,
459 Self::ClaudeOpus4Thinking
460 | Self::ClaudeOpus4_1Thinking
461 | Self::ClaudeOpus4_5Thinking
462 | Self::ClaudeOpus4_6Thinking
463 | Self::ClaudeSonnet4Thinking
464 | Self::ClaudeSonnet4_5Thinking
465 | Self::ClaudeSonnet4_5_1mContextThinking
466 | Self::ClaudeSonnet4_6Thinking
467 | Self::ClaudeHaiku4_5Thinking => AnthropicModelMode::Thinking {
468 budget_tokens: Some(4_096),
469 },
470 Self::Custom { mode, .. } => mode.clone(),
471 }
472 }
473
474 pub fn beta_headers(&self) -> Option<String> {
475 let mut headers = vec![];
476
477 match self {
478 Self::ClaudeSonnet4_5_1mContext | Self::ClaudeSonnet4_5_1mContextThinking => {
479 headers.push(CONTEXT_1M_BETA_HEADER.to_string());
480 }
481 Self::Custom {
482 extra_beta_headers, ..
483 } => {
484 headers.extend(
485 extra_beta_headers
486 .iter()
487 .filter(|header| !header.trim().is_empty())
488 .cloned(),
489 );
490 }
491 _ => {}
492 }
493
494 if headers.is_empty() {
495 None
496 } else {
497 Some(headers.join(","))
498 }
499 }
500
501 pub fn tool_model_id(&self) -> &str {
502 if let Self::Custom {
503 tool_override: Some(tool_override),
504 ..
505 } = self
506 {
507 tool_override
508 } else {
509 self.request_id()
510 }
511 }
512}
513
514/// Generate completion with streaming.
515pub async fn stream_completion(
516 client: &dyn HttpClient,
517 api_url: &str,
518 api_key: &str,
519 request: Request,
520 beta_headers: Option<String>,
521) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
522 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
523 .await
524 .map(|output| output.0)
525}
526
527/// Generate completion without streaming.
528pub async fn non_streaming_completion(
529 client: &dyn HttpClient,
530 api_url: &str,
531 api_key: &str,
532 request: Request,
533 beta_headers: Option<String>,
534) -> Result<Response, AnthropicError> {
535 let (mut response, rate_limits) =
536 send_request(client, api_url, api_key, &request, beta_headers).await?;
537
538 if response.status().is_success() {
539 let mut body = String::new();
540 response
541 .body_mut()
542 .read_to_string(&mut body)
543 .await
544 .map_err(AnthropicError::ReadResponse)?;
545
546 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
547 } else {
548 Err(handle_error_response(response, rate_limits).await)
549 }
550}
551
552async fn send_request(
553 client: &dyn HttpClient,
554 api_url: &str,
555 api_key: &str,
556 request: impl Serialize,
557 beta_headers: Option<String>,
558) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
559 let uri = format!("{api_url}/v1/messages");
560
561 let mut request_builder = HttpRequest::builder()
562 .method(Method::POST)
563 .uri(uri)
564 .header("Anthropic-Version", "2023-06-01")
565 .header("X-Api-Key", api_key.trim())
566 .header("Content-Type", "application/json");
567
568 if let Some(beta_headers) = beta_headers {
569 request_builder = request_builder.header("Anthropic-Beta", beta_headers);
570 }
571
572 let serialized_request =
573 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
574 let request = request_builder
575 .body(AsyncBody::from(serialized_request))
576 .map_err(AnthropicError::BuildRequestBody)?;
577
578 let response = client
579 .send(request)
580 .await
581 .map_err(AnthropicError::HttpSend)?;
582
583 let rate_limits = RateLimitInfo::from_headers(response.headers());
584
585 Ok((response, rate_limits))
586}
587
588async fn handle_error_response(
589 mut response: http::Response<AsyncBody>,
590 rate_limits: RateLimitInfo,
591) -> AnthropicError {
592 if response.status().as_u16() == 529 {
593 return AnthropicError::ServerOverloaded {
594 retry_after: rate_limits.retry_after,
595 };
596 }
597
598 if let Some(retry_after) = rate_limits.retry_after {
599 return AnthropicError::RateLimit { retry_after };
600 }
601
602 let mut body = String::new();
603 let read_result = response
604 .body_mut()
605 .read_to_string(&mut body)
606 .await
607 .map_err(AnthropicError::ReadResponse);
608
609 if let Err(err) = read_result {
610 return err;
611 }
612
613 match serde_json::from_str::<Event>(&body) {
614 Ok(Event::Error { error }) => AnthropicError::ApiError(error),
615 Ok(_) | Err(_) => AnthropicError::HttpResponseError {
616 status_code: response.status(),
617 message: body,
618 },
619 }
620}
621
622/// An individual rate limit.
623#[derive(Debug)]
624pub struct RateLimit {
625 pub limit: usize,
626 pub remaining: usize,
627 pub reset: DateTime<Utc>,
628}
629
630impl RateLimit {
631 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
632 let limit =
633 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
634 let remaining = get_header(
635 &format!("anthropic-ratelimit-{resource}-remaining"),
636 headers,
637 )?
638 .parse()?;
639 let reset = DateTime::parse_from_rfc3339(get_header(
640 &format!("anthropic-ratelimit-{resource}-reset"),
641 headers,
642 )?)?
643 .to_utc();
644
645 Ok(Self {
646 limit,
647 remaining,
648 reset,
649 })
650 }
651}
652
653/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
654#[derive(Debug)]
655pub struct RateLimitInfo {
656 pub retry_after: Option<Duration>,
657 pub requests: Option<RateLimit>,
658 pub tokens: Option<RateLimit>,
659 pub input_tokens: Option<RateLimit>,
660 pub output_tokens: Option<RateLimit>,
661}
662
663impl RateLimitInfo {
664 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
665 // Check if any rate limit headers exist
666 let has_rate_limit_headers = headers
667 .keys()
668 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
669
670 if !has_rate_limit_headers {
671 return Self {
672 retry_after: None,
673 requests: None,
674 tokens: None,
675 input_tokens: None,
676 output_tokens: None,
677 };
678 }
679
680 Self {
681 retry_after: parse_retry_after(headers),
682 requests: RateLimit::from_headers("requests", headers).ok(),
683 tokens: RateLimit::from_headers("tokens", headers).ok(),
684 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
685 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
686 }
687 }
688}
689
690/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
691/// seconds). Note that other services might specify an HTTP date or some other format for this
692/// header. Returns `None` if the header is not present or cannot be parsed.
693pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
694 headers
695 .get("retry-after")
696 .and_then(|v| v.to_str().ok())
697 .and_then(|v| v.parse::<u64>().ok())
698 .map(Duration::from_secs)
699}
700
701fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
702 Ok(headers
703 .get(key)
704 .with_context(|| format!("missing header `{key}`"))?
705 .to_str()?)
706}
707
708pub async fn stream_completion_with_rate_limit_info(
709 client: &dyn HttpClient,
710 api_url: &str,
711 api_key: &str,
712 request: Request,
713 beta_headers: Option<String>,
714) -> Result<
715 (
716 BoxStream<'static, Result<Event, AnthropicError>>,
717 Option<RateLimitInfo>,
718 ),
719 AnthropicError,
720> {
721 let request = StreamingRequest {
722 base: request,
723 stream: true,
724 };
725
726 let (response, rate_limits) =
727 send_request(client, api_url, api_key, &request, beta_headers).await?;
728
729 if response.status().is_success() {
730 let reader = BufReader::new(response.into_body());
731 let stream = reader
732 .lines()
733 .filter_map(|line| async move {
734 match line {
735 Ok(line) => {
736 let line = line
737 .strip_prefix("data: ")
738 .or_else(|| line.strip_prefix("data:"))?;
739
740 match serde_json::from_str(line) {
741 Ok(response) => Some(Ok(response)),
742 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
743 }
744 }
745 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
746 }
747 })
748 .boxed();
749 Ok((stream, Some(rate_limits)))
750 } else {
751 Err(handle_error_response(response, rate_limits).await)
752 }
753}
754
755#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
756#[serde(rename_all = "lowercase")]
757pub enum CacheControlType {
758 Ephemeral,
759}
760
761#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
762pub struct CacheControl {
763 #[serde(rename = "type")]
764 pub cache_type: CacheControlType,
765}
766
767#[derive(Debug, Serialize, Deserialize)]
768pub struct Message {
769 pub role: Role,
770 pub content: Vec<RequestContent>,
771}
772
773#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
774#[serde(rename_all = "lowercase")]
775pub enum Role {
776 User,
777 Assistant,
778}
779
780#[derive(Debug, Serialize, Deserialize)]
781#[serde(tag = "type")]
782pub enum RequestContent {
783 #[serde(rename = "text")]
784 Text {
785 text: String,
786 #[serde(skip_serializing_if = "Option::is_none")]
787 cache_control: Option<CacheControl>,
788 },
789 #[serde(rename = "thinking")]
790 Thinking {
791 thinking: String,
792 signature: String,
793 #[serde(skip_serializing_if = "Option::is_none")]
794 cache_control: Option<CacheControl>,
795 },
796 #[serde(rename = "redacted_thinking")]
797 RedactedThinking { data: String },
798 #[serde(rename = "image")]
799 Image {
800 source: ImageSource,
801 #[serde(skip_serializing_if = "Option::is_none")]
802 cache_control: Option<CacheControl>,
803 },
804 #[serde(rename = "tool_use")]
805 ToolUse {
806 id: String,
807 name: String,
808 input: serde_json::Value,
809 #[serde(skip_serializing_if = "Option::is_none")]
810 cache_control: Option<CacheControl>,
811 },
812 #[serde(rename = "tool_result")]
813 ToolResult {
814 tool_use_id: String,
815 is_error: bool,
816 content: ToolResultContent,
817 #[serde(skip_serializing_if = "Option::is_none")]
818 cache_control: Option<CacheControl>,
819 },
820}
821
822#[derive(Debug, Serialize, Deserialize)]
823#[serde(untagged)]
824pub enum ToolResultContent {
825 Plain(String),
826 Multipart(Vec<ToolResultPart>),
827}
828
829#[derive(Debug, Serialize, Deserialize)]
830#[serde(tag = "type", rename_all = "lowercase")]
831pub enum ToolResultPart {
832 Text { text: String },
833 Image { source: ImageSource },
834}
835
836#[derive(Debug, Serialize, Deserialize)]
837#[serde(tag = "type")]
838pub enum ResponseContent {
839 #[serde(rename = "text")]
840 Text { text: String },
841 #[serde(rename = "thinking")]
842 Thinking { thinking: String },
843 #[serde(rename = "redacted_thinking")]
844 RedactedThinking { data: String },
845 #[serde(rename = "tool_use")]
846 ToolUse {
847 id: String,
848 name: String,
849 input: serde_json::Value,
850 },
851}
852
853#[derive(Debug, Serialize, Deserialize)]
854pub struct ImageSource {
855 #[serde(rename = "type")]
856 pub source_type: String,
857 pub media_type: String,
858 pub data: String,
859}
860
861fn is_false(value: &bool) -> bool {
862 !value
863}
864
865#[derive(Debug, Serialize, Deserialize)]
866pub struct Tool {
867 pub name: String,
868 pub description: String,
869 pub input_schema: serde_json::Value,
870 #[serde(default, skip_serializing_if = "is_false")]
871 pub eager_input_streaming: bool,
872}
873
874#[derive(Debug, Serialize, Deserialize)]
875#[serde(tag = "type", rename_all = "lowercase")]
876pub enum ToolChoice {
877 Auto,
878 Any,
879 Tool { name: String },
880 None,
881}
882
883#[derive(Debug, Serialize, Deserialize)]
884#[serde(tag = "type", rename_all = "lowercase")]
885pub enum Thinking {
886 Enabled { budget_tokens: Option<u32> },
887 Adaptive,
888}
889
890#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
891#[serde(rename_all = "snake_case")]
892#[strum(serialize_all = "snake_case")]
893pub enum Effort {
894 Low,
895 Medium,
896 High,
897 Max,
898}
899
900#[derive(Debug, Clone, Serialize, Deserialize)]
901pub struct OutputConfig {
902 pub effort: Option<Effort>,
903}
904
905#[derive(Debug, Serialize, Deserialize)]
906#[serde(untagged)]
907pub enum StringOrContents {
908 String(String),
909 Content(Vec<RequestContent>),
910}
911
912#[derive(Debug, Serialize, Deserialize)]
913pub struct Request {
914 pub model: String,
915 pub max_tokens: u64,
916 pub messages: Vec<Message>,
917 #[serde(default, skip_serializing_if = "Vec::is_empty")]
918 pub tools: Vec<Tool>,
919 #[serde(default, skip_serializing_if = "Option::is_none")]
920 pub thinking: Option<Thinking>,
921 #[serde(default, skip_serializing_if = "Option::is_none")]
922 pub tool_choice: Option<ToolChoice>,
923 #[serde(default, skip_serializing_if = "Option::is_none")]
924 pub system: Option<StringOrContents>,
925 #[serde(default, skip_serializing_if = "Option::is_none")]
926 pub metadata: Option<Metadata>,
927 #[serde(default, skip_serializing_if = "Option::is_none")]
928 pub output_config: Option<OutputConfig>,
929 #[serde(default, skip_serializing_if = "Vec::is_empty")]
930 pub stop_sequences: Vec<String>,
931 #[serde(default, skip_serializing_if = "Option::is_none")]
932 pub speed: Option<Speed>,
933 #[serde(default, skip_serializing_if = "Option::is_none")]
934 pub temperature: Option<f32>,
935 #[serde(default, skip_serializing_if = "Option::is_none")]
936 pub top_k: Option<u32>,
937 #[serde(default, skip_serializing_if = "Option::is_none")]
938 pub top_p: Option<f32>,
939}
940
941#[derive(Debug, Default, Serialize, Deserialize)]
942#[serde(rename_all = "snake_case")]
943pub enum Speed {
944 #[default]
945 Standard,
946 Fast,
947}
948
949#[derive(Debug, Serialize, Deserialize)]
950pub struct StreamingRequest {
951 #[serde(flatten)]
952 pub base: Request,
953 pub stream: bool,
954}
955
956#[derive(Debug, Serialize, Deserialize)]
957pub struct Metadata {
958 pub user_id: Option<String>,
959}
960
961#[derive(Debug, Serialize, Deserialize, Default)]
962pub struct Usage {
963 #[serde(default, skip_serializing_if = "Option::is_none")]
964 pub input_tokens: Option<u64>,
965 #[serde(default, skip_serializing_if = "Option::is_none")]
966 pub output_tokens: Option<u64>,
967 #[serde(default, skip_serializing_if = "Option::is_none")]
968 pub cache_creation_input_tokens: Option<u64>,
969 #[serde(default, skip_serializing_if = "Option::is_none")]
970 pub cache_read_input_tokens: Option<u64>,
971}
972
973#[derive(Debug, Serialize, Deserialize)]
974pub struct Response {
975 pub id: String,
976 #[serde(rename = "type")]
977 pub response_type: String,
978 pub role: Role,
979 pub content: Vec<ResponseContent>,
980 pub model: String,
981 #[serde(default, skip_serializing_if = "Option::is_none")]
982 pub stop_reason: Option<String>,
983 #[serde(default, skip_serializing_if = "Option::is_none")]
984 pub stop_sequence: Option<String>,
985 pub usage: Usage,
986}
987
988#[derive(Debug, Serialize, Deserialize)]
989#[serde(tag = "type")]
990pub enum Event {
991 #[serde(rename = "message_start")]
992 MessageStart { message: Response },
993 #[serde(rename = "content_block_start")]
994 ContentBlockStart {
995 index: usize,
996 content_block: ResponseContent,
997 },
998 #[serde(rename = "content_block_delta")]
999 ContentBlockDelta { index: usize, delta: ContentDelta },
1000 #[serde(rename = "content_block_stop")]
1001 ContentBlockStop { index: usize },
1002 #[serde(rename = "message_delta")]
1003 MessageDelta { delta: MessageDelta, usage: Usage },
1004 #[serde(rename = "message_stop")]
1005 MessageStop,
1006 #[serde(rename = "ping")]
1007 Ping,
1008 #[serde(rename = "error")]
1009 Error { error: ApiError },
1010}
1011
1012#[derive(Debug, Serialize, Deserialize)]
1013#[serde(tag = "type")]
1014pub enum ContentDelta {
1015 #[serde(rename = "text_delta")]
1016 TextDelta { text: String },
1017 #[serde(rename = "thinking_delta")]
1018 ThinkingDelta { thinking: String },
1019 #[serde(rename = "signature_delta")]
1020 SignatureDelta { signature: String },
1021 #[serde(rename = "input_json_delta")]
1022 InputJsonDelta { partial_json: String },
1023}
1024
1025#[derive(Debug, Serialize, Deserialize)]
1026pub struct MessageDelta {
1027 pub stop_reason: Option<String>,
1028 pub stop_sequence: Option<String>,
1029}
1030
1031#[derive(Debug)]
1032pub enum AnthropicError {
1033 /// Failed to serialize the HTTP request body to JSON
1034 SerializeRequest(serde_json::Error),
1035
1036 /// Failed to construct the HTTP request body
1037 BuildRequestBody(http::Error),
1038
1039 /// Failed to send the HTTP request
1040 HttpSend(anyhow::Error),
1041
1042 /// Failed to deserialize the response from JSON
1043 DeserializeResponse(serde_json::Error),
1044
1045 /// Failed to read from response stream
1046 ReadResponse(io::Error),
1047
1048 /// HTTP error response from the API
1049 HttpResponseError {
1050 status_code: StatusCode,
1051 message: String,
1052 },
1053
1054 /// Rate limit exceeded
1055 RateLimit { retry_after: Duration },
1056
1057 /// Server overloaded
1058 ServerOverloaded { retry_after: Option<Duration> },
1059
1060 /// API returned an error response
1061 ApiError(ApiError),
1062}
1063
1064#[derive(Debug, Serialize, Deserialize, Error)]
1065#[error("Anthropic API Error: {error_type}: {message}")]
1066pub struct ApiError {
1067 #[serde(rename = "type")]
1068 pub error_type: String,
1069 pub message: String,
1070}
1071
1072/// An Anthropic API error code.
1073/// <https://docs.anthropic.com/en/api/errors#http-errors>
1074#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
1075#[strum(serialize_all = "snake_case")]
1076pub enum ApiErrorCode {
1077 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
1078 InvalidRequestError,
1079 /// 401 - `authentication_error`: There's an issue with your API key.
1080 AuthenticationError,
1081 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
1082 PermissionError,
1083 /// 404 - `not_found_error`: The requested resource was not found.
1084 NotFoundError,
1085 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
1086 RequestTooLarge,
1087 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
1088 RateLimitError,
1089 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
1090 ApiError,
1091 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
1092 OverloadedError,
1093}
1094
1095impl ApiError {
1096 pub fn code(&self) -> Option<ApiErrorCode> {
1097 ApiErrorCode::from_str(&self.error_type).ok()
1098 }
1099
1100 pub fn is_rate_limit_error(&self) -> bool {
1101 matches!(self.error_type.as_str(), "rate_limit_error")
1102 }
1103
1104 pub fn match_window_exceeded(&self) -> Option<u64> {
1105 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
1106 return None;
1107 };
1108
1109 parse_prompt_too_long(&self.message)
1110 }
1111}
1112
1113pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1114 message
1115 .strip_prefix("prompt is too long: ")?
1116 .split_once(" tokens")?
1117 .0
1118 .parse()
1119 .ok()
1120}
1121
1122/// Request body for the token counting API.
1123/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
1124#[derive(Debug, Serialize)]
1125pub struct CountTokensRequest {
1126 pub model: String,
1127 pub messages: Vec<Message>,
1128 #[serde(default, skip_serializing_if = "Option::is_none")]
1129 pub system: Option<StringOrContents>,
1130 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1131 pub tools: Vec<Tool>,
1132 #[serde(default, skip_serializing_if = "Option::is_none")]
1133 pub thinking: Option<Thinking>,
1134 #[serde(default, skip_serializing_if = "Option::is_none")]
1135 pub tool_choice: Option<ToolChoice>,
1136}
1137
1138/// Response from the token counting API.
1139#[derive(Debug, Deserialize)]
1140pub struct CountTokensResponse {
1141 pub input_tokens: u64,
1142}
1143
1144/// Count the number of tokens in a message without creating it.
1145pub async fn count_tokens(
1146 client: &dyn HttpClient,
1147 api_url: &str,
1148 api_key: &str,
1149 request: CountTokensRequest,
1150) -> Result<CountTokensResponse, AnthropicError> {
1151 let uri = format!("{api_url}/v1/messages/count_tokens");
1152
1153 let request_builder = HttpRequest::builder()
1154 .method(Method::POST)
1155 .uri(uri)
1156 .header("Anthropic-Version", "2023-06-01")
1157 .header("X-Api-Key", api_key.trim())
1158 .header("Content-Type", "application/json");
1159
1160 let serialized_request =
1161 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1162 let http_request = request_builder
1163 .body(AsyncBody::from(serialized_request))
1164 .map_err(AnthropicError::BuildRequestBody)?;
1165
1166 let mut response = client
1167 .send(http_request)
1168 .await
1169 .map_err(AnthropicError::HttpSend)?;
1170
1171 let rate_limits = RateLimitInfo::from_headers(response.headers());
1172
1173 if response.status().is_success() {
1174 let mut body = String::new();
1175 response
1176 .body_mut()
1177 .read_to_string(&mut body)
1178 .await
1179 .map_err(AnthropicError::ReadResponse)?;
1180
1181 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1182 } else {
1183 Err(handle_error_response(response, rate_limits).await)
1184 }
1185}
1186
1187#[test]
1188fn test_match_window_exceeded() {
1189 let error = ApiError {
1190 error_type: "invalid_request_error".to_string(),
1191 message: "prompt is too long: 220000 tokens > 200000".to_string(),
1192 };
1193 assert_eq!(error.match_window_exceeded(), Some(220_000));
1194
1195 let error = ApiError {
1196 error_type: "invalid_request_error".to_string(),
1197 message: "prompt is too long: 1234953 tokens".to_string(),
1198 };
1199 assert_eq!(error.match_window_exceeded(), Some(1234953));
1200
1201 let error = ApiError {
1202 error_type: "invalid_request_error".to_string(),
1203 message: "not a prompt length error".to_string(),
1204 };
1205 assert_eq!(error.match_window_exceeded(), None);
1206
1207 let error = ApiError {
1208 error_type: "rate_limit_error".to_string(),
1209 message: "prompt is too long: 12345 tokens".to_string(),
1210 };
1211 assert_eq!(error.match_window_exceeded(), None);
1212
1213 let error = ApiError {
1214 error_type: "invalid_request_error".to_string(),
1215 message: "prompt is too long: invalid tokens".to_string(),
1216 };
1217 assert_eq!(error.match_window_exceeded(), None);
1218}