1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
12use strum::{EnumIter, EnumString};
13use thiserror::Error;
14
15pub mod batches;
16
17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
18
19pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07";
20
21#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
22#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
23pub struct AnthropicModelCacheConfiguration {
24 pub min_total_token: u64,
25 pub should_speculate: bool,
26 pub max_cache_anchors: usize,
27}
28
29#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
30#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
31pub enum AnthropicModelMode {
32 #[default]
33 Default,
34 Thinking {
35 budget_tokens: Option<u32>,
36 },
37}
38
39impl From<ModelMode> for AnthropicModelMode {
40 fn from(value: ModelMode) -> Self {
41 match value {
42 ModelMode::Default => AnthropicModelMode::Default,
43 ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
44 }
45 }
46}
47
48impl From<AnthropicModelMode> for ModelMode {
49 fn from(value: AnthropicModelMode) -> Self {
50 match value {
51 AnthropicModelMode::Default => ModelMode::Default,
52 AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
53 }
54 }
55}
56
57#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
58#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
59pub enum Model {
60 #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
61 ClaudeOpus4,
62 #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
63 ClaudeOpus4_1,
64 #[serde(
65 rename = "claude-opus-4-thinking",
66 alias = "claude-opus-4-thinking-latest"
67 )]
68 ClaudeOpus4Thinking,
69 #[serde(
70 rename = "claude-opus-4-1-thinking",
71 alias = "claude-opus-4-1-thinking-latest"
72 )]
73 ClaudeOpus4_1Thinking,
74 #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
75 ClaudeOpus4_5,
76 #[serde(
77 rename = "claude-opus-4-5-thinking",
78 alias = "claude-opus-4-5-thinking-latest"
79 )]
80 ClaudeOpus4_5Thinking,
81 #[serde(rename = "claude-opus-4-6", alias = "claude-opus-4-6-latest")]
82 ClaudeOpus4_6,
83 #[serde(
84 rename = "claude-opus-4-6-thinking",
85 alias = "claude-opus-4-6-thinking-latest"
86 )]
87 ClaudeOpus4_6Thinking,
88 #[serde(
89 rename = "claude-opus-4-6-1m-context",
90 alias = "claude-opus-4-6-1m-context-latest"
91 )]
92 ClaudeOpus4_6_1mContext,
93 #[serde(
94 rename = "claude-opus-4-6-1m-context-thinking",
95 alias = "claude-opus-4-6-1m-context-thinking-latest"
96 )]
97 ClaudeOpus4_6_1mContextThinking,
98 #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
99 ClaudeSonnet4,
100 #[serde(
101 rename = "claude-sonnet-4-thinking",
102 alias = "claude-sonnet-4-thinking-latest"
103 )]
104 ClaudeSonnet4Thinking,
105 #[default]
106 #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
107 ClaudeSonnet4_5,
108 #[serde(
109 rename = "claude-sonnet-4-5-thinking",
110 alias = "claude-sonnet-4-5-thinking-latest"
111 )]
112 ClaudeSonnet4_5Thinking,
113 #[serde(
114 rename = "claude-sonnet-4-5-1m-context",
115 alias = "claude-sonnet-4-5-1m-context-latest"
116 )]
117 ClaudeSonnet4_5_1mContext,
118 #[serde(
119 rename = "claude-sonnet-4-5-1m-context-thinking",
120 alias = "claude-sonnet-4-5-1m-context-thinking-latest"
121 )]
122 ClaudeSonnet4_5_1mContextThinking,
123 #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
124 Claude3_7Sonnet,
125 #[serde(
126 rename = "claude-3-7-sonnet-thinking",
127 alias = "claude-3-7-sonnet-thinking-latest"
128 )]
129 Claude3_7SonnetThinking,
130 #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
131 Claude3_5Sonnet,
132 #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
133 ClaudeHaiku4_5,
134 #[serde(
135 rename = "claude-haiku-4-5-thinking",
136 alias = "claude-haiku-4-5-thinking-latest"
137 )]
138 ClaudeHaiku4_5Thinking,
139 #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
140 Claude3_5Haiku,
141 #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
142 Claude3Opus,
143 #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
144 Claude3Sonnet,
145 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
146 Claude3Haiku,
147 #[serde(rename = "custom")]
148 Custom {
149 name: String,
150 max_tokens: u64,
151 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
152 display_name: Option<String>,
153 /// Override this model with a different Anthropic model for tool calls.
154 tool_override: Option<String>,
155 /// Indicates whether this custom model supports caching.
156 cache_configuration: Option<AnthropicModelCacheConfiguration>,
157 max_output_tokens: Option<u64>,
158 default_temperature: Option<f32>,
159 #[serde(default)]
160 extra_beta_headers: Vec<String>,
161 #[serde(default)]
162 mode: AnthropicModelMode,
163 },
164}
165
166impl Model {
167 pub fn default_fast() -> Self {
168 Self::Claude3_5Haiku
169 }
170
171 pub fn from_id(id: &str) -> Result<Self> {
172 if id.starts_with("claude-opus-4-6-1m-context-thinking") {
173 return Ok(Self::ClaudeOpus4_6_1mContextThinking);
174 }
175
176 if id.starts_with("claude-opus-4-6-1m-context") {
177 return Ok(Self::ClaudeOpus4_6_1mContext);
178 }
179
180 if id.starts_with("claude-opus-4-6-thinking") {
181 return Ok(Self::ClaudeOpus4_6Thinking);
182 }
183
184 if id.starts_with("claude-opus-4-6") {
185 return Ok(Self::ClaudeOpus4_6);
186 }
187
188 if id.starts_with("claude-opus-4-5-thinking") {
189 return Ok(Self::ClaudeOpus4_5Thinking);
190 }
191
192 if id.starts_with("claude-opus-4-5") {
193 return Ok(Self::ClaudeOpus4_5);
194 }
195
196 if id.starts_with("claude-opus-4-1-thinking") {
197 return Ok(Self::ClaudeOpus4_1Thinking);
198 }
199
200 if id.starts_with("claude-opus-4-thinking") {
201 return Ok(Self::ClaudeOpus4Thinking);
202 }
203
204 if id.starts_with("claude-opus-4-1") {
205 return Ok(Self::ClaudeOpus4_1);
206 }
207
208 if id.starts_with("claude-opus-4") {
209 return Ok(Self::ClaudeOpus4);
210 }
211
212 if id.starts_with("claude-sonnet-4-5-1m-context-thinking") {
213 return Ok(Self::ClaudeSonnet4_5_1mContextThinking);
214 }
215
216 if id.starts_with("claude-sonnet-4-5-1m-context") {
217 return Ok(Self::ClaudeSonnet4_5_1mContext);
218 }
219
220 if id.starts_with("claude-sonnet-4-5-thinking") {
221 return Ok(Self::ClaudeSonnet4_5Thinking);
222 }
223
224 if id.starts_with("claude-sonnet-4-5") {
225 return Ok(Self::ClaudeSonnet4_5);
226 }
227
228 if id.starts_with("claude-sonnet-4-thinking") {
229 return Ok(Self::ClaudeSonnet4Thinking);
230 }
231
232 if id.starts_with("claude-sonnet-4") {
233 return Ok(Self::ClaudeSonnet4);
234 }
235
236 if id.starts_with("claude-3-7-sonnet-thinking") {
237 return Ok(Self::Claude3_7SonnetThinking);
238 }
239
240 if id.starts_with("claude-3-7-sonnet") {
241 return Ok(Self::Claude3_7Sonnet);
242 }
243
244 if id.starts_with("claude-3-5-sonnet") {
245 return Ok(Self::Claude3_5Sonnet);
246 }
247
248 if id.starts_with("claude-haiku-4-5-thinking") {
249 return Ok(Self::ClaudeHaiku4_5Thinking);
250 }
251
252 if id.starts_with("claude-haiku-4-5") {
253 return Ok(Self::ClaudeHaiku4_5);
254 }
255
256 if id.starts_with("claude-3-5-haiku") {
257 return Ok(Self::Claude3_5Haiku);
258 }
259
260 if id.starts_with("claude-3-opus") {
261 return Ok(Self::Claude3Opus);
262 }
263
264 if id.starts_with("claude-3-sonnet") {
265 return Ok(Self::Claude3Sonnet);
266 }
267
268 if id.starts_with("claude-3-haiku") {
269 return Ok(Self::Claude3Haiku);
270 }
271
272 Err(anyhow!("invalid model ID: {id}"))
273 }
274
275 pub fn id(&self) -> &str {
276 match self {
277 Self::ClaudeOpus4 => "claude-opus-4-latest",
278 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
279 Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
280 Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
281 Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
282 Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking-latest",
283 Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
284 Self::ClaudeOpus4_6Thinking => "claude-opus-4-6-thinking-latest",
285 Self::ClaudeOpus4_6_1mContext => "claude-opus-4-6-1m-context-latest",
286 Self::ClaudeOpus4_6_1mContextThinking => "claude-opus-4-6-1m-context-thinking-latest",
287 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
288 Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
289 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
290 Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
291 Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-1m-context-latest",
292 Self::ClaudeSonnet4_5_1mContextThinking => {
293 "claude-sonnet-4-5-1m-context-thinking-latest"
294 }
295 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
296 Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
297 Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
298 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
299 Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
300 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
301 Self::Claude3Opus => "claude-3-opus-latest",
302 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
303 Self::Claude3Haiku => "claude-3-haiku-20240307",
304 Self::Custom { name, .. } => name,
305 }
306 }
307
308 /// The id of the model that should be used for making API requests
309 pub fn request_id(&self) -> &str {
310 match self {
311 Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
312 Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
313 Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-20251101",
314 Self::ClaudeOpus4_6
315 | Self::ClaudeOpus4_6Thinking
316 | Self::ClaudeOpus4_6_1mContext
317 | Self::ClaudeOpus4_6_1mContextThinking => "claude-opus-4-6",
318 Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
319 Self::ClaudeSonnet4_5
320 | Self::ClaudeSonnet4_5Thinking
321 | Self::ClaudeSonnet4_5_1mContext
322 | Self::ClaudeSonnet4_5_1mContextThinking => "claude-sonnet-4-5-20250929",
323 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
324 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
325 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
326 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
327 Self::Claude3Opus => "claude-3-opus-latest",
328 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
329 Self::Claude3Haiku => "claude-3-haiku-20240307",
330 Self::Custom { name, .. } => name,
331 }
332 }
333
334 pub fn display_name(&self) -> &str {
335 match self {
336 Self::ClaudeOpus4 => "Claude Opus 4",
337 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
338 Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
339 Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
340 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
341 Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
342 Self::ClaudeOpus4_6 => "Claude Opus 4.6",
343 Self::ClaudeOpus4_6Thinking => "Claude Opus 4.6 Thinking",
344 Self::ClaudeOpus4_6_1mContext => "Claude Opus 4.6 (1M context)",
345 Self::ClaudeOpus4_6_1mContextThinking => "Claude Opus 4.6 Thinking (1M context)",
346 Self::ClaudeSonnet4 => "Claude Sonnet 4",
347 Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
348 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
349 Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
350 Self::ClaudeSonnet4_5_1mContext => "Claude Sonnet 4.5 (1M context)",
351 Self::ClaudeSonnet4_5_1mContextThinking => "Claude Sonnet 4.5 Thinking (1M context)",
352 Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
353 Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
354 Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
355 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
356 Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
357 Self::Claude3_5Haiku => "Claude 3.5 Haiku",
358 Self::Claude3Opus => "Claude 3 Opus",
359 Self::Claude3Sonnet => "Claude 3 Sonnet",
360 Self::Claude3Haiku => "Claude 3 Haiku",
361 Self::Custom {
362 name, display_name, ..
363 } => display_name.as_ref().unwrap_or(name),
364 }
365 }
366
367 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
368 match self {
369 Self::ClaudeOpus4
370 | Self::ClaudeOpus4_1
371 | Self::ClaudeOpus4Thinking
372 | Self::ClaudeOpus4_1Thinking
373 | Self::ClaudeOpus4_5
374 | Self::ClaudeOpus4_5Thinking
375 | Self::ClaudeOpus4_6
376 | Self::ClaudeOpus4_6Thinking
377 | Self::ClaudeOpus4_6_1mContext
378 | Self::ClaudeOpus4_6_1mContextThinking
379 | Self::ClaudeSonnet4
380 | Self::ClaudeSonnet4Thinking
381 | Self::ClaudeSonnet4_5
382 | Self::ClaudeSonnet4_5Thinking
383 | Self::ClaudeSonnet4_5_1mContext
384 | Self::ClaudeSonnet4_5_1mContextThinking
385 | Self::Claude3_5Sonnet
386 | Self::ClaudeHaiku4_5
387 | Self::ClaudeHaiku4_5Thinking
388 | Self::Claude3_5Haiku
389 | Self::Claude3_7Sonnet
390 | Self::Claude3_7SonnetThinking
391 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
392 min_total_token: 2_048,
393 should_speculate: true,
394 max_cache_anchors: 4,
395 }),
396 Self::Custom {
397 cache_configuration,
398 ..
399 } => cache_configuration.clone(),
400 _ => None,
401 }
402 }
403
404 pub fn max_token_count(&self) -> u64 {
405 match self {
406 Self::ClaudeOpus4
407 | Self::ClaudeOpus4_1
408 | Self::ClaudeOpus4Thinking
409 | Self::ClaudeOpus4_1Thinking
410 | Self::ClaudeOpus4_5
411 | Self::ClaudeOpus4_5Thinking
412 | Self::ClaudeOpus4_6
413 | Self::ClaudeOpus4_6Thinking
414 | Self::ClaudeSonnet4
415 | Self::ClaudeSonnet4Thinking
416 | Self::ClaudeSonnet4_5
417 | Self::ClaudeSonnet4_5Thinking
418 | Self::Claude3_5Sonnet
419 | Self::ClaudeHaiku4_5
420 | Self::ClaudeHaiku4_5Thinking
421 | Self::Claude3_5Haiku
422 | Self::Claude3_7Sonnet
423 | Self::Claude3_7SonnetThinking
424 | Self::Claude3Opus
425 | Self::Claude3Sonnet
426 | Self::Claude3Haiku => 200_000,
427 Self::ClaudeOpus4_6_1mContext
428 | Self::ClaudeOpus4_6_1mContextThinking
429 | Self::ClaudeSonnet4_5_1mContext
430 | Self::ClaudeSonnet4_5_1mContextThinking => 1_000_000,
431 Self::Custom { max_tokens, .. } => *max_tokens,
432 }
433 }
434
435 pub fn max_output_tokens(&self) -> u64 {
436 match self {
437 Self::Claude3_5Sonnet | Self::Claude3_5Haiku => 8_192,
438 Self::ClaudeOpus4
439 | Self::ClaudeOpus4Thinking
440 | Self::ClaudeOpus4_1
441 | Self::ClaudeOpus4_1Thinking => 32_000,
442 Self::ClaudeOpus4_5
443 | Self::ClaudeOpus4_5Thinking
444 | Self::ClaudeSonnet4
445 | Self::ClaudeSonnet4Thinking
446 | Self::ClaudeSonnet4_5
447 | Self::ClaudeSonnet4_5Thinking
448 | Self::ClaudeSonnet4_5_1mContext
449 | Self::ClaudeSonnet4_5_1mContextThinking
450 | Self::Claude3_7Sonnet
451 | Self::Claude3_7SonnetThinking
452 | Self::ClaudeHaiku4_5
453 | Self::ClaudeHaiku4_5Thinking => 64_000,
454 Self::ClaudeOpus4_6
455 | Self::ClaudeOpus4_6Thinking
456 | Self::ClaudeOpus4_6_1mContext
457 | Self::ClaudeOpus4_6_1mContextThinking => 128_000,
458 Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
459 Self::Custom {
460 max_output_tokens, ..
461 } => max_output_tokens.unwrap_or(4_096),
462 }
463 }
464
465 pub fn default_temperature(&self) -> f32 {
466 match self {
467 Self::ClaudeOpus4
468 | Self::ClaudeOpus4_1
469 | Self::ClaudeOpus4Thinking
470 | Self::ClaudeOpus4_1Thinking
471 | Self::ClaudeOpus4_5
472 | Self::ClaudeOpus4_5Thinking
473 | Self::ClaudeOpus4_6
474 | Self::ClaudeOpus4_6Thinking
475 | Self::ClaudeOpus4_6_1mContext
476 | Self::ClaudeOpus4_6_1mContextThinking
477 | Self::ClaudeSonnet4
478 | Self::ClaudeSonnet4Thinking
479 | Self::ClaudeSonnet4_5
480 | Self::ClaudeSonnet4_5Thinking
481 | Self::ClaudeSonnet4_5_1mContext
482 | Self::ClaudeSonnet4_5_1mContextThinking
483 | Self::Claude3_5Sonnet
484 | Self::Claude3_7Sonnet
485 | Self::Claude3_7SonnetThinking
486 | Self::ClaudeHaiku4_5
487 | Self::ClaudeHaiku4_5Thinking
488 | Self::Claude3_5Haiku
489 | Self::Claude3Opus
490 | Self::Claude3Sonnet
491 | Self::Claude3Haiku => 1.0,
492 Self::Custom {
493 default_temperature,
494 ..
495 } => default_temperature.unwrap_or(1.0),
496 }
497 }
498
499 pub fn mode(&self) -> AnthropicModelMode {
500 match self {
501 Self::ClaudeOpus4
502 | Self::ClaudeOpus4_1
503 | Self::ClaudeOpus4_5
504 | Self::ClaudeOpus4_6
505 | Self::ClaudeOpus4_6_1mContext
506 | Self::ClaudeSonnet4
507 | Self::ClaudeSonnet4_5
508 | Self::ClaudeSonnet4_5_1mContext
509 | Self::Claude3_5Sonnet
510 | Self::Claude3_7Sonnet
511 | Self::ClaudeHaiku4_5
512 | Self::Claude3_5Haiku
513 | Self::Claude3Opus
514 | Self::Claude3Sonnet
515 | Self::Claude3Haiku => AnthropicModelMode::Default,
516 Self::ClaudeOpus4Thinking
517 | Self::ClaudeOpus4_1Thinking
518 | Self::ClaudeOpus4_5Thinking
519 | Self::ClaudeOpus4_6Thinking
520 | Self::ClaudeOpus4_6_1mContextThinking
521 | Self::ClaudeSonnet4Thinking
522 | Self::ClaudeSonnet4_5Thinking
523 | Self::ClaudeSonnet4_5_1mContextThinking
524 | Self::ClaudeHaiku4_5Thinking
525 | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
526 budget_tokens: Some(4_096),
527 },
528 Self::Custom { mode, .. } => mode.clone(),
529 }
530 }
531
532 pub fn beta_headers(&self) -> Option<String> {
533 let mut headers = vec![];
534
535 match self {
536 Self::ClaudeOpus4_6_1mContext
537 | Self::ClaudeOpus4_6_1mContextThinking
538 | Self::ClaudeSonnet4_5_1mContext
539 | Self::ClaudeSonnet4_5_1mContextThinking => {
540 headers.push(CONTEXT_1M_BETA_HEADER.to_string());
541 }
542 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
543 // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
544 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
545 headers.push("token-efficient-tools-2025-02-19".to_string());
546 }
547 Self::Custom {
548 extra_beta_headers, ..
549 } => {
550 headers.extend(
551 extra_beta_headers
552 .iter()
553 .filter(|header| !header.trim().is_empty())
554 .cloned(),
555 );
556 }
557 _ => {}
558 }
559
560 if headers.is_empty() {
561 None
562 } else {
563 Some(headers.join(","))
564 }
565 }
566
567 pub fn tool_model_id(&self) -> &str {
568 if let Self::Custom {
569 tool_override: Some(tool_override),
570 ..
571 } = self
572 {
573 tool_override
574 } else {
575 self.request_id()
576 }
577 }
578}
579
580/// Generate completion with streaming.
581pub async fn stream_completion(
582 client: &dyn HttpClient,
583 api_url: &str,
584 api_key: &str,
585 request: Request,
586 beta_headers: Option<String>,
587) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
588 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
589 .await
590 .map(|output| output.0)
591}
592
593/// Generate completion without streaming.
594pub async fn non_streaming_completion(
595 client: &dyn HttpClient,
596 api_url: &str,
597 api_key: &str,
598 request: Request,
599 beta_headers: Option<String>,
600) -> Result<Response, AnthropicError> {
601 let (mut response, rate_limits) =
602 send_request(client, api_url, api_key, &request, beta_headers).await?;
603
604 if response.status().is_success() {
605 let mut body = String::new();
606 response
607 .body_mut()
608 .read_to_string(&mut body)
609 .await
610 .map_err(AnthropicError::ReadResponse)?;
611
612 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
613 } else {
614 Err(handle_error_response(response, rate_limits).await)
615 }
616}
617
618async fn send_request(
619 client: &dyn HttpClient,
620 api_url: &str,
621 api_key: &str,
622 request: impl Serialize,
623 beta_headers: Option<String>,
624) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
625 let uri = format!("{api_url}/v1/messages");
626
627 let mut request_builder = HttpRequest::builder()
628 .method(Method::POST)
629 .uri(uri)
630 .header("Anthropic-Version", "2023-06-01")
631 .header("X-Api-Key", api_key.trim())
632 .header("Content-Type", "application/json");
633
634 if let Some(beta_headers) = beta_headers {
635 request_builder = request_builder.header("Anthropic-Beta", beta_headers);
636 }
637
638 let serialized_request =
639 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
640 let request = request_builder
641 .body(AsyncBody::from(serialized_request))
642 .map_err(AnthropicError::BuildRequestBody)?;
643
644 let response = client
645 .send(request)
646 .await
647 .map_err(AnthropicError::HttpSend)?;
648
649 let rate_limits = RateLimitInfo::from_headers(response.headers());
650
651 Ok((response, rate_limits))
652}
653
654async fn handle_error_response(
655 mut response: http::Response<AsyncBody>,
656 rate_limits: RateLimitInfo,
657) -> AnthropicError {
658 if response.status().as_u16() == 529 {
659 return AnthropicError::ServerOverloaded {
660 retry_after: rate_limits.retry_after,
661 };
662 }
663
664 if let Some(retry_after) = rate_limits.retry_after {
665 return AnthropicError::RateLimit { retry_after };
666 }
667
668 let mut body = String::new();
669 let read_result = response
670 .body_mut()
671 .read_to_string(&mut body)
672 .await
673 .map_err(AnthropicError::ReadResponse);
674
675 if let Err(err) = read_result {
676 return err;
677 }
678
679 match serde_json::from_str::<Event>(&body) {
680 Ok(Event::Error { error }) => AnthropicError::ApiError(error),
681 Ok(_) | Err(_) => AnthropicError::HttpResponseError {
682 status_code: response.status(),
683 message: body,
684 },
685 }
686}
687
688/// An individual rate limit.
689#[derive(Debug)]
690pub struct RateLimit {
691 pub limit: usize,
692 pub remaining: usize,
693 pub reset: DateTime<Utc>,
694}
695
696impl RateLimit {
697 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
698 let limit =
699 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
700 let remaining = get_header(
701 &format!("anthropic-ratelimit-{resource}-remaining"),
702 headers,
703 )?
704 .parse()?;
705 let reset = DateTime::parse_from_rfc3339(get_header(
706 &format!("anthropic-ratelimit-{resource}-reset"),
707 headers,
708 )?)?
709 .to_utc();
710
711 Ok(Self {
712 limit,
713 remaining,
714 reset,
715 })
716 }
717}
718
719/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
720#[derive(Debug)]
721pub struct RateLimitInfo {
722 pub retry_after: Option<Duration>,
723 pub requests: Option<RateLimit>,
724 pub tokens: Option<RateLimit>,
725 pub input_tokens: Option<RateLimit>,
726 pub output_tokens: Option<RateLimit>,
727}
728
729impl RateLimitInfo {
730 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
731 // Check if any rate limit headers exist
732 let has_rate_limit_headers = headers
733 .keys()
734 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
735
736 if !has_rate_limit_headers {
737 return Self {
738 retry_after: None,
739 requests: None,
740 tokens: None,
741 input_tokens: None,
742 output_tokens: None,
743 };
744 }
745
746 Self {
747 retry_after: parse_retry_after(headers),
748 requests: RateLimit::from_headers("requests", headers).ok(),
749 tokens: RateLimit::from_headers("tokens", headers).ok(),
750 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
751 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
752 }
753 }
754}
755
756/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
757/// seconds). Note that other services might specify an HTTP date or some other format for this
758/// header. Returns `None` if the header is not present or cannot be parsed.
759pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
760 headers
761 .get("retry-after")
762 .and_then(|v| v.to_str().ok())
763 .and_then(|v| v.parse::<u64>().ok())
764 .map(Duration::from_secs)
765}
766
767fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
768 Ok(headers
769 .get(key)
770 .with_context(|| format!("missing header `{key}`"))?
771 .to_str()?)
772}
773
774pub async fn stream_completion_with_rate_limit_info(
775 client: &dyn HttpClient,
776 api_url: &str,
777 api_key: &str,
778 request: Request,
779 beta_headers: Option<String>,
780) -> Result<
781 (
782 BoxStream<'static, Result<Event, AnthropicError>>,
783 Option<RateLimitInfo>,
784 ),
785 AnthropicError,
786> {
787 let request = StreamingRequest {
788 base: request,
789 stream: true,
790 };
791
792 let (response, rate_limits) =
793 send_request(client, api_url, api_key, &request, beta_headers).await?;
794
795 if response.status().is_success() {
796 let reader = BufReader::new(response.into_body());
797 let stream = reader
798 .lines()
799 .filter_map(|line| async move {
800 match line {
801 Ok(line) => {
802 let line = line.strip_prefix("data: ")?;
803 match serde_json::from_str(line) {
804 Ok(response) => Some(Ok(response)),
805 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
806 }
807 }
808 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
809 }
810 })
811 .boxed();
812 Ok((stream, Some(rate_limits)))
813 } else {
814 Err(handle_error_response(response, rate_limits).await)
815 }
816}
817
818#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
819#[serde(rename_all = "lowercase")]
820pub enum CacheControlType {
821 Ephemeral,
822}
823
824#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
825pub struct CacheControl {
826 #[serde(rename = "type")]
827 pub cache_type: CacheControlType,
828}
829
830#[derive(Debug, Serialize, Deserialize)]
831pub struct Message {
832 pub role: Role,
833 pub content: Vec<RequestContent>,
834}
835
836#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
837#[serde(rename_all = "lowercase")]
838pub enum Role {
839 User,
840 Assistant,
841}
842
843#[derive(Debug, Serialize, Deserialize)]
844#[serde(tag = "type")]
845pub enum RequestContent {
846 #[serde(rename = "text")]
847 Text {
848 text: String,
849 #[serde(skip_serializing_if = "Option::is_none")]
850 cache_control: Option<CacheControl>,
851 },
852 #[serde(rename = "thinking")]
853 Thinking {
854 thinking: String,
855 signature: String,
856 #[serde(skip_serializing_if = "Option::is_none")]
857 cache_control: Option<CacheControl>,
858 },
859 #[serde(rename = "redacted_thinking")]
860 RedactedThinking { data: String },
861 #[serde(rename = "image")]
862 Image {
863 source: ImageSource,
864 #[serde(skip_serializing_if = "Option::is_none")]
865 cache_control: Option<CacheControl>,
866 },
867 #[serde(rename = "tool_use")]
868 ToolUse {
869 id: String,
870 name: String,
871 input: serde_json::Value,
872 #[serde(skip_serializing_if = "Option::is_none")]
873 cache_control: Option<CacheControl>,
874 },
875 #[serde(rename = "tool_result")]
876 ToolResult {
877 tool_use_id: String,
878 is_error: bool,
879 content: ToolResultContent,
880 #[serde(skip_serializing_if = "Option::is_none")]
881 cache_control: Option<CacheControl>,
882 },
883}
884
885#[derive(Debug, Serialize, Deserialize)]
886#[serde(untagged)]
887pub enum ToolResultContent {
888 Plain(String),
889 Multipart(Vec<ToolResultPart>),
890}
891
892#[derive(Debug, Serialize, Deserialize)]
893#[serde(tag = "type", rename_all = "lowercase")]
894pub enum ToolResultPart {
895 Text { text: String },
896 Image { source: ImageSource },
897}
898
899#[derive(Debug, Serialize, Deserialize)]
900#[serde(tag = "type")]
901pub enum ResponseContent {
902 #[serde(rename = "text")]
903 Text { text: String },
904 #[serde(rename = "thinking")]
905 Thinking { thinking: String },
906 #[serde(rename = "redacted_thinking")]
907 RedactedThinking { data: String },
908 #[serde(rename = "tool_use")]
909 ToolUse {
910 id: String,
911 name: String,
912 input: serde_json::Value,
913 },
914}
915
916#[derive(Debug, Serialize, Deserialize)]
917pub struct ImageSource {
918 #[serde(rename = "type")]
919 pub source_type: String,
920 pub media_type: String,
921 pub data: String,
922}
923
924#[derive(Debug, Serialize, Deserialize)]
925pub struct Tool {
926 pub name: String,
927 pub description: String,
928 pub input_schema: serde_json::Value,
929}
930
931#[derive(Debug, Serialize, Deserialize)]
932#[serde(tag = "type", rename_all = "lowercase")]
933pub enum ToolChoice {
934 Auto,
935 Any,
936 Tool { name: String },
937 None,
938}
939
940#[derive(Debug, Serialize, Deserialize)]
941#[serde(tag = "type", rename_all = "lowercase")]
942pub enum Thinking {
943 Enabled { budget_tokens: Option<u32> },
944 Adaptive,
945}
946
947#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
948#[serde(rename_all = "snake_case")]
949#[strum(serialize_all = "snake_case")]
950pub enum Effort {
951 Low,
952 Medium,
953 High,
954 Max,
955}
956
957#[derive(Debug, Clone, Serialize, Deserialize)]
958pub struct OutputConfig {
959 pub effort: Option<Effort>,
960}
961
962#[derive(Debug, Serialize, Deserialize)]
963#[serde(untagged)]
964pub enum StringOrContents {
965 String(String),
966 Content(Vec<RequestContent>),
967}
968
969#[derive(Debug, Serialize, Deserialize)]
970pub struct Request {
971 pub model: String,
972 pub max_tokens: u64,
973 pub messages: Vec<Message>,
974 #[serde(default, skip_serializing_if = "Vec::is_empty")]
975 pub tools: Vec<Tool>,
976 #[serde(default, skip_serializing_if = "Option::is_none")]
977 pub thinking: Option<Thinking>,
978 #[serde(default, skip_serializing_if = "Option::is_none")]
979 pub tool_choice: Option<ToolChoice>,
980 #[serde(default, skip_serializing_if = "Option::is_none")]
981 pub system: Option<StringOrContents>,
982 #[serde(default, skip_serializing_if = "Option::is_none")]
983 pub metadata: Option<Metadata>,
984 #[serde(default, skip_serializing_if = "Option::is_none")]
985 pub output_config: Option<OutputConfig>,
986 #[serde(default, skip_serializing_if = "Vec::is_empty")]
987 pub stop_sequences: Vec<String>,
988 #[serde(default, skip_serializing_if = "Option::is_none")]
989 pub temperature: Option<f32>,
990 #[serde(default, skip_serializing_if = "Option::is_none")]
991 pub top_k: Option<u32>,
992 #[serde(default, skip_serializing_if = "Option::is_none")]
993 pub top_p: Option<f32>,
994}
995
996#[derive(Debug, Serialize, Deserialize)]
997struct StreamingRequest {
998 #[serde(flatten)]
999 pub base: Request,
1000 pub stream: bool,
1001}
1002
1003#[derive(Debug, Serialize, Deserialize)]
1004pub struct Metadata {
1005 pub user_id: Option<String>,
1006}
1007
1008#[derive(Debug, Serialize, Deserialize, Default)]
1009pub struct Usage {
1010 #[serde(default, skip_serializing_if = "Option::is_none")]
1011 pub input_tokens: Option<u64>,
1012 #[serde(default, skip_serializing_if = "Option::is_none")]
1013 pub output_tokens: Option<u64>,
1014 #[serde(default, skip_serializing_if = "Option::is_none")]
1015 pub cache_creation_input_tokens: Option<u64>,
1016 #[serde(default, skip_serializing_if = "Option::is_none")]
1017 pub cache_read_input_tokens: Option<u64>,
1018}
1019
1020#[derive(Debug, Serialize, Deserialize)]
1021pub struct Response {
1022 pub id: String,
1023 #[serde(rename = "type")]
1024 pub response_type: String,
1025 pub role: Role,
1026 pub content: Vec<ResponseContent>,
1027 pub model: String,
1028 #[serde(default, skip_serializing_if = "Option::is_none")]
1029 pub stop_reason: Option<String>,
1030 #[serde(default, skip_serializing_if = "Option::is_none")]
1031 pub stop_sequence: Option<String>,
1032 pub usage: Usage,
1033}
1034
1035#[derive(Debug, Serialize, Deserialize)]
1036#[serde(tag = "type")]
1037pub enum Event {
1038 #[serde(rename = "message_start")]
1039 MessageStart { message: Response },
1040 #[serde(rename = "content_block_start")]
1041 ContentBlockStart {
1042 index: usize,
1043 content_block: ResponseContent,
1044 },
1045 #[serde(rename = "content_block_delta")]
1046 ContentBlockDelta { index: usize, delta: ContentDelta },
1047 #[serde(rename = "content_block_stop")]
1048 ContentBlockStop { index: usize },
1049 #[serde(rename = "message_delta")]
1050 MessageDelta { delta: MessageDelta, usage: Usage },
1051 #[serde(rename = "message_stop")]
1052 MessageStop,
1053 #[serde(rename = "ping")]
1054 Ping,
1055 #[serde(rename = "error")]
1056 Error { error: ApiError },
1057}
1058
1059#[derive(Debug, Serialize, Deserialize)]
1060#[serde(tag = "type")]
1061pub enum ContentDelta {
1062 #[serde(rename = "text_delta")]
1063 TextDelta { text: String },
1064 #[serde(rename = "thinking_delta")]
1065 ThinkingDelta { thinking: String },
1066 #[serde(rename = "signature_delta")]
1067 SignatureDelta { signature: String },
1068 #[serde(rename = "input_json_delta")]
1069 InputJsonDelta { partial_json: String },
1070}
1071
1072#[derive(Debug, Serialize, Deserialize)]
1073pub struct MessageDelta {
1074 pub stop_reason: Option<String>,
1075 pub stop_sequence: Option<String>,
1076}
1077
1078#[derive(Debug)]
1079pub enum AnthropicError {
1080 /// Failed to serialize the HTTP request body to JSON
1081 SerializeRequest(serde_json::Error),
1082
1083 /// Failed to construct the HTTP request body
1084 BuildRequestBody(http::Error),
1085
1086 /// Failed to send the HTTP request
1087 HttpSend(anyhow::Error),
1088
1089 /// Failed to deserialize the response from JSON
1090 DeserializeResponse(serde_json::Error),
1091
1092 /// Failed to read from response stream
1093 ReadResponse(io::Error),
1094
1095 /// HTTP error response from the API
1096 HttpResponseError {
1097 status_code: StatusCode,
1098 message: String,
1099 },
1100
1101 /// Rate limit exceeded
1102 RateLimit { retry_after: Duration },
1103
1104 /// Server overloaded
1105 ServerOverloaded { retry_after: Option<Duration> },
1106
1107 /// API returned an error response
1108 ApiError(ApiError),
1109}
1110
1111#[derive(Debug, Serialize, Deserialize, Error)]
1112#[error("Anthropic API Error: {error_type}: {message}")]
1113pub struct ApiError {
1114 #[serde(rename = "type")]
1115 pub error_type: String,
1116 pub message: String,
1117}
1118
1119/// An Anthropic API error code.
1120/// <https://docs.anthropic.com/en/api/errors#http-errors>
1121#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
1122#[strum(serialize_all = "snake_case")]
1123pub enum ApiErrorCode {
1124 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
1125 InvalidRequestError,
1126 /// 401 - `authentication_error`: There's an issue with your API key.
1127 AuthenticationError,
1128 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
1129 PermissionError,
1130 /// 404 - `not_found_error`: The requested resource was not found.
1131 NotFoundError,
1132 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
1133 RequestTooLarge,
1134 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
1135 RateLimitError,
1136 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
1137 ApiError,
1138 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
1139 OverloadedError,
1140}
1141
1142impl ApiError {
1143 pub fn code(&self) -> Option<ApiErrorCode> {
1144 ApiErrorCode::from_str(&self.error_type).ok()
1145 }
1146
1147 pub fn is_rate_limit_error(&self) -> bool {
1148 matches!(self.error_type.as_str(), "rate_limit_error")
1149 }
1150
1151 pub fn match_window_exceeded(&self) -> Option<u64> {
1152 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
1153 return None;
1154 };
1155
1156 parse_prompt_too_long(&self.message)
1157 }
1158}
1159
1160pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1161 message
1162 .strip_prefix("prompt is too long: ")?
1163 .split_once(" tokens")?
1164 .0
1165 .parse()
1166 .ok()
1167}
1168
1169/// Request body for the token counting API.
1170/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
1171#[derive(Debug, Serialize)]
1172pub struct CountTokensRequest {
1173 pub model: String,
1174 pub messages: Vec<Message>,
1175 #[serde(default, skip_serializing_if = "Option::is_none")]
1176 pub system: Option<StringOrContents>,
1177 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1178 pub tools: Vec<Tool>,
1179 #[serde(default, skip_serializing_if = "Option::is_none")]
1180 pub thinking: Option<Thinking>,
1181 #[serde(default, skip_serializing_if = "Option::is_none")]
1182 pub tool_choice: Option<ToolChoice>,
1183}
1184
1185/// Response from the token counting API.
1186#[derive(Debug, Deserialize)]
1187pub struct CountTokensResponse {
1188 pub input_tokens: u64,
1189}
1190
1191/// Count the number of tokens in a message without creating it.
1192pub async fn count_tokens(
1193 client: &dyn HttpClient,
1194 api_url: &str,
1195 api_key: &str,
1196 request: CountTokensRequest,
1197) -> Result<CountTokensResponse, AnthropicError> {
1198 let uri = format!("{api_url}/v1/messages/count_tokens");
1199
1200 let request_builder = HttpRequest::builder()
1201 .method(Method::POST)
1202 .uri(uri)
1203 .header("Anthropic-Version", "2023-06-01")
1204 .header("X-Api-Key", api_key.trim())
1205 .header("Content-Type", "application/json");
1206
1207 let serialized_request =
1208 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1209 let http_request = request_builder
1210 .body(AsyncBody::from(serialized_request))
1211 .map_err(AnthropicError::BuildRequestBody)?;
1212
1213 let mut response = client
1214 .send(http_request)
1215 .await
1216 .map_err(AnthropicError::HttpSend)?;
1217
1218 let rate_limits = RateLimitInfo::from_headers(response.headers());
1219
1220 if response.status().is_success() {
1221 let mut body = String::new();
1222 response
1223 .body_mut()
1224 .read_to_string(&mut body)
1225 .await
1226 .map_err(AnthropicError::ReadResponse)?;
1227
1228 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1229 } else {
1230 Err(handle_error_response(response, rate_limits).await)
1231 }
1232}
1233
1234#[test]
1235fn test_match_window_exceeded() {
1236 let error = ApiError {
1237 error_type: "invalid_request_error".to_string(),
1238 message: "prompt is too long: 220000 tokens > 200000".to_string(),
1239 };
1240 assert_eq!(error.match_window_exceeded(), Some(220_000));
1241
1242 let error = ApiError {
1243 error_type: "invalid_request_error".to_string(),
1244 message: "prompt is too long: 1234953 tokens".to_string(),
1245 };
1246 assert_eq!(error.match_window_exceeded(), Some(1234953));
1247
1248 let error = ApiError {
1249 error_type: "invalid_request_error".to_string(),
1250 message: "not a prompt length error".to_string(),
1251 };
1252 assert_eq!(error.match_window_exceeded(), None);
1253
1254 let error = ApiError {
1255 error_type: "rate_limit_error".to_string(),
1256 message: "prompt is too long: 12345 tokens".to_string(),
1257 };
1258 assert_eq!(error.match_window_exceeded(), None);
1259
1260 let error = ApiError {
1261 error_type: "invalid_request_error".to_string(),
1262 message: "prompt is too long: invalid tokens".to_string(),
1263 };
1264 assert_eq!(error.match_window_exceeded(), None);
1265}