1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
12use strum::{EnumIter, EnumString};
13use thiserror::Error;
14
15pub mod batches;
16
17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
18
19pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07";
20
21#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
22#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
23pub struct AnthropicModelCacheConfiguration {
24 pub min_total_token: u64,
25 pub should_speculate: bool,
26 pub max_cache_anchors: usize,
27}
28
29#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
30#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
31pub enum AnthropicModelMode {
32 #[default]
33 Default,
34 Thinking {
35 budget_tokens: Option<u32>,
36 },
37}
38
39impl From<ModelMode> for AnthropicModelMode {
40 fn from(value: ModelMode) -> Self {
41 match value {
42 ModelMode::Default => AnthropicModelMode::Default,
43 ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
44 }
45 }
46}
47
48impl From<AnthropicModelMode> for ModelMode {
49 fn from(value: AnthropicModelMode) -> Self {
50 match value {
51 AnthropicModelMode::Default => ModelMode::Default,
52 AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
53 }
54 }
55}
56
57#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
58#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
59pub enum Model {
60 #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
61 ClaudeOpus4,
62 #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
63 ClaudeOpus4_1,
64 #[serde(
65 rename = "claude-opus-4-thinking",
66 alias = "claude-opus-4-thinking-latest"
67 )]
68 ClaudeOpus4Thinking,
69 #[serde(
70 rename = "claude-opus-4-1-thinking",
71 alias = "claude-opus-4-1-thinking-latest"
72 )]
73 ClaudeOpus4_1Thinking,
74 #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
75 ClaudeOpus4_5,
76 #[serde(
77 rename = "claude-opus-4-5-thinking",
78 alias = "claude-opus-4-5-thinking-latest"
79 )]
80 ClaudeOpus4_5Thinking,
81 #[serde(rename = "claude-opus-4-6", alias = "claude-opus-4-6-latest")]
82 ClaudeOpus4_6,
83 #[serde(
84 rename = "claude-opus-4-6-thinking",
85 alias = "claude-opus-4-6-thinking-latest"
86 )]
87 ClaudeOpus4_6Thinking,
88 #[serde(
89 rename = "claude-opus-4-6-1m-context",
90 alias = "claude-opus-4-6-1m-context-latest"
91 )]
92 ClaudeOpus4_6_1mContext,
93 #[serde(
94 rename = "claude-opus-4-6-1m-context-thinking",
95 alias = "claude-opus-4-6-1m-context-thinking-latest"
96 )]
97 ClaudeOpus4_6_1mContextThinking,
98 #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
99 ClaudeSonnet4,
100 #[serde(
101 rename = "claude-sonnet-4-thinking",
102 alias = "claude-sonnet-4-thinking-latest"
103 )]
104 ClaudeSonnet4Thinking,
105 #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
106 ClaudeSonnet4_5,
107 #[serde(
108 rename = "claude-sonnet-4-5-thinking",
109 alias = "claude-sonnet-4-5-thinking-latest"
110 )]
111 ClaudeSonnet4_5Thinking,
112 #[serde(
113 rename = "claude-sonnet-4-5-1m-context",
114 alias = "claude-sonnet-4-5-1m-context-latest"
115 )]
116 ClaudeSonnet4_5_1mContext,
117 #[serde(
118 rename = "claude-sonnet-4-5-1m-context-thinking",
119 alias = "claude-sonnet-4-5-1m-context-thinking-latest"
120 )]
121 ClaudeSonnet4_5_1mContextThinking,
122 #[default]
123 #[serde(rename = "claude-sonnet-4-6", alias = "claude-sonnet-4-6-latest")]
124 ClaudeSonnet4_6,
125 #[serde(
126 rename = "claude-sonnet-4-6-thinking",
127 alias = "claude-sonnet-4-6-thinking-latest"
128 )]
129 ClaudeSonnet4_6Thinking,
130 #[serde(
131 rename = "claude-sonnet-4-6-1m-context",
132 alias = "claude-sonnet-4-6-1m-context-latest"
133 )]
134 ClaudeSonnet4_6_1mContext,
135 #[serde(
136 rename = "claude-sonnet-4-6-1m-context-thinking",
137 alias = "claude-sonnet-4-6-1m-context-thinking-latest"
138 )]
139 ClaudeSonnet4_6_1mContextThinking,
140 #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
141 ClaudeHaiku4_5,
142 #[serde(
143 rename = "claude-haiku-4-5-thinking",
144 alias = "claude-haiku-4-5-thinking-latest"
145 )]
146 ClaudeHaiku4_5Thinking,
147 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
148 Claude3Haiku,
149 #[serde(rename = "custom")]
150 Custom {
151 name: String,
152 max_tokens: u64,
153 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
154 display_name: Option<String>,
155 /// Override this model with a different Anthropic model for tool calls.
156 tool_override: Option<String>,
157 /// Indicates whether this custom model supports caching.
158 cache_configuration: Option<AnthropicModelCacheConfiguration>,
159 max_output_tokens: Option<u64>,
160 default_temperature: Option<f32>,
161 #[serde(default)]
162 extra_beta_headers: Vec<String>,
163 #[serde(default)]
164 mode: AnthropicModelMode,
165 },
166}
167
168impl Model {
169 pub fn default_fast() -> Self {
170 Self::ClaudeHaiku4_5
171 }
172
173 pub fn from_id(id: &str) -> Result<Self> {
174 if id.starts_with("claude-opus-4-6-1m-context-thinking") {
175 return Ok(Self::ClaudeOpus4_6_1mContextThinking);
176 }
177
178 if id.starts_with("claude-opus-4-6-1m-context") {
179 return Ok(Self::ClaudeOpus4_6_1mContext);
180 }
181
182 if id.starts_with("claude-opus-4-6-thinking") {
183 return Ok(Self::ClaudeOpus4_6Thinking);
184 }
185
186 if id.starts_with("claude-opus-4-6") {
187 return Ok(Self::ClaudeOpus4_6);
188 }
189
190 if id.starts_with("claude-opus-4-5-thinking") {
191 return Ok(Self::ClaudeOpus4_5Thinking);
192 }
193
194 if id.starts_with("claude-opus-4-5") {
195 return Ok(Self::ClaudeOpus4_5);
196 }
197
198 if id.starts_with("claude-opus-4-1-thinking") {
199 return Ok(Self::ClaudeOpus4_1Thinking);
200 }
201
202 if id.starts_with("claude-opus-4-thinking") {
203 return Ok(Self::ClaudeOpus4Thinking);
204 }
205
206 if id.starts_with("claude-opus-4-1") {
207 return Ok(Self::ClaudeOpus4_1);
208 }
209
210 if id.starts_with("claude-opus-4") {
211 return Ok(Self::ClaudeOpus4);
212 }
213
214 if id.starts_with("claude-sonnet-4-6-1m-context-thinking") {
215 return Ok(Self::ClaudeSonnet4_6_1mContextThinking);
216 }
217
218 if id.starts_with("claude-sonnet-4-6-1m-context") {
219 return Ok(Self::ClaudeSonnet4_6_1mContext);
220 }
221
222 if id.starts_with("claude-sonnet-4-6-thinking") {
223 return Ok(Self::ClaudeSonnet4_6Thinking);
224 }
225
226 if id.starts_with("claude-sonnet-4-6") {
227 return Ok(Self::ClaudeSonnet4_6);
228 }
229
230 if id.starts_with("claude-sonnet-4-5-1m-context-thinking") {
231 return Ok(Self::ClaudeSonnet4_5_1mContextThinking);
232 }
233
234 if id.starts_with("claude-sonnet-4-5-1m-context") {
235 return Ok(Self::ClaudeSonnet4_5_1mContext);
236 }
237
238 if id.starts_with("claude-sonnet-4-5-thinking") {
239 return Ok(Self::ClaudeSonnet4_5Thinking);
240 }
241
242 if id.starts_with("claude-sonnet-4-5") {
243 return Ok(Self::ClaudeSonnet4_5);
244 }
245
246 if id.starts_with("claude-sonnet-4-thinking") {
247 return Ok(Self::ClaudeSonnet4Thinking);
248 }
249
250 if id.starts_with("claude-sonnet-4") {
251 return Ok(Self::ClaudeSonnet4);
252 }
253
254 if id.starts_with("claude-haiku-4-5-thinking") {
255 return Ok(Self::ClaudeHaiku4_5Thinking);
256 }
257
258 if id.starts_with("claude-haiku-4-5") {
259 return Ok(Self::ClaudeHaiku4_5);
260 }
261
262 if id.starts_with("claude-3-haiku") {
263 return Ok(Self::Claude3Haiku);
264 }
265
266 Err(anyhow!("invalid model ID: {id}"))
267 }
268
269 pub fn id(&self) -> &str {
270 match self {
271 Self::ClaudeOpus4 => "claude-opus-4-latest",
272 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
273 Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
274 Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
275 Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
276 Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking-latest",
277 Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
278 Self::ClaudeOpus4_6Thinking => "claude-opus-4-6-thinking-latest",
279 Self::ClaudeOpus4_6_1mContext => "claude-opus-4-6-1m-context-latest",
280 Self::ClaudeOpus4_6_1mContextThinking => "claude-opus-4-6-1m-context-thinking-latest",
281 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
282 Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
283 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
284 Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
285 Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-1m-context-latest",
286 Self::ClaudeSonnet4_5_1mContextThinking => {
287 "claude-sonnet-4-5-1m-context-thinking-latest"
288 }
289 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
290 Self::ClaudeSonnet4_6Thinking => "claude-sonnet-4-6-thinking-latest",
291 Self::ClaudeSonnet4_6_1mContext => "claude-sonnet-4-6-1m-context-latest",
292 Self::ClaudeSonnet4_6_1mContextThinking => {
293 "claude-sonnet-4-6-1m-context-thinking-latest"
294 }
295 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
296 Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
297 Self::Claude3Haiku => "claude-3-haiku-20240307",
298 Self::Custom { name, .. } => name,
299 }
300 }
301
302 /// The id of the model that should be used for making API requests
303 pub fn request_id(&self) -> &str {
304 match self {
305 Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
306 Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
307 Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-20251101",
308 Self::ClaudeOpus4_6
309 | Self::ClaudeOpus4_6Thinking
310 | Self::ClaudeOpus4_6_1mContext
311 | Self::ClaudeOpus4_6_1mContextThinking => "claude-opus-4-6",
312 Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
313 Self::ClaudeSonnet4_5
314 | Self::ClaudeSonnet4_5Thinking
315 | Self::ClaudeSonnet4_5_1mContext
316 | Self::ClaudeSonnet4_5_1mContextThinking => "claude-sonnet-4-5-20250929",
317 Self::ClaudeSonnet4_6
318 | Self::ClaudeSonnet4_6Thinking
319 | Self::ClaudeSonnet4_6_1mContext
320 | Self::ClaudeSonnet4_6_1mContextThinking => "claude-sonnet-4-6",
321 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
322 Self::Claude3Haiku => "claude-3-haiku-20240307",
323 Self::Custom { name, .. } => name,
324 }
325 }
326
327 pub fn display_name(&self) -> &str {
328 match self {
329 Self::ClaudeOpus4 => "Claude Opus 4",
330 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
331 Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
332 Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
333 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
334 Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
335 Self::ClaudeOpus4_6 => "Claude Opus 4.6",
336 Self::ClaudeOpus4_6Thinking => "Claude Opus 4.6 Thinking",
337 Self::ClaudeOpus4_6_1mContext => "Claude Opus 4.6 (1M context)",
338 Self::ClaudeOpus4_6_1mContextThinking => "Claude Opus 4.6 Thinking (1M context)",
339 Self::ClaudeSonnet4 => "Claude Sonnet 4",
340 Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
341 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
342 Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
343 Self::ClaudeSonnet4_5_1mContext => "Claude Sonnet 4.5 (1M context)",
344 Self::ClaudeSonnet4_5_1mContextThinking => "Claude Sonnet 4.5 Thinking (1M context)",
345 Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
346 Self::ClaudeSonnet4_6Thinking => "Claude Sonnet 4.6 Thinking",
347 Self::ClaudeSonnet4_6_1mContext => "Claude Sonnet 4.6 (1M context)",
348 Self::ClaudeSonnet4_6_1mContextThinking => "Claude Sonnet 4.6 Thinking (1M context)",
349 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
350 Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
351 Self::Claude3Haiku => "Claude 3 Haiku",
352 Self::Custom {
353 name, display_name, ..
354 } => display_name.as_ref().unwrap_or(name),
355 }
356 }
357
358 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
359 match self {
360 Self::ClaudeOpus4
361 | Self::ClaudeOpus4_1
362 | Self::ClaudeOpus4Thinking
363 | Self::ClaudeOpus4_1Thinking
364 | Self::ClaudeOpus4_5
365 | Self::ClaudeOpus4_5Thinking
366 | Self::ClaudeOpus4_6
367 | Self::ClaudeOpus4_6Thinking
368 | Self::ClaudeOpus4_6_1mContext
369 | Self::ClaudeOpus4_6_1mContextThinking
370 | Self::ClaudeSonnet4
371 | Self::ClaudeSonnet4Thinking
372 | Self::ClaudeSonnet4_5
373 | Self::ClaudeSonnet4_5Thinking
374 | Self::ClaudeSonnet4_5_1mContext
375 | Self::ClaudeSonnet4_5_1mContextThinking
376 | Self::ClaudeSonnet4_6
377 | Self::ClaudeSonnet4_6Thinking
378 | Self::ClaudeSonnet4_6_1mContext
379 | Self::ClaudeSonnet4_6_1mContextThinking
380 | Self::ClaudeHaiku4_5
381 | Self::ClaudeHaiku4_5Thinking
382 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
383 min_total_token: 2_048,
384 should_speculate: true,
385 max_cache_anchors: 4,
386 }),
387 Self::Custom {
388 cache_configuration,
389 ..
390 } => cache_configuration.clone(),
391 }
392 }
393
394 pub fn max_token_count(&self) -> u64 {
395 match self {
396 Self::ClaudeOpus4
397 | Self::ClaudeOpus4_1
398 | Self::ClaudeOpus4Thinking
399 | Self::ClaudeOpus4_1Thinking
400 | Self::ClaudeOpus4_5
401 | Self::ClaudeOpus4_5Thinking
402 | Self::ClaudeOpus4_6
403 | Self::ClaudeOpus4_6Thinking
404 | Self::ClaudeSonnet4
405 | Self::ClaudeSonnet4Thinking
406 | Self::ClaudeSonnet4_5
407 | Self::ClaudeSonnet4_5Thinking
408 | Self::ClaudeSonnet4_6
409 | Self::ClaudeSonnet4_6Thinking
410 | Self::ClaudeHaiku4_5
411 | Self::ClaudeHaiku4_5Thinking
412 | Self::Claude3Haiku => 200_000,
413 Self::ClaudeOpus4_6_1mContext
414 | Self::ClaudeOpus4_6_1mContextThinking
415 | Self::ClaudeSonnet4_5_1mContext
416 | Self::ClaudeSonnet4_5_1mContextThinking
417 | Self::ClaudeSonnet4_6_1mContext
418 | Self::ClaudeSonnet4_6_1mContextThinking => 1_000_000,
419 Self::Custom { max_tokens, .. } => *max_tokens,
420 }
421 }
422
423 pub fn max_output_tokens(&self) -> u64 {
424 match self {
425 Self::ClaudeOpus4
426 | Self::ClaudeOpus4Thinking
427 | Self::ClaudeOpus4_1
428 | Self::ClaudeOpus4_1Thinking => 32_000,
429 Self::ClaudeOpus4_5
430 | Self::ClaudeOpus4_5Thinking
431 | Self::ClaudeSonnet4
432 | Self::ClaudeSonnet4Thinking
433 | Self::ClaudeSonnet4_5
434 | Self::ClaudeSonnet4_5Thinking
435 | Self::ClaudeSonnet4_5_1mContext
436 | Self::ClaudeSonnet4_5_1mContextThinking
437 | Self::ClaudeSonnet4_6
438 | Self::ClaudeSonnet4_6Thinking
439 | Self::ClaudeSonnet4_6_1mContext
440 | Self::ClaudeSonnet4_6_1mContextThinking
441 | Self::ClaudeHaiku4_5
442 | Self::ClaudeHaiku4_5Thinking => 64_000,
443 Self::ClaudeOpus4_6
444 | Self::ClaudeOpus4_6Thinking
445 | Self::ClaudeOpus4_6_1mContext
446 | Self::ClaudeOpus4_6_1mContextThinking => 128_000,
447 Self::Claude3Haiku => 4_096,
448 Self::Custom {
449 max_output_tokens, ..
450 } => max_output_tokens.unwrap_or(4_096),
451 }
452 }
453
454 pub fn default_temperature(&self) -> f32 {
455 match self {
456 Self::ClaudeOpus4
457 | Self::ClaudeOpus4_1
458 | Self::ClaudeOpus4Thinking
459 | Self::ClaudeOpus4_1Thinking
460 | Self::ClaudeOpus4_5
461 | Self::ClaudeOpus4_5Thinking
462 | Self::ClaudeOpus4_6
463 | Self::ClaudeOpus4_6Thinking
464 | Self::ClaudeOpus4_6_1mContext
465 | Self::ClaudeOpus4_6_1mContextThinking
466 | Self::ClaudeSonnet4
467 | Self::ClaudeSonnet4Thinking
468 | Self::ClaudeSonnet4_5
469 | Self::ClaudeSonnet4_5Thinking
470 | Self::ClaudeSonnet4_5_1mContext
471 | Self::ClaudeSonnet4_5_1mContextThinking
472 | Self::ClaudeSonnet4_6
473 | Self::ClaudeSonnet4_6Thinking
474 | Self::ClaudeSonnet4_6_1mContext
475 | Self::ClaudeSonnet4_6_1mContextThinking
476 | Self::ClaudeHaiku4_5
477 | Self::ClaudeHaiku4_5Thinking
478 | Self::Claude3Haiku => 1.0,
479 Self::Custom {
480 default_temperature,
481 ..
482 } => default_temperature.unwrap_or(1.0),
483 }
484 }
485
486 pub fn mode(&self) -> AnthropicModelMode {
487 match self {
488 Self::ClaudeOpus4
489 | Self::ClaudeOpus4_1
490 | Self::ClaudeOpus4_5
491 | Self::ClaudeOpus4_6
492 | Self::ClaudeOpus4_6_1mContext
493 | Self::ClaudeSonnet4
494 | Self::ClaudeSonnet4_5
495 | Self::ClaudeSonnet4_5_1mContext
496 | Self::ClaudeSonnet4_6
497 | Self::ClaudeSonnet4_6_1mContext
498 | Self::ClaudeHaiku4_5
499 | Self::Claude3Haiku => AnthropicModelMode::Default,
500 Self::ClaudeOpus4Thinking
501 | Self::ClaudeOpus4_1Thinking
502 | Self::ClaudeOpus4_5Thinking
503 | Self::ClaudeOpus4_6Thinking
504 | Self::ClaudeOpus4_6_1mContextThinking
505 | Self::ClaudeSonnet4Thinking
506 | Self::ClaudeSonnet4_5Thinking
507 | Self::ClaudeSonnet4_5_1mContextThinking
508 | Self::ClaudeSonnet4_6Thinking
509 | Self::ClaudeSonnet4_6_1mContextThinking
510 | Self::ClaudeHaiku4_5Thinking => AnthropicModelMode::Thinking {
511 budget_tokens: Some(4_096),
512 },
513 Self::Custom { mode, .. } => mode.clone(),
514 }
515 }
516
517 pub fn beta_headers(&self) -> Option<String> {
518 let mut headers = vec![];
519
520 match self {
521 Self::ClaudeOpus4_6_1mContext
522 | Self::ClaudeOpus4_6_1mContextThinking
523 | Self::ClaudeSonnet4_5_1mContext
524 | Self::ClaudeSonnet4_5_1mContextThinking
525 | Self::ClaudeSonnet4_6_1mContext
526 | Self::ClaudeSonnet4_6_1mContextThinking => {
527 headers.push(CONTEXT_1M_BETA_HEADER.to_string());
528 }
529 Self::Custom {
530 extra_beta_headers, ..
531 } => {
532 headers.extend(
533 extra_beta_headers
534 .iter()
535 .filter(|header| !header.trim().is_empty())
536 .cloned(),
537 );
538 }
539 _ => {}
540 }
541
542 if headers.is_empty() {
543 None
544 } else {
545 Some(headers.join(","))
546 }
547 }
548
549 pub fn tool_model_id(&self) -> &str {
550 if let Self::Custom {
551 tool_override: Some(tool_override),
552 ..
553 } = self
554 {
555 tool_override
556 } else {
557 self.request_id()
558 }
559 }
560}
561
562/// Generate completion with streaming.
563pub async fn stream_completion(
564 client: &dyn HttpClient,
565 api_url: &str,
566 api_key: &str,
567 request: Request,
568 beta_headers: Option<String>,
569) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
570 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
571 .await
572 .map(|output| output.0)
573}
574
575/// Generate completion without streaming.
576pub async fn non_streaming_completion(
577 client: &dyn HttpClient,
578 api_url: &str,
579 api_key: &str,
580 request: Request,
581 beta_headers: Option<String>,
582) -> Result<Response, AnthropicError> {
583 let (mut response, rate_limits) =
584 send_request(client, api_url, api_key, &request, beta_headers).await?;
585
586 if response.status().is_success() {
587 let mut body = String::new();
588 response
589 .body_mut()
590 .read_to_string(&mut body)
591 .await
592 .map_err(AnthropicError::ReadResponse)?;
593
594 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
595 } else {
596 Err(handle_error_response(response, rate_limits).await)
597 }
598}
599
600async fn send_request(
601 client: &dyn HttpClient,
602 api_url: &str,
603 api_key: &str,
604 request: impl Serialize,
605 beta_headers: Option<String>,
606) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
607 let uri = format!("{api_url}/v1/messages");
608
609 let mut request_builder = HttpRequest::builder()
610 .method(Method::POST)
611 .uri(uri)
612 .header("Anthropic-Version", "2023-06-01")
613 .header("X-Api-Key", api_key.trim())
614 .header("Content-Type", "application/json");
615
616 if let Some(beta_headers) = beta_headers {
617 request_builder = request_builder.header("Anthropic-Beta", beta_headers);
618 }
619
620 let serialized_request =
621 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
622 let request = request_builder
623 .body(AsyncBody::from(serialized_request))
624 .map_err(AnthropicError::BuildRequestBody)?;
625
626 let response = client
627 .send(request)
628 .await
629 .map_err(AnthropicError::HttpSend)?;
630
631 let rate_limits = RateLimitInfo::from_headers(response.headers());
632
633 Ok((response, rate_limits))
634}
635
636async fn handle_error_response(
637 mut response: http::Response<AsyncBody>,
638 rate_limits: RateLimitInfo,
639) -> AnthropicError {
640 if response.status().as_u16() == 529 {
641 return AnthropicError::ServerOverloaded {
642 retry_after: rate_limits.retry_after,
643 };
644 }
645
646 if let Some(retry_after) = rate_limits.retry_after {
647 return AnthropicError::RateLimit { retry_after };
648 }
649
650 let mut body = String::new();
651 let read_result = response
652 .body_mut()
653 .read_to_string(&mut body)
654 .await
655 .map_err(AnthropicError::ReadResponse);
656
657 if let Err(err) = read_result {
658 return err;
659 }
660
661 match serde_json::from_str::<Event>(&body) {
662 Ok(Event::Error { error }) => AnthropicError::ApiError(error),
663 Ok(_) | Err(_) => AnthropicError::HttpResponseError {
664 status_code: response.status(),
665 message: body,
666 },
667 }
668}
669
670/// An individual rate limit.
671#[derive(Debug)]
672pub struct RateLimit {
673 pub limit: usize,
674 pub remaining: usize,
675 pub reset: DateTime<Utc>,
676}
677
678impl RateLimit {
679 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
680 let limit =
681 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
682 let remaining = get_header(
683 &format!("anthropic-ratelimit-{resource}-remaining"),
684 headers,
685 )?
686 .parse()?;
687 let reset = DateTime::parse_from_rfc3339(get_header(
688 &format!("anthropic-ratelimit-{resource}-reset"),
689 headers,
690 )?)?
691 .to_utc();
692
693 Ok(Self {
694 limit,
695 remaining,
696 reset,
697 })
698 }
699}
700
701/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
702#[derive(Debug)]
703pub struct RateLimitInfo {
704 pub retry_after: Option<Duration>,
705 pub requests: Option<RateLimit>,
706 pub tokens: Option<RateLimit>,
707 pub input_tokens: Option<RateLimit>,
708 pub output_tokens: Option<RateLimit>,
709}
710
711impl RateLimitInfo {
712 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
713 // Check if any rate limit headers exist
714 let has_rate_limit_headers = headers
715 .keys()
716 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
717
718 if !has_rate_limit_headers {
719 return Self {
720 retry_after: None,
721 requests: None,
722 tokens: None,
723 input_tokens: None,
724 output_tokens: None,
725 };
726 }
727
728 Self {
729 retry_after: parse_retry_after(headers),
730 requests: RateLimit::from_headers("requests", headers).ok(),
731 tokens: RateLimit::from_headers("tokens", headers).ok(),
732 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
733 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
734 }
735 }
736}
737
738/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
739/// seconds). Note that other services might specify an HTTP date or some other format for this
740/// header. Returns `None` if the header is not present or cannot be parsed.
741pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
742 headers
743 .get("retry-after")
744 .and_then(|v| v.to_str().ok())
745 .and_then(|v| v.parse::<u64>().ok())
746 .map(Duration::from_secs)
747}
748
749fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
750 Ok(headers
751 .get(key)
752 .with_context(|| format!("missing header `{key}`"))?
753 .to_str()?)
754}
755
756pub async fn stream_completion_with_rate_limit_info(
757 client: &dyn HttpClient,
758 api_url: &str,
759 api_key: &str,
760 request: Request,
761 beta_headers: Option<String>,
762) -> Result<
763 (
764 BoxStream<'static, Result<Event, AnthropicError>>,
765 Option<RateLimitInfo>,
766 ),
767 AnthropicError,
768> {
769 let request = StreamingRequest {
770 base: request,
771 stream: true,
772 };
773
774 let (response, rate_limits) =
775 send_request(client, api_url, api_key, &request, beta_headers).await?;
776
777 if response.status().is_success() {
778 let reader = BufReader::new(response.into_body());
779 let stream = reader
780 .lines()
781 .filter_map(|line| async move {
782 match line {
783 Ok(line) => {
784 let line = line
785 .strip_prefix("data: ")
786 .or_else(|| line.strip_prefix("data:"))?;
787
788 match serde_json::from_str(line) {
789 Ok(response) => Some(Ok(response)),
790 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
791 }
792 }
793 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
794 }
795 })
796 .boxed();
797 Ok((stream, Some(rate_limits)))
798 } else {
799 Err(handle_error_response(response, rate_limits).await)
800 }
801}
802
803#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
804#[serde(rename_all = "lowercase")]
805pub enum CacheControlType {
806 Ephemeral,
807}
808
809#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
810pub struct CacheControl {
811 #[serde(rename = "type")]
812 pub cache_type: CacheControlType,
813}
814
815#[derive(Debug, Serialize, Deserialize)]
816pub struct Message {
817 pub role: Role,
818 pub content: Vec<RequestContent>,
819}
820
821#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
822#[serde(rename_all = "lowercase")]
823pub enum Role {
824 User,
825 Assistant,
826}
827
828#[derive(Debug, Serialize, Deserialize)]
829#[serde(tag = "type")]
830pub enum RequestContent {
831 #[serde(rename = "text")]
832 Text {
833 text: String,
834 #[serde(skip_serializing_if = "Option::is_none")]
835 cache_control: Option<CacheControl>,
836 },
837 #[serde(rename = "thinking")]
838 Thinking {
839 thinking: String,
840 signature: String,
841 #[serde(skip_serializing_if = "Option::is_none")]
842 cache_control: Option<CacheControl>,
843 },
844 #[serde(rename = "redacted_thinking")]
845 RedactedThinking { data: String },
846 #[serde(rename = "image")]
847 Image {
848 source: ImageSource,
849 #[serde(skip_serializing_if = "Option::is_none")]
850 cache_control: Option<CacheControl>,
851 },
852 #[serde(rename = "tool_use")]
853 ToolUse {
854 id: String,
855 name: String,
856 input: serde_json::Value,
857 #[serde(skip_serializing_if = "Option::is_none")]
858 cache_control: Option<CacheControl>,
859 },
860 #[serde(rename = "tool_result")]
861 ToolResult {
862 tool_use_id: String,
863 is_error: bool,
864 content: ToolResultContent,
865 #[serde(skip_serializing_if = "Option::is_none")]
866 cache_control: Option<CacheControl>,
867 },
868}
869
870#[derive(Debug, Serialize, Deserialize)]
871#[serde(untagged)]
872pub enum ToolResultContent {
873 Plain(String),
874 Multipart(Vec<ToolResultPart>),
875}
876
877#[derive(Debug, Serialize, Deserialize)]
878#[serde(tag = "type", rename_all = "lowercase")]
879pub enum ToolResultPart {
880 Text { text: String },
881 Image { source: ImageSource },
882}
883
884#[derive(Debug, Serialize, Deserialize)]
885#[serde(tag = "type")]
886pub enum ResponseContent {
887 #[serde(rename = "text")]
888 Text { text: String },
889 #[serde(rename = "thinking")]
890 Thinking { thinking: String },
891 #[serde(rename = "redacted_thinking")]
892 RedactedThinking { data: String },
893 #[serde(rename = "tool_use")]
894 ToolUse {
895 id: String,
896 name: String,
897 input: serde_json::Value,
898 },
899}
900
901#[derive(Debug, Serialize, Deserialize)]
902pub struct ImageSource {
903 #[serde(rename = "type")]
904 pub source_type: String,
905 pub media_type: String,
906 pub data: String,
907}
908
909fn is_false(value: &bool) -> bool {
910 !value
911}
912
913#[derive(Debug, Serialize, Deserialize)]
914pub struct Tool {
915 pub name: String,
916 pub description: String,
917 pub input_schema: serde_json::Value,
918 #[serde(default, skip_serializing_if = "is_false")]
919 pub eager_input_streaming: bool,
920}
921
922#[derive(Debug, Serialize, Deserialize)]
923#[serde(tag = "type", rename_all = "lowercase")]
924pub enum ToolChoice {
925 Auto,
926 Any,
927 Tool { name: String },
928 None,
929}
930
931#[derive(Debug, Serialize, Deserialize)]
932#[serde(tag = "type", rename_all = "lowercase")]
933pub enum Thinking {
934 Enabled { budget_tokens: Option<u32> },
935 Adaptive,
936}
937
938#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
939#[serde(rename_all = "snake_case")]
940#[strum(serialize_all = "snake_case")]
941pub enum Effort {
942 Low,
943 Medium,
944 High,
945 Max,
946}
947
948#[derive(Debug, Clone, Serialize, Deserialize)]
949pub struct OutputConfig {
950 pub effort: Option<Effort>,
951}
952
953#[derive(Debug, Serialize, Deserialize)]
954#[serde(untagged)]
955pub enum StringOrContents {
956 String(String),
957 Content(Vec<RequestContent>),
958}
959
960#[derive(Debug, Serialize, Deserialize)]
961pub struct Request {
962 pub model: String,
963 pub max_tokens: u64,
964 pub messages: Vec<Message>,
965 #[serde(default, skip_serializing_if = "Vec::is_empty")]
966 pub tools: Vec<Tool>,
967 #[serde(default, skip_serializing_if = "Option::is_none")]
968 pub thinking: Option<Thinking>,
969 #[serde(default, skip_serializing_if = "Option::is_none")]
970 pub tool_choice: Option<ToolChoice>,
971 #[serde(default, skip_serializing_if = "Option::is_none")]
972 pub system: Option<StringOrContents>,
973 #[serde(default, skip_serializing_if = "Option::is_none")]
974 pub metadata: Option<Metadata>,
975 #[serde(default, skip_serializing_if = "Option::is_none")]
976 pub output_config: Option<OutputConfig>,
977 #[serde(default, skip_serializing_if = "Vec::is_empty")]
978 pub stop_sequences: Vec<String>,
979 #[serde(default, skip_serializing_if = "Option::is_none")]
980 pub speed: Option<Speed>,
981 #[serde(default, skip_serializing_if = "Option::is_none")]
982 pub temperature: Option<f32>,
983 #[serde(default, skip_serializing_if = "Option::is_none")]
984 pub top_k: Option<u32>,
985 #[serde(default, skip_serializing_if = "Option::is_none")]
986 pub top_p: Option<f32>,
987}
988
989#[derive(Debug, Default, Serialize, Deserialize)]
990#[serde(rename_all = "snake_case")]
991pub enum Speed {
992 #[default]
993 Standard,
994 Fast,
995}
996
997#[derive(Debug, Serialize, Deserialize)]
998pub struct StreamingRequest {
999 #[serde(flatten)]
1000 pub base: Request,
1001 pub stream: bool,
1002}
1003
1004#[derive(Debug, Serialize, Deserialize)]
1005pub struct Metadata {
1006 pub user_id: Option<String>,
1007}
1008
1009#[derive(Debug, Serialize, Deserialize, Default)]
1010pub struct Usage {
1011 #[serde(default, skip_serializing_if = "Option::is_none")]
1012 pub input_tokens: Option<u64>,
1013 #[serde(default, skip_serializing_if = "Option::is_none")]
1014 pub output_tokens: Option<u64>,
1015 #[serde(default, skip_serializing_if = "Option::is_none")]
1016 pub cache_creation_input_tokens: Option<u64>,
1017 #[serde(default, skip_serializing_if = "Option::is_none")]
1018 pub cache_read_input_tokens: Option<u64>,
1019}
1020
1021#[derive(Debug, Serialize, Deserialize)]
1022pub struct Response {
1023 pub id: String,
1024 #[serde(rename = "type")]
1025 pub response_type: String,
1026 pub role: Role,
1027 pub content: Vec<ResponseContent>,
1028 pub model: String,
1029 #[serde(default, skip_serializing_if = "Option::is_none")]
1030 pub stop_reason: Option<String>,
1031 #[serde(default, skip_serializing_if = "Option::is_none")]
1032 pub stop_sequence: Option<String>,
1033 pub usage: Usage,
1034}
1035
1036#[derive(Debug, Serialize, Deserialize)]
1037#[serde(tag = "type")]
1038pub enum Event {
1039 #[serde(rename = "message_start")]
1040 MessageStart { message: Response },
1041 #[serde(rename = "content_block_start")]
1042 ContentBlockStart {
1043 index: usize,
1044 content_block: ResponseContent,
1045 },
1046 #[serde(rename = "content_block_delta")]
1047 ContentBlockDelta { index: usize, delta: ContentDelta },
1048 #[serde(rename = "content_block_stop")]
1049 ContentBlockStop { index: usize },
1050 #[serde(rename = "message_delta")]
1051 MessageDelta { delta: MessageDelta, usage: Usage },
1052 #[serde(rename = "message_stop")]
1053 MessageStop,
1054 #[serde(rename = "ping")]
1055 Ping,
1056 #[serde(rename = "error")]
1057 Error { error: ApiError },
1058}
1059
1060#[derive(Debug, Serialize, Deserialize)]
1061#[serde(tag = "type")]
1062pub enum ContentDelta {
1063 #[serde(rename = "text_delta")]
1064 TextDelta { text: String },
1065 #[serde(rename = "thinking_delta")]
1066 ThinkingDelta { thinking: String },
1067 #[serde(rename = "signature_delta")]
1068 SignatureDelta { signature: String },
1069 #[serde(rename = "input_json_delta")]
1070 InputJsonDelta { partial_json: String },
1071}
1072
1073#[derive(Debug, Serialize, Deserialize)]
1074pub struct MessageDelta {
1075 pub stop_reason: Option<String>,
1076 pub stop_sequence: Option<String>,
1077}
1078
1079#[derive(Debug)]
1080pub enum AnthropicError {
1081 /// Failed to serialize the HTTP request body to JSON
1082 SerializeRequest(serde_json::Error),
1083
1084 /// Failed to construct the HTTP request body
1085 BuildRequestBody(http::Error),
1086
1087 /// Failed to send the HTTP request
1088 HttpSend(anyhow::Error),
1089
1090 /// Failed to deserialize the response from JSON
1091 DeserializeResponse(serde_json::Error),
1092
1093 /// Failed to read from response stream
1094 ReadResponse(io::Error),
1095
1096 /// HTTP error response from the API
1097 HttpResponseError {
1098 status_code: StatusCode,
1099 message: String,
1100 },
1101
1102 /// Rate limit exceeded
1103 RateLimit { retry_after: Duration },
1104
1105 /// Server overloaded
1106 ServerOverloaded { retry_after: Option<Duration> },
1107
1108 /// API returned an error response
1109 ApiError(ApiError),
1110}
1111
1112#[derive(Debug, Serialize, Deserialize, Error)]
1113#[error("Anthropic API Error: {error_type}: {message}")]
1114pub struct ApiError {
1115 #[serde(rename = "type")]
1116 pub error_type: String,
1117 pub message: String,
1118}
1119
1120/// An Anthropic API error code.
1121/// <https://docs.anthropic.com/en/api/errors#http-errors>
1122#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
1123#[strum(serialize_all = "snake_case")]
1124pub enum ApiErrorCode {
1125 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
1126 InvalidRequestError,
1127 /// 401 - `authentication_error`: There's an issue with your API key.
1128 AuthenticationError,
1129 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
1130 PermissionError,
1131 /// 404 - `not_found_error`: The requested resource was not found.
1132 NotFoundError,
1133 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
1134 RequestTooLarge,
1135 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
1136 RateLimitError,
1137 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
1138 ApiError,
1139 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
1140 OverloadedError,
1141}
1142
1143impl ApiError {
1144 pub fn code(&self) -> Option<ApiErrorCode> {
1145 ApiErrorCode::from_str(&self.error_type).ok()
1146 }
1147
1148 pub fn is_rate_limit_error(&self) -> bool {
1149 matches!(self.error_type.as_str(), "rate_limit_error")
1150 }
1151
1152 pub fn match_window_exceeded(&self) -> Option<u64> {
1153 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
1154 return None;
1155 };
1156
1157 parse_prompt_too_long(&self.message)
1158 }
1159}
1160
1161pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1162 message
1163 .strip_prefix("prompt is too long: ")?
1164 .split_once(" tokens")?
1165 .0
1166 .parse()
1167 .ok()
1168}
1169
1170/// Request body for the token counting API.
1171/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
1172#[derive(Debug, Serialize)]
1173pub struct CountTokensRequest {
1174 pub model: String,
1175 pub messages: Vec<Message>,
1176 #[serde(default, skip_serializing_if = "Option::is_none")]
1177 pub system: Option<StringOrContents>,
1178 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1179 pub tools: Vec<Tool>,
1180 #[serde(default, skip_serializing_if = "Option::is_none")]
1181 pub thinking: Option<Thinking>,
1182 #[serde(default, skip_serializing_if = "Option::is_none")]
1183 pub tool_choice: Option<ToolChoice>,
1184}
1185
1186/// Response from the token counting API.
1187#[derive(Debug, Deserialize)]
1188pub struct CountTokensResponse {
1189 pub input_tokens: u64,
1190}
1191
1192/// Count the number of tokens in a message without creating it.
1193pub async fn count_tokens(
1194 client: &dyn HttpClient,
1195 api_url: &str,
1196 api_key: &str,
1197 request: CountTokensRequest,
1198) -> Result<CountTokensResponse, AnthropicError> {
1199 let uri = format!("{api_url}/v1/messages/count_tokens");
1200
1201 let request_builder = HttpRequest::builder()
1202 .method(Method::POST)
1203 .uri(uri)
1204 .header("Anthropic-Version", "2023-06-01")
1205 .header("X-Api-Key", api_key.trim())
1206 .header("Content-Type", "application/json");
1207
1208 let serialized_request =
1209 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1210 let http_request = request_builder
1211 .body(AsyncBody::from(serialized_request))
1212 .map_err(AnthropicError::BuildRequestBody)?;
1213
1214 let mut response = client
1215 .send(http_request)
1216 .await
1217 .map_err(AnthropicError::HttpSend)?;
1218
1219 let rate_limits = RateLimitInfo::from_headers(response.headers());
1220
1221 if response.status().is_success() {
1222 let mut body = String::new();
1223 response
1224 .body_mut()
1225 .read_to_string(&mut body)
1226 .await
1227 .map_err(AnthropicError::ReadResponse)?;
1228
1229 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1230 } else {
1231 Err(handle_error_response(response, rate_limits).await)
1232 }
1233}
1234
1235#[test]
1236fn test_match_window_exceeded() {
1237 let error = ApiError {
1238 error_type: "invalid_request_error".to_string(),
1239 message: "prompt is too long: 220000 tokens > 200000".to_string(),
1240 };
1241 assert_eq!(error.match_window_exceeded(), Some(220_000));
1242
1243 let error = ApiError {
1244 error_type: "invalid_request_error".to_string(),
1245 message: "prompt is too long: 1234953 tokens".to_string(),
1246 };
1247 assert_eq!(error.match_window_exceeded(), Some(1234953));
1248
1249 let error = ApiError {
1250 error_type: "invalid_request_error".to_string(),
1251 message: "not a prompt length error".to_string(),
1252 };
1253 assert_eq!(error.match_window_exceeded(), None);
1254
1255 let error = ApiError {
1256 error_type: "rate_limit_error".to_string(),
1257 message: "prompt is too long: 12345 tokens".to_string(),
1258 };
1259 assert_eq!(error.match_window_exceeded(), None);
1260
1261 let error = ApiError {
1262 error_type: "invalid_request_error".to_string(),
1263 message: "prompt is too long: invalid tokens".to_string(),
1264 };
1265 assert_eq!(error.match_window_exceeded(), None);
1266}