1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
12use strum::{EnumIter, EnumString};
13use thiserror::Error;
14
15pub mod batches;
16
17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
18
19pub const FINE_GRAINED_TOOL_STREAMING_BETA_HEADER: &str = "fine-grained-tool-streaming-2025-05-14";
20pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07";
21
22#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
23#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
24pub struct AnthropicModelCacheConfiguration {
25 pub min_total_token: u64,
26 pub should_speculate: bool,
27 pub max_cache_anchors: usize,
28}
29
30#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
31#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
32pub enum AnthropicModelMode {
33 #[default]
34 Default,
35 Thinking {
36 budget_tokens: Option<u32>,
37 },
38}
39
40impl From<ModelMode> for AnthropicModelMode {
41 fn from(value: ModelMode) -> Self {
42 match value {
43 ModelMode::Default => AnthropicModelMode::Default,
44 ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
45 }
46 }
47}
48
49impl From<AnthropicModelMode> for ModelMode {
50 fn from(value: AnthropicModelMode) -> Self {
51 match value {
52 AnthropicModelMode::Default => ModelMode::Default,
53 AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
54 }
55 }
56}
57
58#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
59#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
60pub enum Model {
61 #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
62 ClaudeOpus4,
63 #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
64 ClaudeOpus4_1,
65 #[serde(
66 rename = "claude-opus-4-thinking",
67 alias = "claude-opus-4-thinking-latest"
68 )]
69 ClaudeOpus4Thinking,
70 #[serde(
71 rename = "claude-opus-4-1-thinking",
72 alias = "claude-opus-4-1-thinking-latest"
73 )]
74 ClaudeOpus4_1Thinking,
75 #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
76 ClaudeOpus4_5,
77 #[serde(
78 rename = "claude-opus-4-5-thinking",
79 alias = "claude-opus-4-5-thinking-latest"
80 )]
81 ClaudeOpus4_5Thinking,
82 #[serde(rename = "claude-opus-4-6", alias = "claude-opus-4-6-latest")]
83 ClaudeOpus4_6,
84 #[serde(
85 rename = "claude-opus-4-6-thinking",
86 alias = "claude-opus-4-6-thinking-latest"
87 )]
88 ClaudeOpus4_6Thinking,
89 #[serde(
90 rename = "claude-opus-4-6-1m-context",
91 alias = "claude-opus-4-6-1m-context-latest"
92 )]
93 ClaudeOpus4_6_1mContext,
94 #[serde(
95 rename = "claude-opus-4-6-1m-context-thinking",
96 alias = "claude-opus-4-6-1m-context-thinking-latest"
97 )]
98 ClaudeOpus4_6_1mContextThinking,
99 #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
100 ClaudeSonnet4,
101 #[serde(
102 rename = "claude-sonnet-4-thinking",
103 alias = "claude-sonnet-4-thinking-latest"
104 )]
105 ClaudeSonnet4Thinking,
106 #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
107 ClaudeSonnet4_5,
108 #[serde(
109 rename = "claude-sonnet-4-5-thinking",
110 alias = "claude-sonnet-4-5-thinking-latest"
111 )]
112 ClaudeSonnet4_5Thinking,
113 #[serde(
114 rename = "claude-sonnet-4-5-1m-context",
115 alias = "claude-sonnet-4-5-1m-context-latest"
116 )]
117 ClaudeSonnet4_5_1mContext,
118 #[serde(
119 rename = "claude-sonnet-4-5-1m-context-thinking",
120 alias = "claude-sonnet-4-5-1m-context-thinking-latest"
121 )]
122 ClaudeSonnet4_5_1mContextThinking,
123 #[default]
124 #[serde(rename = "claude-sonnet-4-6", alias = "claude-sonnet-4-6-latest")]
125 ClaudeSonnet4_6,
126 #[serde(
127 rename = "claude-sonnet-4-6-thinking",
128 alias = "claude-sonnet-4-6-thinking-latest"
129 )]
130 ClaudeSonnet4_6Thinking,
131 #[serde(
132 rename = "claude-sonnet-4-6-1m-context",
133 alias = "claude-sonnet-4-6-1m-context-latest"
134 )]
135 ClaudeSonnet4_6_1mContext,
136 #[serde(
137 rename = "claude-sonnet-4-6-1m-context-thinking",
138 alias = "claude-sonnet-4-6-1m-context-thinking-latest"
139 )]
140 ClaudeSonnet4_6_1mContextThinking,
141 #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
142 Claude3_7Sonnet,
143 #[serde(
144 rename = "claude-3-7-sonnet-thinking",
145 alias = "claude-3-7-sonnet-thinking-latest"
146 )]
147 Claude3_7SonnetThinking,
148 #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
149 Claude3_5Sonnet,
150 #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
151 ClaudeHaiku4_5,
152 #[serde(
153 rename = "claude-haiku-4-5-thinking",
154 alias = "claude-haiku-4-5-thinking-latest"
155 )]
156 ClaudeHaiku4_5Thinking,
157 #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
158 Claude3_5Haiku,
159 #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
160 Claude3Opus,
161 #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
162 Claude3Sonnet,
163 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
164 Claude3Haiku,
165 #[serde(rename = "custom")]
166 Custom {
167 name: String,
168 max_tokens: u64,
169 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
170 display_name: Option<String>,
171 /// Override this model with a different Anthropic model for tool calls.
172 tool_override: Option<String>,
173 /// Indicates whether this custom model supports caching.
174 cache_configuration: Option<AnthropicModelCacheConfiguration>,
175 max_output_tokens: Option<u64>,
176 default_temperature: Option<f32>,
177 #[serde(default)]
178 extra_beta_headers: Vec<String>,
179 #[serde(default)]
180 mode: AnthropicModelMode,
181 },
182}
183
184impl Model {
185 pub fn default_fast() -> Self {
186 Self::Claude3_5Haiku
187 }
188
189 pub fn from_id(id: &str) -> Result<Self> {
190 if id.starts_with("claude-opus-4-6-1m-context-thinking") {
191 return Ok(Self::ClaudeOpus4_6_1mContextThinking);
192 }
193
194 if id.starts_with("claude-opus-4-6-1m-context") {
195 return Ok(Self::ClaudeOpus4_6_1mContext);
196 }
197
198 if id.starts_with("claude-opus-4-6-thinking") {
199 return Ok(Self::ClaudeOpus4_6Thinking);
200 }
201
202 if id.starts_with("claude-opus-4-6") {
203 return Ok(Self::ClaudeOpus4_6);
204 }
205
206 if id.starts_with("claude-opus-4-5-thinking") {
207 return Ok(Self::ClaudeOpus4_5Thinking);
208 }
209
210 if id.starts_with("claude-opus-4-5") {
211 return Ok(Self::ClaudeOpus4_5);
212 }
213
214 if id.starts_with("claude-opus-4-1-thinking") {
215 return Ok(Self::ClaudeOpus4_1Thinking);
216 }
217
218 if id.starts_with("claude-opus-4-thinking") {
219 return Ok(Self::ClaudeOpus4Thinking);
220 }
221
222 if id.starts_with("claude-opus-4-1") {
223 return Ok(Self::ClaudeOpus4_1);
224 }
225
226 if id.starts_with("claude-opus-4") {
227 return Ok(Self::ClaudeOpus4);
228 }
229
230 if id.starts_with("claude-sonnet-4-6-1m-context-thinking") {
231 return Ok(Self::ClaudeSonnet4_6_1mContextThinking);
232 }
233
234 if id.starts_with("claude-sonnet-4-6-1m-context") {
235 return Ok(Self::ClaudeSonnet4_6_1mContext);
236 }
237
238 if id.starts_with("claude-sonnet-4-6-thinking") {
239 return Ok(Self::ClaudeSonnet4_6Thinking);
240 }
241
242 if id.starts_with("claude-sonnet-4-6") {
243 return Ok(Self::ClaudeSonnet4_6);
244 }
245
246 if id.starts_with("claude-sonnet-4-5-1m-context-thinking") {
247 return Ok(Self::ClaudeSonnet4_5_1mContextThinking);
248 }
249
250 if id.starts_with("claude-sonnet-4-5-1m-context") {
251 return Ok(Self::ClaudeSonnet4_5_1mContext);
252 }
253
254 if id.starts_with("claude-sonnet-4-5-thinking") {
255 return Ok(Self::ClaudeSonnet4_5Thinking);
256 }
257
258 if id.starts_with("claude-sonnet-4-5") {
259 return Ok(Self::ClaudeSonnet4_5);
260 }
261
262 if id.starts_with("claude-sonnet-4-thinking") {
263 return Ok(Self::ClaudeSonnet4Thinking);
264 }
265
266 if id.starts_with("claude-sonnet-4") {
267 return Ok(Self::ClaudeSonnet4);
268 }
269
270 if id.starts_with("claude-3-7-sonnet-thinking") {
271 return Ok(Self::Claude3_7SonnetThinking);
272 }
273
274 if id.starts_with("claude-3-7-sonnet") {
275 return Ok(Self::Claude3_7Sonnet);
276 }
277
278 if id.starts_with("claude-3-5-sonnet") {
279 return Ok(Self::Claude3_5Sonnet);
280 }
281
282 if id.starts_with("claude-haiku-4-5-thinking") {
283 return Ok(Self::ClaudeHaiku4_5Thinking);
284 }
285
286 if id.starts_with("claude-haiku-4-5") {
287 return Ok(Self::ClaudeHaiku4_5);
288 }
289
290 if id.starts_with("claude-3-5-haiku") {
291 return Ok(Self::Claude3_5Haiku);
292 }
293
294 if id.starts_with("claude-3-opus") {
295 return Ok(Self::Claude3Opus);
296 }
297
298 if id.starts_with("claude-3-sonnet") {
299 return Ok(Self::Claude3Sonnet);
300 }
301
302 if id.starts_with("claude-3-haiku") {
303 return Ok(Self::Claude3Haiku);
304 }
305
306 Err(anyhow!("invalid model ID: {id}"))
307 }
308
309 pub fn id(&self) -> &str {
310 match self {
311 Self::ClaudeOpus4 => "claude-opus-4-latest",
312 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
313 Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
314 Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
315 Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
316 Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking-latest",
317 Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
318 Self::ClaudeOpus4_6Thinking => "claude-opus-4-6-thinking-latest",
319 Self::ClaudeOpus4_6_1mContext => "claude-opus-4-6-1m-context-latest",
320 Self::ClaudeOpus4_6_1mContextThinking => "claude-opus-4-6-1m-context-thinking-latest",
321 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
322 Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
323 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
324 Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
325 Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-1m-context-latest",
326 Self::ClaudeSonnet4_5_1mContextThinking => {
327 "claude-sonnet-4-5-1m-context-thinking-latest"
328 }
329 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
330 Self::ClaudeSonnet4_6Thinking => "claude-sonnet-4-6-thinking-latest",
331 Self::ClaudeSonnet4_6_1mContext => "claude-sonnet-4-6-1m-context-latest",
332 Self::ClaudeSonnet4_6_1mContextThinking => {
333 "claude-sonnet-4-6-1m-context-thinking-latest"
334 }
335 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
336 Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
337 Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
338 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
339 Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
340 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
341 Self::Claude3Opus => "claude-3-opus-latest",
342 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
343 Self::Claude3Haiku => "claude-3-haiku-20240307",
344 Self::Custom { name, .. } => name,
345 }
346 }
347
348 /// The id of the model that should be used for making API requests
349 pub fn request_id(&self) -> &str {
350 match self {
351 Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
352 Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
353 Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-20251101",
354 Self::ClaudeOpus4_6
355 | Self::ClaudeOpus4_6Thinking
356 | Self::ClaudeOpus4_6_1mContext
357 | Self::ClaudeOpus4_6_1mContextThinking => "claude-opus-4-6",
358 Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
359 Self::ClaudeSonnet4_5
360 | Self::ClaudeSonnet4_5Thinking
361 | Self::ClaudeSonnet4_5_1mContext
362 | Self::ClaudeSonnet4_5_1mContextThinking => "claude-sonnet-4-5-20250929",
363 Self::ClaudeSonnet4_6
364 | Self::ClaudeSonnet4_6Thinking
365 | Self::ClaudeSonnet4_6_1mContext
366 | Self::ClaudeSonnet4_6_1mContextThinking => "claude-sonnet-4-6",
367 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
368 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
369 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
370 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
371 Self::Claude3Opus => "claude-3-opus-latest",
372 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
373 Self::Claude3Haiku => "claude-3-haiku-20240307",
374 Self::Custom { name, .. } => name,
375 }
376 }
377
378 pub fn display_name(&self) -> &str {
379 match self {
380 Self::ClaudeOpus4 => "Claude Opus 4",
381 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
382 Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
383 Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
384 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
385 Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
386 Self::ClaudeOpus4_6 => "Claude Opus 4.6",
387 Self::ClaudeOpus4_6Thinking => "Claude Opus 4.6 Thinking",
388 Self::ClaudeOpus4_6_1mContext => "Claude Opus 4.6 (1M context)",
389 Self::ClaudeOpus4_6_1mContextThinking => "Claude Opus 4.6 Thinking (1M context)",
390 Self::ClaudeSonnet4 => "Claude Sonnet 4",
391 Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
392 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
393 Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
394 Self::ClaudeSonnet4_5_1mContext => "Claude Sonnet 4.5 (1M context)",
395 Self::ClaudeSonnet4_5_1mContextThinking => "Claude Sonnet 4.5 Thinking (1M context)",
396 Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
397 Self::ClaudeSonnet4_6Thinking => "Claude Sonnet 4.6 Thinking",
398 Self::ClaudeSonnet4_6_1mContext => "Claude Sonnet 4.6 (1M context)",
399 Self::ClaudeSonnet4_6_1mContextThinking => "Claude Sonnet 4.6 Thinking (1M context)",
400 Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
401 Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
402 Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
403 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
404 Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
405 Self::Claude3_5Haiku => "Claude 3.5 Haiku",
406 Self::Claude3Opus => "Claude 3 Opus",
407 Self::Claude3Sonnet => "Claude 3 Sonnet",
408 Self::Claude3Haiku => "Claude 3 Haiku",
409 Self::Custom {
410 name, display_name, ..
411 } => display_name.as_ref().unwrap_or(name),
412 }
413 }
414
415 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
416 match self {
417 Self::ClaudeOpus4
418 | Self::ClaudeOpus4_1
419 | Self::ClaudeOpus4Thinking
420 | Self::ClaudeOpus4_1Thinking
421 | Self::ClaudeOpus4_5
422 | Self::ClaudeOpus4_5Thinking
423 | Self::ClaudeOpus4_6
424 | Self::ClaudeOpus4_6Thinking
425 | Self::ClaudeOpus4_6_1mContext
426 | Self::ClaudeOpus4_6_1mContextThinking
427 | Self::ClaudeSonnet4
428 | Self::ClaudeSonnet4Thinking
429 | Self::ClaudeSonnet4_5
430 | Self::ClaudeSonnet4_5Thinking
431 | Self::ClaudeSonnet4_5_1mContext
432 | Self::ClaudeSonnet4_5_1mContextThinking
433 | Self::ClaudeSonnet4_6
434 | Self::ClaudeSonnet4_6Thinking
435 | Self::ClaudeSonnet4_6_1mContext
436 | Self::ClaudeSonnet4_6_1mContextThinking
437 | Self::Claude3_5Sonnet
438 | Self::ClaudeHaiku4_5
439 | Self::ClaudeHaiku4_5Thinking
440 | Self::Claude3_5Haiku
441 | Self::Claude3_7Sonnet
442 | Self::Claude3_7SonnetThinking
443 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
444 min_total_token: 2_048,
445 should_speculate: true,
446 max_cache_anchors: 4,
447 }),
448 Self::Custom {
449 cache_configuration,
450 ..
451 } => cache_configuration.clone(),
452 _ => None,
453 }
454 }
455
456 pub fn max_token_count(&self) -> u64 {
457 match self {
458 Self::ClaudeOpus4
459 | Self::ClaudeOpus4_1
460 | Self::ClaudeOpus4Thinking
461 | Self::ClaudeOpus4_1Thinking
462 | Self::ClaudeOpus4_5
463 | Self::ClaudeOpus4_5Thinking
464 | Self::ClaudeOpus4_6
465 | Self::ClaudeOpus4_6Thinking
466 | Self::ClaudeSonnet4
467 | Self::ClaudeSonnet4Thinking
468 | Self::ClaudeSonnet4_5
469 | Self::ClaudeSonnet4_5Thinking
470 | Self::ClaudeSonnet4_6
471 | Self::ClaudeSonnet4_6Thinking
472 | Self::Claude3_5Sonnet
473 | Self::ClaudeHaiku4_5
474 | Self::ClaudeHaiku4_5Thinking
475 | Self::Claude3_5Haiku
476 | Self::Claude3_7Sonnet
477 | Self::Claude3_7SonnetThinking
478 | Self::Claude3Opus
479 | Self::Claude3Sonnet
480 | Self::Claude3Haiku => 200_000,
481 Self::ClaudeOpus4_6_1mContext
482 | Self::ClaudeOpus4_6_1mContextThinking
483 | Self::ClaudeSonnet4_5_1mContext
484 | Self::ClaudeSonnet4_5_1mContextThinking
485 | Self::ClaudeSonnet4_6_1mContext
486 | Self::ClaudeSonnet4_6_1mContextThinking => 1_000_000,
487 Self::Custom { max_tokens, .. } => *max_tokens,
488 }
489 }
490
491 pub fn max_output_tokens(&self) -> u64 {
492 match self {
493 Self::Claude3_5Sonnet | Self::Claude3_5Haiku => 8_192,
494 Self::ClaudeOpus4
495 | Self::ClaudeOpus4Thinking
496 | Self::ClaudeOpus4_1
497 | Self::ClaudeOpus4_1Thinking => 32_000,
498 Self::ClaudeOpus4_5
499 | Self::ClaudeOpus4_5Thinking
500 | Self::ClaudeSonnet4
501 | Self::ClaudeSonnet4Thinking
502 | Self::ClaudeSonnet4_5
503 | Self::ClaudeSonnet4_5Thinking
504 | Self::ClaudeSonnet4_5_1mContext
505 | Self::ClaudeSonnet4_5_1mContextThinking
506 | Self::ClaudeSonnet4_6
507 | Self::ClaudeSonnet4_6Thinking
508 | Self::ClaudeSonnet4_6_1mContext
509 | Self::ClaudeSonnet4_6_1mContextThinking
510 | Self::Claude3_7Sonnet
511 | Self::Claude3_7SonnetThinking
512 | Self::ClaudeHaiku4_5
513 | Self::ClaudeHaiku4_5Thinking => 64_000,
514 Self::ClaudeOpus4_6
515 | Self::ClaudeOpus4_6Thinking
516 | Self::ClaudeOpus4_6_1mContext
517 | Self::ClaudeOpus4_6_1mContextThinking => 128_000,
518 Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
519 Self::Custom {
520 max_output_tokens, ..
521 } => max_output_tokens.unwrap_or(4_096),
522 }
523 }
524
525 pub fn default_temperature(&self) -> f32 {
526 match self {
527 Self::ClaudeOpus4
528 | Self::ClaudeOpus4_1
529 | Self::ClaudeOpus4Thinking
530 | Self::ClaudeOpus4_1Thinking
531 | Self::ClaudeOpus4_5
532 | Self::ClaudeOpus4_5Thinking
533 | Self::ClaudeOpus4_6
534 | Self::ClaudeOpus4_6Thinking
535 | Self::ClaudeOpus4_6_1mContext
536 | Self::ClaudeOpus4_6_1mContextThinking
537 | Self::ClaudeSonnet4
538 | Self::ClaudeSonnet4Thinking
539 | Self::ClaudeSonnet4_5
540 | Self::ClaudeSonnet4_5Thinking
541 | Self::ClaudeSonnet4_5_1mContext
542 | Self::ClaudeSonnet4_5_1mContextThinking
543 | Self::ClaudeSonnet4_6
544 | Self::ClaudeSonnet4_6Thinking
545 | Self::ClaudeSonnet4_6_1mContext
546 | Self::ClaudeSonnet4_6_1mContextThinking
547 | Self::Claude3_5Sonnet
548 | Self::Claude3_7Sonnet
549 | Self::Claude3_7SonnetThinking
550 | Self::ClaudeHaiku4_5
551 | Self::ClaudeHaiku4_5Thinking
552 | Self::Claude3_5Haiku
553 | Self::Claude3Opus
554 | Self::Claude3Sonnet
555 | Self::Claude3Haiku => 1.0,
556 Self::Custom {
557 default_temperature,
558 ..
559 } => default_temperature.unwrap_or(1.0),
560 }
561 }
562
563 pub fn mode(&self) -> AnthropicModelMode {
564 match self {
565 Self::ClaudeOpus4
566 | Self::ClaudeOpus4_1
567 | Self::ClaudeOpus4_5
568 | Self::ClaudeOpus4_6
569 | Self::ClaudeOpus4_6_1mContext
570 | Self::ClaudeSonnet4
571 | Self::ClaudeSonnet4_5
572 | Self::ClaudeSonnet4_5_1mContext
573 | Self::ClaudeSonnet4_6
574 | Self::ClaudeSonnet4_6_1mContext
575 | Self::Claude3_5Sonnet
576 | Self::Claude3_7Sonnet
577 | Self::ClaudeHaiku4_5
578 | Self::Claude3_5Haiku
579 | Self::Claude3Opus
580 | Self::Claude3Sonnet
581 | Self::Claude3Haiku => AnthropicModelMode::Default,
582 Self::ClaudeOpus4Thinking
583 | Self::ClaudeOpus4_1Thinking
584 | Self::ClaudeOpus4_5Thinking
585 | Self::ClaudeOpus4_6Thinking
586 | Self::ClaudeOpus4_6_1mContextThinking
587 | Self::ClaudeSonnet4Thinking
588 | Self::ClaudeSonnet4_5Thinking
589 | Self::ClaudeSonnet4_5_1mContextThinking
590 | Self::ClaudeSonnet4_6Thinking
591 | Self::ClaudeSonnet4_6_1mContextThinking
592 | Self::ClaudeHaiku4_5Thinking
593 | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
594 budget_tokens: Some(4_096),
595 },
596 Self::Custom { mode, .. } => mode.clone(),
597 }
598 }
599
600 pub fn beta_headers(&self) -> Option<String> {
601 let mut headers = vec![];
602
603 match self {
604 Self::ClaudeOpus4
605 | Self::ClaudeOpus4_1
606 | Self::ClaudeOpus4_5
607 | Self::ClaudeOpus4_6
608 | Self::ClaudeSonnet4
609 | Self::ClaudeSonnet4_5
610 | Self::ClaudeOpus4Thinking
611 | Self::ClaudeOpus4_1Thinking
612 | Self::ClaudeOpus4_5Thinking
613 | Self::ClaudeOpus4_6Thinking
614 | Self::ClaudeSonnet4Thinking
615 | Self::ClaudeSonnet4_5Thinking => {
616 headers.push(FINE_GRAINED_TOOL_STREAMING_BETA_HEADER.to_string());
617 }
618 Self::ClaudeOpus4_6_1mContext
619 | Self::ClaudeOpus4_6_1mContextThinking
620 | Self::ClaudeSonnet4_5_1mContext
621 | Self::ClaudeSonnet4_5_1mContextThinking
622 | Self::ClaudeSonnet4_6_1mContext
623 | Self::ClaudeSonnet4_6_1mContextThinking => {
624 headers.push(FINE_GRAINED_TOOL_STREAMING_BETA_HEADER.to_string());
625 headers.push(CONTEXT_1M_BETA_HEADER.to_string());
626 }
627 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
628 // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
629 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
630 headers.push("token-efficient-tools-2025-02-19".to_string());
631 headers.push(FINE_GRAINED_TOOL_STREAMING_BETA_HEADER.to_string());
632 }
633 Self::Custom {
634 extra_beta_headers, ..
635 } => {
636 headers.extend(
637 extra_beta_headers
638 .iter()
639 .filter(|header| !header.trim().is_empty())
640 .cloned(),
641 );
642 }
643 _ => {}
644 }
645
646 if headers.is_empty() {
647 None
648 } else {
649 Some(headers.join(","))
650 }
651 }
652
653 pub fn tool_model_id(&self) -> &str {
654 if let Self::Custom {
655 tool_override: Some(tool_override),
656 ..
657 } = self
658 {
659 tool_override
660 } else {
661 self.request_id()
662 }
663 }
664}
665
666/// Generate completion with streaming.
667pub async fn stream_completion(
668 client: &dyn HttpClient,
669 api_url: &str,
670 api_key: &str,
671 request: Request,
672 beta_headers: Option<String>,
673) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
674 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
675 .await
676 .map(|output| output.0)
677}
678
679/// Generate completion without streaming.
680pub async fn non_streaming_completion(
681 client: &dyn HttpClient,
682 api_url: &str,
683 api_key: &str,
684 request: Request,
685 beta_headers: Option<String>,
686) -> Result<Response, AnthropicError> {
687 let (mut response, rate_limits) =
688 send_request(client, api_url, api_key, &request, beta_headers).await?;
689
690 if response.status().is_success() {
691 let mut body = String::new();
692 response
693 .body_mut()
694 .read_to_string(&mut body)
695 .await
696 .map_err(AnthropicError::ReadResponse)?;
697
698 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
699 } else {
700 Err(handle_error_response(response, rate_limits).await)
701 }
702}
703
704async fn send_request(
705 client: &dyn HttpClient,
706 api_url: &str,
707 api_key: &str,
708 request: impl Serialize,
709 beta_headers: Option<String>,
710) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
711 let uri = format!("{api_url}/v1/messages");
712
713 let mut request_builder = HttpRequest::builder()
714 .method(Method::POST)
715 .uri(uri)
716 .header("Anthropic-Version", "2023-06-01")
717 .header("X-Api-Key", api_key.trim())
718 .header("Content-Type", "application/json");
719
720 if let Some(beta_headers) = beta_headers {
721 request_builder = request_builder.header("Anthropic-Beta", beta_headers);
722 }
723
724 let serialized_request =
725 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
726 let request = request_builder
727 .body(AsyncBody::from(serialized_request))
728 .map_err(AnthropicError::BuildRequestBody)?;
729
730 let response = client
731 .send(request)
732 .await
733 .map_err(AnthropicError::HttpSend)?;
734
735 let rate_limits = RateLimitInfo::from_headers(response.headers());
736
737 Ok((response, rate_limits))
738}
739
740async fn handle_error_response(
741 mut response: http::Response<AsyncBody>,
742 rate_limits: RateLimitInfo,
743) -> AnthropicError {
744 if response.status().as_u16() == 529 {
745 return AnthropicError::ServerOverloaded {
746 retry_after: rate_limits.retry_after,
747 };
748 }
749
750 if let Some(retry_after) = rate_limits.retry_after {
751 return AnthropicError::RateLimit { retry_after };
752 }
753
754 let mut body = String::new();
755 let read_result = response
756 .body_mut()
757 .read_to_string(&mut body)
758 .await
759 .map_err(AnthropicError::ReadResponse);
760
761 if let Err(err) = read_result {
762 return err;
763 }
764
765 match serde_json::from_str::<Event>(&body) {
766 Ok(Event::Error { error }) => AnthropicError::ApiError(error),
767 Ok(_) | Err(_) => AnthropicError::HttpResponseError {
768 status_code: response.status(),
769 message: body,
770 },
771 }
772}
773
774/// An individual rate limit.
775#[derive(Debug)]
776pub struct RateLimit {
777 pub limit: usize,
778 pub remaining: usize,
779 pub reset: DateTime<Utc>,
780}
781
782impl RateLimit {
783 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
784 let limit =
785 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
786 let remaining = get_header(
787 &format!("anthropic-ratelimit-{resource}-remaining"),
788 headers,
789 )?
790 .parse()?;
791 let reset = DateTime::parse_from_rfc3339(get_header(
792 &format!("anthropic-ratelimit-{resource}-reset"),
793 headers,
794 )?)?
795 .to_utc();
796
797 Ok(Self {
798 limit,
799 remaining,
800 reset,
801 })
802 }
803}
804
805/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
806#[derive(Debug)]
807pub struct RateLimitInfo {
808 pub retry_after: Option<Duration>,
809 pub requests: Option<RateLimit>,
810 pub tokens: Option<RateLimit>,
811 pub input_tokens: Option<RateLimit>,
812 pub output_tokens: Option<RateLimit>,
813}
814
815impl RateLimitInfo {
816 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
817 // Check if any rate limit headers exist
818 let has_rate_limit_headers = headers
819 .keys()
820 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
821
822 if !has_rate_limit_headers {
823 return Self {
824 retry_after: None,
825 requests: None,
826 tokens: None,
827 input_tokens: None,
828 output_tokens: None,
829 };
830 }
831
832 Self {
833 retry_after: parse_retry_after(headers),
834 requests: RateLimit::from_headers("requests", headers).ok(),
835 tokens: RateLimit::from_headers("tokens", headers).ok(),
836 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
837 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
838 }
839 }
840}
841
842/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
843/// seconds). Note that other services might specify an HTTP date or some other format for this
844/// header. Returns `None` if the header is not present or cannot be parsed.
845pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
846 headers
847 .get("retry-after")
848 .and_then(|v| v.to_str().ok())
849 .and_then(|v| v.parse::<u64>().ok())
850 .map(Duration::from_secs)
851}
852
853fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
854 Ok(headers
855 .get(key)
856 .with_context(|| format!("missing header `{key}`"))?
857 .to_str()?)
858}
859
860pub async fn stream_completion_with_rate_limit_info(
861 client: &dyn HttpClient,
862 api_url: &str,
863 api_key: &str,
864 request: Request,
865 beta_headers: Option<String>,
866) -> Result<
867 (
868 BoxStream<'static, Result<Event, AnthropicError>>,
869 Option<RateLimitInfo>,
870 ),
871 AnthropicError,
872> {
873 let request = StreamingRequest {
874 base: request,
875 stream: true,
876 };
877
878 let (response, rate_limits) =
879 send_request(client, api_url, api_key, &request, beta_headers).await?;
880
881 if response.status().is_success() {
882 let reader = BufReader::new(response.into_body());
883 let stream = reader
884 .lines()
885 .filter_map(|line| async move {
886 match line {
887 Ok(line) => {
888 let line = line.strip_prefix("data: ")?;
889 match serde_json::from_str(line) {
890 Ok(response) => Some(Ok(response)),
891 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
892 }
893 }
894 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
895 }
896 })
897 .boxed();
898 Ok((stream, Some(rate_limits)))
899 } else {
900 Err(handle_error_response(response, rate_limits).await)
901 }
902}
903
904#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
905#[serde(rename_all = "lowercase")]
906pub enum CacheControlType {
907 Ephemeral,
908}
909
910#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
911pub struct CacheControl {
912 #[serde(rename = "type")]
913 pub cache_type: CacheControlType,
914}
915
916#[derive(Debug, Serialize, Deserialize)]
917pub struct Message {
918 pub role: Role,
919 pub content: Vec<RequestContent>,
920}
921
922#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
923#[serde(rename_all = "lowercase")]
924pub enum Role {
925 User,
926 Assistant,
927}
928
929#[derive(Debug, Serialize, Deserialize)]
930#[serde(tag = "type")]
931pub enum RequestContent {
932 #[serde(rename = "text")]
933 Text {
934 text: String,
935 #[serde(skip_serializing_if = "Option::is_none")]
936 cache_control: Option<CacheControl>,
937 },
938 #[serde(rename = "thinking")]
939 Thinking {
940 thinking: String,
941 signature: String,
942 #[serde(skip_serializing_if = "Option::is_none")]
943 cache_control: Option<CacheControl>,
944 },
945 #[serde(rename = "redacted_thinking")]
946 RedactedThinking { data: String },
947 #[serde(rename = "image")]
948 Image {
949 source: ImageSource,
950 #[serde(skip_serializing_if = "Option::is_none")]
951 cache_control: Option<CacheControl>,
952 },
953 #[serde(rename = "tool_use")]
954 ToolUse {
955 id: String,
956 name: String,
957 input: serde_json::Value,
958 #[serde(skip_serializing_if = "Option::is_none")]
959 cache_control: Option<CacheControl>,
960 },
961 #[serde(rename = "tool_result")]
962 ToolResult {
963 tool_use_id: String,
964 is_error: bool,
965 content: ToolResultContent,
966 #[serde(skip_serializing_if = "Option::is_none")]
967 cache_control: Option<CacheControl>,
968 },
969}
970
971#[derive(Debug, Serialize, Deserialize)]
972#[serde(untagged)]
973pub enum ToolResultContent {
974 Plain(String),
975 Multipart(Vec<ToolResultPart>),
976}
977
978#[derive(Debug, Serialize, Deserialize)]
979#[serde(tag = "type", rename_all = "lowercase")]
980pub enum ToolResultPart {
981 Text { text: String },
982 Image { source: ImageSource },
983}
984
985#[derive(Debug, Serialize, Deserialize)]
986#[serde(tag = "type")]
987pub enum ResponseContent {
988 #[serde(rename = "text")]
989 Text { text: String },
990 #[serde(rename = "thinking")]
991 Thinking { thinking: String },
992 #[serde(rename = "redacted_thinking")]
993 RedactedThinking { data: String },
994 #[serde(rename = "tool_use")]
995 ToolUse {
996 id: String,
997 name: String,
998 input: serde_json::Value,
999 },
1000}
1001
1002#[derive(Debug, Serialize, Deserialize)]
1003pub struct ImageSource {
1004 #[serde(rename = "type")]
1005 pub source_type: String,
1006 pub media_type: String,
1007 pub data: String,
1008}
1009
1010#[derive(Debug, Serialize, Deserialize)]
1011pub struct Tool {
1012 pub name: String,
1013 pub description: String,
1014 pub input_schema: serde_json::Value,
1015}
1016
1017#[derive(Debug, Serialize, Deserialize)]
1018#[serde(tag = "type", rename_all = "lowercase")]
1019pub enum ToolChoice {
1020 Auto,
1021 Any,
1022 Tool { name: String },
1023 None,
1024}
1025
1026#[derive(Debug, Serialize, Deserialize)]
1027#[serde(tag = "type", rename_all = "lowercase")]
1028pub enum Thinking {
1029 Enabled { budget_tokens: Option<u32> },
1030}
1031
1032#[derive(Debug, Serialize, Deserialize)]
1033#[serde(untagged)]
1034pub enum StringOrContents {
1035 String(String),
1036 Content(Vec<RequestContent>),
1037}
1038
1039#[derive(Debug, Serialize, Deserialize)]
1040pub struct Request {
1041 pub model: String,
1042 pub max_tokens: u64,
1043 pub messages: Vec<Message>,
1044 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1045 pub tools: Vec<Tool>,
1046 #[serde(default, skip_serializing_if = "Option::is_none")]
1047 pub thinking: Option<Thinking>,
1048 #[serde(default, skip_serializing_if = "Option::is_none")]
1049 pub tool_choice: Option<ToolChoice>,
1050 #[serde(default, skip_serializing_if = "Option::is_none")]
1051 pub system: Option<StringOrContents>,
1052 #[serde(default, skip_serializing_if = "Option::is_none")]
1053 pub metadata: Option<Metadata>,
1054 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1055 pub stop_sequences: Vec<String>,
1056 #[serde(default, skip_serializing_if = "Option::is_none")]
1057 pub temperature: Option<f32>,
1058 #[serde(default, skip_serializing_if = "Option::is_none")]
1059 pub top_k: Option<u32>,
1060 #[serde(default, skip_serializing_if = "Option::is_none")]
1061 pub top_p: Option<f32>,
1062}
1063
1064#[derive(Debug, Serialize, Deserialize)]
1065struct StreamingRequest {
1066 #[serde(flatten)]
1067 pub base: Request,
1068 pub stream: bool,
1069}
1070
1071#[derive(Debug, Serialize, Deserialize)]
1072pub struct Metadata {
1073 pub user_id: Option<String>,
1074}
1075
1076#[derive(Debug, Serialize, Deserialize, Default)]
1077pub struct Usage {
1078 #[serde(default, skip_serializing_if = "Option::is_none")]
1079 pub input_tokens: Option<u64>,
1080 #[serde(default, skip_serializing_if = "Option::is_none")]
1081 pub output_tokens: Option<u64>,
1082 #[serde(default, skip_serializing_if = "Option::is_none")]
1083 pub cache_creation_input_tokens: Option<u64>,
1084 #[serde(default, skip_serializing_if = "Option::is_none")]
1085 pub cache_read_input_tokens: Option<u64>,
1086}
1087
1088#[derive(Debug, Serialize, Deserialize)]
1089pub struct Response {
1090 pub id: String,
1091 #[serde(rename = "type")]
1092 pub response_type: String,
1093 pub role: Role,
1094 pub content: Vec<ResponseContent>,
1095 pub model: String,
1096 #[serde(default, skip_serializing_if = "Option::is_none")]
1097 pub stop_reason: Option<String>,
1098 #[serde(default, skip_serializing_if = "Option::is_none")]
1099 pub stop_sequence: Option<String>,
1100 pub usage: Usage,
1101}
1102
1103#[derive(Debug, Serialize, Deserialize)]
1104#[serde(tag = "type")]
1105pub enum Event {
1106 #[serde(rename = "message_start")]
1107 MessageStart { message: Response },
1108 #[serde(rename = "content_block_start")]
1109 ContentBlockStart {
1110 index: usize,
1111 content_block: ResponseContent,
1112 },
1113 #[serde(rename = "content_block_delta")]
1114 ContentBlockDelta { index: usize, delta: ContentDelta },
1115 #[serde(rename = "content_block_stop")]
1116 ContentBlockStop { index: usize },
1117 #[serde(rename = "message_delta")]
1118 MessageDelta { delta: MessageDelta, usage: Usage },
1119 #[serde(rename = "message_stop")]
1120 MessageStop,
1121 #[serde(rename = "ping")]
1122 Ping,
1123 #[serde(rename = "error")]
1124 Error { error: ApiError },
1125}
1126
1127#[derive(Debug, Serialize, Deserialize)]
1128#[serde(tag = "type")]
1129pub enum ContentDelta {
1130 #[serde(rename = "text_delta")]
1131 TextDelta { text: String },
1132 #[serde(rename = "thinking_delta")]
1133 ThinkingDelta { thinking: String },
1134 #[serde(rename = "signature_delta")]
1135 SignatureDelta { signature: String },
1136 #[serde(rename = "input_json_delta")]
1137 InputJsonDelta { partial_json: String },
1138}
1139
1140#[derive(Debug, Serialize, Deserialize)]
1141pub struct MessageDelta {
1142 pub stop_reason: Option<String>,
1143 pub stop_sequence: Option<String>,
1144}
1145
1146#[derive(Debug)]
1147pub enum AnthropicError {
1148 /// Failed to serialize the HTTP request body to JSON
1149 SerializeRequest(serde_json::Error),
1150
1151 /// Failed to construct the HTTP request body
1152 BuildRequestBody(http::Error),
1153
1154 /// Failed to send the HTTP request
1155 HttpSend(anyhow::Error),
1156
1157 /// Failed to deserialize the response from JSON
1158 DeserializeResponse(serde_json::Error),
1159
1160 /// Failed to read from response stream
1161 ReadResponse(io::Error),
1162
1163 /// HTTP error response from the API
1164 HttpResponseError {
1165 status_code: StatusCode,
1166 message: String,
1167 },
1168
1169 /// Rate limit exceeded
1170 RateLimit { retry_after: Duration },
1171
1172 /// Server overloaded
1173 ServerOverloaded { retry_after: Option<Duration> },
1174
1175 /// API returned an error response
1176 ApiError(ApiError),
1177}
1178
1179#[derive(Debug, Serialize, Deserialize, Error)]
1180#[error("Anthropic API Error: {error_type}: {message}")]
1181pub struct ApiError {
1182 #[serde(rename = "type")]
1183 pub error_type: String,
1184 pub message: String,
1185}
1186
1187/// An Anthropic API error code.
1188/// <https://docs.anthropic.com/en/api/errors#http-errors>
1189#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
1190#[strum(serialize_all = "snake_case")]
1191pub enum ApiErrorCode {
1192 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
1193 InvalidRequestError,
1194 /// 401 - `authentication_error`: There's an issue with your API key.
1195 AuthenticationError,
1196 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
1197 PermissionError,
1198 /// 404 - `not_found_error`: The requested resource was not found.
1199 NotFoundError,
1200 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
1201 RequestTooLarge,
1202 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
1203 RateLimitError,
1204 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
1205 ApiError,
1206 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
1207 OverloadedError,
1208}
1209
1210impl ApiError {
1211 pub fn code(&self) -> Option<ApiErrorCode> {
1212 ApiErrorCode::from_str(&self.error_type).ok()
1213 }
1214
1215 pub fn is_rate_limit_error(&self) -> bool {
1216 matches!(self.error_type.as_str(), "rate_limit_error")
1217 }
1218
1219 pub fn match_window_exceeded(&self) -> Option<u64> {
1220 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
1221 return None;
1222 };
1223
1224 parse_prompt_too_long(&self.message)
1225 }
1226}
1227
1228pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1229 message
1230 .strip_prefix("prompt is too long: ")?
1231 .split_once(" tokens")?
1232 .0
1233 .parse()
1234 .ok()
1235}
1236
1237/// Request body for the token counting API.
1238/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
1239#[derive(Debug, Serialize)]
1240pub struct CountTokensRequest {
1241 pub model: String,
1242 pub messages: Vec<Message>,
1243 #[serde(default, skip_serializing_if = "Option::is_none")]
1244 pub system: Option<StringOrContents>,
1245 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1246 pub tools: Vec<Tool>,
1247 #[serde(default, skip_serializing_if = "Option::is_none")]
1248 pub thinking: Option<Thinking>,
1249 #[serde(default, skip_serializing_if = "Option::is_none")]
1250 pub tool_choice: Option<ToolChoice>,
1251}
1252
1253/// Response from the token counting API.
1254#[derive(Debug, Deserialize)]
1255pub struct CountTokensResponse {
1256 pub input_tokens: u64,
1257}
1258
1259/// Count the number of tokens in a message without creating it.
1260pub async fn count_tokens(
1261 client: &dyn HttpClient,
1262 api_url: &str,
1263 api_key: &str,
1264 request: CountTokensRequest,
1265) -> Result<CountTokensResponse, AnthropicError> {
1266 let uri = format!("{api_url}/v1/messages/count_tokens");
1267
1268 let request_builder = HttpRequest::builder()
1269 .method(Method::POST)
1270 .uri(uri)
1271 .header("Anthropic-Version", "2023-06-01")
1272 .header("X-Api-Key", api_key.trim())
1273 .header("Content-Type", "application/json");
1274
1275 let serialized_request =
1276 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1277 let http_request = request_builder
1278 .body(AsyncBody::from(serialized_request))
1279 .map_err(AnthropicError::BuildRequestBody)?;
1280
1281 let mut response = client
1282 .send(http_request)
1283 .await
1284 .map_err(AnthropicError::HttpSend)?;
1285
1286 let rate_limits = RateLimitInfo::from_headers(response.headers());
1287
1288 if response.status().is_success() {
1289 let mut body = String::new();
1290 response
1291 .body_mut()
1292 .read_to_string(&mut body)
1293 .await
1294 .map_err(AnthropicError::ReadResponse)?;
1295
1296 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1297 } else {
1298 Err(handle_error_response(response, rate_limits).await)
1299 }
1300}
1301
1302#[test]
1303fn test_match_window_exceeded() {
1304 let error = ApiError {
1305 error_type: "invalid_request_error".to_string(),
1306 message: "prompt is too long: 220000 tokens > 200000".to_string(),
1307 };
1308 assert_eq!(error.match_window_exceeded(), Some(220_000));
1309
1310 let error = ApiError {
1311 error_type: "invalid_request_error".to_string(),
1312 message: "prompt is too long: 1234953 tokens".to_string(),
1313 };
1314 assert_eq!(error.match_window_exceeded(), Some(1234953));
1315
1316 let error = ApiError {
1317 error_type: "invalid_request_error".to_string(),
1318 message: "not a prompt length error".to_string(),
1319 };
1320 assert_eq!(error.match_window_exceeded(), None);
1321
1322 let error = ApiError {
1323 error_type: "rate_limit_error".to_string(),
1324 message: "prompt is too long: 12345 tokens".to_string(),
1325 };
1326 assert_eq!(error.match_window_exceeded(), None);
1327
1328 let error = ApiError {
1329 error_type: "invalid_request_error".to_string(),
1330 message: "prompt is too long: invalid tokens".to_string(),
1331 };
1332 assert_eq!(error.match_window_exceeded(), None);
1333}