1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
12use strum::{EnumIter, EnumString};
13use thiserror::Error;
14
15pub mod batches;
16
17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
18
19pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07";
20
21#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
22#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
23pub struct AnthropicModelCacheConfiguration {
24 pub min_total_token: u64,
25 pub should_speculate: bool,
26 pub max_cache_anchors: usize,
27}
28
29#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
30#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
31pub enum AnthropicModelMode {
32 #[default]
33 Default,
34 Thinking {
35 budget_tokens: Option<u32>,
36 },
37}
38
39impl From<ModelMode> for AnthropicModelMode {
40 fn from(value: ModelMode) -> Self {
41 match value {
42 ModelMode::Default => AnthropicModelMode::Default,
43 ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
44 }
45 }
46}
47
48impl From<AnthropicModelMode> for ModelMode {
49 fn from(value: AnthropicModelMode) -> Self {
50 match value {
51 AnthropicModelMode::Default => ModelMode::Default,
52 AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
53 }
54 }
55}
56
57#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
58#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
59pub enum Model {
60 #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
61 ClaudeOpus4,
62 #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
63 ClaudeOpus4_1,
64 #[serde(
65 rename = "claude-opus-4-thinking",
66 alias = "claude-opus-4-thinking-latest"
67 )]
68 ClaudeOpus4Thinking,
69 #[serde(
70 rename = "claude-opus-4-1-thinking",
71 alias = "claude-opus-4-1-thinking-latest"
72 )]
73 ClaudeOpus4_1Thinking,
74 #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
75 ClaudeOpus4_5,
76 #[serde(
77 rename = "claude-opus-4-5-thinking",
78 alias = "claude-opus-4-5-thinking-latest"
79 )]
80 ClaudeOpus4_5Thinking,
81 #[serde(rename = "claude-opus-4-6", alias = "claude-opus-4-6-latest")]
82 ClaudeOpus4_6,
83 #[serde(
84 rename = "claude-opus-4-6-thinking",
85 alias = "claude-opus-4-6-thinking-latest"
86 )]
87 ClaudeOpus4_6Thinking,
88 #[serde(
89 rename = "claude-opus-4-6-1m-context",
90 alias = "claude-opus-4-6-1m-context-latest"
91 )]
92 ClaudeOpus4_6_1mContext,
93 #[serde(
94 rename = "claude-opus-4-6-1m-context-thinking",
95 alias = "claude-opus-4-6-1m-context-thinking-latest"
96 )]
97 ClaudeOpus4_6_1mContextThinking,
98 #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
99 ClaudeSonnet4,
100 #[serde(
101 rename = "claude-sonnet-4-thinking",
102 alias = "claude-sonnet-4-thinking-latest"
103 )]
104 ClaudeSonnet4Thinking,
105 #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
106 ClaudeSonnet4_5,
107 #[serde(
108 rename = "claude-sonnet-4-5-thinking",
109 alias = "claude-sonnet-4-5-thinking-latest"
110 )]
111 ClaudeSonnet4_5Thinking,
112 #[serde(
113 rename = "claude-sonnet-4-5-1m-context",
114 alias = "claude-sonnet-4-5-1m-context-latest"
115 )]
116 ClaudeSonnet4_5_1mContext,
117 #[serde(
118 rename = "claude-sonnet-4-5-1m-context-thinking",
119 alias = "claude-sonnet-4-5-1m-context-thinking-latest"
120 )]
121 ClaudeSonnet4_5_1mContextThinking,
122 #[default]
123 #[serde(rename = "claude-sonnet-4-6", alias = "claude-sonnet-4-6-latest")]
124 ClaudeSonnet4_6,
125 #[serde(
126 rename = "claude-sonnet-4-6-thinking",
127 alias = "claude-sonnet-4-6-thinking-latest"
128 )]
129 ClaudeSonnet4_6Thinking,
130 #[serde(
131 rename = "claude-sonnet-4-6-1m-context",
132 alias = "claude-sonnet-4-6-1m-context-latest"
133 )]
134 ClaudeSonnet4_6_1mContext,
135 #[serde(
136 rename = "claude-sonnet-4-6-1m-context-thinking",
137 alias = "claude-sonnet-4-6-1m-context-thinking-latest"
138 )]
139 ClaudeSonnet4_6_1mContextThinking,
140 #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
141 Claude3_7Sonnet,
142 #[serde(
143 rename = "claude-3-7-sonnet-thinking",
144 alias = "claude-3-7-sonnet-thinking-latest"
145 )]
146 Claude3_7SonnetThinking,
147 #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
148 Claude3_5Sonnet,
149 #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
150 ClaudeHaiku4_5,
151 #[serde(
152 rename = "claude-haiku-4-5-thinking",
153 alias = "claude-haiku-4-5-thinking-latest"
154 )]
155 ClaudeHaiku4_5Thinking,
156 #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
157 Claude3_5Haiku,
158 #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
159 Claude3Opus,
160 #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
161 Claude3Sonnet,
162 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
163 Claude3Haiku,
164 #[serde(rename = "custom")]
165 Custom {
166 name: String,
167 max_tokens: u64,
168 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
169 display_name: Option<String>,
170 /// Override this model with a different Anthropic model for tool calls.
171 tool_override: Option<String>,
172 /// Indicates whether this custom model supports caching.
173 cache_configuration: Option<AnthropicModelCacheConfiguration>,
174 max_output_tokens: Option<u64>,
175 default_temperature: Option<f32>,
176 #[serde(default)]
177 extra_beta_headers: Vec<String>,
178 #[serde(default)]
179 mode: AnthropicModelMode,
180 },
181}
182
183impl Model {
184 pub fn default_fast() -> Self {
185 Self::Claude3_5Haiku
186 }
187
188 pub fn from_id(id: &str) -> Result<Self> {
189 if id.starts_with("claude-opus-4-6-1m-context-thinking") {
190 return Ok(Self::ClaudeOpus4_6_1mContextThinking);
191 }
192
193 if id.starts_with("claude-opus-4-6-1m-context") {
194 return Ok(Self::ClaudeOpus4_6_1mContext);
195 }
196
197 if id.starts_with("claude-opus-4-6-thinking") {
198 return Ok(Self::ClaudeOpus4_6Thinking);
199 }
200
201 if id.starts_with("claude-opus-4-6") {
202 return Ok(Self::ClaudeOpus4_6);
203 }
204
205 if id.starts_with("claude-opus-4-5-thinking") {
206 return Ok(Self::ClaudeOpus4_5Thinking);
207 }
208
209 if id.starts_with("claude-opus-4-5") {
210 return Ok(Self::ClaudeOpus4_5);
211 }
212
213 if id.starts_with("claude-opus-4-1-thinking") {
214 return Ok(Self::ClaudeOpus4_1Thinking);
215 }
216
217 if id.starts_with("claude-opus-4-thinking") {
218 return Ok(Self::ClaudeOpus4Thinking);
219 }
220
221 if id.starts_with("claude-opus-4-1") {
222 return Ok(Self::ClaudeOpus4_1);
223 }
224
225 if id.starts_with("claude-opus-4") {
226 return Ok(Self::ClaudeOpus4);
227 }
228
229 if id.starts_with("claude-sonnet-4-6-1m-context-thinking") {
230 return Ok(Self::ClaudeSonnet4_6_1mContextThinking);
231 }
232
233 if id.starts_with("claude-sonnet-4-6-1m-context") {
234 return Ok(Self::ClaudeSonnet4_6_1mContext);
235 }
236
237 if id.starts_with("claude-sonnet-4-6-thinking") {
238 return Ok(Self::ClaudeSonnet4_6Thinking);
239 }
240
241 if id.starts_with("claude-sonnet-4-6") {
242 return Ok(Self::ClaudeSonnet4_6);
243 }
244
245 if id.starts_with("claude-sonnet-4-5-1m-context-thinking") {
246 return Ok(Self::ClaudeSonnet4_5_1mContextThinking);
247 }
248
249 if id.starts_with("claude-sonnet-4-5-1m-context") {
250 return Ok(Self::ClaudeSonnet4_5_1mContext);
251 }
252
253 if id.starts_with("claude-sonnet-4-5-thinking") {
254 return Ok(Self::ClaudeSonnet4_5Thinking);
255 }
256
257 if id.starts_with("claude-sonnet-4-5") {
258 return Ok(Self::ClaudeSonnet4_5);
259 }
260
261 if id.starts_with("claude-sonnet-4-thinking") {
262 return Ok(Self::ClaudeSonnet4Thinking);
263 }
264
265 if id.starts_with("claude-sonnet-4") {
266 return Ok(Self::ClaudeSonnet4);
267 }
268
269 if id.starts_with("claude-3-7-sonnet-thinking") {
270 return Ok(Self::Claude3_7SonnetThinking);
271 }
272
273 if id.starts_with("claude-3-7-sonnet") {
274 return Ok(Self::Claude3_7Sonnet);
275 }
276
277 if id.starts_with("claude-3-5-sonnet") {
278 return Ok(Self::Claude3_5Sonnet);
279 }
280
281 if id.starts_with("claude-haiku-4-5-thinking") {
282 return Ok(Self::ClaudeHaiku4_5Thinking);
283 }
284
285 if id.starts_with("claude-haiku-4-5") {
286 return Ok(Self::ClaudeHaiku4_5);
287 }
288
289 if id.starts_with("claude-3-5-haiku") {
290 return Ok(Self::Claude3_5Haiku);
291 }
292
293 if id.starts_with("claude-3-opus") {
294 return Ok(Self::Claude3Opus);
295 }
296
297 if id.starts_with("claude-3-sonnet") {
298 return Ok(Self::Claude3Sonnet);
299 }
300
301 if id.starts_with("claude-3-haiku") {
302 return Ok(Self::Claude3Haiku);
303 }
304
305 Err(anyhow!("invalid model ID: {id}"))
306 }
307
308 pub fn id(&self) -> &str {
309 match self {
310 Self::ClaudeOpus4 => "claude-opus-4-latest",
311 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
312 Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
313 Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
314 Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
315 Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking-latest",
316 Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
317 Self::ClaudeOpus4_6Thinking => "claude-opus-4-6-thinking-latest",
318 Self::ClaudeOpus4_6_1mContext => "claude-opus-4-6-1m-context-latest",
319 Self::ClaudeOpus4_6_1mContextThinking => "claude-opus-4-6-1m-context-thinking-latest",
320 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
321 Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
322 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
323 Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
324 Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-1m-context-latest",
325 Self::ClaudeSonnet4_5_1mContextThinking => {
326 "claude-sonnet-4-5-1m-context-thinking-latest"
327 }
328 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
329 Self::ClaudeSonnet4_6Thinking => "claude-sonnet-4-6-thinking-latest",
330 Self::ClaudeSonnet4_6_1mContext => "claude-sonnet-4-6-1m-context-latest",
331 Self::ClaudeSonnet4_6_1mContextThinking => {
332 "claude-sonnet-4-6-1m-context-thinking-latest"
333 }
334 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
335 Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
336 Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
337 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
338 Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
339 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
340 Self::Claude3Opus => "claude-3-opus-latest",
341 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
342 Self::Claude3Haiku => "claude-3-haiku-20240307",
343 Self::Custom { name, .. } => name,
344 }
345 }
346
347 /// The id of the model that should be used for making API requests
348 pub fn request_id(&self) -> &str {
349 match self {
350 Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
351 Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
352 Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-20251101",
353 Self::ClaudeOpus4_6
354 | Self::ClaudeOpus4_6Thinking
355 | Self::ClaudeOpus4_6_1mContext
356 | Self::ClaudeOpus4_6_1mContextThinking => "claude-opus-4-6",
357 Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
358 Self::ClaudeSonnet4_5
359 | Self::ClaudeSonnet4_5Thinking
360 | Self::ClaudeSonnet4_5_1mContext
361 | Self::ClaudeSonnet4_5_1mContextThinking => "claude-sonnet-4-5-20250929",
362 Self::ClaudeSonnet4_6
363 | Self::ClaudeSonnet4_6Thinking
364 | Self::ClaudeSonnet4_6_1mContext
365 | Self::ClaudeSonnet4_6_1mContextThinking => "claude-sonnet-4-6",
366 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
367 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
368 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
369 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
370 Self::Claude3Opus => "claude-3-opus-latest",
371 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
372 Self::Claude3Haiku => "claude-3-haiku-20240307",
373 Self::Custom { name, .. } => name,
374 }
375 }
376
377 pub fn display_name(&self) -> &str {
378 match self {
379 Self::ClaudeOpus4 => "Claude Opus 4",
380 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
381 Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
382 Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
383 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
384 Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
385 Self::ClaudeOpus4_6 => "Claude Opus 4.6",
386 Self::ClaudeOpus4_6Thinking => "Claude Opus 4.6 Thinking",
387 Self::ClaudeOpus4_6_1mContext => "Claude Opus 4.6 (1M context)",
388 Self::ClaudeOpus4_6_1mContextThinking => "Claude Opus 4.6 Thinking (1M context)",
389 Self::ClaudeSonnet4 => "Claude Sonnet 4",
390 Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
391 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
392 Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
393 Self::ClaudeSonnet4_5_1mContext => "Claude Sonnet 4.5 (1M context)",
394 Self::ClaudeSonnet4_5_1mContextThinking => "Claude Sonnet 4.5 Thinking (1M context)",
395 Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
396 Self::ClaudeSonnet4_6Thinking => "Claude Sonnet 4.6 Thinking",
397 Self::ClaudeSonnet4_6_1mContext => "Claude Sonnet 4.6 (1M context)",
398 Self::ClaudeSonnet4_6_1mContextThinking => "Claude Sonnet 4.6 Thinking (1M context)",
399 Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
400 Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
401 Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
402 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
403 Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
404 Self::Claude3_5Haiku => "Claude 3.5 Haiku",
405 Self::Claude3Opus => "Claude 3 Opus",
406 Self::Claude3Sonnet => "Claude 3 Sonnet",
407 Self::Claude3Haiku => "Claude 3 Haiku",
408 Self::Custom {
409 name, display_name, ..
410 } => display_name.as_ref().unwrap_or(name),
411 }
412 }
413
414 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
415 match self {
416 Self::ClaudeOpus4
417 | Self::ClaudeOpus4_1
418 | Self::ClaudeOpus4Thinking
419 | Self::ClaudeOpus4_1Thinking
420 | Self::ClaudeOpus4_5
421 | Self::ClaudeOpus4_5Thinking
422 | Self::ClaudeOpus4_6
423 | Self::ClaudeOpus4_6Thinking
424 | Self::ClaudeOpus4_6_1mContext
425 | Self::ClaudeOpus4_6_1mContextThinking
426 | Self::ClaudeSonnet4
427 | Self::ClaudeSonnet4Thinking
428 | Self::ClaudeSonnet4_5
429 | Self::ClaudeSonnet4_5Thinking
430 | Self::ClaudeSonnet4_5_1mContext
431 | Self::ClaudeSonnet4_5_1mContextThinking
432 | Self::ClaudeSonnet4_6
433 | Self::ClaudeSonnet4_6Thinking
434 | Self::ClaudeSonnet4_6_1mContext
435 | Self::ClaudeSonnet4_6_1mContextThinking
436 | Self::Claude3_5Sonnet
437 | Self::ClaudeHaiku4_5
438 | Self::ClaudeHaiku4_5Thinking
439 | Self::Claude3_5Haiku
440 | Self::Claude3_7Sonnet
441 | Self::Claude3_7SonnetThinking
442 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
443 min_total_token: 2_048,
444 should_speculate: true,
445 max_cache_anchors: 4,
446 }),
447 Self::Custom {
448 cache_configuration,
449 ..
450 } => cache_configuration.clone(),
451 _ => None,
452 }
453 }
454
455 pub fn max_token_count(&self) -> u64 {
456 match self {
457 Self::ClaudeOpus4
458 | Self::ClaudeOpus4_1
459 | Self::ClaudeOpus4Thinking
460 | Self::ClaudeOpus4_1Thinking
461 | Self::ClaudeOpus4_5
462 | Self::ClaudeOpus4_5Thinking
463 | Self::ClaudeOpus4_6
464 | Self::ClaudeOpus4_6Thinking
465 | Self::ClaudeSonnet4
466 | Self::ClaudeSonnet4Thinking
467 | Self::ClaudeSonnet4_5
468 | Self::ClaudeSonnet4_5Thinking
469 | Self::ClaudeSonnet4_6
470 | Self::ClaudeSonnet4_6Thinking
471 | Self::Claude3_5Sonnet
472 | Self::ClaudeHaiku4_5
473 | Self::ClaudeHaiku4_5Thinking
474 | Self::Claude3_5Haiku
475 | Self::Claude3_7Sonnet
476 | Self::Claude3_7SonnetThinking
477 | Self::Claude3Opus
478 | Self::Claude3Sonnet
479 | Self::Claude3Haiku => 200_000,
480 Self::ClaudeOpus4_6_1mContext
481 | Self::ClaudeOpus4_6_1mContextThinking
482 | Self::ClaudeSonnet4_5_1mContext
483 | Self::ClaudeSonnet4_5_1mContextThinking
484 | Self::ClaudeSonnet4_6_1mContext
485 | Self::ClaudeSonnet4_6_1mContextThinking => 1_000_000,
486 Self::Custom { max_tokens, .. } => *max_tokens,
487 }
488 }
489
490 pub fn max_output_tokens(&self) -> u64 {
491 match self {
492 Self::Claude3_5Sonnet | Self::Claude3_5Haiku => 8_192,
493 Self::ClaudeOpus4
494 | Self::ClaudeOpus4Thinking
495 | Self::ClaudeOpus4_1
496 | Self::ClaudeOpus4_1Thinking => 32_000,
497 Self::ClaudeOpus4_5
498 | Self::ClaudeOpus4_5Thinking
499 | Self::ClaudeSonnet4
500 | Self::ClaudeSonnet4Thinking
501 | Self::ClaudeSonnet4_5
502 | Self::ClaudeSonnet4_5Thinking
503 | Self::ClaudeSonnet4_5_1mContext
504 | Self::ClaudeSonnet4_5_1mContextThinking
505 | Self::ClaudeSonnet4_6
506 | Self::ClaudeSonnet4_6Thinking
507 | Self::ClaudeSonnet4_6_1mContext
508 | Self::ClaudeSonnet4_6_1mContextThinking
509 | Self::Claude3_7Sonnet
510 | Self::Claude3_7SonnetThinking
511 | Self::ClaudeHaiku4_5
512 | Self::ClaudeHaiku4_5Thinking => 64_000,
513 Self::ClaudeOpus4_6
514 | Self::ClaudeOpus4_6Thinking
515 | Self::ClaudeOpus4_6_1mContext
516 | Self::ClaudeOpus4_6_1mContextThinking => 128_000,
517 Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
518 Self::Custom {
519 max_output_tokens, ..
520 } => max_output_tokens.unwrap_or(4_096),
521 }
522 }
523
524 pub fn default_temperature(&self) -> f32 {
525 match self {
526 Self::ClaudeOpus4
527 | Self::ClaudeOpus4_1
528 | Self::ClaudeOpus4Thinking
529 | Self::ClaudeOpus4_1Thinking
530 | Self::ClaudeOpus4_5
531 | Self::ClaudeOpus4_5Thinking
532 | Self::ClaudeOpus4_6
533 | Self::ClaudeOpus4_6Thinking
534 | Self::ClaudeOpus4_6_1mContext
535 | Self::ClaudeOpus4_6_1mContextThinking
536 | Self::ClaudeSonnet4
537 | Self::ClaudeSonnet4Thinking
538 | Self::ClaudeSonnet4_5
539 | Self::ClaudeSonnet4_5Thinking
540 | Self::ClaudeSonnet4_5_1mContext
541 | Self::ClaudeSonnet4_5_1mContextThinking
542 | Self::ClaudeSonnet4_6
543 | Self::ClaudeSonnet4_6Thinking
544 | Self::ClaudeSonnet4_6_1mContext
545 | Self::ClaudeSonnet4_6_1mContextThinking
546 | Self::Claude3_5Sonnet
547 | Self::Claude3_7Sonnet
548 | Self::Claude3_7SonnetThinking
549 | Self::ClaudeHaiku4_5
550 | Self::ClaudeHaiku4_5Thinking
551 | Self::Claude3_5Haiku
552 | Self::Claude3Opus
553 | Self::Claude3Sonnet
554 | Self::Claude3Haiku => 1.0,
555 Self::Custom {
556 default_temperature,
557 ..
558 } => default_temperature.unwrap_or(1.0),
559 }
560 }
561
562 pub fn mode(&self) -> AnthropicModelMode {
563 match self {
564 Self::ClaudeOpus4
565 | Self::ClaudeOpus4_1
566 | Self::ClaudeOpus4_5
567 | Self::ClaudeOpus4_6
568 | Self::ClaudeOpus4_6_1mContext
569 | Self::ClaudeSonnet4
570 | Self::ClaudeSonnet4_5
571 | Self::ClaudeSonnet4_5_1mContext
572 | Self::ClaudeSonnet4_6
573 | Self::ClaudeSonnet4_6_1mContext
574 | Self::Claude3_5Sonnet
575 | Self::Claude3_7Sonnet
576 | Self::ClaudeHaiku4_5
577 | Self::Claude3_5Haiku
578 | Self::Claude3Opus
579 | Self::Claude3Sonnet
580 | Self::Claude3Haiku => AnthropicModelMode::Default,
581 Self::ClaudeOpus4Thinking
582 | Self::ClaudeOpus4_1Thinking
583 | Self::ClaudeOpus4_5Thinking
584 | Self::ClaudeOpus4_6Thinking
585 | Self::ClaudeOpus4_6_1mContextThinking
586 | Self::ClaudeSonnet4Thinking
587 | Self::ClaudeSonnet4_5Thinking
588 | Self::ClaudeSonnet4_5_1mContextThinking
589 | Self::ClaudeSonnet4_6Thinking
590 | Self::ClaudeSonnet4_6_1mContextThinking
591 | Self::ClaudeHaiku4_5Thinking
592 | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
593 budget_tokens: Some(4_096),
594 },
595 Self::Custom { mode, .. } => mode.clone(),
596 }
597 }
598
599 pub fn beta_headers(&self) -> Option<String> {
600 let mut headers = vec![];
601
602 match self {
603 Self::ClaudeOpus4_6_1mContext
604 | Self::ClaudeOpus4_6_1mContextThinking
605 | Self::ClaudeSonnet4_5_1mContext
606 | Self::ClaudeSonnet4_5_1mContextThinking
607 | Self::ClaudeSonnet4_6_1mContext
608 | Self::ClaudeSonnet4_6_1mContextThinking => {
609 headers.push(CONTEXT_1M_BETA_HEADER.to_string());
610 }
611 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
612 // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
613 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
614 headers.push("token-efficient-tools-2025-02-19".to_string());
615 }
616 Self::Custom {
617 extra_beta_headers, ..
618 } => {
619 headers.extend(
620 extra_beta_headers
621 .iter()
622 .filter(|header| !header.trim().is_empty())
623 .cloned(),
624 );
625 }
626 _ => {}
627 }
628
629 if headers.is_empty() {
630 None
631 } else {
632 Some(headers.join(","))
633 }
634 }
635
636 pub fn tool_model_id(&self) -> &str {
637 if let Self::Custom {
638 tool_override: Some(tool_override),
639 ..
640 } = self
641 {
642 tool_override
643 } else {
644 self.request_id()
645 }
646 }
647}
648
649/// Generate completion with streaming.
650pub async fn stream_completion(
651 client: &dyn HttpClient,
652 api_url: &str,
653 api_key: &str,
654 request: Request,
655 beta_headers: Option<String>,
656) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
657 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
658 .await
659 .map(|output| output.0)
660}
661
662/// Generate completion without streaming.
663pub async fn non_streaming_completion(
664 client: &dyn HttpClient,
665 api_url: &str,
666 api_key: &str,
667 request: Request,
668 beta_headers: Option<String>,
669) -> Result<Response, AnthropicError> {
670 let (mut response, rate_limits) =
671 send_request(client, api_url, api_key, &request, beta_headers).await?;
672
673 if response.status().is_success() {
674 let mut body = String::new();
675 response
676 .body_mut()
677 .read_to_string(&mut body)
678 .await
679 .map_err(AnthropicError::ReadResponse)?;
680
681 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
682 } else {
683 Err(handle_error_response(response, rate_limits).await)
684 }
685}
686
687async fn send_request(
688 client: &dyn HttpClient,
689 api_url: &str,
690 api_key: &str,
691 request: impl Serialize,
692 beta_headers: Option<String>,
693) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
694 let uri = format!("{api_url}/v1/messages");
695
696 let mut request_builder = HttpRequest::builder()
697 .method(Method::POST)
698 .uri(uri)
699 .header("Anthropic-Version", "2023-06-01")
700 .header("X-Api-Key", api_key.trim())
701 .header("Content-Type", "application/json");
702
703 if let Some(beta_headers) = beta_headers {
704 request_builder = request_builder.header("Anthropic-Beta", beta_headers);
705 }
706
707 let serialized_request =
708 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
709 let request = request_builder
710 .body(AsyncBody::from(serialized_request))
711 .map_err(AnthropicError::BuildRequestBody)?;
712
713 let response = client
714 .send(request)
715 .await
716 .map_err(AnthropicError::HttpSend)?;
717
718 let rate_limits = RateLimitInfo::from_headers(response.headers());
719
720 Ok((response, rate_limits))
721}
722
723async fn handle_error_response(
724 mut response: http::Response<AsyncBody>,
725 rate_limits: RateLimitInfo,
726) -> AnthropicError {
727 if response.status().as_u16() == 529 {
728 return AnthropicError::ServerOverloaded {
729 retry_after: rate_limits.retry_after,
730 };
731 }
732
733 if let Some(retry_after) = rate_limits.retry_after {
734 return AnthropicError::RateLimit { retry_after };
735 }
736
737 let mut body = String::new();
738 let read_result = response
739 .body_mut()
740 .read_to_string(&mut body)
741 .await
742 .map_err(AnthropicError::ReadResponse);
743
744 if let Err(err) = read_result {
745 return err;
746 }
747
748 match serde_json::from_str::<Event>(&body) {
749 Ok(Event::Error { error }) => AnthropicError::ApiError(error),
750 Ok(_) | Err(_) => AnthropicError::HttpResponseError {
751 status_code: response.status(),
752 message: body,
753 },
754 }
755}
756
757/// An individual rate limit.
758#[derive(Debug)]
759pub struct RateLimit {
760 pub limit: usize,
761 pub remaining: usize,
762 pub reset: DateTime<Utc>,
763}
764
765impl RateLimit {
766 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
767 let limit =
768 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
769 let remaining = get_header(
770 &format!("anthropic-ratelimit-{resource}-remaining"),
771 headers,
772 )?
773 .parse()?;
774 let reset = DateTime::parse_from_rfc3339(get_header(
775 &format!("anthropic-ratelimit-{resource}-reset"),
776 headers,
777 )?)?
778 .to_utc();
779
780 Ok(Self {
781 limit,
782 remaining,
783 reset,
784 })
785 }
786}
787
788/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
789#[derive(Debug)]
790pub struct RateLimitInfo {
791 pub retry_after: Option<Duration>,
792 pub requests: Option<RateLimit>,
793 pub tokens: Option<RateLimit>,
794 pub input_tokens: Option<RateLimit>,
795 pub output_tokens: Option<RateLimit>,
796}
797
798impl RateLimitInfo {
799 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
800 // Check if any rate limit headers exist
801 let has_rate_limit_headers = headers
802 .keys()
803 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
804
805 if !has_rate_limit_headers {
806 return Self {
807 retry_after: None,
808 requests: None,
809 tokens: None,
810 input_tokens: None,
811 output_tokens: None,
812 };
813 }
814
815 Self {
816 retry_after: parse_retry_after(headers),
817 requests: RateLimit::from_headers("requests", headers).ok(),
818 tokens: RateLimit::from_headers("tokens", headers).ok(),
819 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
820 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
821 }
822 }
823}
824
825/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
826/// seconds). Note that other services might specify an HTTP date or some other format for this
827/// header. Returns `None` if the header is not present or cannot be parsed.
828pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
829 headers
830 .get("retry-after")
831 .and_then(|v| v.to_str().ok())
832 .and_then(|v| v.parse::<u64>().ok())
833 .map(Duration::from_secs)
834}
835
836fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
837 Ok(headers
838 .get(key)
839 .with_context(|| format!("missing header `{key}`"))?
840 .to_str()?)
841}
842
843pub async fn stream_completion_with_rate_limit_info(
844 client: &dyn HttpClient,
845 api_url: &str,
846 api_key: &str,
847 request: Request,
848 beta_headers: Option<String>,
849) -> Result<
850 (
851 BoxStream<'static, Result<Event, AnthropicError>>,
852 Option<RateLimitInfo>,
853 ),
854 AnthropicError,
855> {
856 let request = StreamingRequest {
857 base: request,
858 stream: true,
859 };
860
861 let (response, rate_limits) =
862 send_request(client, api_url, api_key, &request, beta_headers).await?;
863
864 if response.status().is_success() {
865 let reader = BufReader::new(response.into_body());
866 let stream = reader
867 .lines()
868 .filter_map(|line| async move {
869 match line {
870 Ok(line) => {
871 let line = line.strip_prefix("data: ")?;
872 match serde_json::from_str(line) {
873 Ok(response) => Some(Ok(response)),
874 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
875 }
876 }
877 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
878 }
879 })
880 .boxed();
881 Ok((stream, Some(rate_limits)))
882 } else {
883 Err(handle_error_response(response, rate_limits).await)
884 }
885}
886
887#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
888#[serde(rename_all = "lowercase")]
889pub enum CacheControlType {
890 Ephemeral,
891}
892
893#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
894pub struct CacheControl {
895 #[serde(rename = "type")]
896 pub cache_type: CacheControlType,
897}
898
899#[derive(Debug, Serialize, Deserialize)]
900pub struct Message {
901 pub role: Role,
902 pub content: Vec<RequestContent>,
903}
904
905#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
906#[serde(rename_all = "lowercase")]
907pub enum Role {
908 User,
909 Assistant,
910}
911
912#[derive(Debug, Serialize, Deserialize)]
913#[serde(tag = "type")]
914pub enum RequestContent {
915 #[serde(rename = "text")]
916 Text {
917 text: String,
918 #[serde(skip_serializing_if = "Option::is_none")]
919 cache_control: Option<CacheControl>,
920 },
921 #[serde(rename = "thinking")]
922 Thinking {
923 thinking: String,
924 signature: String,
925 #[serde(skip_serializing_if = "Option::is_none")]
926 cache_control: Option<CacheControl>,
927 },
928 #[serde(rename = "redacted_thinking")]
929 RedactedThinking { data: String },
930 #[serde(rename = "image")]
931 Image {
932 source: ImageSource,
933 #[serde(skip_serializing_if = "Option::is_none")]
934 cache_control: Option<CacheControl>,
935 },
936 #[serde(rename = "tool_use")]
937 ToolUse {
938 id: String,
939 name: String,
940 input: serde_json::Value,
941 #[serde(skip_serializing_if = "Option::is_none")]
942 cache_control: Option<CacheControl>,
943 },
944 #[serde(rename = "tool_result")]
945 ToolResult {
946 tool_use_id: String,
947 is_error: bool,
948 content: ToolResultContent,
949 #[serde(skip_serializing_if = "Option::is_none")]
950 cache_control: Option<CacheControl>,
951 },
952}
953
954#[derive(Debug, Serialize, Deserialize)]
955#[serde(untagged)]
956pub enum ToolResultContent {
957 Plain(String),
958 Multipart(Vec<ToolResultPart>),
959}
960
961#[derive(Debug, Serialize, Deserialize)]
962#[serde(tag = "type", rename_all = "lowercase")]
963pub enum ToolResultPart {
964 Text { text: String },
965 Image { source: ImageSource },
966}
967
968#[derive(Debug, Serialize, Deserialize)]
969#[serde(tag = "type")]
970pub enum ResponseContent {
971 #[serde(rename = "text")]
972 Text { text: String },
973 #[serde(rename = "thinking")]
974 Thinking { thinking: String },
975 #[serde(rename = "redacted_thinking")]
976 RedactedThinking { data: String },
977 #[serde(rename = "tool_use")]
978 ToolUse {
979 id: String,
980 name: String,
981 input: serde_json::Value,
982 },
983}
984
985#[derive(Debug, Serialize, Deserialize)]
986pub struct ImageSource {
987 #[serde(rename = "type")]
988 pub source_type: String,
989 pub media_type: String,
990 pub data: String,
991}
992
993#[derive(Debug, Serialize, Deserialize)]
994pub struct Tool {
995 pub name: String,
996 pub description: String,
997 pub input_schema: serde_json::Value,
998}
999
1000#[derive(Debug, Serialize, Deserialize)]
1001#[serde(tag = "type", rename_all = "lowercase")]
1002pub enum ToolChoice {
1003 Auto,
1004 Any,
1005 Tool { name: String },
1006 None,
1007}
1008
1009#[derive(Debug, Serialize, Deserialize)]
1010#[serde(tag = "type", rename_all = "lowercase")]
1011pub enum Thinking {
1012 Enabled { budget_tokens: Option<u32> },
1013 Adaptive,
1014}
1015
1016#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
1017#[serde(rename_all = "snake_case")]
1018#[strum(serialize_all = "snake_case")]
1019pub enum Effort {
1020 Low,
1021 Medium,
1022 High,
1023 Max,
1024}
1025
1026#[derive(Debug, Clone, Serialize, Deserialize)]
1027pub struct OutputConfig {
1028 pub effort: Option<Effort>,
1029}
1030
1031#[derive(Debug, Serialize, Deserialize)]
1032#[serde(untagged)]
1033pub enum StringOrContents {
1034 String(String),
1035 Content(Vec<RequestContent>),
1036}
1037
1038#[derive(Debug, Serialize, Deserialize)]
1039pub struct Request {
1040 pub model: String,
1041 pub max_tokens: u64,
1042 pub messages: Vec<Message>,
1043 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1044 pub tools: Vec<Tool>,
1045 #[serde(default, skip_serializing_if = "Option::is_none")]
1046 pub thinking: Option<Thinking>,
1047 #[serde(default, skip_serializing_if = "Option::is_none")]
1048 pub tool_choice: Option<ToolChoice>,
1049 #[serde(default, skip_serializing_if = "Option::is_none")]
1050 pub system: Option<StringOrContents>,
1051 #[serde(default, skip_serializing_if = "Option::is_none")]
1052 pub metadata: Option<Metadata>,
1053 #[serde(default, skip_serializing_if = "Option::is_none")]
1054 pub output_config: Option<OutputConfig>,
1055 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1056 pub stop_sequences: Vec<String>,
1057 #[serde(default, skip_serializing_if = "Option::is_none")]
1058 pub temperature: Option<f32>,
1059 #[serde(default, skip_serializing_if = "Option::is_none")]
1060 pub top_k: Option<u32>,
1061 #[serde(default, skip_serializing_if = "Option::is_none")]
1062 pub top_p: Option<f32>,
1063}
1064
1065#[derive(Debug, Serialize, Deserialize)]
1066struct StreamingRequest {
1067 #[serde(flatten)]
1068 pub base: Request,
1069 pub stream: bool,
1070}
1071
1072#[derive(Debug, Serialize, Deserialize)]
1073pub struct Metadata {
1074 pub user_id: Option<String>,
1075}
1076
1077#[derive(Debug, Serialize, Deserialize, Default)]
1078pub struct Usage {
1079 #[serde(default, skip_serializing_if = "Option::is_none")]
1080 pub input_tokens: Option<u64>,
1081 #[serde(default, skip_serializing_if = "Option::is_none")]
1082 pub output_tokens: Option<u64>,
1083 #[serde(default, skip_serializing_if = "Option::is_none")]
1084 pub cache_creation_input_tokens: Option<u64>,
1085 #[serde(default, skip_serializing_if = "Option::is_none")]
1086 pub cache_read_input_tokens: Option<u64>,
1087}
1088
1089#[derive(Debug, Serialize, Deserialize)]
1090pub struct Response {
1091 pub id: String,
1092 #[serde(rename = "type")]
1093 pub response_type: String,
1094 pub role: Role,
1095 pub content: Vec<ResponseContent>,
1096 pub model: String,
1097 #[serde(default, skip_serializing_if = "Option::is_none")]
1098 pub stop_reason: Option<String>,
1099 #[serde(default, skip_serializing_if = "Option::is_none")]
1100 pub stop_sequence: Option<String>,
1101 pub usage: Usage,
1102}
1103
1104#[derive(Debug, Serialize, Deserialize)]
1105#[serde(tag = "type")]
1106pub enum Event {
1107 #[serde(rename = "message_start")]
1108 MessageStart { message: Response },
1109 #[serde(rename = "content_block_start")]
1110 ContentBlockStart {
1111 index: usize,
1112 content_block: ResponseContent,
1113 },
1114 #[serde(rename = "content_block_delta")]
1115 ContentBlockDelta { index: usize, delta: ContentDelta },
1116 #[serde(rename = "content_block_stop")]
1117 ContentBlockStop { index: usize },
1118 #[serde(rename = "message_delta")]
1119 MessageDelta { delta: MessageDelta, usage: Usage },
1120 #[serde(rename = "message_stop")]
1121 MessageStop,
1122 #[serde(rename = "ping")]
1123 Ping,
1124 #[serde(rename = "error")]
1125 Error { error: ApiError },
1126}
1127
1128#[derive(Debug, Serialize, Deserialize)]
1129#[serde(tag = "type")]
1130pub enum ContentDelta {
1131 #[serde(rename = "text_delta")]
1132 TextDelta { text: String },
1133 #[serde(rename = "thinking_delta")]
1134 ThinkingDelta { thinking: String },
1135 #[serde(rename = "signature_delta")]
1136 SignatureDelta { signature: String },
1137 #[serde(rename = "input_json_delta")]
1138 InputJsonDelta { partial_json: String },
1139}
1140
1141#[derive(Debug, Serialize, Deserialize)]
1142pub struct MessageDelta {
1143 pub stop_reason: Option<String>,
1144 pub stop_sequence: Option<String>,
1145}
1146
1147#[derive(Debug)]
1148pub enum AnthropicError {
1149 /// Failed to serialize the HTTP request body to JSON
1150 SerializeRequest(serde_json::Error),
1151
1152 /// Failed to construct the HTTP request body
1153 BuildRequestBody(http::Error),
1154
1155 /// Failed to send the HTTP request
1156 HttpSend(anyhow::Error),
1157
1158 /// Failed to deserialize the response from JSON
1159 DeserializeResponse(serde_json::Error),
1160
1161 /// Failed to read from response stream
1162 ReadResponse(io::Error),
1163
1164 /// HTTP error response from the API
1165 HttpResponseError {
1166 status_code: StatusCode,
1167 message: String,
1168 },
1169
1170 /// Rate limit exceeded
1171 RateLimit { retry_after: Duration },
1172
1173 /// Server overloaded
1174 ServerOverloaded { retry_after: Option<Duration> },
1175
1176 /// API returned an error response
1177 ApiError(ApiError),
1178}
1179
1180#[derive(Debug, Serialize, Deserialize, Error)]
1181#[error("Anthropic API Error: {error_type}: {message}")]
1182pub struct ApiError {
1183 #[serde(rename = "type")]
1184 pub error_type: String,
1185 pub message: String,
1186}
1187
1188/// An Anthropic API error code.
1189/// <https://docs.anthropic.com/en/api/errors#http-errors>
1190#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
1191#[strum(serialize_all = "snake_case")]
1192pub enum ApiErrorCode {
1193 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
1194 InvalidRequestError,
1195 /// 401 - `authentication_error`: There's an issue with your API key.
1196 AuthenticationError,
1197 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
1198 PermissionError,
1199 /// 404 - `not_found_error`: The requested resource was not found.
1200 NotFoundError,
1201 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
1202 RequestTooLarge,
1203 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
1204 RateLimitError,
1205 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
1206 ApiError,
1207 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
1208 OverloadedError,
1209}
1210
1211impl ApiError {
1212 pub fn code(&self) -> Option<ApiErrorCode> {
1213 ApiErrorCode::from_str(&self.error_type).ok()
1214 }
1215
1216 pub fn is_rate_limit_error(&self) -> bool {
1217 matches!(self.error_type.as_str(), "rate_limit_error")
1218 }
1219
1220 pub fn match_window_exceeded(&self) -> Option<u64> {
1221 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
1222 return None;
1223 };
1224
1225 parse_prompt_too_long(&self.message)
1226 }
1227}
1228
1229pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1230 message
1231 .strip_prefix("prompt is too long: ")?
1232 .split_once(" tokens")?
1233 .0
1234 .parse()
1235 .ok()
1236}
1237
1238/// Request body for the token counting API.
1239/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
1240#[derive(Debug, Serialize)]
1241pub struct CountTokensRequest {
1242 pub model: String,
1243 pub messages: Vec<Message>,
1244 #[serde(default, skip_serializing_if = "Option::is_none")]
1245 pub system: Option<StringOrContents>,
1246 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1247 pub tools: Vec<Tool>,
1248 #[serde(default, skip_serializing_if = "Option::is_none")]
1249 pub thinking: Option<Thinking>,
1250 #[serde(default, skip_serializing_if = "Option::is_none")]
1251 pub tool_choice: Option<ToolChoice>,
1252}
1253
1254/// Response from the token counting API.
1255#[derive(Debug, Deserialize)]
1256pub struct CountTokensResponse {
1257 pub input_tokens: u64,
1258}
1259
1260/// Count the number of tokens in a message without creating it.
1261pub async fn count_tokens(
1262 client: &dyn HttpClient,
1263 api_url: &str,
1264 api_key: &str,
1265 request: CountTokensRequest,
1266) -> Result<CountTokensResponse, AnthropicError> {
1267 let uri = format!("{api_url}/v1/messages/count_tokens");
1268
1269 let request_builder = HttpRequest::builder()
1270 .method(Method::POST)
1271 .uri(uri)
1272 .header("Anthropic-Version", "2023-06-01")
1273 .header("X-Api-Key", api_key.trim())
1274 .header("Content-Type", "application/json");
1275
1276 let serialized_request =
1277 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1278 let http_request = request_builder
1279 .body(AsyncBody::from(serialized_request))
1280 .map_err(AnthropicError::BuildRequestBody)?;
1281
1282 let mut response = client
1283 .send(http_request)
1284 .await
1285 .map_err(AnthropicError::HttpSend)?;
1286
1287 let rate_limits = RateLimitInfo::from_headers(response.headers());
1288
1289 if response.status().is_success() {
1290 let mut body = String::new();
1291 response
1292 .body_mut()
1293 .read_to_string(&mut body)
1294 .await
1295 .map_err(AnthropicError::ReadResponse)?;
1296
1297 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1298 } else {
1299 Err(handle_error_response(response, rate_limits).await)
1300 }
1301}
1302
1303#[test]
1304fn test_match_window_exceeded() {
1305 let error = ApiError {
1306 error_type: "invalid_request_error".to_string(),
1307 message: "prompt is too long: 220000 tokens > 200000".to_string(),
1308 };
1309 assert_eq!(error.match_window_exceeded(), Some(220_000));
1310
1311 let error = ApiError {
1312 error_type: "invalid_request_error".to_string(),
1313 message: "prompt is too long: 1234953 tokens".to_string(),
1314 };
1315 assert_eq!(error.match_window_exceeded(), Some(1234953));
1316
1317 let error = ApiError {
1318 error_type: "invalid_request_error".to_string(),
1319 message: "not a prompt length error".to_string(),
1320 };
1321 assert_eq!(error.match_window_exceeded(), None);
1322
1323 let error = ApiError {
1324 error_type: "rate_limit_error".to_string(),
1325 message: "prompt is too long: 12345 tokens".to_string(),
1326 };
1327 assert_eq!(error.match_window_exceeded(), None);
1328
1329 let error = ApiError {
1330 error_type: "invalid_request_error".to_string(),
1331 message: "prompt is too long: invalid tokens".to_string(),
1332 };
1333 assert_eq!(error.match_window_exceeded(), None);
1334}