1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11pub use settings::{AnthropicAvailableModel as AvailableModel, ModelMode};
12use strum::{EnumIter, EnumString};
13use thiserror::Error;
14
15pub mod batches;
16
17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
18
19pub const FINE_GRAINED_TOOL_STREAMING_BETA_HEADER: &str = "fine-grained-tool-streaming-2025-05-14";
20pub const CONTEXT_1M_BETA_HEADER: &str = "context-1m-2025-08-07";
21
22#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
23#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
24pub struct AnthropicModelCacheConfiguration {
25 pub min_total_token: u64,
26 pub should_speculate: bool,
27 pub max_cache_anchors: usize,
28}
29
30#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
31#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
32pub enum AnthropicModelMode {
33 #[default]
34 Default,
35 Thinking {
36 budget_tokens: Option<u32>,
37 },
38}
39
40impl From<ModelMode> for AnthropicModelMode {
41 fn from(value: ModelMode) -> Self {
42 match value {
43 ModelMode::Default => AnthropicModelMode::Default,
44 ModelMode::Thinking { budget_tokens } => AnthropicModelMode::Thinking { budget_tokens },
45 }
46 }
47}
48
49impl From<AnthropicModelMode> for ModelMode {
50 fn from(value: AnthropicModelMode) -> Self {
51 match value {
52 AnthropicModelMode::Default => ModelMode::Default,
53 AnthropicModelMode::Thinking { budget_tokens } => ModelMode::Thinking { budget_tokens },
54 }
55 }
56}
57
58#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
59#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
60pub enum Model {
61 #[serde(rename = "claude-opus-4", alias = "claude-opus-4-latest")]
62 ClaudeOpus4,
63 #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
64 ClaudeOpus4_1,
65 #[serde(
66 rename = "claude-opus-4-thinking",
67 alias = "claude-opus-4-thinking-latest"
68 )]
69 ClaudeOpus4Thinking,
70 #[serde(
71 rename = "claude-opus-4-1-thinking",
72 alias = "claude-opus-4-1-thinking-latest"
73 )]
74 ClaudeOpus4_1Thinking,
75 #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
76 ClaudeOpus4_5,
77 #[serde(
78 rename = "claude-opus-4-5-thinking",
79 alias = "claude-opus-4-5-thinking-latest"
80 )]
81 ClaudeOpus4_5Thinking,
82 #[serde(rename = "claude-opus-4-6", alias = "claude-opus-4-6-latest")]
83 ClaudeOpus4_6,
84 #[serde(
85 rename = "claude-opus-4-6-thinking",
86 alias = "claude-opus-4-6-thinking-latest"
87 )]
88 ClaudeOpus4_6Thinking,
89 #[serde(
90 rename = "claude-opus-4-6-1m-context",
91 alias = "claude-opus-4-6-1m-context-latest"
92 )]
93 ClaudeOpus4_6_1mContext,
94 #[serde(
95 rename = "claude-opus-4-6-1m-context-thinking",
96 alias = "claude-opus-4-6-1m-context-thinking-latest"
97 )]
98 ClaudeOpus4_6_1mContextThinking,
99 #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
100 ClaudeSonnet4,
101 #[serde(
102 rename = "claude-sonnet-4-thinking",
103 alias = "claude-sonnet-4-thinking-latest"
104 )]
105 ClaudeSonnet4Thinking,
106 #[default]
107 #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
108 ClaudeSonnet4_5,
109 #[serde(
110 rename = "claude-sonnet-4-5-thinking",
111 alias = "claude-sonnet-4-5-thinking-latest"
112 )]
113 ClaudeSonnet4_5Thinking,
114 #[serde(
115 rename = "claude-sonnet-4-5-1m-context",
116 alias = "claude-sonnet-4-5-1m-context-latest"
117 )]
118 ClaudeSonnet4_5_1mContext,
119 #[serde(
120 rename = "claude-sonnet-4-5-1m-context-thinking",
121 alias = "claude-sonnet-4-5-1m-context-thinking-latest"
122 )]
123 ClaudeSonnet4_5_1mContextThinking,
124 #[serde(rename = "claude-3-7-sonnet", alias = "claude-3-7-sonnet-latest")]
125 Claude3_7Sonnet,
126 #[serde(
127 rename = "claude-3-7-sonnet-thinking",
128 alias = "claude-3-7-sonnet-thinking-latest"
129 )]
130 Claude3_7SonnetThinking,
131 #[serde(rename = "claude-3-5-sonnet", alias = "claude-3-5-sonnet-latest")]
132 Claude3_5Sonnet,
133 #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
134 ClaudeHaiku4_5,
135 #[serde(
136 rename = "claude-haiku-4-5-thinking",
137 alias = "claude-haiku-4-5-thinking-latest"
138 )]
139 ClaudeHaiku4_5Thinking,
140 #[serde(rename = "claude-3-5-haiku", alias = "claude-3-5-haiku-latest")]
141 Claude3_5Haiku,
142 #[serde(rename = "claude-3-opus", alias = "claude-3-opus-latest")]
143 Claude3Opus,
144 #[serde(rename = "claude-3-sonnet", alias = "claude-3-sonnet-latest")]
145 Claude3Sonnet,
146 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
147 Claude3Haiku,
148 #[serde(rename = "custom")]
149 Custom {
150 name: String,
151 max_tokens: u64,
152 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
153 display_name: Option<String>,
154 /// Override this model with a different Anthropic model for tool calls.
155 tool_override: Option<String>,
156 /// Indicates whether this custom model supports caching.
157 cache_configuration: Option<AnthropicModelCacheConfiguration>,
158 max_output_tokens: Option<u64>,
159 default_temperature: Option<f32>,
160 #[serde(default)]
161 extra_beta_headers: Vec<String>,
162 #[serde(default)]
163 mode: AnthropicModelMode,
164 },
165}
166
167impl Model {
168 pub fn default_fast() -> Self {
169 Self::Claude3_5Haiku
170 }
171
172 pub fn from_id(id: &str) -> Result<Self> {
173 if id.starts_with("claude-opus-4-6-1m-context-thinking") {
174 return Ok(Self::ClaudeOpus4_6_1mContextThinking);
175 }
176
177 if id.starts_with("claude-opus-4-6-1m-context") {
178 return Ok(Self::ClaudeOpus4_6_1mContext);
179 }
180
181 if id.starts_with("claude-opus-4-6-thinking") {
182 return Ok(Self::ClaudeOpus4_6Thinking);
183 }
184
185 if id.starts_with("claude-opus-4-6") {
186 return Ok(Self::ClaudeOpus4_6);
187 }
188
189 if id.starts_with("claude-opus-4-5-thinking") {
190 return Ok(Self::ClaudeOpus4_5Thinking);
191 }
192
193 if id.starts_with("claude-opus-4-5") {
194 return Ok(Self::ClaudeOpus4_5);
195 }
196
197 if id.starts_with("claude-opus-4-1-thinking") {
198 return Ok(Self::ClaudeOpus4_1Thinking);
199 }
200
201 if id.starts_with("claude-opus-4-thinking") {
202 return Ok(Self::ClaudeOpus4Thinking);
203 }
204
205 if id.starts_with("claude-opus-4-1") {
206 return Ok(Self::ClaudeOpus4_1);
207 }
208
209 if id.starts_with("claude-opus-4") {
210 return Ok(Self::ClaudeOpus4);
211 }
212
213 if id.starts_with("claude-sonnet-4-5-1m-context-thinking") {
214 return Ok(Self::ClaudeSonnet4_5_1mContextThinking);
215 }
216
217 if id.starts_with("claude-sonnet-4-5-1m-context") {
218 return Ok(Self::ClaudeSonnet4_5_1mContext);
219 }
220
221 if id.starts_with("claude-sonnet-4-5-thinking") {
222 return Ok(Self::ClaudeSonnet4_5Thinking);
223 }
224
225 if id.starts_with("claude-sonnet-4-5") {
226 return Ok(Self::ClaudeSonnet4_5);
227 }
228
229 if id.starts_with("claude-sonnet-4-thinking") {
230 return Ok(Self::ClaudeSonnet4Thinking);
231 }
232
233 if id.starts_with("claude-sonnet-4") {
234 return Ok(Self::ClaudeSonnet4);
235 }
236
237 if id.starts_with("claude-3-7-sonnet-thinking") {
238 return Ok(Self::Claude3_7SonnetThinking);
239 }
240
241 if id.starts_with("claude-3-7-sonnet") {
242 return Ok(Self::Claude3_7Sonnet);
243 }
244
245 if id.starts_with("claude-3-5-sonnet") {
246 return Ok(Self::Claude3_5Sonnet);
247 }
248
249 if id.starts_with("claude-haiku-4-5-thinking") {
250 return Ok(Self::ClaudeHaiku4_5Thinking);
251 }
252
253 if id.starts_with("claude-haiku-4-5") {
254 return Ok(Self::ClaudeHaiku4_5);
255 }
256
257 if id.starts_with("claude-3-5-haiku") {
258 return Ok(Self::Claude3_5Haiku);
259 }
260
261 if id.starts_with("claude-3-opus") {
262 return Ok(Self::Claude3Opus);
263 }
264
265 if id.starts_with("claude-3-sonnet") {
266 return Ok(Self::Claude3Sonnet);
267 }
268
269 if id.starts_with("claude-3-haiku") {
270 return Ok(Self::Claude3Haiku);
271 }
272
273 Err(anyhow!("invalid model ID: {id}"))
274 }
275
276 pub fn id(&self) -> &str {
277 match self {
278 Self::ClaudeOpus4 => "claude-opus-4-latest",
279 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
280 Self::ClaudeOpus4Thinking => "claude-opus-4-thinking-latest",
281 Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking-latest",
282 Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
283 Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking-latest",
284 Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
285 Self::ClaudeOpus4_6Thinking => "claude-opus-4-6-thinking-latest",
286 Self::ClaudeOpus4_6_1mContext => "claude-opus-4-6-1m-context-latest",
287 Self::ClaudeOpus4_6_1mContextThinking => "claude-opus-4-6-1m-context-thinking-latest",
288 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
289 Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking-latest",
290 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
291 Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking-latest",
292 Self::ClaudeSonnet4_5_1mContext => "claude-sonnet-4-5-1m-context-latest",
293 Self::ClaudeSonnet4_5_1mContextThinking => {
294 "claude-sonnet-4-5-1m-context-thinking-latest"
295 }
296 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
297 Self::Claude3_7Sonnet => "claude-3-7-sonnet-latest",
298 Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-thinking-latest",
299 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
300 Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-thinking-latest",
301 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
302 Self::Claude3Opus => "claude-3-opus-latest",
303 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
304 Self::Claude3Haiku => "claude-3-haiku-20240307",
305 Self::Custom { name, .. } => name,
306 }
307 }
308
309 /// The id of the model that should be used for making API requests
310 pub fn request_id(&self) -> &str {
311 match self {
312 Self::ClaudeOpus4 | Self::ClaudeOpus4Thinking => "claude-opus-4-20250514",
313 Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-20250805",
314 Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-20251101",
315 Self::ClaudeOpus4_6
316 | Self::ClaudeOpus4_6Thinking
317 | Self::ClaudeOpus4_6_1mContext
318 | Self::ClaudeOpus4_6_1mContextThinking => "claude-opus-4-6",
319 Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => "claude-sonnet-4-20250514",
320 Self::ClaudeSonnet4_5
321 | Self::ClaudeSonnet4_5Thinking
322 | Self::ClaudeSonnet4_5_1mContext
323 | Self::ClaudeSonnet4_5_1mContextThinking => "claude-sonnet-4-5-20250929",
324 Self::Claude3_5Sonnet => "claude-3-5-sonnet-latest",
325 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => "claude-3-7-sonnet-latest",
326 Self::ClaudeHaiku4_5 | Self::ClaudeHaiku4_5Thinking => "claude-haiku-4-5-20251001",
327 Self::Claude3_5Haiku => "claude-3-5-haiku-latest",
328 Self::Claude3Opus => "claude-3-opus-latest",
329 Self::Claude3Sonnet => "claude-3-sonnet-20240229",
330 Self::Claude3Haiku => "claude-3-haiku-20240307",
331 Self::Custom { name, .. } => name,
332 }
333 }
334
335 pub fn display_name(&self) -> &str {
336 match self {
337 Self::ClaudeOpus4 => "Claude Opus 4",
338 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
339 Self::ClaudeOpus4Thinking => "Claude Opus 4 Thinking",
340 Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
341 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
342 Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
343 Self::ClaudeOpus4_6 => "Claude Opus 4.6",
344 Self::ClaudeOpus4_6Thinking => "Claude Opus 4.6 Thinking",
345 Self::ClaudeOpus4_6_1mContext => "Claude Opus 4.6 (1M context)",
346 Self::ClaudeOpus4_6_1mContextThinking => "Claude Opus 4.6 Thinking (1M context)",
347 Self::ClaudeSonnet4 => "Claude Sonnet 4",
348 Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
349 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
350 Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
351 Self::ClaudeSonnet4_5_1mContext => "Claude Sonnet 4.5 (1M context)",
352 Self::ClaudeSonnet4_5_1mContextThinking => "Claude Sonnet 4.5 Thinking (1M context)",
353 Self::Claude3_7Sonnet => "Claude 3.7 Sonnet",
354 Self::Claude3_5Sonnet => "Claude 3.5 Sonnet",
355 Self::Claude3_7SonnetThinking => "Claude 3.7 Sonnet Thinking",
356 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
357 Self::ClaudeHaiku4_5Thinking => "Claude Haiku 4.5 Thinking",
358 Self::Claude3_5Haiku => "Claude 3.5 Haiku",
359 Self::Claude3Opus => "Claude 3 Opus",
360 Self::Claude3Sonnet => "Claude 3 Sonnet",
361 Self::Claude3Haiku => "Claude 3 Haiku",
362 Self::Custom {
363 name, display_name, ..
364 } => display_name.as_ref().unwrap_or(name),
365 }
366 }
367
368 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
369 match self {
370 Self::ClaudeOpus4
371 | Self::ClaudeOpus4_1
372 | Self::ClaudeOpus4Thinking
373 | Self::ClaudeOpus4_1Thinking
374 | Self::ClaudeOpus4_5
375 | Self::ClaudeOpus4_5Thinking
376 | Self::ClaudeOpus4_6
377 | Self::ClaudeOpus4_6Thinking
378 | Self::ClaudeOpus4_6_1mContext
379 | Self::ClaudeOpus4_6_1mContextThinking
380 | Self::ClaudeSonnet4
381 | Self::ClaudeSonnet4Thinking
382 | Self::ClaudeSonnet4_5
383 | Self::ClaudeSonnet4_5Thinking
384 | Self::ClaudeSonnet4_5_1mContext
385 | Self::ClaudeSonnet4_5_1mContextThinking
386 | Self::Claude3_5Sonnet
387 | Self::ClaudeHaiku4_5
388 | Self::ClaudeHaiku4_5Thinking
389 | Self::Claude3_5Haiku
390 | Self::Claude3_7Sonnet
391 | Self::Claude3_7SonnetThinking
392 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
393 min_total_token: 2_048,
394 should_speculate: true,
395 max_cache_anchors: 4,
396 }),
397 Self::Custom {
398 cache_configuration,
399 ..
400 } => cache_configuration.clone(),
401 _ => None,
402 }
403 }
404
405 pub fn max_token_count(&self) -> u64 {
406 match self {
407 Self::ClaudeOpus4
408 | Self::ClaudeOpus4_1
409 | Self::ClaudeOpus4Thinking
410 | Self::ClaudeOpus4_1Thinking
411 | Self::ClaudeOpus4_5
412 | Self::ClaudeOpus4_5Thinking
413 | Self::ClaudeOpus4_6
414 | Self::ClaudeOpus4_6Thinking
415 | Self::ClaudeSonnet4
416 | Self::ClaudeSonnet4Thinking
417 | Self::ClaudeSonnet4_5
418 | Self::ClaudeSonnet4_5Thinking
419 | Self::Claude3_5Sonnet
420 | Self::ClaudeHaiku4_5
421 | Self::ClaudeHaiku4_5Thinking
422 | Self::Claude3_5Haiku
423 | Self::Claude3_7Sonnet
424 | Self::Claude3_7SonnetThinking
425 | Self::Claude3Opus
426 | Self::Claude3Sonnet
427 | Self::Claude3Haiku => 200_000,
428 Self::ClaudeOpus4_6_1mContext
429 | Self::ClaudeOpus4_6_1mContextThinking
430 | Self::ClaudeSonnet4_5_1mContext
431 | Self::ClaudeSonnet4_5_1mContextThinking => 1_000_000,
432 Self::Custom { max_tokens, .. } => *max_tokens,
433 }
434 }
435
436 pub fn max_output_tokens(&self) -> u64 {
437 match self {
438 Self::Claude3_5Sonnet | Self::Claude3_5Haiku => 8_192,
439 Self::ClaudeOpus4
440 | Self::ClaudeOpus4Thinking
441 | Self::ClaudeOpus4_1
442 | Self::ClaudeOpus4_1Thinking => 32_000,
443 Self::ClaudeOpus4_5
444 | Self::ClaudeOpus4_5Thinking
445 | Self::ClaudeSonnet4
446 | Self::ClaudeSonnet4Thinking
447 | Self::ClaudeSonnet4_5
448 | Self::ClaudeSonnet4_5Thinking
449 | Self::ClaudeSonnet4_5_1mContext
450 | Self::ClaudeSonnet4_5_1mContextThinking
451 | Self::Claude3_7Sonnet
452 | Self::Claude3_7SonnetThinking
453 | Self::ClaudeHaiku4_5
454 | Self::ClaudeHaiku4_5Thinking => 64_000,
455 Self::ClaudeOpus4_6
456 | Self::ClaudeOpus4_6Thinking
457 | Self::ClaudeOpus4_6_1mContext
458 | Self::ClaudeOpus4_6_1mContextThinking => 128_000,
459 Self::Claude3Opus | Self::Claude3Sonnet | Self::Claude3Haiku => 4_096,
460 Self::Custom {
461 max_output_tokens, ..
462 } => max_output_tokens.unwrap_or(4_096),
463 }
464 }
465
466 pub fn default_temperature(&self) -> f32 {
467 match self {
468 Self::ClaudeOpus4
469 | Self::ClaudeOpus4_1
470 | Self::ClaudeOpus4Thinking
471 | Self::ClaudeOpus4_1Thinking
472 | Self::ClaudeOpus4_5
473 | Self::ClaudeOpus4_5Thinking
474 | Self::ClaudeOpus4_6
475 | Self::ClaudeOpus4_6Thinking
476 | Self::ClaudeOpus4_6_1mContext
477 | Self::ClaudeOpus4_6_1mContextThinking
478 | Self::ClaudeSonnet4
479 | Self::ClaudeSonnet4Thinking
480 | Self::ClaudeSonnet4_5
481 | Self::ClaudeSonnet4_5Thinking
482 | Self::ClaudeSonnet4_5_1mContext
483 | Self::ClaudeSonnet4_5_1mContextThinking
484 | Self::Claude3_5Sonnet
485 | Self::Claude3_7Sonnet
486 | Self::Claude3_7SonnetThinking
487 | Self::ClaudeHaiku4_5
488 | Self::ClaudeHaiku4_5Thinking
489 | Self::Claude3_5Haiku
490 | Self::Claude3Opus
491 | Self::Claude3Sonnet
492 | Self::Claude3Haiku => 1.0,
493 Self::Custom {
494 default_temperature,
495 ..
496 } => default_temperature.unwrap_or(1.0),
497 }
498 }
499
500 pub fn mode(&self) -> AnthropicModelMode {
501 match self {
502 Self::ClaudeOpus4
503 | Self::ClaudeOpus4_1
504 | Self::ClaudeOpus4_5
505 | Self::ClaudeOpus4_6
506 | Self::ClaudeOpus4_6_1mContext
507 | Self::ClaudeSonnet4
508 | Self::ClaudeSonnet4_5
509 | Self::ClaudeSonnet4_5_1mContext
510 | Self::Claude3_5Sonnet
511 | Self::Claude3_7Sonnet
512 | Self::ClaudeHaiku4_5
513 | Self::Claude3_5Haiku
514 | Self::Claude3Opus
515 | Self::Claude3Sonnet
516 | Self::Claude3Haiku => AnthropicModelMode::Default,
517 Self::ClaudeOpus4Thinking
518 | Self::ClaudeOpus4_1Thinking
519 | Self::ClaudeOpus4_5Thinking
520 | Self::ClaudeOpus4_6Thinking
521 | Self::ClaudeOpus4_6_1mContextThinking
522 | Self::ClaudeSonnet4Thinking
523 | Self::ClaudeSonnet4_5Thinking
524 | Self::ClaudeSonnet4_5_1mContextThinking
525 | Self::ClaudeHaiku4_5Thinking
526 | Self::Claude3_7SonnetThinking => AnthropicModelMode::Thinking {
527 budget_tokens: Some(4_096),
528 },
529 Self::Custom { mode, .. } => mode.clone(),
530 }
531 }
532
533 pub fn beta_headers(&self) -> Option<String> {
534 let mut headers = vec![];
535
536 match self {
537 Self::ClaudeOpus4
538 | Self::ClaudeOpus4_1
539 | Self::ClaudeOpus4_5
540 | Self::ClaudeOpus4_6
541 | Self::ClaudeSonnet4
542 | Self::ClaudeSonnet4_5
543 | Self::ClaudeOpus4Thinking
544 | Self::ClaudeOpus4_1Thinking
545 | Self::ClaudeOpus4_5Thinking
546 | Self::ClaudeOpus4_6Thinking
547 | Self::ClaudeSonnet4Thinking
548 | Self::ClaudeSonnet4_5Thinking => {
549 headers.push(FINE_GRAINED_TOOL_STREAMING_BETA_HEADER.to_string());
550 }
551 Self::ClaudeOpus4_6_1mContext
552 | Self::ClaudeOpus4_6_1mContextThinking
553 | Self::ClaudeSonnet4_5_1mContext
554 | Self::ClaudeSonnet4_5_1mContextThinking => {
555 headers.push(FINE_GRAINED_TOOL_STREAMING_BETA_HEADER.to_string());
556 headers.push(CONTEXT_1M_BETA_HEADER.to_string());
557 }
558 Self::Claude3_7Sonnet | Self::Claude3_7SonnetThinking => {
559 // Try beta token-efficient tool use (supported in Claude 3.7 Sonnet only)
560 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/token-efficient-tool-use
561 headers.push("token-efficient-tools-2025-02-19".to_string());
562 headers.push(FINE_GRAINED_TOOL_STREAMING_BETA_HEADER.to_string());
563 }
564 Self::Custom {
565 extra_beta_headers, ..
566 } => {
567 headers.extend(
568 extra_beta_headers
569 .iter()
570 .filter(|header| !header.trim().is_empty())
571 .cloned(),
572 );
573 }
574 _ => {}
575 }
576
577 if headers.is_empty() {
578 None
579 } else {
580 Some(headers.join(","))
581 }
582 }
583
584 pub fn tool_model_id(&self) -> &str {
585 if let Self::Custom {
586 tool_override: Some(tool_override),
587 ..
588 } = self
589 {
590 tool_override
591 } else {
592 self.request_id()
593 }
594 }
595}
596
597/// Generate completion with streaming.
598pub async fn stream_completion(
599 client: &dyn HttpClient,
600 api_url: &str,
601 api_key: &str,
602 request: Request,
603 beta_headers: Option<String>,
604) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
605 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
606 .await
607 .map(|output| output.0)
608}
609
610/// Generate completion without streaming.
611pub async fn non_streaming_completion(
612 client: &dyn HttpClient,
613 api_url: &str,
614 api_key: &str,
615 request: Request,
616 beta_headers: Option<String>,
617) -> Result<Response, AnthropicError> {
618 let (mut response, rate_limits) =
619 send_request(client, api_url, api_key, &request, beta_headers).await?;
620
621 if response.status().is_success() {
622 let mut body = String::new();
623 response
624 .body_mut()
625 .read_to_string(&mut body)
626 .await
627 .map_err(AnthropicError::ReadResponse)?;
628
629 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
630 } else {
631 Err(handle_error_response(response, rate_limits).await)
632 }
633}
634
635async fn send_request(
636 client: &dyn HttpClient,
637 api_url: &str,
638 api_key: &str,
639 request: impl Serialize,
640 beta_headers: Option<String>,
641) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
642 let uri = format!("{api_url}/v1/messages");
643
644 let mut request_builder = HttpRequest::builder()
645 .method(Method::POST)
646 .uri(uri)
647 .header("Anthropic-Version", "2023-06-01")
648 .header("X-Api-Key", api_key.trim())
649 .header("Content-Type", "application/json");
650
651 if let Some(beta_headers) = beta_headers {
652 request_builder = request_builder.header("Anthropic-Beta", beta_headers);
653 }
654
655 let serialized_request =
656 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
657 let request = request_builder
658 .body(AsyncBody::from(serialized_request))
659 .map_err(AnthropicError::BuildRequestBody)?;
660
661 let response = client
662 .send(request)
663 .await
664 .map_err(AnthropicError::HttpSend)?;
665
666 let rate_limits = RateLimitInfo::from_headers(response.headers());
667
668 Ok((response, rate_limits))
669}
670
671async fn handle_error_response(
672 mut response: http::Response<AsyncBody>,
673 rate_limits: RateLimitInfo,
674) -> AnthropicError {
675 if response.status().as_u16() == 529 {
676 return AnthropicError::ServerOverloaded {
677 retry_after: rate_limits.retry_after,
678 };
679 }
680
681 if let Some(retry_after) = rate_limits.retry_after {
682 return AnthropicError::RateLimit { retry_after };
683 }
684
685 let mut body = String::new();
686 let read_result = response
687 .body_mut()
688 .read_to_string(&mut body)
689 .await
690 .map_err(AnthropicError::ReadResponse);
691
692 if let Err(err) = read_result {
693 return err;
694 }
695
696 match serde_json::from_str::<Event>(&body) {
697 Ok(Event::Error { error }) => AnthropicError::ApiError(error),
698 Ok(_) | Err(_) => AnthropicError::HttpResponseError {
699 status_code: response.status(),
700 message: body,
701 },
702 }
703}
704
705/// An individual rate limit.
706#[derive(Debug)]
707pub struct RateLimit {
708 pub limit: usize,
709 pub remaining: usize,
710 pub reset: DateTime<Utc>,
711}
712
713impl RateLimit {
714 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
715 let limit =
716 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
717 let remaining = get_header(
718 &format!("anthropic-ratelimit-{resource}-remaining"),
719 headers,
720 )?
721 .parse()?;
722 let reset = DateTime::parse_from_rfc3339(get_header(
723 &format!("anthropic-ratelimit-{resource}-reset"),
724 headers,
725 )?)?
726 .to_utc();
727
728 Ok(Self {
729 limit,
730 remaining,
731 reset,
732 })
733 }
734}
735
736/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
737#[derive(Debug)]
738pub struct RateLimitInfo {
739 pub retry_after: Option<Duration>,
740 pub requests: Option<RateLimit>,
741 pub tokens: Option<RateLimit>,
742 pub input_tokens: Option<RateLimit>,
743 pub output_tokens: Option<RateLimit>,
744}
745
746impl RateLimitInfo {
747 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
748 // Check if any rate limit headers exist
749 let has_rate_limit_headers = headers
750 .keys()
751 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
752
753 if !has_rate_limit_headers {
754 return Self {
755 retry_after: None,
756 requests: None,
757 tokens: None,
758 input_tokens: None,
759 output_tokens: None,
760 };
761 }
762
763 Self {
764 retry_after: parse_retry_after(headers),
765 requests: RateLimit::from_headers("requests", headers).ok(),
766 tokens: RateLimit::from_headers("tokens", headers).ok(),
767 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
768 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
769 }
770 }
771}
772
773/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
774/// seconds). Note that other services might specify an HTTP date or some other format for this
775/// header. Returns `None` if the header is not present or cannot be parsed.
776pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
777 headers
778 .get("retry-after")
779 .and_then(|v| v.to_str().ok())
780 .and_then(|v| v.parse::<u64>().ok())
781 .map(Duration::from_secs)
782}
783
784fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
785 Ok(headers
786 .get(key)
787 .with_context(|| format!("missing header `{key}`"))?
788 .to_str()?)
789}
790
791pub async fn stream_completion_with_rate_limit_info(
792 client: &dyn HttpClient,
793 api_url: &str,
794 api_key: &str,
795 request: Request,
796 beta_headers: Option<String>,
797) -> Result<
798 (
799 BoxStream<'static, Result<Event, AnthropicError>>,
800 Option<RateLimitInfo>,
801 ),
802 AnthropicError,
803> {
804 let request = StreamingRequest {
805 base: request,
806 stream: true,
807 };
808
809 let (response, rate_limits) =
810 send_request(client, api_url, api_key, &request, beta_headers).await?;
811
812 if response.status().is_success() {
813 let reader = BufReader::new(response.into_body());
814 let stream = reader
815 .lines()
816 .filter_map(|line| async move {
817 match line {
818 Ok(line) => {
819 let line = line.strip_prefix("data: ")?;
820 match serde_json::from_str(line) {
821 Ok(response) => Some(Ok(response)),
822 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
823 }
824 }
825 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
826 }
827 })
828 .boxed();
829 Ok((stream, Some(rate_limits)))
830 } else {
831 Err(handle_error_response(response, rate_limits).await)
832 }
833}
834
835#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
836#[serde(rename_all = "lowercase")]
837pub enum CacheControlType {
838 Ephemeral,
839}
840
841#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
842pub struct CacheControl {
843 #[serde(rename = "type")]
844 pub cache_type: CacheControlType,
845}
846
847#[derive(Debug, Serialize, Deserialize)]
848pub struct Message {
849 pub role: Role,
850 pub content: Vec<RequestContent>,
851}
852
853#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
854#[serde(rename_all = "lowercase")]
855pub enum Role {
856 User,
857 Assistant,
858}
859
860#[derive(Debug, Serialize, Deserialize)]
861#[serde(tag = "type")]
862pub enum RequestContent {
863 #[serde(rename = "text")]
864 Text {
865 text: String,
866 #[serde(skip_serializing_if = "Option::is_none")]
867 cache_control: Option<CacheControl>,
868 },
869 #[serde(rename = "thinking")]
870 Thinking {
871 thinking: String,
872 signature: String,
873 #[serde(skip_serializing_if = "Option::is_none")]
874 cache_control: Option<CacheControl>,
875 },
876 #[serde(rename = "redacted_thinking")]
877 RedactedThinking { data: String },
878 #[serde(rename = "image")]
879 Image {
880 source: ImageSource,
881 #[serde(skip_serializing_if = "Option::is_none")]
882 cache_control: Option<CacheControl>,
883 },
884 #[serde(rename = "tool_use")]
885 ToolUse {
886 id: String,
887 name: String,
888 input: serde_json::Value,
889 #[serde(skip_serializing_if = "Option::is_none")]
890 cache_control: Option<CacheControl>,
891 },
892 #[serde(rename = "tool_result")]
893 ToolResult {
894 tool_use_id: String,
895 is_error: bool,
896 content: ToolResultContent,
897 #[serde(skip_serializing_if = "Option::is_none")]
898 cache_control: Option<CacheControl>,
899 },
900}
901
902#[derive(Debug, Serialize, Deserialize)]
903#[serde(untagged)]
904pub enum ToolResultContent {
905 Plain(String),
906 Multipart(Vec<ToolResultPart>),
907}
908
909#[derive(Debug, Serialize, Deserialize)]
910#[serde(tag = "type", rename_all = "lowercase")]
911pub enum ToolResultPart {
912 Text { text: String },
913 Image { source: ImageSource },
914}
915
916#[derive(Debug, Serialize, Deserialize)]
917#[serde(tag = "type")]
918pub enum ResponseContent {
919 #[serde(rename = "text")]
920 Text { text: String },
921 #[serde(rename = "thinking")]
922 Thinking { thinking: String },
923 #[serde(rename = "redacted_thinking")]
924 RedactedThinking { data: String },
925 #[serde(rename = "tool_use")]
926 ToolUse {
927 id: String,
928 name: String,
929 input: serde_json::Value,
930 },
931}
932
933#[derive(Debug, Serialize, Deserialize)]
934pub struct ImageSource {
935 #[serde(rename = "type")]
936 pub source_type: String,
937 pub media_type: String,
938 pub data: String,
939}
940
941#[derive(Debug, Serialize, Deserialize)]
942pub struct Tool {
943 pub name: String,
944 pub description: String,
945 pub input_schema: serde_json::Value,
946}
947
948#[derive(Debug, Serialize, Deserialize)]
949#[serde(tag = "type", rename_all = "lowercase")]
950pub enum ToolChoice {
951 Auto,
952 Any,
953 Tool { name: String },
954 None,
955}
956
957#[derive(Debug, Serialize, Deserialize)]
958#[serde(tag = "type", rename_all = "lowercase")]
959pub enum Thinking {
960 Enabled { budget_tokens: Option<u32> },
961}
962
963#[derive(Debug, Serialize, Deserialize)]
964#[serde(untagged)]
965pub enum StringOrContents {
966 String(String),
967 Content(Vec<RequestContent>),
968}
969
970#[derive(Debug, Serialize, Deserialize)]
971pub struct Request {
972 pub model: String,
973 pub max_tokens: u64,
974 pub messages: Vec<Message>,
975 #[serde(default, skip_serializing_if = "Vec::is_empty")]
976 pub tools: Vec<Tool>,
977 #[serde(default, skip_serializing_if = "Option::is_none")]
978 pub thinking: Option<Thinking>,
979 #[serde(default, skip_serializing_if = "Option::is_none")]
980 pub tool_choice: Option<ToolChoice>,
981 #[serde(default, skip_serializing_if = "Option::is_none")]
982 pub system: Option<StringOrContents>,
983 #[serde(default, skip_serializing_if = "Option::is_none")]
984 pub metadata: Option<Metadata>,
985 #[serde(default, skip_serializing_if = "Vec::is_empty")]
986 pub stop_sequences: Vec<String>,
987 #[serde(default, skip_serializing_if = "Option::is_none")]
988 pub temperature: Option<f32>,
989 #[serde(default, skip_serializing_if = "Option::is_none")]
990 pub top_k: Option<u32>,
991 #[serde(default, skip_serializing_if = "Option::is_none")]
992 pub top_p: Option<f32>,
993}
994
995#[derive(Debug, Serialize, Deserialize)]
996struct StreamingRequest {
997 #[serde(flatten)]
998 pub base: Request,
999 pub stream: bool,
1000}
1001
1002#[derive(Debug, Serialize, Deserialize)]
1003pub struct Metadata {
1004 pub user_id: Option<String>,
1005}
1006
1007#[derive(Debug, Serialize, Deserialize, Default)]
1008pub struct Usage {
1009 #[serde(default, skip_serializing_if = "Option::is_none")]
1010 pub input_tokens: Option<u64>,
1011 #[serde(default, skip_serializing_if = "Option::is_none")]
1012 pub output_tokens: Option<u64>,
1013 #[serde(default, skip_serializing_if = "Option::is_none")]
1014 pub cache_creation_input_tokens: Option<u64>,
1015 #[serde(default, skip_serializing_if = "Option::is_none")]
1016 pub cache_read_input_tokens: Option<u64>,
1017}
1018
1019#[derive(Debug, Serialize, Deserialize)]
1020pub struct Response {
1021 pub id: String,
1022 #[serde(rename = "type")]
1023 pub response_type: String,
1024 pub role: Role,
1025 pub content: Vec<ResponseContent>,
1026 pub model: String,
1027 #[serde(default, skip_serializing_if = "Option::is_none")]
1028 pub stop_reason: Option<String>,
1029 #[serde(default, skip_serializing_if = "Option::is_none")]
1030 pub stop_sequence: Option<String>,
1031 pub usage: Usage,
1032}
1033
1034#[derive(Debug, Serialize, Deserialize)]
1035#[serde(tag = "type")]
1036pub enum Event {
1037 #[serde(rename = "message_start")]
1038 MessageStart { message: Response },
1039 #[serde(rename = "content_block_start")]
1040 ContentBlockStart {
1041 index: usize,
1042 content_block: ResponseContent,
1043 },
1044 #[serde(rename = "content_block_delta")]
1045 ContentBlockDelta { index: usize, delta: ContentDelta },
1046 #[serde(rename = "content_block_stop")]
1047 ContentBlockStop { index: usize },
1048 #[serde(rename = "message_delta")]
1049 MessageDelta { delta: MessageDelta, usage: Usage },
1050 #[serde(rename = "message_stop")]
1051 MessageStop,
1052 #[serde(rename = "ping")]
1053 Ping,
1054 #[serde(rename = "error")]
1055 Error { error: ApiError },
1056}
1057
1058#[derive(Debug, Serialize, Deserialize)]
1059#[serde(tag = "type")]
1060pub enum ContentDelta {
1061 #[serde(rename = "text_delta")]
1062 TextDelta { text: String },
1063 #[serde(rename = "thinking_delta")]
1064 ThinkingDelta { thinking: String },
1065 #[serde(rename = "signature_delta")]
1066 SignatureDelta { signature: String },
1067 #[serde(rename = "input_json_delta")]
1068 InputJsonDelta { partial_json: String },
1069}
1070
1071#[derive(Debug, Serialize, Deserialize)]
1072pub struct MessageDelta {
1073 pub stop_reason: Option<String>,
1074 pub stop_sequence: Option<String>,
1075}
1076
1077#[derive(Debug)]
1078pub enum AnthropicError {
1079 /// Failed to serialize the HTTP request body to JSON
1080 SerializeRequest(serde_json::Error),
1081
1082 /// Failed to construct the HTTP request body
1083 BuildRequestBody(http::Error),
1084
1085 /// Failed to send the HTTP request
1086 HttpSend(anyhow::Error),
1087
1088 /// Failed to deserialize the response from JSON
1089 DeserializeResponse(serde_json::Error),
1090
1091 /// Failed to read from response stream
1092 ReadResponse(io::Error),
1093
1094 /// HTTP error response from the API
1095 HttpResponseError {
1096 status_code: StatusCode,
1097 message: String,
1098 },
1099
1100 /// Rate limit exceeded
1101 RateLimit { retry_after: Duration },
1102
1103 /// Server overloaded
1104 ServerOverloaded { retry_after: Option<Duration> },
1105
1106 /// API returned an error response
1107 ApiError(ApiError),
1108}
1109
1110#[derive(Debug, Serialize, Deserialize, Error)]
1111#[error("Anthropic API Error: {error_type}: {message}")]
1112pub struct ApiError {
1113 #[serde(rename = "type")]
1114 pub error_type: String,
1115 pub message: String,
1116}
1117
1118/// An Anthropic API error code.
1119/// <https://docs.anthropic.com/en/api/errors#http-errors>
1120#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
1121#[strum(serialize_all = "snake_case")]
1122pub enum ApiErrorCode {
1123 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
1124 InvalidRequestError,
1125 /// 401 - `authentication_error`: There's an issue with your API key.
1126 AuthenticationError,
1127 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
1128 PermissionError,
1129 /// 404 - `not_found_error`: The requested resource was not found.
1130 NotFoundError,
1131 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
1132 RequestTooLarge,
1133 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
1134 RateLimitError,
1135 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
1136 ApiError,
1137 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
1138 OverloadedError,
1139}
1140
1141impl ApiError {
1142 pub fn code(&self) -> Option<ApiErrorCode> {
1143 ApiErrorCode::from_str(&self.error_type).ok()
1144 }
1145
1146 pub fn is_rate_limit_error(&self) -> bool {
1147 matches!(self.error_type.as_str(), "rate_limit_error")
1148 }
1149
1150 pub fn match_window_exceeded(&self) -> Option<u64> {
1151 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
1152 return None;
1153 };
1154
1155 parse_prompt_too_long(&self.message)
1156 }
1157}
1158
1159pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1160 message
1161 .strip_prefix("prompt is too long: ")?
1162 .split_once(" tokens")?
1163 .0
1164 .parse()
1165 .ok()
1166}
1167
1168/// Request body for the token counting API.
1169/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
1170#[derive(Debug, Serialize)]
1171pub struct CountTokensRequest {
1172 pub model: String,
1173 pub messages: Vec<Message>,
1174 #[serde(default, skip_serializing_if = "Option::is_none")]
1175 pub system: Option<StringOrContents>,
1176 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1177 pub tools: Vec<Tool>,
1178 #[serde(default, skip_serializing_if = "Option::is_none")]
1179 pub thinking: Option<Thinking>,
1180 #[serde(default, skip_serializing_if = "Option::is_none")]
1181 pub tool_choice: Option<ToolChoice>,
1182}
1183
1184/// Response from the token counting API.
1185#[derive(Debug, Deserialize)]
1186pub struct CountTokensResponse {
1187 pub input_tokens: u64,
1188}
1189
1190/// Count the number of tokens in a message without creating it.
1191pub async fn count_tokens(
1192 client: &dyn HttpClient,
1193 api_url: &str,
1194 api_key: &str,
1195 request: CountTokensRequest,
1196) -> Result<CountTokensResponse, AnthropicError> {
1197 let uri = format!("{api_url}/v1/messages/count_tokens");
1198
1199 let request_builder = HttpRequest::builder()
1200 .method(Method::POST)
1201 .uri(uri)
1202 .header("Anthropic-Version", "2023-06-01")
1203 .header("X-Api-Key", api_key.trim())
1204 .header("Content-Type", "application/json");
1205
1206 let serialized_request =
1207 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1208 let http_request = request_builder
1209 .body(AsyncBody::from(serialized_request))
1210 .map_err(AnthropicError::BuildRequestBody)?;
1211
1212 let mut response = client
1213 .send(http_request)
1214 .await
1215 .map_err(AnthropicError::HttpSend)?;
1216
1217 let rate_limits = RateLimitInfo::from_headers(response.headers());
1218
1219 if response.status().is_success() {
1220 let mut body = String::new();
1221 response
1222 .body_mut()
1223 .read_to_string(&mut body)
1224 .await
1225 .map_err(AnthropicError::ReadResponse)?;
1226
1227 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1228 } else {
1229 Err(handle_error_response(response, rate_limits).await)
1230 }
1231}
1232
1233#[test]
1234fn test_match_window_exceeded() {
1235 let error = ApiError {
1236 error_type: "invalid_request_error".to_string(),
1237 message: "prompt is too long: 220000 tokens > 200000".to_string(),
1238 };
1239 assert_eq!(error.match_window_exceeded(), Some(220_000));
1240
1241 let error = ApiError {
1242 error_type: "invalid_request_error".to_string(),
1243 message: "prompt is too long: 1234953 tokens".to_string(),
1244 };
1245 assert_eq!(error.match_window_exceeded(), Some(1234953));
1246
1247 let error = ApiError {
1248 error_type: "invalid_request_error".to_string(),
1249 message: "not a prompt length error".to_string(),
1250 };
1251 assert_eq!(error.match_window_exceeded(), None);
1252
1253 let error = ApiError {
1254 error_type: "rate_limit_error".to_string(),
1255 message: "prompt is too long: 12345 tokens".to_string(),
1256 };
1257 assert_eq!(error.match_window_exceeded(), None);
1258
1259 let error = ApiError {
1260 error_type: "invalid_request_error".to_string(),
1261 message: "prompt is too long: invalid tokens".to_string(),
1262 };
1263 assert_eq!(error.match_window_exceeded(), None);
1264}