1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11use strum::{EnumIter, EnumString};
12use thiserror::Error;
13
14pub mod batches;
15
16pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
17
18#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
19#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
20pub struct AnthropicModelCacheConfiguration {
21 pub min_total_token: u64,
22 pub should_speculate: bool,
23 pub max_cache_anchors: usize,
24}
25
26#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
27#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
28pub enum AnthropicModelMode {
29 #[default]
30 Default,
31 Thinking {
32 budget_tokens: Option<u32>,
33 },
34 AdaptiveThinking,
35}
36
37#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
38#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
39pub enum Model {
40 #[serde(
41 rename = "claude-opus-4",
42 alias = "claude-opus-4-latest",
43 alias = "claude-opus-4-thinking",
44 alias = "claude-opus-4-thinking-latest"
45 )]
46 ClaudeOpus4,
47 #[serde(
48 rename = "claude-opus-4-1",
49 alias = "claude-opus-4-1-latest",
50 alias = "claude-opus-4-1-thinking",
51 alias = "claude-opus-4-1-thinking-latest"
52 )]
53 ClaudeOpus4_1,
54 #[serde(
55 rename = "claude-opus-4-5",
56 alias = "claude-opus-4-5-latest",
57 alias = "claude-opus-4-5-thinking",
58 alias = "claude-opus-4-5-thinking-latest"
59 )]
60 ClaudeOpus4_5,
61 #[serde(
62 rename = "claude-opus-4-6",
63 alias = "claude-opus-4-6-latest",
64 alias = "claude-opus-4-6-1m-context",
65 alias = "claude-opus-4-6-1m-context-latest",
66 alias = "claude-opus-4-6-thinking",
67 alias = "claude-opus-4-6-thinking-latest",
68 alias = "claude-opus-4-6-1m-context-thinking",
69 alias = "claude-opus-4-6-1m-context-thinking-latest"
70 )]
71 ClaudeOpus4_6,
72 #[serde(
73 rename = "claude-sonnet-4",
74 alias = "claude-sonnet-4-latest",
75 alias = "claude-sonnet-4-thinking",
76 alias = "claude-sonnet-4-thinking-latest"
77 )]
78 ClaudeSonnet4,
79 #[serde(
80 rename = "claude-sonnet-4-5",
81 alias = "claude-sonnet-4-5-latest",
82 alias = "claude-sonnet-4-5-thinking",
83 alias = "claude-sonnet-4-5-thinking-latest"
84 )]
85 ClaudeSonnet4_5,
86 #[default]
87 #[serde(
88 rename = "claude-sonnet-4-6",
89 alias = "claude-sonnet-4-6-latest",
90 alias = "claude-sonnet-4-6-1m-context",
91 alias = "claude-sonnet-4-6-1m-context-latest",
92 alias = "claude-sonnet-4-6-thinking",
93 alias = "claude-sonnet-4-6-thinking-latest",
94 alias = "claude-sonnet-4-6-1m-context-thinking",
95 alias = "claude-sonnet-4-6-1m-context-thinking-latest"
96 )]
97 ClaudeSonnet4_6,
98 #[serde(
99 rename = "claude-haiku-4-5",
100 alias = "claude-haiku-4-5-latest",
101 alias = "claude-haiku-4-5-thinking",
102 alias = "claude-haiku-4-5-thinking-latest"
103 )]
104 ClaudeHaiku4_5,
105 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
106 Claude3Haiku,
107 #[serde(rename = "custom")]
108 Custom {
109 name: String,
110 max_tokens: u64,
111 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
112 display_name: Option<String>,
113 /// Override this model with a different Anthropic model for tool calls.
114 tool_override: Option<String>,
115 /// Indicates whether this custom model supports caching.
116 cache_configuration: Option<AnthropicModelCacheConfiguration>,
117 max_output_tokens: Option<u64>,
118 default_temperature: Option<f32>,
119 #[serde(default)]
120 extra_beta_headers: Vec<String>,
121 #[serde(default)]
122 mode: AnthropicModelMode,
123 },
124}
125
126impl Model {
127 pub fn default_fast() -> Self {
128 Self::ClaudeHaiku4_5
129 }
130
131 pub fn from_id(id: &str) -> Result<Self> {
132 if id.starts_with("claude-opus-4-6") {
133 return Ok(Self::ClaudeOpus4_6);
134 }
135
136 if id.starts_with("claude-opus-4-5") {
137 return Ok(Self::ClaudeOpus4_5);
138 }
139
140 if id.starts_with("claude-opus-4-1") {
141 return Ok(Self::ClaudeOpus4_1);
142 }
143
144 if id.starts_with("claude-opus-4") {
145 return Ok(Self::ClaudeOpus4);
146 }
147
148 if id.starts_with("claude-sonnet-4-6") {
149 return Ok(Self::ClaudeSonnet4_6);
150 }
151
152 if id.starts_with("claude-sonnet-4-5") {
153 return Ok(Self::ClaudeSonnet4_5);
154 }
155
156 if id.starts_with("claude-sonnet-4") {
157 return Ok(Self::ClaudeSonnet4);
158 }
159
160 if id.starts_with("claude-haiku-4-5") {
161 return Ok(Self::ClaudeHaiku4_5);
162 }
163
164 if id.starts_with("claude-3-haiku") {
165 return Ok(Self::Claude3Haiku);
166 }
167
168 Err(anyhow!("invalid model ID: {id}"))
169 }
170
171 pub fn id(&self) -> &str {
172 match self {
173 Self::ClaudeOpus4 => "claude-opus-4-latest",
174 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
175 Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
176 Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
177 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
178 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
179 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
180 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
181 Self::Claude3Haiku => "claude-3-haiku-20240307",
182 Self::Custom { name, .. } => name,
183 }
184 }
185
186 /// The id of the model that should be used for making API requests
187 pub fn request_id(&self) -> &str {
188 match self {
189 Self::ClaudeOpus4 => "claude-opus-4-20250514",
190 Self::ClaudeOpus4_1 => "claude-opus-4-1-20250805",
191 Self::ClaudeOpus4_5 => "claude-opus-4-5-20251101",
192 Self::ClaudeOpus4_6 => "claude-opus-4-6",
193 Self::ClaudeSonnet4 => "claude-sonnet-4-20250514",
194 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-20250929",
195 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6",
196 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-20251001",
197 Self::Claude3Haiku => "claude-3-haiku-20240307",
198 Self::Custom { name, .. } => name,
199 }
200 }
201
202 pub fn display_name(&self) -> &str {
203 match self {
204 Self::ClaudeOpus4 => "Claude Opus 4",
205 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
206 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
207 Self::ClaudeOpus4_6 => "Claude Opus 4.6",
208 Self::ClaudeSonnet4 => "Claude Sonnet 4",
209 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
210 Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
211 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
212 Self::Claude3Haiku => "Claude 3 Haiku",
213 Self::Custom {
214 name, display_name, ..
215 } => display_name.as_ref().unwrap_or(name),
216 }
217 }
218
219 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
220 match self {
221 Self::ClaudeOpus4
222 | Self::ClaudeOpus4_1
223 | Self::ClaudeOpus4_5
224 | Self::ClaudeOpus4_6
225 | Self::ClaudeSonnet4
226 | Self::ClaudeSonnet4_5
227 | Self::ClaudeSonnet4_6
228 | Self::ClaudeHaiku4_5
229 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
230 min_total_token: 2_048,
231 should_speculate: true,
232 max_cache_anchors: 4,
233 }),
234 Self::Custom {
235 cache_configuration,
236 ..
237 } => cache_configuration.clone(),
238 }
239 }
240
241 pub fn max_token_count(&self) -> u64 {
242 match self {
243 Self::ClaudeOpus4
244 | Self::ClaudeOpus4_1
245 | Self::ClaudeOpus4_5
246 | Self::ClaudeSonnet4
247 | Self::ClaudeSonnet4_5
248 | Self::ClaudeHaiku4_5
249 | Self::Claude3Haiku => 200_000,
250 Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6 => 1_000_000,
251 Self::Custom { max_tokens, .. } => *max_tokens,
252 }
253 }
254
255 pub fn max_output_tokens(&self) -> u64 {
256 match self {
257 Self::ClaudeOpus4 | Self::ClaudeOpus4_1 => 32_000,
258 Self::ClaudeOpus4_5
259 | Self::ClaudeSonnet4
260 | Self::ClaudeSonnet4_5
261 | Self::ClaudeSonnet4_6
262 | Self::ClaudeHaiku4_5 => 64_000,
263 Self::ClaudeOpus4_6 => 128_000,
264 Self::Claude3Haiku => 4_096,
265 Self::Custom {
266 max_output_tokens, ..
267 } => max_output_tokens.unwrap_or(4_096),
268 }
269 }
270
271 pub fn default_temperature(&self) -> f32 {
272 match self {
273 Self::ClaudeOpus4
274 | Self::ClaudeOpus4_1
275 | Self::ClaudeOpus4_5
276 | Self::ClaudeOpus4_6
277 | Self::ClaudeSonnet4
278 | Self::ClaudeSonnet4_5
279 | Self::ClaudeSonnet4_6
280 | Self::ClaudeHaiku4_5
281 | Self::Claude3Haiku => 1.0,
282 Self::Custom {
283 default_temperature,
284 ..
285 } => default_temperature.unwrap_or(1.0),
286 }
287 }
288
289 pub fn mode(&self) -> AnthropicModelMode {
290 if self.supports_adaptive_thinking() {
291 AnthropicModelMode::AdaptiveThinking
292 } else if self.supports_thinking() {
293 AnthropicModelMode::Thinking {
294 budget_tokens: Some(4_096),
295 }
296 } else {
297 AnthropicModelMode::Default
298 }
299 }
300
301 pub fn supports_thinking(&self) -> bool {
302 matches!(
303 self,
304 Self::ClaudeOpus4
305 | Self::ClaudeOpus4_1
306 | Self::ClaudeOpus4_5
307 | Self::ClaudeOpus4_6
308 | Self::ClaudeSonnet4
309 | Self::ClaudeSonnet4_5
310 | Self::ClaudeSonnet4_6
311 | Self::ClaudeHaiku4_5
312 )
313 }
314
315 pub fn supports_adaptive_thinking(&self) -> bool {
316 matches!(self, Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6)
317 }
318
319 pub fn beta_headers(&self) -> Option<String> {
320 let mut headers = vec![];
321
322 match self {
323 Self::Custom {
324 extra_beta_headers, ..
325 } => {
326 headers.extend(
327 extra_beta_headers
328 .iter()
329 .filter(|header| !header.trim().is_empty())
330 .cloned(),
331 );
332 }
333 _ => {}
334 }
335
336 if headers.is_empty() {
337 None
338 } else {
339 Some(headers.join(","))
340 }
341 }
342
343 pub fn tool_model_id(&self) -> &str {
344 if let Self::Custom {
345 tool_override: Some(tool_override),
346 ..
347 } = self
348 {
349 tool_override
350 } else {
351 self.request_id()
352 }
353 }
354}
355
356/// Generate completion with streaming.
357pub async fn stream_completion(
358 client: &dyn HttpClient,
359 api_url: &str,
360 api_key: &str,
361 request: Request,
362 beta_headers: Option<String>,
363) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
364 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
365 .await
366 .map(|output| output.0)
367}
368
369/// Generate completion without streaming.
370pub async fn non_streaming_completion(
371 client: &dyn HttpClient,
372 api_url: &str,
373 api_key: &str,
374 request: Request,
375 beta_headers: Option<String>,
376) -> Result<Response, AnthropicError> {
377 let (mut response, rate_limits) =
378 send_request(client, api_url, api_key, &request, beta_headers).await?;
379
380 if response.status().is_success() {
381 let mut body = String::new();
382 response
383 .body_mut()
384 .read_to_string(&mut body)
385 .await
386 .map_err(AnthropicError::ReadResponse)?;
387
388 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
389 } else {
390 Err(handle_error_response(response, rate_limits).await)
391 }
392}
393
394async fn send_request(
395 client: &dyn HttpClient,
396 api_url: &str,
397 api_key: &str,
398 request: impl Serialize,
399 beta_headers: Option<String>,
400) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
401 let uri = format!("{api_url}/v1/messages");
402
403 let mut request_builder = HttpRequest::builder()
404 .method(Method::POST)
405 .uri(uri)
406 .header("Anthropic-Version", "2023-06-01")
407 .header("X-Api-Key", api_key.trim())
408 .header("Content-Type", "application/json");
409
410 if let Some(beta_headers) = beta_headers {
411 request_builder = request_builder.header("Anthropic-Beta", beta_headers);
412 }
413
414 let serialized_request =
415 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
416 let request = request_builder
417 .body(AsyncBody::from(serialized_request))
418 .map_err(AnthropicError::BuildRequestBody)?;
419
420 let response = client
421 .send(request)
422 .await
423 .map_err(AnthropicError::HttpSend)?;
424
425 let rate_limits = RateLimitInfo::from_headers(response.headers());
426
427 Ok((response, rate_limits))
428}
429
430async fn handle_error_response(
431 mut response: http::Response<AsyncBody>,
432 rate_limits: RateLimitInfo,
433) -> AnthropicError {
434 if response.status().as_u16() == 529 {
435 return AnthropicError::ServerOverloaded {
436 retry_after: rate_limits.retry_after,
437 };
438 }
439
440 if let Some(retry_after) = rate_limits.retry_after {
441 return AnthropicError::RateLimit { retry_after };
442 }
443
444 let mut body = String::new();
445 let read_result = response
446 .body_mut()
447 .read_to_string(&mut body)
448 .await
449 .map_err(AnthropicError::ReadResponse);
450
451 if let Err(err) = read_result {
452 return err;
453 }
454
455 match serde_json::from_str::<Event>(&body) {
456 Ok(Event::Error { error }) => AnthropicError::ApiError(error),
457 Ok(_) | Err(_) => AnthropicError::HttpResponseError {
458 status_code: response.status(),
459 message: body,
460 },
461 }
462}
463
464/// An individual rate limit.
465#[derive(Debug)]
466pub struct RateLimit {
467 pub limit: usize,
468 pub remaining: usize,
469 pub reset: DateTime<Utc>,
470}
471
472impl RateLimit {
473 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
474 let limit =
475 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
476 let remaining = get_header(
477 &format!("anthropic-ratelimit-{resource}-remaining"),
478 headers,
479 )?
480 .parse()?;
481 let reset = DateTime::parse_from_rfc3339(get_header(
482 &format!("anthropic-ratelimit-{resource}-reset"),
483 headers,
484 )?)?
485 .to_utc();
486
487 Ok(Self {
488 limit,
489 remaining,
490 reset,
491 })
492 }
493}
494
495/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
496#[derive(Debug)]
497pub struct RateLimitInfo {
498 pub retry_after: Option<Duration>,
499 pub requests: Option<RateLimit>,
500 pub tokens: Option<RateLimit>,
501 pub input_tokens: Option<RateLimit>,
502 pub output_tokens: Option<RateLimit>,
503}
504
505impl RateLimitInfo {
506 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
507 // Check if any rate limit headers exist
508 let has_rate_limit_headers = headers
509 .keys()
510 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
511
512 if !has_rate_limit_headers {
513 return Self {
514 retry_after: None,
515 requests: None,
516 tokens: None,
517 input_tokens: None,
518 output_tokens: None,
519 };
520 }
521
522 Self {
523 retry_after: parse_retry_after(headers),
524 requests: RateLimit::from_headers("requests", headers).ok(),
525 tokens: RateLimit::from_headers("tokens", headers).ok(),
526 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
527 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
528 }
529 }
530}
531
532/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
533/// seconds). Note that other services might specify an HTTP date or some other format for this
534/// header. Returns `None` if the header is not present or cannot be parsed.
535pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
536 headers
537 .get("retry-after")
538 .and_then(|v| v.to_str().ok())
539 .and_then(|v| v.parse::<u64>().ok())
540 .map(Duration::from_secs)
541}
542
543fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
544 Ok(headers
545 .get(key)
546 .with_context(|| format!("missing header `{key}`"))?
547 .to_str()?)
548}
549
550pub async fn stream_completion_with_rate_limit_info(
551 client: &dyn HttpClient,
552 api_url: &str,
553 api_key: &str,
554 request: Request,
555 beta_headers: Option<String>,
556) -> Result<
557 (
558 BoxStream<'static, Result<Event, AnthropicError>>,
559 Option<RateLimitInfo>,
560 ),
561 AnthropicError,
562> {
563 let request = StreamingRequest {
564 base: request,
565 stream: true,
566 };
567
568 let (response, rate_limits) =
569 send_request(client, api_url, api_key, &request, beta_headers).await?;
570
571 if response.status().is_success() {
572 let reader = BufReader::new(response.into_body());
573 let stream = reader
574 .lines()
575 .filter_map(|line| async move {
576 match line {
577 Ok(line) => {
578 let line = line
579 .strip_prefix("data: ")
580 .or_else(|| line.strip_prefix("data:"))?;
581
582 match serde_json::from_str(line) {
583 Ok(response) => Some(Ok(response)),
584 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
585 }
586 }
587 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
588 }
589 })
590 .boxed();
591 Ok((stream, Some(rate_limits)))
592 } else {
593 Err(handle_error_response(response, rate_limits).await)
594 }
595}
596
597#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
598#[serde(rename_all = "lowercase")]
599pub enum CacheControlType {
600 Ephemeral,
601}
602
603#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
604pub struct CacheControl {
605 #[serde(rename = "type")]
606 pub cache_type: CacheControlType,
607}
608
609#[derive(Debug, Serialize, Deserialize)]
610pub struct Message {
611 pub role: Role,
612 pub content: Vec<RequestContent>,
613}
614
615#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
616#[serde(rename_all = "lowercase")]
617pub enum Role {
618 User,
619 Assistant,
620}
621
622#[derive(Debug, Serialize, Deserialize)]
623#[serde(tag = "type")]
624pub enum RequestContent {
625 #[serde(rename = "text")]
626 Text {
627 text: String,
628 #[serde(skip_serializing_if = "Option::is_none")]
629 cache_control: Option<CacheControl>,
630 },
631 #[serde(rename = "thinking")]
632 Thinking {
633 thinking: String,
634 signature: String,
635 #[serde(skip_serializing_if = "Option::is_none")]
636 cache_control: Option<CacheControl>,
637 },
638 #[serde(rename = "redacted_thinking")]
639 RedactedThinking { data: String },
640 #[serde(rename = "image")]
641 Image {
642 source: ImageSource,
643 #[serde(skip_serializing_if = "Option::is_none")]
644 cache_control: Option<CacheControl>,
645 },
646 #[serde(rename = "tool_use")]
647 ToolUse {
648 id: String,
649 name: String,
650 input: serde_json::Value,
651 #[serde(skip_serializing_if = "Option::is_none")]
652 cache_control: Option<CacheControl>,
653 },
654 #[serde(rename = "tool_result")]
655 ToolResult {
656 tool_use_id: String,
657 is_error: bool,
658 content: ToolResultContent,
659 #[serde(skip_serializing_if = "Option::is_none")]
660 cache_control: Option<CacheControl>,
661 },
662}
663
664#[derive(Debug, Serialize, Deserialize)]
665#[serde(untagged)]
666pub enum ToolResultContent {
667 Plain(String),
668 Multipart(Vec<ToolResultPart>),
669}
670
671#[derive(Debug, Serialize, Deserialize)]
672#[serde(tag = "type", rename_all = "lowercase")]
673pub enum ToolResultPart {
674 Text { text: String },
675 Image { source: ImageSource },
676}
677
678#[derive(Debug, Serialize, Deserialize)]
679#[serde(tag = "type")]
680pub enum ResponseContent {
681 #[serde(rename = "text")]
682 Text { text: String },
683 #[serde(rename = "thinking")]
684 Thinking { thinking: String },
685 #[serde(rename = "redacted_thinking")]
686 RedactedThinking { data: String },
687 #[serde(rename = "tool_use")]
688 ToolUse {
689 id: String,
690 name: String,
691 input: serde_json::Value,
692 },
693}
694
695#[derive(Debug, Serialize, Deserialize)]
696pub struct ImageSource {
697 #[serde(rename = "type")]
698 pub source_type: String,
699 pub media_type: String,
700 pub data: String,
701}
702
703fn is_false(value: &bool) -> bool {
704 !value
705}
706
707#[derive(Debug, Serialize, Deserialize)]
708pub struct Tool {
709 pub name: String,
710 pub description: String,
711 pub input_schema: serde_json::Value,
712 #[serde(default, skip_serializing_if = "is_false")]
713 pub eager_input_streaming: bool,
714}
715
716#[derive(Debug, Serialize, Deserialize)]
717#[serde(tag = "type", rename_all = "lowercase")]
718pub enum ToolChoice {
719 Auto,
720 Any,
721 Tool { name: String },
722 None,
723}
724
725#[derive(Debug, Serialize, Deserialize)]
726#[serde(tag = "type", rename_all = "lowercase")]
727pub enum Thinking {
728 Enabled { budget_tokens: Option<u32> },
729 Adaptive,
730}
731
732#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
733#[serde(rename_all = "snake_case")]
734#[strum(serialize_all = "snake_case")]
735pub enum Effort {
736 Low,
737 Medium,
738 High,
739 Max,
740}
741
742#[derive(Debug, Clone, Serialize, Deserialize)]
743pub struct OutputConfig {
744 pub effort: Option<Effort>,
745}
746
747#[derive(Debug, Serialize, Deserialize)]
748#[serde(untagged)]
749pub enum StringOrContents {
750 String(String),
751 Content(Vec<RequestContent>),
752}
753
754#[derive(Debug, Serialize, Deserialize)]
755pub struct Request {
756 pub model: String,
757 pub max_tokens: u64,
758 pub messages: Vec<Message>,
759 #[serde(default, skip_serializing_if = "Vec::is_empty")]
760 pub tools: Vec<Tool>,
761 #[serde(default, skip_serializing_if = "Option::is_none")]
762 pub thinking: Option<Thinking>,
763 #[serde(default, skip_serializing_if = "Option::is_none")]
764 pub tool_choice: Option<ToolChoice>,
765 #[serde(default, skip_serializing_if = "Option::is_none")]
766 pub system: Option<StringOrContents>,
767 #[serde(default, skip_serializing_if = "Option::is_none")]
768 pub metadata: Option<Metadata>,
769 #[serde(default, skip_serializing_if = "Option::is_none")]
770 pub output_config: Option<OutputConfig>,
771 #[serde(default, skip_serializing_if = "Vec::is_empty")]
772 pub stop_sequences: Vec<String>,
773 #[serde(default, skip_serializing_if = "Option::is_none")]
774 pub speed: Option<Speed>,
775 #[serde(default, skip_serializing_if = "Option::is_none")]
776 pub temperature: Option<f32>,
777 #[serde(default, skip_serializing_if = "Option::is_none")]
778 pub top_k: Option<u32>,
779 #[serde(default, skip_serializing_if = "Option::is_none")]
780 pub top_p: Option<f32>,
781}
782
783#[derive(Debug, Default, Serialize, Deserialize)]
784#[serde(rename_all = "snake_case")]
785pub enum Speed {
786 #[default]
787 Standard,
788 Fast,
789}
790
791#[derive(Debug, Serialize, Deserialize)]
792pub struct StreamingRequest {
793 #[serde(flatten)]
794 pub base: Request,
795 pub stream: bool,
796}
797
798#[derive(Debug, Serialize, Deserialize)]
799pub struct Metadata {
800 pub user_id: Option<String>,
801}
802
803#[derive(Debug, Serialize, Deserialize, Default)]
804pub struct Usage {
805 #[serde(default, skip_serializing_if = "Option::is_none")]
806 pub input_tokens: Option<u64>,
807 #[serde(default, skip_serializing_if = "Option::is_none")]
808 pub output_tokens: Option<u64>,
809 #[serde(default, skip_serializing_if = "Option::is_none")]
810 pub cache_creation_input_tokens: Option<u64>,
811 #[serde(default, skip_serializing_if = "Option::is_none")]
812 pub cache_read_input_tokens: Option<u64>,
813}
814
815#[derive(Debug, Serialize, Deserialize)]
816pub struct Response {
817 pub id: String,
818 #[serde(rename = "type")]
819 pub response_type: String,
820 pub role: Role,
821 pub content: Vec<ResponseContent>,
822 pub model: String,
823 #[serde(default, skip_serializing_if = "Option::is_none")]
824 pub stop_reason: Option<String>,
825 #[serde(default, skip_serializing_if = "Option::is_none")]
826 pub stop_sequence: Option<String>,
827 pub usage: Usage,
828}
829
830#[derive(Debug, Serialize, Deserialize)]
831#[serde(tag = "type")]
832pub enum Event {
833 #[serde(rename = "message_start")]
834 MessageStart { message: Response },
835 #[serde(rename = "content_block_start")]
836 ContentBlockStart {
837 index: usize,
838 content_block: ResponseContent,
839 },
840 #[serde(rename = "content_block_delta")]
841 ContentBlockDelta { index: usize, delta: ContentDelta },
842 #[serde(rename = "content_block_stop")]
843 ContentBlockStop { index: usize },
844 #[serde(rename = "message_delta")]
845 MessageDelta { delta: MessageDelta, usage: Usage },
846 #[serde(rename = "message_stop")]
847 MessageStop,
848 #[serde(rename = "ping")]
849 Ping,
850 #[serde(rename = "error")]
851 Error { error: ApiError },
852}
853
854#[derive(Debug, Serialize, Deserialize)]
855#[serde(tag = "type")]
856pub enum ContentDelta {
857 #[serde(rename = "text_delta")]
858 TextDelta { text: String },
859 #[serde(rename = "thinking_delta")]
860 ThinkingDelta { thinking: String },
861 #[serde(rename = "signature_delta")]
862 SignatureDelta { signature: String },
863 #[serde(rename = "input_json_delta")]
864 InputJsonDelta { partial_json: String },
865}
866
867#[derive(Debug, Serialize, Deserialize)]
868pub struct MessageDelta {
869 pub stop_reason: Option<String>,
870 pub stop_sequence: Option<String>,
871}
872
873#[derive(Debug)]
874pub enum AnthropicError {
875 /// Failed to serialize the HTTP request body to JSON
876 SerializeRequest(serde_json::Error),
877
878 /// Failed to construct the HTTP request body
879 BuildRequestBody(http::Error),
880
881 /// Failed to send the HTTP request
882 HttpSend(anyhow::Error),
883
884 /// Failed to deserialize the response from JSON
885 DeserializeResponse(serde_json::Error),
886
887 /// Failed to read from response stream
888 ReadResponse(io::Error),
889
890 /// HTTP error response from the API
891 HttpResponseError {
892 status_code: StatusCode,
893 message: String,
894 },
895
896 /// Rate limit exceeded
897 RateLimit { retry_after: Duration },
898
899 /// Server overloaded
900 ServerOverloaded { retry_after: Option<Duration> },
901
902 /// API returned an error response
903 ApiError(ApiError),
904}
905
906#[derive(Debug, Serialize, Deserialize, Error)]
907#[error("Anthropic API Error: {error_type}: {message}")]
908pub struct ApiError {
909 #[serde(rename = "type")]
910 pub error_type: String,
911 pub message: String,
912}
913
914/// An Anthropic API error code.
915/// <https://docs.anthropic.com/en/api/errors#http-errors>
916#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
917#[strum(serialize_all = "snake_case")]
918pub enum ApiErrorCode {
919 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
920 InvalidRequestError,
921 /// 401 - `authentication_error`: There's an issue with your API key.
922 AuthenticationError,
923 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
924 PermissionError,
925 /// 404 - `not_found_error`: The requested resource was not found.
926 NotFoundError,
927 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
928 RequestTooLarge,
929 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
930 RateLimitError,
931 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
932 ApiError,
933 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
934 OverloadedError,
935}
936
937impl ApiError {
938 pub fn code(&self) -> Option<ApiErrorCode> {
939 ApiErrorCode::from_str(&self.error_type).ok()
940 }
941
942 pub fn is_rate_limit_error(&self) -> bool {
943 matches!(self.error_type.as_str(), "rate_limit_error")
944 }
945
946 pub fn match_window_exceeded(&self) -> Option<u64> {
947 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
948 return None;
949 };
950
951 parse_prompt_too_long(&self.message)
952 }
953}
954
955pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
956 message
957 .strip_prefix("prompt is too long: ")?
958 .split_once(" tokens")?
959 .0
960 .parse()
961 .ok()
962}
963
964/// Request body for the token counting API.
965/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
966#[derive(Debug, Serialize)]
967pub struct CountTokensRequest {
968 pub model: String,
969 pub messages: Vec<Message>,
970 #[serde(default, skip_serializing_if = "Option::is_none")]
971 pub system: Option<StringOrContents>,
972 #[serde(default, skip_serializing_if = "Vec::is_empty")]
973 pub tools: Vec<Tool>,
974 #[serde(default, skip_serializing_if = "Option::is_none")]
975 pub thinking: Option<Thinking>,
976 #[serde(default, skip_serializing_if = "Option::is_none")]
977 pub tool_choice: Option<ToolChoice>,
978}
979
980/// Response from the token counting API.
981#[derive(Debug, Deserialize)]
982pub struct CountTokensResponse {
983 pub input_tokens: u64,
984}
985
986/// Count the number of tokens in a message without creating it.
987pub async fn count_tokens(
988 client: &dyn HttpClient,
989 api_url: &str,
990 api_key: &str,
991 request: CountTokensRequest,
992) -> Result<CountTokensResponse, AnthropicError> {
993 let uri = format!("{api_url}/v1/messages/count_tokens");
994
995 let request_builder = HttpRequest::builder()
996 .method(Method::POST)
997 .uri(uri)
998 .header("Anthropic-Version", "2023-06-01")
999 .header("X-Api-Key", api_key.trim())
1000 .header("Content-Type", "application/json");
1001
1002 let serialized_request =
1003 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1004 let http_request = request_builder
1005 .body(AsyncBody::from(serialized_request))
1006 .map_err(AnthropicError::BuildRequestBody)?;
1007
1008 let mut response = client
1009 .send(http_request)
1010 .await
1011 .map_err(AnthropicError::HttpSend)?;
1012
1013 let rate_limits = RateLimitInfo::from_headers(response.headers());
1014
1015 if response.status().is_success() {
1016 let mut body = String::new();
1017 response
1018 .body_mut()
1019 .read_to_string(&mut body)
1020 .await
1021 .map_err(AnthropicError::ReadResponse)?;
1022
1023 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1024 } else {
1025 Err(handle_error_response(response, rate_limits).await)
1026 }
1027}
1028
1029#[test]
1030fn test_match_window_exceeded() {
1031 let error = ApiError {
1032 error_type: "invalid_request_error".to_string(),
1033 message: "prompt is too long: 220000 tokens > 200000".to_string(),
1034 };
1035 assert_eq!(error.match_window_exceeded(), Some(220_000));
1036
1037 let error = ApiError {
1038 error_type: "invalid_request_error".to_string(),
1039 message: "prompt is too long: 1234953 tokens".to_string(),
1040 };
1041 assert_eq!(error.match_window_exceeded(), Some(1234953));
1042
1043 let error = ApiError {
1044 error_type: "invalid_request_error".to_string(),
1045 message: "not a prompt length error".to_string(),
1046 };
1047 assert_eq!(error.match_window_exceeded(), None);
1048
1049 let error = ApiError {
1050 error_type: "rate_limit_error".to_string(),
1051 message: "prompt is too long: 12345 tokens".to_string(),
1052 };
1053 assert_eq!(error.match_window_exceeded(), None);
1054
1055 let error = ApiError {
1056 error_type: "invalid_request_error".to_string(),
1057 message: "prompt is too long: invalid tokens".to_string(),
1058 };
1059 assert_eq!(error.match_window_exceeded(), None);
1060}