1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11use strum::{EnumIter, EnumString};
12use thiserror::Error;
13
14pub mod batches;
15pub mod completion;
16
17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
18
19#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
20#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
21pub struct AnthropicModelCacheConfiguration {
22 pub min_total_token: u64,
23 pub should_speculate: bool,
24 pub max_cache_anchors: usize,
25}
26
27#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
28#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
29pub enum AnthropicModelMode {
30 #[default]
31 Default,
32 Thinking {
33 budget_tokens: Option<u32>,
34 },
35 AdaptiveThinking,
36}
37
38#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
39#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
40pub enum Model {
41 #[serde(
42 rename = "claude-opus-4",
43 alias = "claude-opus-4-latest",
44 alias = "claude-opus-4-thinking",
45 alias = "claude-opus-4-thinking-latest"
46 )]
47 ClaudeOpus4,
48 #[serde(
49 rename = "claude-opus-4-1",
50 alias = "claude-opus-4-1-latest",
51 alias = "claude-opus-4-1-thinking",
52 alias = "claude-opus-4-1-thinking-latest"
53 )]
54 ClaudeOpus4_1,
55 #[serde(
56 rename = "claude-opus-4-5",
57 alias = "claude-opus-4-5-latest",
58 alias = "claude-opus-4-5-thinking",
59 alias = "claude-opus-4-5-thinking-latest"
60 )]
61 ClaudeOpus4_5,
62 #[serde(
63 rename = "claude-opus-4-6",
64 alias = "claude-opus-4-6-latest",
65 alias = "claude-opus-4-6-1m-context",
66 alias = "claude-opus-4-6-1m-context-latest",
67 alias = "claude-opus-4-6-thinking",
68 alias = "claude-opus-4-6-thinking-latest",
69 alias = "claude-opus-4-6-1m-context-thinking",
70 alias = "claude-opus-4-6-1m-context-thinking-latest"
71 )]
72 ClaudeOpus4_6,
73 #[serde(
74 rename = "claude-sonnet-4",
75 alias = "claude-sonnet-4-latest",
76 alias = "claude-sonnet-4-thinking",
77 alias = "claude-sonnet-4-thinking-latest"
78 )]
79 ClaudeSonnet4,
80 #[serde(
81 rename = "claude-sonnet-4-5",
82 alias = "claude-sonnet-4-5-latest",
83 alias = "claude-sonnet-4-5-thinking",
84 alias = "claude-sonnet-4-5-thinking-latest"
85 )]
86 ClaudeSonnet4_5,
87 #[default]
88 #[serde(
89 rename = "claude-sonnet-4-6",
90 alias = "claude-sonnet-4-6-latest",
91 alias = "claude-sonnet-4-6-1m-context",
92 alias = "claude-sonnet-4-6-1m-context-latest",
93 alias = "claude-sonnet-4-6-thinking",
94 alias = "claude-sonnet-4-6-thinking-latest",
95 alias = "claude-sonnet-4-6-1m-context-thinking",
96 alias = "claude-sonnet-4-6-1m-context-thinking-latest"
97 )]
98 ClaudeSonnet4_6,
99 #[serde(
100 rename = "claude-haiku-4-5",
101 alias = "claude-haiku-4-5-latest",
102 alias = "claude-haiku-4-5-thinking",
103 alias = "claude-haiku-4-5-thinking-latest"
104 )]
105 ClaudeHaiku4_5,
106 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
107 Claude3Haiku,
108 #[serde(rename = "custom")]
109 Custom {
110 name: String,
111 max_tokens: u64,
112 /// The name displayed in the UI, such as in the assistant panel model dropdown menu.
113 display_name: Option<String>,
114 /// Override this model with a different Anthropic model for tool calls.
115 tool_override: Option<String>,
116 /// Indicates whether this custom model supports caching.
117 cache_configuration: Option<AnthropicModelCacheConfiguration>,
118 max_output_tokens: Option<u64>,
119 default_temperature: Option<f32>,
120 #[serde(default)]
121 extra_beta_headers: Vec<String>,
122 #[serde(default)]
123 mode: AnthropicModelMode,
124 },
125}
126
127impl Model {
128 pub fn default_fast() -> Self {
129 Self::ClaudeHaiku4_5
130 }
131
132 pub fn from_id(id: &str) -> Result<Self> {
133 if id.starts_with("claude-opus-4-6") {
134 return Ok(Self::ClaudeOpus4_6);
135 }
136
137 if id.starts_with("claude-opus-4-5") {
138 return Ok(Self::ClaudeOpus4_5);
139 }
140
141 if id.starts_with("claude-opus-4-1") {
142 return Ok(Self::ClaudeOpus4_1);
143 }
144
145 if id.starts_with("claude-opus-4") {
146 return Ok(Self::ClaudeOpus4);
147 }
148
149 if id.starts_with("claude-sonnet-4-6") {
150 return Ok(Self::ClaudeSonnet4_6);
151 }
152
153 if id.starts_with("claude-sonnet-4-5") {
154 return Ok(Self::ClaudeSonnet4_5);
155 }
156
157 if id.starts_with("claude-sonnet-4") {
158 return Ok(Self::ClaudeSonnet4);
159 }
160
161 if id.starts_with("claude-haiku-4-5") {
162 return Ok(Self::ClaudeHaiku4_5);
163 }
164
165 if id.starts_with("claude-3-haiku") {
166 return Ok(Self::Claude3Haiku);
167 }
168
169 Err(anyhow!("invalid model ID: {id}"))
170 }
171
172 pub fn id(&self) -> &str {
173 match self {
174 Self::ClaudeOpus4 => "claude-opus-4-latest",
175 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
176 Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
177 Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
178 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
179 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
180 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
181 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
182 Self::Claude3Haiku => "claude-3-haiku-20240307",
183 Self::Custom { name, .. } => name,
184 }
185 }
186
187 /// The id of the model that should be used for making API requests
188 pub fn request_id(&self) -> &str {
189 match self {
190 Self::ClaudeOpus4 => "claude-opus-4-20250514",
191 Self::ClaudeOpus4_1 => "claude-opus-4-1-20250805",
192 Self::ClaudeOpus4_5 => "claude-opus-4-5-20251101",
193 Self::ClaudeOpus4_6 => "claude-opus-4-6",
194 Self::ClaudeSonnet4 => "claude-sonnet-4-20250514",
195 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-20250929",
196 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6",
197 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-20251001",
198 Self::Claude3Haiku => "claude-3-haiku-20240307",
199 Self::Custom { name, .. } => name,
200 }
201 }
202
203 pub fn display_name(&self) -> &str {
204 match self {
205 Self::ClaudeOpus4 => "Claude Opus 4",
206 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
207 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
208 Self::ClaudeOpus4_6 => "Claude Opus 4.6",
209 Self::ClaudeSonnet4 => "Claude Sonnet 4",
210 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
211 Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
212 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
213 Self::Claude3Haiku => "Claude 3 Haiku",
214 Self::Custom {
215 name, display_name, ..
216 } => display_name.as_ref().unwrap_or(name),
217 }
218 }
219
220 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
221 match self {
222 Self::ClaudeOpus4
223 | Self::ClaudeOpus4_1
224 | Self::ClaudeOpus4_5
225 | Self::ClaudeOpus4_6
226 | Self::ClaudeSonnet4
227 | Self::ClaudeSonnet4_5
228 | Self::ClaudeSonnet4_6
229 | Self::ClaudeHaiku4_5
230 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
231 min_total_token: 2_048,
232 should_speculate: true,
233 max_cache_anchors: 4,
234 }),
235 Self::Custom {
236 cache_configuration,
237 ..
238 } => cache_configuration.clone(),
239 }
240 }
241
242 pub fn max_token_count(&self) -> u64 {
243 match self {
244 Self::ClaudeOpus4
245 | Self::ClaudeOpus4_1
246 | Self::ClaudeOpus4_5
247 | Self::ClaudeSonnet4
248 | Self::ClaudeSonnet4_5
249 | Self::ClaudeHaiku4_5
250 | Self::Claude3Haiku => 200_000,
251 Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6 => 1_000_000,
252 Self::Custom { max_tokens, .. } => *max_tokens,
253 }
254 }
255
256 pub fn max_output_tokens(&self) -> u64 {
257 match self {
258 Self::ClaudeOpus4 | Self::ClaudeOpus4_1 => 32_000,
259 Self::ClaudeOpus4_5
260 | Self::ClaudeSonnet4
261 | Self::ClaudeSonnet4_5
262 | Self::ClaudeSonnet4_6
263 | Self::ClaudeHaiku4_5 => 64_000,
264 Self::ClaudeOpus4_6 => 128_000,
265 Self::Claude3Haiku => 4_096,
266 Self::Custom {
267 max_output_tokens, ..
268 } => max_output_tokens.unwrap_or(4_096),
269 }
270 }
271
272 pub fn default_temperature(&self) -> f32 {
273 match self {
274 Self::ClaudeOpus4
275 | Self::ClaudeOpus4_1
276 | Self::ClaudeOpus4_5
277 | Self::ClaudeOpus4_6
278 | Self::ClaudeSonnet4
279 | Self::ClaudeSonnet4_5
280 | Self::ClaudeSonnet4_6
281 | Self::ClaudeHaiku4_5
282 | Self::Claude3Haiku => 1.0,
283 Self::Custom {
284 default_temperature,
285 ..
286 } => default_temperature.unwrap_or(1.0),
287 }
288 }
289
290 pub fn mode(&self) -> AnthropicModelMode {
291 if self.supports_adaptive_thinking() {
292 AnthropicModelMode::AdaptiveThinking
293 } else if self.supports_thinking() {
294 AnthropicModelMode::Thinking {
295 budget_tokens: Some(4_096),
296 }
297 } else {
298 AnthropicModelMode::Default
299 }
300 }
301
302 pub fn supports_thinking(&self) -> bool {
303 matches!(
304 self,
305 Self::ClaudeOpus4
306 | Self::ClaudeOpus4_1
307 | Self::ClaudeOpus4_5
308 | Self::ClaudeOpus4_6
309 | Self::ClaudeSonnet4
310 | Self::ClaudeSonnet4_5
311 | Self::ClaudeSonnet4_6
312 | Self::ClaudeHaiku4_5
313 )
314 }
315
316 pub fn supports_adaptive_thinking(&self) -> bool {
317 matches!(self, Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6)
318 }
319
320 pub fn beta_headers(&self) -> Option<String> {
321 let mut headers = vec![];
322
323 match self {
324 Self::Custom {
325 extra_beta_headers, ..
326 } => {
327 headers.extend(
328 extra_beta_headers
329 .iter()
330 .filter(|header| !header.trim().is_empty())
331 .cloned(),
332 );
333 }
334 _ => {}
335 }
336
337 if headers.is_empty() {
338 None
339 } else {
340 Some(headers.join(","))
341 }
342 }
343
344 pub fn tool_model_id(&self) -> &str {
345 if let Self::Custom {
346 tool_override: Some(tool_override),
347 ..
348 } = self
349 {
350 tool_override
351 } else {
352 self.request_id()
353 }
354 }
355}
356
357/// Generate completion with streaming.
358pub async fn stream_completion(
359 client: &dyn HttpClient,
360 api_url: &str,
361 api_key: &str,
362 request: Request,
363 beta_headers: Option<String>,
364) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
365 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
366 .await
367 .map(|output| output.0)
368}
369
370/// Generate completion without streaming.
371pub async fn non_streaming_completion(
372 client: &dyn HttpClient,
373 api_url: &str,
374 api_key: &str,
375 request: Request,
376 beta_headers: Option<String>,
377) -> Result<Response, AnthropicError> {
378 let (mut response, rate_limits) =
379 send_request(client, api_url, api_key, &request, beta_headers).await?;
380
381 if response.status().is_success() {
382 let mut body = String::new();
383 response
384 .body_mut()
385 .read_to_string(&mut body)
386 .await
387 .map_err(AnthropicError::ReadResponse)?;
388
389 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
390 } else {
391 Err(handle_error_response(response, rate_limits).await)
392 }
393}
394
395async fn send_request(
396 client: &dyn HttpClient,
397 api_url: &str,
398 api_key: &str,
399 request: impl Serialize,
400 beta_headers: Option<String>,
401) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
402 let uri = format!("{api_url}/v1/messages");
403
404 let mut request_builder = HttpRequest::builder()
405 .method(Method::POST)
406 .uri(uri)
407 .header("Anthropic-Version", "2023-06-01")
408 .header("X-Api-Key", api_key.trim())
409 .header("Content-Type", "application/json");
410
411 if let Some(beta_headers) = beta_headers {
412 request_builder = request_builder.header("Anthropic-Beta", beta_headers);
413 }
414
415 let serialized_request =
416 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
417 let request = request_builder
418 .body(AsyncBody::from(serialized_request))
419 .map_err(AnthropicError::BuildRequestBody)?;
420
421 let response = client
422 .send(request)
423 .await
424 .map_err(AnthropicError::HttpSend)?;
425
426 let rate_limits = RateLimitInfo::from_headers(response.headers());
427
428 Ok((response, rate_limits))
429}
430
431async fn handle_error_response(
432 mut response: http::Response<AsyncBody>,
433 rate_limits: RateLimitInfo,
434) -> AnthropicError {
435 if response.status().as_u16() == 529 {
436 return AnthropicError::ServerOverloaded {
437 retry_after: rate_limits.retry_after,
438 };
439 }
440
441 if let Some(retry_after) = rate_limits.retry_after {
442 return AnthropicError::RateLimit { retry_after };
443 }
444
445 let mut body = String::new();
446 let read_result = response
447 .body_mut()
448 .read_to_string(&mut body)
449 .await
450 .map_err(AnthropicError::ReadResponse);
451
452 if let Err(err) = read_result {
453 return err;
454 }
455
456 match serde_json::from_str::<Event>(&body) {
457 Ok(Event::Error { error }) => AnthropicError::ApiError(error),
458 Ok(_) | Err(_) => AnthropicError::HttpResponseError {
459 status_code: response.status(),
460 message: body,
461 },
462 }
463}
464
465/// An individual rate limit.
466#[derive(Debug)]
467pub struct RateLimit {
468 pub limit: usize,
469 pub remaining: usize,
470 pub reset: DateTime<Utc>,
471}
472
473impl RateLimit {
474 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
475 let limit =
476 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
477 let remaining = get_header(
478 &format!("anthropic-ratelimit-{resource}-remaining"),
479 headers,
480 )?
481 .parse()?;
482 let reset = DateTime::parse_from_rfc3339(get_header(
483 &format!("anthropic-ratelimit-{resource}-reset"),
484 headers,
485 )?)?
486 .to_utc();
487
488 Ok(Self {
489 limit,
490 remaining,
491 reset,
492 })
493 }
494}
495
496/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
497#[derive(Debug)]
498pub struct RateLimitInfo {
499 pub retry_after: Option<Duration>,
500 pub requests: Option<RateLimit>,
501 pub tokens: Option<RateLimit>,
502 pub input_tokens: Option<RateLimit>,
503 pub output_tokens: Option<RateLimit>,
504}
505
506impl RateLimitInfo {
507 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
508 // Check if any rate limit headers exist
509 let has_rate_limit_headers = headers
510 .keys()
511 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
512
513 if !has_rate_limit_headers {
514 return Self {
515 retry_after: None,
516 requests: None,
517 tokens: None,
518 input_tokens: None,
519 output_tokens: None,
520 };
521 }
522
523 Self {
524 retry_after: parse_retry_after(headers),
525 requests: RateLimit::from_headers("requests", headers).ok(),
526 tokens: RateLimit::from_headers("tokens", headers).ok(),
527 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
528 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
529 }
530 }
531}
532
533/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
534/// seconds). Note that other services might specify an HTTP date or some other format for this
535/// header. Returns `None` if the header is not present or cannot be parsed.
536pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
537 headers
538 .get("retry-after")
539 .and_then(|v| v.to_str().ok())
540 .and_then(|v| v.parse::<u64>().ok())
541 .map(Duration::from_secs)
542}
543
544fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
545 Ok(headers
546 .get(key)
547 .with_context(|| format!("missing header `{key}`"))?
548 .to_str()?)
549}
550
551pub async fn stream_completion_with_rate_limit_info(
552 client: &dyn HttpClient,
553 api_url: &str,
554 api_key: &str,
555 request: Request,
556 beta_headers: Option<String>,
557) -> Result<
558 (
559 BoxStream<'static, Result<Event, AnthropicError>>,
560 Option<RateLimitInfo>,
561 ),
562 AnthropicError,
563> {
564 let request = StreamingRequest {
565 base: request,
566 stream: true,
567 };
568
569 let (response, rate_limits) =
570 send_request(client, api_url, api_key, &request, beta_headers).await?;
571
572 if response.status().is_success() {
573 let reader = BufReader::new(response.into_body());
574 let stream = reader
575 .lines()
576 .filter_map(|line| async move {
577 match line {
578 Ok(line) => {
579 let line = line
580 .strip_prefix("data: ")
581 .or_else(|| line.strip_prefix("data:"))?;
582
583 match serde_json::from_str(line) {
584 Ok(response) => Some(Ok(response)),
585 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
586 }
587 }
588 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
589 }
590 })
591 .boxed();
592 Ok((stream, Some(rate_limits)))
593 } else {
594 Err(handle_error_response(response, rate_limits).await)
595 }
596}
597
598#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
599#[serde(rename_all = "lowercase")]
600pub enum CacheControlType {
601 Ephemeral,
602}
603
604#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
605pub struct CacheControl {
606 #[serde(rename = "type")]
607 pub cache_type: CacheControlType,
608}
609
610#[derive(Debug, Serialize, Deserialize)]
611pub struct Message {
612 pub role: Role,
613 pub content: Vec<RequestContent>,
614}
615
616#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
617#[serde(rename_all = "lowercase")]
618pub enum Role {
619 User,
620 Assistant,
621}
622
623#[derive(Debug, Serialize, Deserialize)]
624#[serde(tag = "type")]
625pub enum RequestContent {
626 #[serde(rename = "text")]
627 Text {
628 text: String,
629 #[serde(skip_serializing_if = "Option::is_none")]
630 cache_control: Option<CacheControl>,
631 },
632 #[serde(rename = "thinking")]
633 Thinking {
634 thinking: String,
635 signature: String,
636 #[serde(skip_serializing_if = "Option::is_none")]
637 cache_control: Option<CacheControl>,
638 },
639 #[serde(rename = "redacted_thinking")]
640 RedactedThinking { data: String },
641 #[serde(rename = "image")]
642 Image {
643 source: ImageSource,
644 #[serde(skip_serializing_if = "Option::is_none")]
645 cache_control: Option<CacheControl>,
646 },
647 #[serde(rename = "tool_use")]
648 ToolUse {
649 id: String,
650 name: String,
651 input: serde_json::Value,
652 #[serde(skip_serializing_if = "Option::is_none")]
653 cache_control: Option<CacheControl>,
654 },
655 #[serde(rename = "tool_result")]
656 ToolResult {
657 tool_use_id: String,
658 is_error: bool,
659 content: ToolResultContent,
660 #[serde(skip_serializing_if = "Option::is_none")]
661 cache_control: Option<CacheControl>,
662 },
663}
664
665#[derive(Debug, Serialize, Deserialize)]
666#[serde(untagged)]
667pub enum ToolResultContent {
668 Plain(String),
669 Multipart(Vec<ToolResultPart>),
670}
671
672#[derive(Debug, Serialize, Deserialize)]
673#[serde(tag = "type", rename_all = "lowercase")]
674pub enum ToolResultPart {
675 Text { text: String },
676 Image { source: ImageSource },
677}
678
679#[derive(Debug, Serialize, Deserialize)]
680#[serde(tag = "type")]
681pub enum ResponseContent {
682 #[serde(rename = "text")]
683 Text { text: String },
684 #[serde(rename = "thinking")]
685 Thinking { thinking: String },
686 #[serde(rename = "redacted_thinking")]
687 RedactedThinking { data: String },
688 #[serde(rename = "tool_use")]
689 ToolUse {
690 id: String,
691 name: String,
692 input: serde_json::Value,
693 },
694}
695
696#[derive(Debug, Serialize, Deserialize)]
697pub struct ImageSource {
698 #[serde(rename = "type")]
699 pub source_type: String,
700 pub media_type: String,
701 pub data: String,
702}
703
704fn is_false(value: &bool) -> bool {
705 !value
706}
707
708#[derive(Debug, Serialize, Deserialize)]
709pub struct Tool {
710 pub name: String,
711 pub description: String,
712 pub input_schema: serde_json::Value,
713 #[serde(default, skip_serializing_if = "is_false")]
714 pub eager_input_streaming: bool,
715}
716
717#[derive(Debug, Serialize, Deserialize)]
718#[serde(tag = "type", rename_all = "lowercase")]
719pub enum ToolChoice {
720 Auto,
721 Any,
722 Tool { name: String },
723 None,
724}
725
726#[derive(Debug, Serialize, Deserialize)]
727#[serde(tag = "type", rename_all = "lowercase")]
728pub enum Thinking {
729 Enabled { budget_tokens: Option<u32> },
730 Adaptive,
731}
732
733#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
734#[serde(rename_all = "snake_case")]
735#[strum(serialize_all = "snake_case")]
736pub enum Effort {
737 Low,
738 Medium,
739 High,
740 Max,
741}
742
743#[derive(Debug, Clone, Serialize, Deserialize)]
744pub struct OutputConfig {
745 pub effort: Option<Effort>,
746}
747
748#[derive(Debug, Serialize, Deserialize)]
749#[serde(untagged)]
750pub enum StringOrContents {
751 String(String),
752 Content(Vec<RequestContent>),
753}
754
755#[derive(Debug, Serialize, Deserialize)]
756pub struct Request {
757 pub model: String,
758 pub max_tokens: u64,
759 pub messages: Vec<Message>,
760 #[serde(default, skip_serializing_if = "Vec::is_empty")]
761 pub tools: Vec<Tool>,
762 #[serde(default, skip_serializing_if = "Option::is_none")]
763 pub thinking: Option<Thinking>,
764 #[serde(default, skip_serializing_if = "Option::is_none")]
765 pub tool_choice: Option<ToolChoice>,
766 #[serde(default, skip_serializing_if = "Option::is_none")]
767 pub system: Option<StringOrContents>,
768 #[serde(default, skip_serializing_if = "Option::is_none")]
769 pub metadata: Option<Metadata>,
770 #[serde(default, skip_serializing_if = "Option::is_none")]
771 pub output_config: Option<OutputConfig>,
772 #[serde(default, skip_serializing_if = "Vec::is_empty")]
773 pub stop_sequences: Vec<String>,
774 #[serde(default, skip_serializing_if = "Option::is_none")]
775 pub speed: Option<Speed>,
776 #[serde(default, skip_serializing_if = "Option::is_none")]
777 pub temperature: Option<f32>,
778 #[serde(default, skip_serializing_if = "Option::is_none")]
779 pub top_k: Option<u32>,
780 #[serde(default, skip_serializing_if = "Option::is_none")]
781 pub top_p: Option<f32>,
782}
783
784#[derive(Debug, Default, Serialize, Deserialize)]
785#[serde(rename_all = "snake_case")]
786pub enum Speed {
787 #[default]
788 Standard,
789 Fast,
790}
791
792#[derive(Debug, Serialize, Deserialize)]
793pub struct StreamingRequest {
794 #[serde(flatten)]
795 pub base: Request,
796 pub stream: bool,
797}
798
799#[derive(Debug, Serialize, Deserialize)]
800pub struct Metadata {
801 pub user_id: Option<String>,
802}
803
804#[derive(Debug, Serialize, Deserialize, Default)]
805pub struct Usage {
806 #[serde(default, skip_serializing_if = "Option::is_none")]
807 pub input_tokens: Option<u64>,
808 #[serde(default, skip_serializing_if = "Option::is_none")]
809 pub output_tokens: Option<u64>,
810 #[serde(default, skip_serializing_if = "Option::is_none")]
811 pub cache_creation_input_tokens: Option<u64>,
812 #[serde(default, skip_serializing_if = "Option::is_none")]
813 pub cache_read_input_tokens: Option<u64>,
814}
815
816#[derive(Debug, Serialize, Deserialize)]
817pub struct Response {
818 pub id: String,
819 #[serde(rename = "type")]
820 pub response_type: String,
821 pub role: Role,
822 pub content: Vec<ResponseContent>,
823 pub model: String,
824 #[serde(default, skip_serializing_if = "Option::is_none")]
825 pub stop_reason: Option<String>,
826 #[serde(default, skip_serializing_if = "Option::is_none")]
827 pub stop_sequence: Option<String>,
828 pub usage: Usage,
829}
830
831#[derive(Debug, Serialize, Deserialize)]
832#[serde(tag = "type")]
833pub enum Event {
834 #[serde(rename = "message_start")]
835 MessageStart { message: Response },
836 #[serde(rename = "content_block_start")]
837 ContentBlockStart {
838 index: usize,
839 content_block: ResponseContent,
840 },
841 #[serde(rename = "content_block_delta")]
842 ContentBlockDelta { index: usize, delta: ContentDelta },
843 #[serde(rename = "content_block_stop")]
844 ContentBlockStop { index: usize },
845 #[serde(rename = "message_delta")]
846 MessageDelta { delta: MessageDelta, usage: Usage },
847 #[serde(rename = "message_stop")]
848 MessageStop,
849 #[serde(rename = "ping")]
850 Ping,
851 #[serde(rename = "error")]
852 Error { error: ApiError },
853}
854
855#[derive(Debug, Serialize, Deserialize)]
856#[serde(tag = "type")]
857pub enum ContentDelta {
858 #[serde(rename = "text_delta")]
859 TextDelta { text: String },
860 #[serde(rename = "thinking_delta")]
861 ThinkingDelta { thinking: String },
862 #[serde(rename = "signature_delta")]
863 SignatureDelta { signature: String },
864 #[serde(rename = "input_json_delta")]
865 InputJsonDelta { partial_json: String },
866}
867
868#[derive(Debug, Serialize, Deserialize)]
869pub struct MessageDelta {
870 pub stop_reason: Option<String>,
871 pub stop_sequence: Option<String>,
872}
873
874#[derive(Debug)]
875pub enum AnthropicError {
876 /// Failed to serialize the HTTP request body to JSON
877 SerializeRequest(serde_json::Error),
878
879 /// Failed to construct the HTTP request body
880 BuildRequestBody(http::Error),
881
882 /// Failed to send the HTTP request
883 HttpSend(anyhow::Error),
884
885 /// Failed to deserialize the response from JSON
886 DeserializeResponse(serde_json::Error),
887
888 /// Failed to read from response stream
889 ReadResponse(io::Error),
890
891 /// HTTP error response from the API
892 HttpResponseError {
893 status_code: StatusCode,
894 message: String,
895 },
896
897 /// Rate limit exceeded
898 RateLimit { retry_after: Duration },
899
900 /// Server overloaded
901 ServerOverloaded { retry_after: Option<Duration> },
902
903 /// API returned an error response
904 ApiError(ApiError),
905}
906
907#[derive(Debug, Serialize, Deserialize, Error)]
908#[error("Anthropic API Error: {error_type}: {message}")]
909pub struct ApiError {
910 #[serde(rename = "type")]
911 pub error_type: String,
912 pub message: String,
913}
914
915/// An Anthropic API error code.
916/// <https://docs.anthropic.com/en/api/errors#http-errors>
917#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
918#[strum(serialize_all = "snake_case")]
919pub enum ApiErrorCode {
920 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
921 InvalidRequestError,
922 /// 401 - `authentication_error`: There's an issue with your API key.
923 AuthenticationError,
924 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
925 PermissionError,
926 /// 404 - `not_found_error`: The requested resource was not found.
927 NotFoundError,
928 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
929 RequestTooLarge,
930 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
931 RateLimitError,
932 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
933 ApiError,
934 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
935 OverloadedError,
936}
937
938impl ApiError {
939 pub fn code(&self) -> Option<ApiErrorCode> {
940 ApiErrorCode::from_str(&self.error_type).ok()
941 }
942
943 pub fn is_rate_limit_error(&self) -> bool {
944 matches!(self.error_type.as_str(), "rate_limit_error")
945 }
946
947 pub fn match_window_exceeded(&self) -> Option<u64> {
948 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
949 return None;
950 };
951
952 parse_prompt_too_long(&self.message)
953 }
954}
955
956pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
957 message
958 .strip_prefix("prompt is too long: ")?
959 .split_once(" tokens")?
960 .0
961 .parse()
962 .ok()
963}
964
965/// Request body for the token counting API.
966/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
967#[derive(Debug, Serialize)]
968pub struct CountTokensRequest {
969 pub model: String,
970 pub messages: Vec<Message>,
971 #[serde(default, skip_serializing_if = "Option::is_none")]
972 pub system: Option<StringOrContents>,
973 #[serde(default, skip_serializing_if = "Vec::is_empty")]
974 pub tools: Vec<Tool>,
975 #[serde(default, skip_serializing_if = "Option::is_none")]
976 pub thinking: Option<Thinking>,
977 #[serde(default, skip_serializing_if = "Option::is_none")]
978 pub tool_choice: Option<ToolChoice>,
979}
980
981/// Response from the token counting API.
982#[derive(Debug, Deserialize)]
983pub struct CountTokensResponse {
984 pub input_tokens: u64,
985}
986
987/// Count the number of tokens in a message without creating it.
988pub async fn count_tokens(
989 client: &dyn HttpClient,
990 api_url: &str,
991 api_key: &str,
992 request: CountTokensRequest,
993) -> Result<CountTokensResponse, AnthropicError> {
994 let uri = format!("{api_url}/v1/messages/count_tokens");
995
996 let request_builder = HttpRequest::builder()
997 .method(Method::POST)
998 .uri(uri)
999 .header("Anthropic-Version", "2023-06-01")
1000 .header("X-Api-Key", api_key.trim())
1001 .header("Content-Type", "application/json");
1002
1003 let serialized_request =
1004 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1005 let http_request = request_builder
1006 .body(AsyncBody::from(serialized_request))
1007 .map_err(AnthropicError::BuildRequestBody)?;
1008
1009 let mut response = client
1010 .send(http_request)
1011 .await
1012 .map_err(AnthropicError::HttpSend)?;
1013
1014 let rate_limits = RateLimitInfo::from_headers(response.headers());
1015
1016 if response.status().is_success() {
1017 let mut body = String::new();
1018 response
1019 .body_mut()
1020 .read_to_string(&mut body)
1021 .await
1022 .map_err(AnthropicError::ReadResponse)?;
1023
1024 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1025 } else {
1026 Err(handle_error_response(response, rate_limits).await)
1027 }
1028}
1029
1030// -- Conversions from/to `language_model_core` types --
1031
1032impl From<language_model_core::Speed> for Speed {
1033 fn from(speed: language_model_core::Speed) -> Self {
1034 match speed {
1035 language_model_core::Speed::Standard => Speed::Standard,
1036 language_model_core::Speed::Fast => Speed::Fast,
1037 }
1038 }
1039}
1040
1041impl From<AnthropicError> for language_model_core::LanguageModelCompletionError {
1042 fn from(error: AnthropicError) -> Self {
1043 let provider = language_model_core::ANTHROPIC_PROVIDER_NAME;
1044 match error {
1045 AnthropicError::SerializeRequest(error) => Self::SerializeRequest { provider, error },
1046 AnthropicError::BuildRequestBody(error) => Self::BuildRequestBody { provider, error },
1047 AnthropicError::HttpSend(error) => Self::HttpSend { provider, error },
1048 AnthropicError::DeserializeResponse(error) => {
1049 Self::DeserializeResponse { provider, error }
1050 }
1051 AnthropicError::ReadResponse(error) => Self::ApiReadResponseError { provider, error },
1052 AnthropicError::HttpResponseError {
1053 status_code,
1054 message,
1055 } => Self::HttpResponseError {
1056 provider,
1057 status_code,
1058 message,
1059 },
1060 AnthropicError::RateLimit { retry_after } => Self::RateLimitExceeded {
1061 provider,
1062 retry_after: Some(retry_after),
1063 },
1064 AnthropicError::ServerOverloaded { retry_after } => Self::ServerOverloaded {
1065 provider,
1066 retry_after,
1067 },
1068 AnthropicError::ApiError(api_error) => api_error.into(),
1069 }
1070 }
1071}
1072
1073impl From<ApiError> for language_model_core::LanguageModelCompletionError {
1074 fn from(error: ApiError) -> Self {
1075 use ApiErrorCode::*;
1076 let provider = language_model_core::ANTHROPIC_PROVIDER_NAME;
1077 match error.code() {
1078 Some(code) => match code {
1079 InvalidRequestError => Self::BadRequestFormat {
1080 provider,
1081 message: error.message,
1082 },
1083 AuthenticationError => Self::AuthenticationError {
1084 provider,
1085 message: error.message,
1086 },
1087 PermissionError => Self::PermissionError {
1088 provider,
1089 message: error.message,
1090 },
1091 NotFoundError => Self::ApiEndpointNotFound { provider },
1092 RequestTooLarge => Self::PromptTooLarge {
1093 tokens: language_model_core::parse_prompt_too_long(&error.message),
1094 },
1095 RateLimitError => Self::RateLimitExceeded {
1096 provider,
1097 retry_after: None,
1098 },
1099 ApiError => Self::ApiInternalServerError {
1100 provider,
1101 message: error.message,
1102 },
1103 OverloadedError => Self::ServerOverloaded {
1104 provider,
1105 retry_after: None,
1106 },
1107 },
1108 None => Self::Other(error.into()),
1109 }
1110 }
1111}
1112
1113#[test]
1114fn test_match_window_exceeded() {
1115 let error = ApiError {
1116 error_type: "invalid_request_error".to_string(),
1117 message: "prompt is too long: 220000 tokens > 200000".to_string(),
1118 };
1119 assert_eq!(error.match_window_exceeded(), Some(220_000));
1120
1121 let error = ApiError {
1122 error_type: "invalid_request_error".to_string(),
1123 message: "prompt is too long: 1234953 tokens".to_string(),
1124 };
1125 assert_eq!(error.match_window_exceeded(), Some(1234953));
1126
1127 let error = ApiError {
1128 error_type: "invalid_request_error".to_string(),
1129 message: "not a prompt length error".to_string(),
1130 };
1131 assert_eq!(error.match_window_exceeded(), None);
1132
1133 let error = ApiError {
1134 error_type: "rate_limit_error".to_string(),
1135 message: "prompt is too long: 12345 tokens".to_string(),
1136 };
1137 assert_eq!(error.match_window_exceeded(), None);
1138
1139 let error = ApiError {
1140 error_type: "invalid_request_error".to_string(),
1141 message: "prompt is too long: invalid tokens".to_string(),
1142 };
1143 assert_eq!(error.match_window_exceeded(), None);
1144}