1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11use strum::{EnumIter, EnumString};
12use thiserror::Error;
13
14pub mod batches;
15pub mod completion;
16
17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
18
19#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
20#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
21pub struct AnthropicModelCacheConfiguration {
22 pub min_total_token: u64,
23 pub should_speculate: bool,
24 pub max_cache_anchors: usize,
25}
26
27#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
28#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
29pub enum AnthropicModelMode {
30 #[default]
31 Default,
32 Thinking {
33 budget_tokens: Option<u32>,
34 },
35 AdaptiveThinking,
36}
37
38#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
39#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
40pub enum Model {
41 #[serde(
42 rename = "claude-opus-4",
43 alias = "claude-opus-4-latest",
44 alias = "claude-opus-4-thinking",
45 alias = "claude-opus-4-thinking-latest"
46 )]
47 ClaudeOpus4,
48 #[serde(
49 rename = "claude-opus-4-1",
50 alias = "claude-opus-4-1-latest",
51 alias = "claude-opus-4-1-thinking",
52 alias = "claude-opus-4-1-thinking-latest"
53 )]
54 ClaudeOpus4_1,
55 #[serde(
56 rename = "claude-opus-4-5",
57 alias = "claude-opus-4-5-latest",
58 alias = "claude-opus-4-5-thinking",
59 alias = "claude-opus-4-5-thinking-latest"
60 )]
61 ClaudeOpus4_5,
62 #[serde(
63 rename = "claude-opus-4-6",
64 alias = "claude-opus-4-6-latest",
65 alias = "claude-opus-4-6-1m-context",
66 alias = "claude-opus-4-6-1m-context-latest",
67 alias = "claude-opus-4-6-thinking",
68 alias = "claude-opus-4-6-thinking-latest",
69 alias = "claude-opus-4-6-1m-context-thinking",
70 alias = "claude-opus-4-6-1m-context-thinking-latest"
71 )]
72 ClaudeOpus4_6,
73 #[serde(
74 rename = "claude-opus-4-7",
75 alias = "claude-opus-4-7-latest",
76 alias = "claude-opus-4-7-1m-context",
77 alias = "claude-opus-4-7-1m-context-latest",
78 alias = "claude-opus-4-7-thinking",
79 alias = "claude-opus-4-7-thinking-latest",
80 alias = "claude-opus-4-7-1m-context-thinking",
81 alias = "claude-opus-4-7-1m-context-thinking-latest"
82 )]
83 ClaudeOpus4_7,
84 #[serde(
85 rename = "claude-sonnet-4",
86 alias = "claude-sonnet-4-latest",
87 alias = "claude-sonnet-4-thinking",
88 alias = "claude-sonnet-4-thinking-latest"
89 )]
90 ClaudeSonnet4,
91 #[serde(
92 rename = "claude-sonnet-4-5",
93 alias = "claude-sonnet-4-5-latest",
94 alias = "claude-sonnet-4-5-thinking",
95 alias = "claude-sonnet-4-5-thinking-latest"
96 )]
97 ClaudeSonnet4_5,
98 #[default]
99 #[serde(
100 rename = "claude-sonnet-4-6",
101 alias = "claude-sonnet-4-6-latest",
102 alias = "claude-sonnet-4-6-1m-context",
103 alias = "claude-sonnet-4-6-1m-context-latest",
104 alias = "claude-sonnet-4-6-thinking",
105 alias = "claude-sonnet-4-6-thinking-latest",
106 alias = "claude-sonnet-4-6-1m-context-thinking",
107 alias = "claude-sonnet-4-6-1m-context-thinking-latest"
108 )]
109 ClaudeSonnet4_6,
110 #[serde(
111 rename = "claude-haiku-4-5",
112 alias = "claude-haiku-4-5-latest",
113 alias = "claude-haiku-4-5-thinking",
114 alias = "claude-haiku-4-5-thinking-latest"
115 )]
116 ClaudeHaiku4_5,
117 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
118 Claude3Haiku,
119 #[serde(rename = "custom")]
120 Custom {
121 name: String,
122 max_tokens: u64,
123 /// The name displayed in the UI, such as in the agent panel model dropdown menu.
124 display_name: Option<String>,
125 /// Override this model with a different Anthropic model for tool calls.
126 tool_override: Option<String>,
127 /// Indicates whether this custom model supports caching.
128 cache_configuration: Option<AnthropicModelCacheConfiguration>,
129 max_output_tokens: Option<u64>,
130 default_temperature: Option<f32>,
131 #[serde(default)]
132 extra_beta_headers: Vec<String>,
133 #[serde(default)]
134 mode: AnthropicModelMode,
135 },
136}
137
138impl Model {
139 pub fn default_fast() -> Self {
140 Self::ClaudeHaiku4_5
141 }
142
143 pub fn from_id(id: &str) -> Result<Self> {
144 if id.starts_with("claude-opus-4-7") {
145 return Ok(Self::ClaudeOpus4_7);
146 }
147
148 if id.starts_with("claude-opus-4-6") {
149 return Ok(Self::ClaudeOpus4_6);
150 }
151
152 if id.starts_with("claude-opus-4-5") {
153 return Ok(Self::ClaudeOpus4_5);
154 }
155
156 if id.starts_with("claude-opus-4-1") {
157 return Ok(Self::ClaudeOpus4_1);
158 }
159
160 if id.starts_with("claude-opus-4") {
161 return Ok(Self::ClaudeOpus4);
162 }
163
164 if id.starts_with("claude-sonnet-4-6") {
165 return Ok(Self::ClaudeSonnet4_6);
166 }
167
168 if id.starts_with("claude-sonnet-4-5") {
169 return Ok(Self::ClaudeSonnet4_5);
170 }
171
172 if id.starts_with("claude-sonnet-4") {
173 return Ok(Self::ClaudeSonnet4);
174 }
175
176 if id.starts_with("claude-haiku-4-5") {
177 return Ok(Self::ClaudeHaiku4_5);
178 }
179
180 if id.starts_with("claude-3-haiku") {
181 return Ok(Self::Claude3Haiku);
182 }
183
184 Err(anyhow!("invalid model ID: {id}"))
185 }
186
187 pub fn id(&self) -> &str {
188 match self {
189 Self::ClaudeOpus4 => "claude-opus-4-latest",
190 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
191 Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
192 Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
193 Self::ClaudeOpus4_7 => "claude-opus-4-7-latest",
194 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
195 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
196 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
197 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
198 Self::Claude3Haiku => "claude-3-haiku-20240307",
199 Self::Custom { name, .. } => name,
200 }
201 }
202
203 /// The id of the model that should be used for making API requests
204 pub fn request_id(&self) -> &str {
205 match self {
206 Self::ClaudeOpus4 => "claude-opus-4-20250514",
207 Self::ClaudeOpus4_1 => "claude-opus-4-1-20250805",
208 Self::ClaudeOpus4_5 => "claude-opus-4-5-20251101",
209 Self::ClaudeOpus4_6 => "claude-opus-4-6",
210 Self::ClaudeOpus4_7 => "claude-opus-4-7",
211 Self::ClaudeSonnet4 => "claude-sonnet-4-20250514",
212 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-20250929",
213 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6",
214 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-20251001",
215 Self::Claude3Haiku => "claude-3-haiku-20240307",
216 Self::Custom { name, .. } => name,
217 }
218 }
219
220 pub fn display_name(&self) -> &str {
221 match self {
222 Self::ClaudeOpus4 => "Claude Opus 4",
223 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
224 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
225 Self::ClaudeOpus4_6 => "Claude Opus 4.6",
226 Self::ClaudeOpus4_7 => "Claude Opus 4.7",
227 Self::ClaudeSonnet4 => "Claude Sonnet 4",
228 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
229 Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
230 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
231 Self::Claude3Haiku => "Claude 3 Haiku",
232 Self::Custom {
233 name, display_name, ..
234 } => display_name.as_ref().unwrap_or(name),
235 }
236 }
237
238 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
239 match self {
240 Self::ClaudeOpus4
241 | Self::ClaudeOpus4_1
242 | Self::ClaudeOpus4_5
243 | Self::ClaudeOpus4_6
244 | Self::ClaudeOpus4_7
245 | Self::ClaudeSonnet4
246 | Self::ClaudeSonnet4_5
247 | Self::ClaudeSonnet4_6
248 | Self::ClaudeHaiku4_5
249 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
250 min_total_token: 2_048,
251 should_speculate: true,
252 max_cache_anchors: 4,
253 }),
254 Self::Custom {
255 cache_configuration,
256 ..
257 } => cache_configuration.clone(),
258 }
259 }
260
261 pub fn max_token_count(&self) -> u64 {
262 match self {
263 Self::ClaudeOpus4
264 | Self::ClaudeOpus4_1
265 | Self::ClaudeOpus4_5
266 | Self::ClaudeSonnet4
267 | Self::ClaudeSonnet4_5
268 | Self::ClaudeHaiku4_5
269 | Self::Claude3Haiku => 200_000,
270 Self::ClaudeOpus4_6 | Self::ClaudeOpus4_7 | Self::ClaudeSonnet4_6 => 1_000_000,
271 Self::Custom { max_tokens, .. } => *max_tokens,
272 }
273 }
274
275 pub fn max_output_tokens(&self) -> u64 {
276 match self {
277 Self::ClaudeOpus4 | Self::ClaudeOpus4_1 => 32_000,
278 Self::ClaudeOpus4_5
279 | Self::ClaudeSonnet4
280 | Self::ClaudeSonnet4_5
281 | Self::ClaudeSonnet4_6
282 | Self::ClaudeHaiku4_5 => 64_000,
283 Self::ClaudeOpus4_6 | Self::ClaudeOpus4_7 => 128_000,
284 Self::Claude3Haiku => 4_096,
285 Self::Custom {
286 max_output_tokens, ..
287 } => max_output_tokens.unwrap_or(4_096),
288 }
289 }
290
291 pub fn default_temperature(&self) -> f32 {
292 match self {
293 Self::ClaudeOpus4
294 | Self::ClaudeOpus4_1
295 | Self::ClaudeOpus4_5
296 | Self::ClaudeOpus4_6
297 | Self::ClaudeOpus4_7
298 | Self::ClaudeSonnet4
299 | Self::ClaudeSonnet4_5
300 | Self::ClaudeSonnet4_6
301 | Self::ClaudeHaiku4_5
302 | Self::Claude3Haiku => 1.0,
303 Self::Custom {
304 default_temperature,
305 ..
306 } => default_temperature.unwrap_or(1.0),
307 }
308 }
309
310 pub fn mode(&self) -> AnthropicModelMode {
311 match self {
312 Self::Custom { mode, .. } => mode.clone(),
313 _ if self.supports_adaptive_thinking() => AnthropicModelMode::AdaptiveThinking,
314 _ if self.supports_thinking() => AnthropicModelMode::Thinking {
315 budget_tokens: Some(4_096),
316 },
317 _ => AnthropicModelMode::Default,
318 }
319 }
320
321 pub fn supports_thinking(&self) -> bool {
322 match self {
323 Self::Custom { mode, .. } => {
324 matches!(
325 mode,
326 AnthropicModelMode::Thinking { .. } | AnthropicModelMode::AdaptiveThinking
327 )
328 }
329 _ => matches!(
330 self,
331 Self::ClaudeOpus4
332 | Self::ClaudeOpus4_1
333 | Self::ClaudeOpus4_5
334 | Self::ClaudeOpus4_6
335 | Self::ClaudeOpus4_7
336 | Self::ClaudeSonnet4
337 | Self::ClaudeSonnet4_5
338 | Self::ClaudeSonnet4_6
339 | Self::ClaudeHaiku4_5
340 ),
341 }
342 }
343
344 pub fn supports_speed(&self) -> bool {
345 matches!(self, Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6)
346 }
347
348 pub fn supports_adaptive_thinking(&self) -> bool {
349 match self {
350 Self::Custom { mode, .. } => matches!(mode, AnthropicModelMode::AdaptiveThinking),
351 _ => matches!(
352 self,
353 Self::ClaudeOpus4_6 | Self::ClaudeOpus4_7 | Self::ClaudeSonnet4_6
354 ),
355 }
356 }
357
358 pub fn beta_headers(&self) -> Option<String> {
359 let mut headers = vec![];
360
361 match self {
362 Self::Custom {
363 extra_beta_headers, ..
364 } => {
365 headers.extend(
366 extra_beta_headers
367 .iter()
368 .filter(|header| !header.trim().is_empty())
369 .cloned(),
370 );
371 }
372 _ => {}
373 }
374
375 if headers.is_empty() {
376 None
377 } else {
378 Some(headers.join(","))
379 }
380 }
381
382 pub fn tool_model_id(&self) -> &str {
383 if let Self::Custom {
384 tool_override: Some(tool_override),
385 ..
386 } = self
387 {
388 tool_override
389 } else {
390 self.request_id()
391 }
392 }
393}
394
395/// Generate completion with streaming.
396pub async fn stream_completion(
397 client: &dyn HttpClient,
398 api_url: &str,
399 api_key: &str,
400 request: Request,
401 beta_headers: Option<String>,
402) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
403 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
404 .await
405 .map(|output| output.0)
406}
407
408/// Generate completion without streaming.
409pub async fn non_streaming_completion(
410 client: &dyn HttpClient,
411 api_url: &str,
412 api_key: &str,
413 request: Request,
414 beta_headers: Option<String>,
415) -> Result<Response, AnthropicError> {
416 let (mut response, rate_limits) =
417 send_request(client, api_url, api_key, &request, beta_headers).await?;
418
419 if response.status().is_success() {
420 let mut body = String::new();
421 response
422 .body_mut()
423 .read_to_string(&mut body)
424 .await
425 .map_err(AnthropicError::ReadResponse)?;
426
427 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
428 } else {
429 Err(handle_error_response(response, rate_limits).await)
430 }
431}
432
433async fn send_request(
434 client: &dyn HttpClient,
435 api_url: &str,
436 api_key: &str,
437 request: impl Serialize,
438 beta_headers: Option<String>,
439) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
440 let uri = format!("{api_url}/v1/messages");
441
442 let mut request_builder = HttpRequest::builder()
443 .method(Method::POST)
444 .uri(uri)
445 .header("Anthropic-Version", "2023-06-01")
446 .header("X-Api-Key", api_key.trim())
447 .header("Content-Type", "application/json");
448
449 if let Some(beta_headers) = beta_headers {
450 request_builder = request_builder.header("Anthropic-Beta", beta_headers);
451 }
452
453 let serialized_request =
454 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
455 let request = request_builder
456 .body(AsyncBody::from(serialized_request))
457 .map_err(AnthropicError::BuildRequestBody)?;
458
459 let response = client
460 .send(request)
461 .await
462 .map_err(AnthropicError::HttpSend)?;
463
464 let rate_limits = RateLimitInfo::from_headers(response.headers());
465
466 Ok((response, rate_limits))
467}
468
469async fn handle_error_response(
470 mut response: http::Response<AsyncBody>,
471 rate_limits: RateLimitInfo,
472) -> AnthropicError {
473 if response.status().as_u16() == 529 {
474 return AnthropicError::ServerOverloaded {
475 retry_after: rate_limits.retry_after,
476 };
477 }
478
479 if let Some(retry_after) = rate_limits.retry_after {
480 return AnthropicError::RateLimit { retry_after };
481 }
482
483 let mut body = String::new();
484 let read_result = response
485 .body_mut()
486 .read_to_string(&mut body)
487 .await
488 .map_err(AnthropicError::ReadResponse);
489
490 if let Err(err) = read_result {
491 return err;
492 }
493
494 match serde_json::from_str::<Event>(&body) {
495 Ok(Event::Error { error }) => AnthropicError::ApiError(error),
496 Ok(_) | Err(_) => AnthropicError::HttpResponseError {
497 status_code: response.status(),
498 message: body,
499 },
500 }
501}
502
503/// An individual rate limit.
504#[derive(Debug)]
505pub struct RateLimit {
506 pub limit: usize,
507 pub remaining: usize,
508 pub reset: DateTime<Utc>,
509}
510
511impl RateLimit {
512 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
513 let limit =
514 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
515 let remaining = get_header(
516 &format!("anthropic-ratelimit-{resource}-remaining"),
517 headers,
518 )?
519 .parse()?;
520 let reset = DateTime::parse_from_rfc3339(get_header(
521 &format!("anthropic-ratelimit-{resource}-reset"),
522 headers,
523 )?)?
524 .to_utc();
525
526 Ok(Self {
527 limit,
528 remaining,
529 reset,
530 })
531 }
532}
533
534/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
535#[derive(Debug)]
536pub struct RateLimitInfo {
537 pub retry_after: Option<Duration>,
538 pub requests: Option<RateLimit>,
539 pub tokens: Option<RateLimit>,
540 pub input_tokens: Option<RateLimit>,
541 pub output_tokens: Option<RateLimit>,
542}
543
544impl RateLimitInfo {
545 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
546 // Check if any rate limit headers exist
547 let has_rate_limit_headers = headers
548 .keys()
549 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
550
551 if !has_rate_limit_headers {
552 return Self {
553 retry_after: None,
554 requests: None,
555 tokens: None,
556 input_tokens: None,
557 output_tokens: None,
558 };
559 }
560
561 Self {
562 retry_after: parse_retry_after(headers),
563 requests: RateLimit::from_headers("requests", headers).ok(),
564 tokens: RateLimit::from_headers("tokens", headers).ok(),
565 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
566 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
567 }
568 }
569}
570
571/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
572/// seconds). Note that other services might specify an HTTP date or some other format for this
573/// header. Returns `None` if the header is not present or cannot be parsed.
574pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
575 headers
576 .get("retry-after")
577 .and_then(|v| v.to_str().ok())
578 .and_then(|v| v.parse::<u64>().ok())
579 .map(Duration::from_secs)
580}
581
582fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
583 Ok(headers
584 .get(key)
585 .with_context(|| format!("missing header `{key}`"))?
586 .to_str()?)
587}
588
589pub async fn stream_completion_with_rate_limit_info(
590 client: &dyn HttpClient,
591 api_url: &str,
592 api_key: &str,
593 request: Request,
594 beta_headers: Option<String>,
595) -> Result<
596 (
597 BoxStream<'static, Result<Event, AnthropicError>>,
598 Option<RateLimitInfo>,
599 ),
600 AnthropicError,
601> {
602 let request = StreamingRequest {
603 base: request,
604 stream: true,
605 };
606
607 let (response, rate_limits) =
608 send_request(client, api_url, api_key, &request, beta_headers).await?;
609
610 if response.status().is_success() {
611 let reader = BufReader::new(response.into_body());
612 let stream = reader
613 .lines()
614 .filter_map(|line| async move {
615 match line {
616 Ok(line) => {
617 let line = line
618 .strip_prefix("data: ")
619 .or_else(|| line.strip_prefix("data:"))?;
620
621 match serde_json::from_str(line) {
622 Ok(response) => Some(Ok(response)),
623 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
624 }
625 }
626 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
627 }
628 })
629 .boxed();
630 Ok((stream, Some(rate_limits)))
631 } else {
632 Err(handle_error_response(response, rate_limits).await)
633 }
634}
635
636#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
637#[serde(rename_all = "lowercase")]
638pub enum CacheControlType {
639 Ephemeral,
640}
641
642#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
643pub struct CacheControl {
644 #[serde(rename = "type")]
645 pub cache_type: CacheControlType,
646}
647
648#[derive(Debug, Serialize, Deserialize)]
649pub struct Message {
650 pub role: Role,
651 pub content: Vec<RequestContent>,
652}
653
654#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
655#[serde(rename_all = "lowercase")]
656pub enum Role {
657 User,
658 Assistant,
659}
660
661#[derive(Debug, Serialize, Deserialize)]
662#[serde(tag = "type")]
663pub enum RequestContent {
664 #[serde(rename = "text")]
665 Text {
666 text: String,
667 #[serde(skip_serializing_if = "Option::is_none")]
668 cache_control: Option<CacheControl>,
669 },
670 #[serde(rename = "thinking")]
671 Thinking {
672 thinking: String,
673 signature: String,
674 #[serde(skip_serializing_if = "Option::is_none")]
675 cache_control: Option<CacheControl>,
676 },
677 #[serde(rename = "redacted_thinking")]
678 RedactedThinking { data: String },
679 #[serde(rename = "image")]
680 Image {
681 source: ImageSource,
682 #[serde(skip_serializing_if = "Option::is_none")]
683 cache_control: Option<CacheControl>,
684 },
685 #[serde(rename = "tool_use")]
686 ToolUse {
687 id: String,
688 name: String,
689 input: serde_json::Value,
690 #[serde(skip_serializing_if = "Option::is_none")]
691 cache_control: Option<CacheControl>,
692 },
693 #[serde(rename = "tool_result")]
694 ToolResult {
695 tool_use_id: String,
696 is_error: bool,
697 content: ToolResultContent,
698 #[serde(skip_serializing_if = "Option::is_none")]
699 cache_control: Option<CacheControl>,
700 },
701}
702
703#[derive(Debug, Serialize, Deserialize)]
704#[serde(untagged)]
705pub enum ToolResultContent {
706 Plain(String),
707 Multipart(Vec<ToolResultPart>),
708}
709
710#[derive(Debug, Serialize, Deserialize)]
711#[serde(tag = "type", rename_all = "lowercase")]
712pub enum ToolResultPart {
713 Text { text: String },
714 Image { source: ImageSource },
715}
716
717#[derive(Debug, Serialize, Deserialize)]
718#[serde(tag = "type")]
719pub enum ResponseContent {
720 #[serde(rename = "text")]
721 Text { text: String },
722 #[serde(rename = "thinking")]
723 Thinking { thinking: String },
724 #[serde(rename = "redacted_thinking")]
725 RedactedThinking { data: String },
726 #[serde(rename = "tool_use")]
727 ToolUse {
728 id: String,
729 name: String,
730 input: serde_json::Value,
731 },
732}
733
734#[derive(Debug, Serialize, Deserialize)]
735pub struct ImageSource {
736 #[serde(rename = "type")]
737 pub source_type: String,
738 pub media_type: String,
739 pub data: String,
740}
741
742fn is_false(value: &bool) -> bool {
743 !value
744}
745
746#[derive(Debug, Serialize, Deserialize)]
747pub struct Tool {
748 pub name: String,
749 pub description: String,
750 pub input_schema: serde_json::Value,
751 #[serde(default, skip_serializing_if = "is_false")]
752 pub eager_input_streaming: bool,
753}
754
755#[derive(Debug, Serialize, Deserialize)]
756#[serde(tag = "type", rename_all = "lowercase")]
757pub enum ToolChoice {
758 Auto,
759 Any,
760 Tool { name: String },
761 None,
762}
763
764#[derive(Debug, Serialize, Deserialize)]
765#[serde(tag = "type", rename_all = "lowercase")]
766pub enum Thinking {
767 Enabled {
768 budget_tokens: Option<u32>,
769 },
770 Adaptive {
771 #[serde(default, skip_serializing_if = "Option::is_none")]
772 display: Option<AdaptiveThinkingDisplay>,
773 },
774}
775
776#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
777#[serde(rename_all = "lowercase")]
778pub enum AdaptiveThinkingDisplay {
779 Omitted,
780 Summarized,
781}
782
783#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
784#[serde(rename_all = "snake_case")]
785#[strum(serialize_all = "snake_case")]
786pub enum Effort {
787 Low,
788 Medium,
789 High,
790 Max,
791}
792
793#[derive(Debug, Clone, Serialize, Deserialize)]
794pub struct OutputConfig {
795 pub effort: Option<Effort>,
796}
797
798#[derive(Debug, Serialize, Deserialize)]
799#[serde(untagged)]
800pub enum StringOrContents {
801 String(String),
802 Content(Vec<RequestContent>),
803}
804
805#[derive(Debug, Serialize, Deserialize)]
806pub struct Request {
807 pub model: String,
808 pub max_tokens: u64,
809 pub messages: Vec<Message>,
810 #[serde(default, skip_serializing_if = "Vec::is_empty")]
811 pub tools: Vec<Tool>,
812 #[serde(default, skip_serializing_if = "Option::is_none")]
813 pub thinking: Option<Thinking>,
814 #[serde(default, skip_serializing_if = "Option::is_none")]
815 pub tool_choice: Option<ToolChoice>,
816 #[serde(default, skip_serializing_if = "Option::is_none")]
817 pub system: Option<StringOrContents>,
818 #[serde(default, skip_serializing_if = "Option::is_none")]
819 pub metadata: Option<Metadata>,
820 #[serde(default, skip_serializing_if = "Option::is_none")]
821 pub output_config: Option<OutputConfig>,
822 #[serde(default, skip_serializing_if = "Vec::is_empty")]
823 pub stop_sequences: Vec<String>,
824 #[serde(default, skip_serializing_if = "Option::is_none")]
825 pub speed: Option<Speed>,
826 #[serde(default, skip_serializing_if = "Option::is_none")]
827 pub temperature: Option<f32>,
828 #[serde(default, skip_serializing_if = "Option::is_none")]
829 pub top_k: Option<u32>,
830 #[serde(default, skip_serializing_if = "Option::is_none")]
831 pub top_p: Option<f32>,
832}
833
834#[derive(Debug, Default, Serialize, Deserialize)]
835#[serde(rename_all = "snake_case")]
836pub enum Speed {
837 #[default]
838 Standard,
839 Fast,
840}
841
842#[derive(Debug, Serialize, Deserialize)]
843pub struct StreamingRequest {
844 #[serde(flatten)]
845 pub base: Request,
846 pub stream: bool,
847}
848
849#[derive(Debug, Serialize, Deserialize)]
850pub struct Metadata {
851 pub user_id: Option<String>,
852}
853
854#[derive(Debug, Serialize, Deserialize, Default)]
855pub struct Usage {
856 #[serde(default, skip_serializing_if = "Option::is_none")]
857 pub input_tokens: Option<u64>,
858 #[serde(default, skip_serializing_if = "Option::is_none")]
859 pub output_tokens: Option<u64>,
860 #[serde(default, skip_serializing_if = "Option::is_none")]
861 pub cache_creation_input_tokens: Option<u64>,
862 #[serde(default, skip_serializing_if = "Option::is_none")]
863 pub cache_read_input_tokens: Option<u64>,
864}
865
866#[derive(Debug, Serialize, Deserialize)]
867pub struct Response {
868 pub id: String,
869 #[serde(rename = "type")]
870 pub response_type: String,
871 pub role: Role,
872 pub content: Vec<ResponseContent>,
873 pub model: String,
874 #[serde(default, skip_serializing_if = "Option::is_none")]
875 pub stop_reason: Option<String>,
876 #[serde(default, skip_serializing_if = "Option::is_none")]
877 pub stop_sequence: Option<String>,
878 pub usage: Usage,
879}
880
881#[derive(Debug, Serialize, Deserialize)]
882#[serde(tag = "type")]
883pub enum Event {
884 #[serde(rename = "message_start")]
885 MessageStart { message: Response },
886 #[serde(rename = "content_block_start")]
887 ContentBlockStart {
888 index: usize,
889 content_block: ResponseContent,
890 },
891 #[serde(rename = "content_block_delta")]
892 ContentBlockDelta { index: usize, delta: ContentDelta },
893 #[serde(rename = "content_block_stop")]
894 ContentBlockStop { index: usize },
895 #[serde(rename = "message_delta")]
896 MessageDelta { delta: MessageDelta, usage: Usage },
897 #[serde(rename = "message_stop")]
898 MessageStop,
899 #[serde(rename = "ping")]
900 Ping,
901 #[serde(rename = "error")]
902 Error { error: ApiError },
903}
904
905#[derive(Debug, Serialize, Deserialize)]
906#[serde(tag = "type")]
907pub enum ContentDelta {
908 #[serde(rename = "text_delta")]
909 TextDelta { text: String },
910 #[serde(rename = "thinking_delta")]
911 ThinkingDelta { thinking: String },
912 #[serde(rename = "signature_delta")]
913 SignatureDelta { signature: String },
914 #[serde(rename = "input_json_delta")]
915 InputJsonDelta { partial_json: String },
916}
917
918#[derive(Debug, Serialize, Deserialize)]
919pub struct MessageDelta {
920 pub stop_reason: Option<String>,
921 pub stop_sequence: Option<String>,
922}
923
924#[derive(Debug)]
925pub enum AnthropicError {
926 /// Failed to serialize the HTTP request body to JSON
927 SerializeRequest(serde_json::Error),
928
929 /// Failed to construct the HTTP request body
930 BuildRequestBody(http::Error),
931
932 /// Failed to send the HTTP request
933 HttpSend(anyhow::Error),
934
935 /// Failed to deserialize the response from JSON
936 DeserializeResponse(serde_json::Error),
937
938 /// Failed to read from response stream
939 ReadResponse(io::Error),
940
941 /// HTTP error response from the API
942 HttpResponseError {
943 status_code: StatusCode,
944 message: String,
945 },
946
947 /// Rate limit exceeded
948 RateLimit { retry_after: Duration },
949
950 /// Server overloaded
951 ServerOverloaded { retry_after: Option<Duration> },
952
953 /// API returned an error response
954 ApiError(ApiError),
955}
956
957#[derive(Debug, Serialize, Deserialize, Error)]
958#[error("Anthropic API Error: {error_type}: {message}")]
959pub struct ApiError {
960 #[serde(rename = "type")]
961 pub error_type: String,
962 pub message: String,
963}
964
965/// An Anthropic API error code.
966/// <https://docs.anthropic.com/en/api/errors#http-errors>
967#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
968#[strum(serialize_all = "snake_case")]
969pub enum ApiErrorCode {
970 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
971 InvalidRequestError,
972 /// 401 - `authentication_error`: There's an issue with your API key.
973 AuthenticationError,
974 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
975 PermissionError,
976 /// 404 - `not_found_error`: The requested resource was not found.
977 NotFoundError,
978 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
979 RequestTooLarge,
980 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
981 RateLimitError,
982 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
983 ApiError,
984 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
985 OverloadedError,
986}
987
988impl ApiError {
989 pub fn code(&self) -> Option<ApiErrorCode> {
990 ApiErrorCode::from_str(&self.error_type).ok()
991 }
992
993 pub fn is_rate_limit_error(&self) -> bool {
994 matches!(self.error_type.as_str(), "rate_limit_error")
995 }
996
997 pub fn match_window_exceeded(&self) -> Option<u64> {
998 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
999 return None;
1000 };
1001
1002 parse_prompt_too_long(&self.message)
1003 }
1004}
1005
1006pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
1007 message
1008 .strip_prefix("prompt is too long: ")?
1009 .split_once(" tokens")?
1010 .0
1011 .parse()
1012 .ok()
1013}
1014
1015// -- Conversions from/to `language_model_core` types --
1016
1017impl From<language_model_core::Speed> for Speed {
1018 fn from(speed: language_model_core::Speed) -> Self {
1019 match speed {
1020 language_model_core::Speed::Standard => Speed::Standard,
1021 language_model_core::Speed::Fast => Speed::Fast,
1022 }
1023 }
1024}
1025
1026impl From<AnthropicError> for language_model_core::LanguageModelCompletionError {
1027 fn from(error: AnthropicError) -> Self {
1028 let provider = language_model_core::ANTHROPIC_PROVIDER_NAME;
1029 match error {
1030 AnthropicError::SerializeRequest(error) => Self::SerializeRequest { provider, error },
1031 AnthropicError::BuildRequestBody(error) => Self::BuildRequestBody { provider, error },
1032 AnthropicError::HttpSend(error) => Self::HttpSend { provider, error },
1033 AnthropicError::DeserializeResponse(error) => {
1034 Self::DeserializeResponse { provider, error }
1035 }
1036 AnthropicError::ReadResponse(error) => Self::ApiReadResponseError { provider, error },
1037 AnthropicError::HttpResponseError {
1038 status_code,
1039 message,
1040 } => Self::HttpResponseError {
1041 provider,
1042 status_code,
1043 message,
1044 },
1045 AnthropicError::RateLimit { retry_after } => Self::RateLimitExceeded {
1046 provider,
1047 retry_after: Some(retry_after),
1048 },
1049 AnthropicError::ServerOverloaded { retry_after } => Self::ServerOverloaded {
1050 provider,
1051 retry_after,
1052 },
1053 AnthropicError::ApiError(api_error) => api_error.into(),
1054 }
1055 }
1056}
1057
1058impl From<ApiError> for language_model_core::LanguageModelCompletionError {
1059 fn from(error: ApiError) -> Self {
1060 use ApiErrorCode::*;
1061 let provider = language_model_core::ANTHROPIC_PROVIDER_NAME;
1062 match error.code() {
1063 Some(code) => match code {
1064 InvalidRequestError => Self::BadRequestFormat {
1065 provider,
1066 message: error.message,
1067 },
1068 AuthenticationError => Self::AuthenticationError {
1069 provider,
1070 message: error.message,
1071 },
1072 PermissionError => Self::PermissionError {
1073 provider,
1074 message: error.message,
1075 },
1076 NotFoundError => Self::ApiEndpointNotFound { provider },
1077 RequestTooLarge => Self::PromptTooLarge {
1078 tokens: language_model_core::parse_prompt_too_long(&error.message),
1079 },
1080 RateLimitError => Self::RateLimitExceeded {
1081 provider,
1082 retry_after: None,
1083 },
1084 ApiError => Self::ApiInternalServerError {
1085 provider,
1086 message: error.message,
1087 },
1088 OverloadedError => Self::ServerOverloaded {
1089 provider,
1090 retry_after: None,
1091 },
1092 },
1093 None => Self::Other(error.into()),
1094 }
1095 }
1096}
1097
1098#[test]
1099fn custom_mode_thinking_is_preserved() {
1100 let model = Model::Custom {
1101 name: "my-custom-model".to_string(),
1102 max_tokens: 8192,
1103 display_name: None,
1104 tool_override: None,
1105 cache_configuration: None,
1106 max_output_tokens: None,
1107 default_temperature: None,
1108 extra_beta_headers: vec![],
1109 mode: AnthropicModelMode::Thinking {
1110 budget_tokens: Some(2048),
1111 },
1112 };
1113 assert_eq!(
1114 model.mode(),
1115 AnthropicModelMode::Thinking {
1116 budget_tokens: Some(2048)
1117 }
1118 );
1119 assert!(model.supports_thinking());
1120}
1121
1122#[test]
1123fn custom_mode_adaptive_is_preserved() {
1124 let model = Model::Custom {
1125 name: "my-custom-model".to_string(),
1126 max_tokens: 8192,
1127 display_name: None,
1128 tool_override: None,
1129 cache_configuration: None,
1130 max_output_tokens: None,
1131 default_temperature: None,
1132 extra_beta_headers: vec![],
1133 mode: AnthropicModelMode::AdaptiveThinking,
1134 };
1135 assert_eq!(model.mode(), AnthropicModelMode::AdaptiveThinking);
1136 assert!(model.supports_adaptive_thinking());
1137 assert!(model.supports_thinking());
1138}
1139
1140#[test]
1141fn custom_mode_default_disables_thinking() {
1142 let model = Model::Custom {
1143 name: "my-custom-model".to_string(),
1144 max_tokens: 8192,
1145 display_name: None,
1146 tool_override: None,
1147 cache_configuration: None,
1148 max_output_tokens: None,
1149 default_temperature: None,
1150 extra_beta_headers: vec![],
1151 mode: AnthropicModelMode::Default,
1152 };
1153 assert!(!model.supports_thinking());
1154 assert!(!model.supports_adaptive_thinking());
1155}
1156
1157#[test]
1158fn test_match_window_exceeded() {
1159 let error = ApiError {
1160 error_type: "invalid_request_error".to_string(),
1161 message: "prompt is too long: 220000 tokens > 200000".to_string(),
1162 };
1163 assert_eq!(error.match_window_exceeded(), Some(220_000));
1164
1165 let error = ApiError {
1166 error_type: "invalid_request_error".to_string(),
1167 message: "prompt is too long: 1234953 tokens".to_string(),
1168 };
1169 assert_eq!(error.match_window_exceeded(), Some(1234953));
1170
1171 let error = ApiError {
1172 error_type: "invalid_request_error".to_string(),
1173 message: "not a prompt length error".to_string(),
1174 };
1175 assert_eq!(error.match_window_exceeded(), None);
1176
1177 let error = ApiError {
1178 error_type: "rate_limit_error".to_string(),
1179 message: "prompt is too long: 12345 tokens".to_string(),
1180 };
1181 assert_eq!(error.match_window_exceeded(), None);
1182
1183 let error = ApiError {
1184 error_type: "invalid_request_error".to_string(),
1185 message: "prompt is too long: invalid tokens".to_string(),
1186 };
1187 assert_eq!(error.match_window_exceeded(), None);
1188}