1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11use strum::{EnumIter, EnumString};
12use thiserror::Error;
13
14pub mod batches;
15pub mod completion;
16
17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
18
19#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
20#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
21pub struct AnthropicModelCacheConfiguration {
22 pub min_total_token: u64,
23 pub should_speculate: bool,
24 pub max_cache_anchors: usize,
25}
26
27#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
28#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
29pub enum AnthropicModelMode {
30 #[default]
31 Default,
32 Thinking {
33 budget_tokens: Option<u32>,
34 },
35 AdaptiveThinking,
36}
37
38#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
39#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
40pub enum Model {
41 #[serde(
42 rename = "claude-opus-4",
43 alias = "claude-opus-4-latest",
44 alias = "claude-opus-4-thinking",
45 alias = "claude-opus-4-thinking-latest"
46 )]
47 ClaudeOpus4,
48 #[serde(
49 rename = "claude-opus-4-1",
50 alias = "claude-opus-4-1-latest",
51 alias = "claude-opus-4-1-thinking",
52 alias = "claude-opus-4-1-thinking-latest"
53 )]
54 ClaudeOpus4_1,
55 #[serde(
56 rename = "claude-opus-4-5",
57 alias = "claude-opus-4-5-latest",
58 alias = "claude-opus-4-5-thinking",
59 alias = "claude-opus-4-5-thinking-latest"
60 )]
61 ClaudeOpus4_5,
62 #[serde(
63 rename = "claude-opus-4-6",
64 alias = "claude-opus-4-6-latest",
65 alias = "claude-opus-4-6-1m-context",
66 alias = "claude-opus-4-6-1m-context-latest",
67 alias = "claude-opus-4-6-thinking",
68 alias = "claude-opus-4-6-thinking-latest",
69 alias = "claude-opus-4-6-1m-context-thinking",
70 alias = "claude-opus-4-6-1m-context-thinking-latest"
71 )]
72 ClaudeOpus4_6,
73 #[serde(
74 rename = "claude-opus-4-7",
75 alias = "claude-opus-4-7-latest",
76 alias = "claude-opus-4-7-1m-context",
77 alias = "claude-opus-4-7-1m-context-latest",
78 alias = "claude-opus-4-7-thinking",
79 alias = "claude-opus-4-7-thinking-latest",
80 alias = "claude-opus-4-7-1m-context-thinking",
81 alias = "claude-opus-4-7-1m-context-thinking-latest"
82 )]
83 ClaudeOpus4_7,
84 #[serde(
85 rename = "claude-sonnet-4",
86 alias = "claude-sonnet-4-latest",
87 alias = "claude-sonnet-4-thinking",
88 alias = "claude-sonnet-4-thinking-latest"
89 )]
90 ClaudeSonnet4,
91 #[serde(
92 rename = "claude-sonnet-4-5",
93 alias = "claude-sonnet-4-5-latest",
94 alias = "claude-sonnet-4-5-thinking",
95 alias = "claude-sonnet-4-5-thinking-latest"
96 )]
97 ClaudeSonnet4_5,
98 #[default]
99 #[serde(
100 rename = "claude-sonnet-4-6",
101 alias = "claude-sonnet-4-6-latest",
102 alias = "claude-sonnet-4-6-1m-context",
103 alias = "claude-sonnet-4-6-1m-context-latest",
104 alias = "claude-sonnet-4-6-thinking",
105 alias = "claude-sonnet-4-6-thinking-latest",
106 alias = "claude-sonnet-4-6-1m-context-thinking",
107 alias = "claude-sonnet-4-6-1m-context-thinking-latest"
108 )]
109 ClaudeSonnet4_6,
110 #[serde(
111 rename = "claude-haiku-4-5",
112 alias = "claude-haiku-4-5-latest",
113 alias = "claude-haiku-4-5-thinking",
114 alias = "claude-haiku-4-5-thinking-latest"
115 )]
116 ClaudeHaiku4_5,
117 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
118 Claude3Haiku,
119 #[serde(rename = "custom")]
120 Custom {
121 name: String,
122 max_tokens: u64,
123 /// The name displayed in the UI, such as in the agent panel model dropdown menu.
124 display_name: Option<String>,
125 /// Override this model with a different Anthropic model for tool calls.
126 tool_override: Option<String>,
127 /// Indicates whether this custom model supports caching.
128 cache_configuration: Option<AnthropicModelCacheConfiguration>,
129 max_output_tokens: Option<u64>,
130 default_temperature: Option<f32>,
131 #[serde(default)]
132 extra_beta_headers: Vec<String>,
133 #[serde(default)]
134 mode: AnthropicModelMode,
135 },
136}
137
138impl Model {
139 pub fn default_fast() -> Self {
140 Self::ClaudeHaiku4_5
141 }
142
143 pub fn from_id(id: &str) -> Result<Self> {
144 if id.starts_with("claude-opus-4-7") {
145 return Ok(Self::ClaudeOpus4_7);
146 }
147
148 if id.starts_with("claude-opus-4-6") {
149 return Ok(Self::ClaudeOpus4_6);
150 }
151
152 if id.starts_with("claude-opus-4-5") {
153 return Ok(Self::ClaudeOpus4_5);
154 }
155
156 if id.starts_with("claude-opus-4-1") {
157 return Ok(Self::ClaudeOpus4_1);
158 }
159
160 if id.starts_with("claude-opus-4") {
161 return Ok(Self::ClaudeOpus4);
162 }
163
164 if id.starts_with("claude-sonnet-4-6") {
165 return Ok(Self::ClaudeSonnet4_6);
166 }
167
168 if id.starts_with("claude-sonnet-4-5") {
169 return Ok(Self::ClaudeSonnet4_5);
170 }
171
172 if id.starts_with("claude-sonnet-4") {
173 return Ok(Self::ClaudeSonnet4);
174 }
175
176 if id.starts_with("claude-haiku-4-5") {
177 return Ok(Self::ClaudeHaiku4_5);
178 }
179
180 if id.starts_with("claude-3-haiku") {
181 return Ok(Self::Claude3Haiku);
182 }
183
184 Err(anyhow!("invalid model ID: {id}"))
185 }
186
187 pub fn id(&self) -> &str {
188 match self {
189 Self::ClaudeOpus4 => "claude-opus-4-latest",
190 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
191 Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
192 Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
193 Self::ClaudeOpus4_7 => "claude-opus-4-7-latest",
194 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
195 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
196 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
197 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
198 Self::Claude3Haiku => "claude-3-haiku-20240307",
199 Self::Custom { name, .. } => name,
200 }
201 }
202
203 /// The id of the model that should be used for making API requests
204 pub fn request_id(&self) -> &str {
205 match self {
206 Self::ClaudeOpus4 => "claude-opus-4-20250514",
207 Self::ClaudeOpus4_1 => "claude-opus-4-1-20250805",
208 Self::ClaudeOpus4_5 => "claude-opus-4-5-20251101",
209 Self::ClaudeOpus4_6 => "claude-opus-4-6",
210 Self::ClaudeOpus4_7 => "claude-opus-4-7",
211 Self::ClaudeSonnet4 => "claude-sonnet-4-20250514",
212 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-20250929",
213 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6",
214 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-20251001",
215 Self::Claude3Haiku => "claude-3-haiku-20240307",
216 Self::Custom { name, .. } => name,
217 }
218 }
219
220 pub fn display_name(&self) -> &str {
221 match self {
222 Self::ClaudeOpus4 => "Claude Opus 4",
223 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
224 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
225 Self::ClaudeOpus4_6 => "Claude Opus 4.6",
226 Self::ClaudeOpus4_7 => "Claude Opus 4.7",
227 Self::ClaudeSonnet4 => "Claude Sonnet 4",
228 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
229 Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
230 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
231 Self::Claude3Haiku => "Claude 3 Haiku",
232 Self::Custom {
233 name, display_name, ..
234 } => display_name.as_ref().unwrap_or(name),
235 }
236 }
237
238 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
239 match self {
240 Self::ClaudeOpus4
241 | Self::ClaudeOpus4_1
242 | Self::ClaudeOpus4_5
243 | Self::ClaudeOpus4_6
244 | Self::ClaudeOpus4_7
245 | Self::ClaudeSonnet4
246 | Self::ClaudeSonnet4_5
247 | Self::ClaudeSonnet4_6
248 | Self::ClaudeHaiku4_5
249 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
250 min_total_token: 2_048,
251 should_speculate: true,
252 max_cache_anchors: 4,
253 }),
254 Self::Custom {
255 cache_configuration,
256 ..
257 } => cache_configuration.clone(),
258 }
259 }
260
261 pub fn max_token_count(&self) -> u64 {
262 match self {
263 Self::ClaudeOpus4
264 | Self::ClaudeOpus4_1
265 | Self::ClaudeOpus4_5
266 | Self::ClaudeSonnet4
267 | Self::ClaudeSonnet4_5
268 | Self::ClaudeHaiku4_5
269 | Self::Claude3Haiku => 200_000,
270 Self::ClaudeOpus4_6 | Self::ClaudeOpus4_7 | Self::ClaudeSonnet4_6 => 1_000_000,
271 Self::Custom { max_tokens, .. } => *max_tokens,
272 }
273 }
274
275 pub fn max_output_tokens(&self) -> u64 {
276 match self {
277 Self::ClaudeOpus4 | Self::ClaudeOpus4_1 => 32_000,
278 Self::ClaudeOpus4_5
279 | Self::ClaudeSonnet4
280 | Self::ClaudeSonnet4_5
281 | Self::ClaudeSonnet4_6
282 | Self::ClaudeHaiku4_5 => 64_000,
283 Self::ClaudeOpus4_6 | Self::ClaudeOpus4_7 => 128_000,
284 Self::Claude3Haiku => 4_096,
285 Self::Custom {
286 max_output_tokens, ..
287 } => max_output_tokens.unwrap_or(4_096),
288 }
289 }
290
291 pub fn default_temperature(&self) -> f32 {
292 match self {
293 Self::ClaudeOpus4
294 | Self::ClaudeOpus4_1
295 | Self::ClaudeOpus4_5
296 | Self::ClaudeOpus4_6
297 | Self::ClaudeOpus4_7
298 | Self::ClaudeSonnet4
299 | Self::ClaudeSonnet4_5
300 | Self::ClaudeSonnet4_6
301 | Self::ClaudeHaiku4_5
302 | Self::Claude3Haiku => 1.0,
303 Self::Custom {
304 default_temperature,
305 ..
306 } => default_temperature.unwrap_or(1.0),
307 }
308 }
309
310 pub fn mode(&self) -> AnthropicModelMode {
311 match self {
312 Self::Custom { mode, .. } => mode.clone(),
313 _ if self.supports_adaptive_thinking() => AnthropicModelMode::AdaptiveThinking,
314 _ if self.supports_thinking() => AnthropicModelMode::Thinking {
315 budget_tokens: Some(4_096),
316 },
317 _ => AnthropicModelMode::Default,
318 }
319 }
320
321 pub fn supports_thinking(&self) -> bool {
322 match self {
323 Self::Custom { mode, .. } => {
324 matches!(
325 mode,
326 AnthropicModelMode::Thinking { .. } | AnthropicModelMode::AdaptiveThinking
327 )
328 }
329 _ => matches!(
330 self,
331 Self::ClaudeOpus4
332 | Self::ClaudeOpus4_1
333 | Self::ClaudeOpus4_5
334 | Self::ClaudeOpus4_6
335 | Self::ClaudeOpus4_7
336 | Self::ClaudeSonnet4
337 | Self::ClaudeSonnet4_5
338 | Self::ClaudeSonnet4_6
339 | Self::ClaudeHaiku4_5
340 ),
341 }
342 }
343
344 pub fn supports_speed(&self) -> bool {
345 matches!(self, Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6)
346 }
347
348 pub fn supports_adaptive_thinking(&self) -> bool {
349 match self {
350 Self::Custom { mode, .. } => matches!(mode, AnthropicModelMode::AdaptiveThinking),
351 _ => matches!(
352 self,
353 Self::ClaudeOpus4_6 | Self::ClaudeOpus4_7 | Self::ClaudeSonnet4_6
354 ),
355 }
356 }
357
358 pub fn beta_headers(&self) -> Option<String> {
359 let mut headers = vec![];
360
361 match self {
362 Self::Custom {
363 extra_beta_headers, ..
364 } => {
365 headers.extend(
366 extra_beta_headers
367 .iter()
368 .filter(|header| !header.trim().is_empty())
369 .cloned(),
370 );
371 }
372 _ => {}
373 }
374
375 if headers.is_empty() {
376 None
377 } else {
378 Some(headers.join(","))
379 }
380 }
381
382 pub fn tool_model_id(&self) -> &str {
383 if let Self::Custom {
384 tool_override: Some(tool_override),
385 ..
386 } = self
387 {
388 tool_override
389 } else {
390 self.request_id()
391 }
392 }
393}
394
395/// Generate completion with streaming.
396pub async fn stream_completion(
397 client: &dyn HttpClient,
398 api_url: &str,
399 api_key: &str,
400 request: Request,
401 beta_headers: Option<String>,
402) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
403 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
404 .await
405 .map(|output| output.0)
406}
407
408/// Generate completion without streaming.
409pub async fn non_streaming_completion(
410 client: &dyn HttpClient,
411 api_url: &str,
412 api_key: &str,
413 request: Request,
414 beta_headers: Option<String>,
415) -> Result<Response, AnthropicError> {
416 let (mut response, rate_limits) =
417 send_request(client, api_url, api_key, &request, beta_headers).await?;
418
419 if response.status().is_success() {
420 let mut body = String::new();
421 response
422 .body_mut()
423 .read_to_string(&mut body)
424 .await
425 .map_err(AnthropicError::ReadResponse)?;
426
427 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
428 } else {
429 Err(handle_error_response(response, rate_limits).await)
430 }
431}
432
433async fn send_request(
434 client: &dyn HttpClient,
435 api_url: &str,
436 api_key: &str,
437 request: impl Serialize,
438 beta_headers: Option<String>,
439) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
440 let uri = format!("{api_url}/v1/messages");
441
442 let mut request_builder = HttpRequest::builder()
443 .method(Method::POST)
444 .uri(uri)
445 .header("Anthropic-Version", "2023-06-01")
446 .header("X-Api-Key", api_key.trim())
447 .header("Content-Type", "application/json");
448
449 if let Some(beta_headers) = beta_headers {
450 request_builder = request_builder.header("Anthropic-Beta", beta_headers);
451 }
452
453 let serialized_request =
454 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
455 let request = request_builder
456 .body(AsyncBody::from(serialized_request))
457 .map_err(AnthropicError::BuildRequestBody)?;
458
459 let response = client
460 .send(request)
461 .await
462 .map_err(AnthropicError::HttpSend)?;
463
464 let rate_limits = RateLimitInfo::from_headers(response.headers());
465
466 Ok((response, rate_limits))
467}
468
469async fn handle_error_response(
470 mut response: http::Response<AsyncBody>,
471 rate_limits: RateLimitInfo,
472) -> AnthropicError {
473 if response.status().as_u16() == 529 {
474 return AnthropicError::ServerOverloaded {
475 retry_after: rate_limits.retry_after,
476 };
477 }
478
479 if let Some(retry_after) = rate_limits.retry_after {
480 return AnthropicError::RateLimit { retry_after };
481 }
482
483 let mut body = String::new();
484 let read_result = response
485 .body_mut()
486 .read_to_string(&mut body)
487 .await
488 .map_err(AnthropicError::ReadResponse);
489
490 if let Err(err) = read_result {
491 return err;
492 }
493
494 match serde_json::from_str::<Event>(&body) {
495 Ok(Event::Error { error }) => AnthropicError::ApiError(error),
496 Ok(_) | Err(_) => AnthropicError::HttpResponseError {
497 status_code: response.status(),
498 message: body,
499 },
500 }
501}
502
503/// An individual rate limit.
504#[derive(Debug)]
505pub struct RateLimit {
506 pub limit: usize,
507 pub remaining: usize,
508 pub reset: DateTime<Utc>,
509}
510
511impl RateLimit {
512 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
513 let limit =
514 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
515 let remaining = get_header(
516 &format!("anthropic-ratelimit-{resource}-remaining"),
517 headers,
518 )?
519 .parse()?;
520 let reset = DateTime::parse_from_rfc3339(get_header(
521 &format!("anthropic-ratelimit-{resource}-reset"),
522 headers,
523 )?)?
524 .to_utc();
525
526 Ok(Self {
527 limit,
528 remaining,
529 reset,
530 })
531 }
532}
533
534/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
535#[derive(Debug)]
536pub struct RateLimitInfo {
537 pub retry_after: Option<Duration>,
538 pub requests: Option<RateLimit>,
539 pub tokens: Option<RateLimit>,
540 pub input_tokens: Option<RateLimit>,
541 pub output_tokens: Option<RateLimit>,
542}
543
544impl RateLimitInfo {
545 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
546 // Check if any rate limit headers exist
547 let has_rate_limit_headers = headers
548 .keys()
549 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
550
551 if !has_rate_limit_headers {
552 return Self {
553 retry_after: None,
554 requests: None,
555 tokens: None,
556 input_tokens: None,
557 output_tokens: None,
558 };
559 }
560
561 Self {
562 retry_after: parse_retry_after(headers),
563 requests: RateLimit::from_headers("requests", headers).ok(),
564 tokens: RateLimit::from_headers("tokens", headers).ok(),
565 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
566 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
567 }
568 }
569}
570
571/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
572/// seconds). Note that other services might specify an HTTP date or some other format for this
573/// header. Returns `None` if the header is not present or cannot be parsed.
574pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
575 headers
576 .get("retry-after")
577 .and_then(|v| v.to_str().ok())
578 .and_then(|v| v.parse::<u64>().ok())
579 .map(Duration::from_secs)
580}
581
582fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
583 Ok(headers
584 .get(key)
585 .with_context(|| format!("missing header `{key}`"))?
586 .to_str()?)
587}
588
589pub async fn stream_completion_with_rate_limit_info(
590 client: &dyn HttpClient,
591 api_url: &str,
592 api_key: &str,
593 request: Request,
594 beta_headers: Option<String>,
595) -> Result<
596 (
597 BoxStream<'static, Result<Event, AnthropicError>>,
598 Option<RateLimitInfo>,
599 ),
600 AnthropicError,
601> {
602 let request = StreamingRequest {
603 base: request,
604 stream: true,
605 };
606
607 let (response, rate_limits) =
608 send_request(client, api_url, api_key, &request, beta_headers).await?;
609
610 if response.status().is_success() {
611 let reader = BufReader::new(response.into_body());
612 let stream = reader
613 .lines()
614 .filter_map(|line| async move {
615 match line {
616 Ok(line) => {
617 let line = line
618 .strip_prefix("data: ")
619 .or_else(|| line.strip_prefix("data:"))?;
620
621 match serde_json::from_str(line) {
622 Ok(response) => Some(Ok(response)),
623 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
624 }
625 }
626 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
627 }
628 })
629 .boxed();
630 Ok((stream, Some(rate_limits)))
631 } else {
632 Err(handle_error_response(response, rate_limits).await)
633 }
634}
635
636#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
637#[serde(rename_all = "lowercase")]
638pub enum CacheControlType {
639 Ephemeral,
640}
641
642#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
643pub struct CacheControl {
644 #[serde(rename = "type")]
645 pub cache_type: CacheControlType,
646}
647
648#[derive(Debug, Serialize, Deserialize)]
649pub struct Message {
650 pub role: Role,
651 pub content: Vec<RequestContent>,
652}
653
654#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
655#[serde(rename_all = "lowercase")]
656pub enum Role {
657 User,
658 Assistant,
659}
660
661#[derive(Debug, Serialize, Deserialize)]
662#[serde(tag = "type")]
663pub enum RequestContent {
664 #[serde(rename = "text")]
665 Text {
666 text: String,
667 #[serde(skip_serializing_if = "Option::is_none")]
668 cache_control: Option<CacheControl>,
669 },
670 #[serde(rename = "thinking")]
671 Thinking {
672 thinking: String,
673 signature: String,
674 #[serde(skip_serializing_if = "Option::is_none")]
675 cache_control: Option<CacheControl>,
676 },
677 #[serde(rename = "redacted_thinking")]
678 RedactedThinking { data: String },
679 #[serde(rename = "image")]
680 Image {
681 source: ImageSource,
682 #[serde(skip_serializing_if = "Option::is_none")]
683 cache_control: Option<CacheControl>,
684 },
685 #[serde(rename = "tool_use")]
686 ToolUse {
687 id: String,
688 name: String,
689 input: serde_json::Value,
690 #[serde(skip_serializing_if = "Option::is_none")]
691 cache_control: Option<CacheControl>,
692 },
693 #[serde(rename = "tool_result")]
694 ToolResult {
695 tool_use_id: String,
696 is_error: bool,
697 content: ToolResultContent,
698 #[serde(skip_serializing_if = "Option::is_none")]
699 cache_control: Option<CacheControl>,
700 },
701}
702
703#[derive(Debug, Serialize, Deserialize)]
704#[serde(untagged)]
705pub enum ToolResultContent {
706 Plain(String),
707 Multipart(Vec<ToolResultPart>),
708}
709
710#[derive(Debug, Serialize, Deserialize)]
711#[serde(tag = "type", rename_all = "lowercase")]
712pub enum ToolResultPart {
713 Text { text: String },
714 Image { source: ImageSource },
715}
716
717#[derive(Debug, Serialize, Deserialize)]
718#[serde(tag = "type")]
719pub enum ResponseContent {
720 #[serde(rename = "text")]
721 Text { text: String },
722 #[serde(rename = "thinking")]
723 Thinking { thinking: String },
724 #[serde(rename = "redacted_thinking")]
725 RedactedThinking { data: String },
726 #[serde(rename = "tool_use")]
727 ToolUse {
728 id: String,
729 name: String,
730 input: serde_json::Value,
731 },
732}
733
734#[derive(Debug, Serialize, Deserialize)]
735pub struct ImageSource {
736 #[serde(rename = "type")]
737 pub source_type: String,
738 pub media_type: String,
739 pub data: String,
740}
741
742fn is_false(value: &bool) -> bool {
743 !value
744}
745
746#[derive(Debug, Serialize, Deserialize)]
747pub struct Tool {
748 pub name: String,
749 pub description: String,
750 pub input_schema: serde_json::Value,
751 #[serde(default, skip_serializing_if = "is_false")]
752 pub eager_input_streaming: bool,
753}
754
755#[derive(Debug, Serialize, Deserialize)]
756#[serde(tag = "type", rename_all = "lowercase")]
757pub enum ToolChoice {
758 Auto,
759 Any,
760 Tool { name: String },
761 None,
762}
763
764#[derive(Debug, Serialize, Deserialize)]
765#[serde(tag = "type", rename_all = "lowercase")]
766pub enum Thinking {
767 Enabled { budget_tokens: Option<u32> },
768 Adaptive,
769}
770
771#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
772#[serde(rename_all = "snake_case")]
773#[strum(serialize_all = "snake_case")]
774pub enum Effort {
775 Low,
776 Medium,
777 High,
778 Max,
779}
780
781#[derive(Debug, Clone, Serialize, Deserialize)]
782pub struct OutputConfig {
783 pub effort: Option<Effort>,
784}
785
786#[derive(Debug, Serialize, Deserialize)]
787#[serde(untagged)]
788pub enum StringOrContents {
789 String(String),
790 Content(Vec<RequestContent>),
791}
792
793#[derive(Debug, Serialize, Deserialize)]
794pub struct Request {
795 pub model: String,
796 pub max_tokens: u64,
797 pub messages: Vec<Message>,
798 #[serde(default, skip_serializing_if = "Vec::is_empty")]
799 pub tools: Vec<Tool>,
800 #[serde(default, skip_serializing_if = "Option::is_none")]
801 pub thinking: Option<Thinking>,
802 #[serde(default, skip_serializing_if = "Option::is_none")]
803 pub tool_choice: Option<ToolChoice>,
804 #[serde(default, skip_serializing_if = "Option::is_none")]
805 pub system: Option<StringOrContents>,
806 #[serde(default, skip_serializing_if = "Option::is_none")]
807 pub metadata: Option<Metadata>,
808 #[serde(default, skip_serializing_if = "Option::is_none")]
809 pub output_config: Option<OutputConfig>,
810 #[serde(default, skip_serializing_if = "Vec::is_empty")]
811 pub stop_sequences: Vec<String>,
812 #[serde(default, skip_serializing_if = "Option::is_none")]
813 pub speed: Option<Speed>,
814 #[serde(default, skip_serializing_if = "Option::is_none")]
815 pub temperature: Option<f32>,
816 #[serde(default, skip_serializing_if = "Option::is_none")]
817 pub top_k: Option<u32>,
818 #[serde(default, skip_serializing_if = "Option::is_none")]
819 pub top_p: Option<f32>,
820}
821
822#[derive(Debug, Default, Serialize, Deserialize)]
823#[serde(rename_all = "snake_case")]
824pub enum Speed {
825 #[default]
826 Standard,
827 Fast,
828}
829
830#[derive(Debug, Serialize, Deserialize)]
831pub struct StreamingRequest {
832 #[serde(flatten)]
833 pub base: Request,
834 pub stream: bool,
835}
836
837#[derive(Debug, Serialize, Deserialize)]
838pub struct Metadata {
839 pub user_id: Option<String>,
840}
841
842#[derive(Debug, Serialize, Deserialize, Default)]
843pub struct Usage {
844 #[serde(default, skip_serializing_if = "Option::is_none")]
845 pub input_tokens: Option<u64>,
846 #[serde(default, skip_serializing_if = "Option::is_none")]
847 pub output_tokens: Option<u64>,
848 #[serde(default, skip_serializing_if = "Option::is_none")]
849 pub cache_creation_input_tokens: Option<u64>,
850 #[serde(default, skip_serializing_if = "Option::is_none")]
851 pub cache_read_input_tokens: Option<u64>,
852}
853
854#[derive(Debug, Serialize, Deserialize)]
855pub struct Response {
856 pub id: String,
857 #[serde(rename = "type")]
858 pub response_type: String,
859 pub role: Role,
860 pub content: Vec<ResponseContent>,
861 pub model: String,
862 #[serde(default, skip_serializing_if = "Option::is_none")]
863 pub stop_reason: Option<String>,
864 #[serde(default, skip_serializing_if = "Option::is_none")]
865 pub stop_sequence: Option<String>,
866 pub usage: Usage,
867}
868
869#[derive(Debug, Serialize, Deserialize)]
870#[serde(tag = "type")]
871pub enum Event {
872 #[serde(rename = "message_start")]
873 MessageStart { message: Response },
874 #[serde(rename = "content_block_start")]
875 ContentBlockStart {
876 index: usize,
877 content_block: ResponseContent,
878 },
879 #[serde(rename = "content_block_delta")]
880 ContentBlockDelta { index: usize, delta: ContentDelta },
881 #[serde(rename = "content_block_stop")]
882 ContentBlockStop { index: usize },
883 #[serde(rename = "message_delta")]
884 MessageDelta { delta: MessageDelta, usage: Usage },
885 #[serde(rename = "message_stop")]
886 MessageStop,
887 #[serde(rename = "ping")]
888 Ping,
889 #[serde(rename = "error")]
890 Error { error: ApiError },
891}
892
893#[derive(Debug, Serialize, Deserialize)]
894#[serde(tag = "type")]
895pub enum ContentDelta {
896 #[serde(rename = "text_delta")]
897 TextDelta { text: String },
898 #[serde(rename = "thinking_delta")]
899 ThinkingDelta { thinking: String },
900 #[serde(rename = "signature_delta")]
901 SignatureDelta { signature: String },
902 #[serde(rename = "input_json_delta")]
903 InputJsonDelta { partial_json: String },
904}
905
906#[derive(Debug, Serialize, Deserialize)]
907pub struct MessageDelta {
908 pub stop_reason: Option<String>,
909 pub stop_sequence: Option<String>,
910}
911
912#[derive(Debug)]
913pub enum AnthropicError {
914 /// Failed to serialize the HTTP request body to JSON
915 SerializeRequest(serde_json::Error),
916
917 /// Failed to construct the HTTP request body
918 BuildRequestBody(http::Error),
919
920 /// Failed to send the HTTP request
921 HttpSend(anyhow::Error),
922
923 /// Failed to deserialize the response from JSON
924 DeserializeResponse(serde_json::Error),
925
926 /// Failed to read from response stream
927 ReadResponse(io::Error),
928
929 /// HTTP error response from the API
930 HttpResponseError {
931 status_code: StatusCode,
932 message: String,
933 },
934
935 /// Rate limit exceeded
936 RateLimit { retry_after: Duration },
937
938 /// Server overloaded
939 ServerOverloaded { retry_after: Option<Duration> },
940
941 /// API returned an error response
942 ApiError(ApiError),
943}
944
945#[derive(Debug, Serialize, Deserialize, Error)]
946#[error("Anthropic API Error: {error_type}: {message}")]
947pub struct ApiError {
948 #[serde(rename = "type")]
949 pub error_type: String,
950 pub message: String,
951}
952
953/// An Anthropic API error code.
954/// <https://docs.anthropic.com/en/api/errors#http-errors>
955#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
956#[strum(serialize_all = "snake_case")]
957pub enum ApiErrorCode {
958 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
959 InvalidRequestError,
960 /// 401 - `authentication_error`: There's an issue with your API key.
961 AuthenticationError,
962 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
963 PermissionError,
964 /// 404 - `not_found_error`: The requested resource was not found.
965 NotFoundError,
966 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
967 RequestTooLarge,
968 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
969 RateLimitError,
970 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
971 ApiError,
972 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
973 OverloadedError,
974}
975
976impl ApiError {
977 pub fn code(&self) -> Option<ApiErrorCode> {
978 ApiErrorCode::from_str(&self.error_type).ok()
979 }
980
981 pub fn is_rate_limit_error(&self) -> bool {
982 matches!(self.error_type.as_str(), "rate_limit_error")
983 }
984
985 pub fn match_window_exceeded(&self) -> Option<u64> {
986 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
987 return None;
988 };
989
990 parse_prompt_too_long(&self.message)
991 }
992}
993
994pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
995 message
996 .strip_prefix("prompt is too long: ")?
997 .split_once(" tokens")?
998 .0
999 .parse()
1000 .ok()
1001}
1002
1003/// Request body for the token counting API.
1004/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
1005#[derive(Debug, Serialize)]
1006pub struct CountTokensRequest {
1007 pub model: String,
1008 pub messages: Vec<Message>,
1009 #[serde(default, skip_serializing_if = "Option::is_none")]
1010 pub system: Option<StringOrContents>,
1011 #[serde(default, skip_serializing_if = "Vec::is_empty")]
1012 pub tools: Vec<Tool>,
1013 #[serde(default, skip_serializing_if = "Option::is_none")]
1014 pub thinking: Option<Thinking>,
1015 #[serde(default, skip_serializing_if = "Option::is_none")]
1016 pub tool_choice: Option<ToolChoice>,
1017}
1018
1019/// Response from the token counting API.
1020#[derive(Debug, Deserialize)]
1021pub struct CountTokensResponse {
1022 pub input_tokens: u64,
1023}
1024
1025/// Count the number of tokens in a message without creating it.
1026pub async fn count_tokens(
1027 client: &dyn HttpClient,
1028 api_url: &str,
1029 api_key: &str,
1030 request: CountTokensRequest,
1031) -> Result<CountTokensResponse, AnthropicError> {
1032 let uri = format!("{api_url}/v1/messages/count_tokens");
1033
1034 let request_builder = HttpRequest::builder()
1035 .method(Method::POST)
1036 .uri(uri)
1037 .header("Anthropic-Version", "2023-06-01")
1038 .header("X-Api-Key", api_key.trim())
1039 .header("Content-Type", "application/json");
1040
1041 let serialized_request =
1042 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1043 let http_request = request_builder
1044 .body(AsyncBody::from(serialized_request))
1045 .map_err(AnthropicError::BuildRequestBody)?;
1046
1047 let mut response = client
1048 .send(http_request)
1049 .await
1050 .map_err(AnthropicError::HttpSend)?;
1051
1052 let rate_limits = RateLimitInfo::from_headers(response.headers());
1053
1054 if response.status().is_success() {
1055 let mut body = String::new();
1056 response
1057 .body_mut()
1058 .read_to_string(&mut body)
1059 .await
1060 .map_err(AnthropicError::ReadResponse)?;
1061
1062 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1063 } else {
1064 Err(handle_error_response(response, rate_limits).await)
1065 }
1066}
1067
1068// -- Conversions from/to `language_model_core` types --
1069
1070impl From<language_model_core::Speed> for Speed {
1071 fn from(speed: language_model_core::Speed) -> Self {
1072 match speed {
1073 language_model_core::Speed::Standard => Speed::Standard,
1074 language_model_core::Speed::Fast => Speed::Fast,
1075 }
1076 }
1077}
1078
1079impl From<AnthropicError> for language_model_core::LanguageModelCompletionError {
1080 fn from(error: AnthropicError) -> Self {
1081 let provider = language_model_core::ANTHROPIC_PROVIDER_NAME;
1082 match error {
1083 AnthropicError::SerializeRequest(error) => Self::SerializeRequest { provider, error },
1084 AnthropicError::BuildRequestBody(error) => Self::BuildRequestBody { provider, error },
1085 AnthropicError::HttpSend(error) => Self::HttpSend { provider, error },
1086 AnthropicError::DeserializeResponse(error) => {
1087 Self::DeserializeResponse { provider, error }
1088 }
1089 AnthropicError::ReadResponse(error) => Self::ApiReadResponseError { provider, error },
1090 AnthropicError::HttpResponseError {
1091 status_code,
1092 message,
1093 } => Self::HttpResponseError {
1094 provider,
1095 status_code,
1096 message,
1097 },
1098 AnthropicError::RateLimit { retry_after } => Self::RateLimitExceeded {
1099 provider,
1100 retry_after: Some(retry_after),
1101 },
1102 AnthropicError::ServerOverloaded { retry_after } => Self::ServerOverloaded {
1103 provider,
1104 retry_after,
1105 },
1106 AnthropicError::ApiError(api_error) => api_error.into(),
1107 }
1108 }
1109}
1110
1111impl From<ApiError> for language_model_core::LanguageModelCompletionError {
1112 fn from(error: ApiError) -> Self {
1113 use ApiErrorCode::*;
1114 let provider = language_model_core::ANTHROPIC_PROVIDER_NAME;
1115 match error.code() {
1116 Some(code) => match code {
1117 InvalidRequestError => Self::BadRequestFormat {
1118 provider,
1119 message: error.message,
1120 },
1121 AuthenticationError => Self::AuthenticationError {
1122 provider,
1123 message: error.message,
1124 },
1125 PermissionError => Self::PermissionError {
1126 provider,
1127 message: error.message,
1128 },
1129 NotFoundError => Self::ApiEndpointNotFound { provider },
1130 RequestTooLarge => Self::PromptTooLarge {
1131 tokens: language_model_core::parse_prompt_too_long(&error.message),
1132 },
1133 RateLimitError => Self::RateLimitExceeded {
1134 provider,
1135 retry_after: None,
1136 },
1137 ApiError => Self::ApiInternalServerError {
1138 provider,
1139 message: error.message,
1140 },
1141 OverloadedError => Self::ServerOverloaded {
1142 provider,
1143 retry_after: None,
1144 },
1145 },
1146 None => Self::Other(error.into()),
1147 }
1148 }
1149}
1150
1151#[test]
1152fn custom_mode_thinking_is_preserved() {
1153 let model = Model::Custom {
1154 name: "my-custom-model".to_string(),
1155 max_tokens: 8192,
1156 display_name: None,
1157 tool_override: None,
1158 cache_configuration: None,
1159 max_output_tokens: None,
1160 default_temperature: None,
1161 extra_beta_headers: vec![],
1162 mode: AnthropicModelMode::Thinking {
1163 budget_tokens: Some(2048),
1164 },
1165 };
1166 assert_eq!(
1167 model.mode(),
1168 AnthropicModelMode::Thinking {
1169 budget_tokens: Some(2048)
1170 }
1171 );
1172 assert!(model.supports_thinking());
1173}
1174
1175#[test]
1176fn custom_mode_adaptive_is_preserved() {
1177 let model = Model::Custom {
1178 name: "my-custom-model".to_string(),
1179 max_tokens: 8192,
1180 display_name: None,
1181 tool_override: None,
1182 cache_configuration: None,
1183 max_output_tokens: None,
1184 default_temperature: None,
1185 extra_beta_headers: vec![],
1186 mode: AnthropicModelMode::AdaptiveThinking,
1187 };
1188 assert_eq!(model.mode(), AnthropicModelMode::AdaptiveThinking);
1189 assert!(model.supports_adaptive_thinking());
1190 assert!(model.supports_thinking());
1191}
1192
1193#[test]
1194fn custom_mode_default_disables_thinking() {
1195 let model = Model::Custom {
1196 name: "my-custom-model".to_string(),
1197 max_tokens: 8192,
1198 display_name: None,
1199 tool_override: None,
1200 cache_configuration: None,
1201 max_output_tokens: None,
1202 default_temperature: None,
1203 extra_beta_headers: vec![],
1204 mode: AnthropicModelMode::Default,
1205 };
1206 assert!(!model.supports_thinking());
1207 assert!(!model.supports_adaptive_thinking());
1208}
1209
1210#[test]
1211fn test_match_window_exceeded() {
1212 let error = ApiError {
1213 error_type: "invalid_request_error".to_string(),
1214 message: "prompt is too long: 220000 tokens > 200000".to_string(),
1215 };
1216 assert_eq!(error.match_window_exceeded(), Some(220_000));
1217
1218 let error = ApiError {
1219 error_type: "invalid_request_error".to_string(),
1220 message: "prompt is too long: 1234953 tokens".to_string(),
1221 };
1222 assert_eq!(error.match_window_exceeded(), Some(1234953));
1223
1224 let error = ApiError {
1225 error_type: "invalid_request_error".to_string(),
1226 message: "not a prompt length error".to_string(),
1227 };
1228 assert_eq!(error.match_window_exceeded(), None);
1229
1230 let error = ApiError {
1231 error_type: "rate_limit_error".to_string(),
1232 message: "prompt is too long: 12345 tokens".to_string(),
1233 };
1234 assert_eq!(error.match_window_exceeded(), None);
1235
1236 let error = ApiError {
1237 error_type: "invalid_request_error".to_string(),
1238 message: "prompt is too long: invalid tokens".to_string(),
1239 };
1240 assert_eq!(error.match_window_exceeded(), None);
1241}