1use std::io;
2use std::str::FromStr;
3use std::time::Duration;
4
5use anyhow::{Context as _, Result, anyhow};
6use chrono::{DateTime, Utc};
7use futures::{AsyncBufReadExt, AsyncReadExt, StreamExt, io::BufReader, stream::BoxStream};
8use http_client::http::{self, HeaderMap, HeaderValue};
9use http_client::{AsyncBody, HttpClient, Method, Request as HttpRequest, StatusCode};
10use serde::{Deserialize, Serialize};
11use strum::{EnumIter, EnumString};
12use thiserror::Error;
13
14pub mod batches;
15pub mod completion;
16
17pub const ANTHROPIC_API_URL: &str = "https://api.anthropic.com";
18
19#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
20#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
21pub struct AnthropicModelCacheConfiguration {
22 pub min_total_token: u64,
23 pub should_speculate: bool,
24 pub max_cache_anchors: usize,
25}
26
27#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
28#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
29pub enum AnthropicModelMode {
30 #[default]
31 Default,
32 Thinking {
33 budget_tokens: Option<u32>,
34 },
35 AdaptiveThinking,
36}
37
38#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
39#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
40pub enum Model {
41 #[serde(
42 rename = "claude-opus-4",
43 alias = "claude-opus-4-latest",
44 alias = "claude-opus-4-thinking",
45 alias = "claude-opus-4-thinking-latest"
46 )]
47 ClaudeOpus4,
48 #[serde(
49 rename = "claude-opus-4-1",
50 alias = "claude-opus-4-1-latest",
51 alias = "claude-opus-4-1-thinking",
52 alias = "claude-opus-4-1-thinking-latest"
53 )]
54 ClaudeOpus4_1,
55 #[serde(
56 rename = "claude-opus-4-5",
57 alias = "claude-opus-4-5-latest",
58 alias = "claude-opus-4-5-thinking",
59 alias = "claude-opus-4-5-thinking-latest"
60 )]
61 ClaudeOpus4_5,
62 #[serde(
63 rename = "claude-opus-4-6",
64 alias = "claude-opus-4-6-latest",
65 alias = "claude-opus-4-6-1m-context",
66 alias = "claude-opus-4-6-1m-context-latest",
67 alias = "claude-opus-4-6-thinking",
68 alias = "claude-opus-4-6-thinking-latest",
69 alias = "claude-opus-4-6-1m-context-thinking",
70 alias = "claude-opus-4-6-1m-context-thinking-latest"
71 )]
72 ClaudeOpus4_6,
73 #[serde(
74 rename = "claude-sonnet-4",
75 alias = "claude-sonnet-4-latest",
76 alias = "claude-sonnet-4-thinking",
77 alias = "claude-sonnet-4-thinking-latest"
78 )]
79 ClaudeSonnet4,
80 #[serde(
81 rename = "claude-sonnet-4-5",
82 alias = "claude-sonnet-4-5-latest",
83 alias = "claude-sonnet-4-5-thinking",
84 alias = "claude-sonnet-4-5-thinking-latest"
85 )]
86 ClaudeSonnet4_5,
87 #[default]
88 #[serde(
89 rename = "claude-sonnet-4-6",
90 alias = "claude-sonnet-4-6-latest",
91 alias = "claude-sonnet-4-6-1m-context",
92 alias = "claude-sonnet-4-6-1m-context-latest",
93 alias = "claude-sonnet-4-6-thinking",
94 alias = "claude-sonnet-4-6-thinking-latest",
95 alias = "claude-sonnet-4-6-1m-context-thinking",
96 alias = "claude-sonnet-4-6-1m-context-thinking-latest"
97 )]
98 ClaudeSonnet4_6,
99 #[serde(
100 rename = "claude-haiku-4-5",
101 alias = "claude-haiku-4-5-latest",
102 alias = "claude-haiku-4-5-thinking",
103 alias = "claude-haiku-4-5-thinking-latest"
104 )]
105 ClaudeHaiku4_5,
106 #[serde(rename = "claude-3-haiku", alias = "claude-3-haiku-latest")]
107 Claude3Haiku,
108 #[serde(rename = "custom")]
109 Custom {
110 name: String,
111 max_tokens: u64,
112 /// The name displayed in the UI, such as in the agent panel model dropdown menu.
113 display_name: Option<String>,
114 /// Override this model with a different Anthropic model for tool calls.
115 tool_override: Option<String>,
116 /// Indicates whether this custom model supports caching.
117 cache_configuration: Option<AnthropicModelCacheConfiguration>,
118 max_output_tokens: Option<u64>,
119 default_temperature: Option<f32>,
120 #[serde(default)]
121 extra_beta_headers: Vec<String>,
122 #[serde(default)]
123 mode: AnthropicModelMode,
124 },
125}
126
127impl Model {
128 pub fn default_fast() -> Self {
129 Self::ClaudeHaiku4_5
130 }
131
132 pub fn from_id(id: &str) -> Result<Self> {
133 if id.starts_with("claude-opus-4-6") {
134 return Ok(Self::ClaudeOpus4_6);
135 }
136
137 if id.starts_with("claude-opus-4-5") {
138 return Ok(Self::ClaudeOpus4_5);
139 }
140
141 if id.starts_with("claude-opus-4-1") {
142 return Ok(Self::ClaudeOpus4_1);
143 }
144
145 if id.starts_with("claude-opus-4") {
146 return Ok(Self::ClaudeOpus4);
147 }
148
149 if id.starts_with("claude-sonnet-4-6") {
150 return Ok(Self::ClaudeSonnet4_6);
151 }
152
153 if id.starts_with("claude-sonnet-4-5") {
154 return Ok(Self::ClaudeSonnet4_5);
155 }
156
157 if id.starts_with("claude-sonnet-4") {
158 return Ok(Self::ClaudeSonnet4);
159 }
160
161 if id.starts_with("claude-haiku-4-5") {
162 return Ok(Self::ClaudeHaiku4_5);
163 }
164
165 if id.starts_with("claude-3-haiku") {
166 return Ok(Self::Claude3Haiku);
167 }
168
169 Err(anyhow!("invalid model ID: {id}"))
170 }
171
172 pub fn id(&self) -> &str {
173 match self {
174 Self::ClaudeOpus4 => "claude-opus-4-latest",
175 Self::ClaudeOpus4_1 => "claude-opus-4-1-latest",
176 Self::ClaudeOpus4_5 => "claude-opus-4-5-latest",
177 Self::ClaudeOpus4_6 => "claude-opus-4-6-latest",
178 Self::ClaudeSonnet4 => "claude-sonnet-4-latest",
179 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-latest",
180 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6-latest",
181 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-latest",
182 Self::Claude3Haiku => "claude-3-haiku-20240307",
183 Self::Custom { name, .. } => name,
184 }
185 }
186
187 /// The id of the model that should be used for making API requests
188 pub fn request_id(&self) -> &str {
189 match self {
190 Self::ClaudeOpus4 => "claude-opus-4-20250514",
191 Self::ClaudeOpus4_1 => "claude-opus-4-1-20250805",
192 Self::ClaudeOpus4_5 => "claude-opus-4-5-20251101",
193 Self::ClaudeOpus4_6 => "claude-opus-4-6",
194 Self::ClaudeSonnet4 => "claude-sonnet-4-20250514",
195 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5-20250929",
196 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6",
197 Self::ClaudeHaiku4_5 => "claude-haiku-4-5-20251001",
198 Self::Claude3Haiku => "claude-3-haiku-20240307",
199 Self::Custom { name, .. } => name,
200 }
201 }
202
203 pub fn display_name(&self) -> &str {
204 match self {
205 Self::ClaudeOpus4 => "Claude Opus 4",
206 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
207 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
208 Self::ClaudeOpus4_6 => "Claude Opus 4.6",
209 Self::ClaudeSonnet4 => "Claude Sonnet 4",
210 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
211 Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
212 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
213 Self::Claude3Haiku => "Claude 3 Haiku",
214 Self::Custom {
215 name, display_name, ..
216 } => display_name.as_ref().unwrap_or(name),
217 }
218 }
219
220 pub fn cache_configuration(&self) -> Option<AnthropicModelCacheConfiguration> {
221 match self {
222 Self::ClaudeOpus4
223 | Self::ClaudeOpus4_1
224 | Self::ClaudeOpus4_5
225 | Self::ClaudeOpus4_6
226 | Self::ClaudeSonnet4
227 | Self::ClaudeSonnet4_5
228 | Self::ClaudeSonnet4_6
229 | Self::ClaudeHaiku4_5
230 | Self::Claude3Haiku => Some(AnthropicModelCacheConfiguration {
231 min_total_token: 2_048,
232 should_speculate: true,
233 max_cache_anchors: 4,
234 }),
235 Self::Custom {
236 cache_configuration,
237 ..
238 } => cache_configuration.clone(),
239 }
240 }
241
242 pub fn max_token_count(&self) -> u64 {
243 match self {
244 Self::ClaudeOpus4
245 | Self::ClaudeOpus4_1
246 | Self::ClaudeOpus4_5
247 | Self::ClaudeSonnet4
248 | Self::ClaudeSonnet4_5
249 | Self::ClaudeHaiku4_5
250 | Self::Claude3Haiku => 200_000,
251 Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6 => 1_000_000,
252 Self::Custom { max_tokens, .. } => *max_tokens,
253 }
254 }
255
256 pub fn max_output_tokens(&self) -> u64 {
257 match self {
258 Self::ClaudeOpus4 | Self::ClaudeOpus4_1 => 32_000,
259 Self::ClaudeOpus4_5
260 | Self::ClaudeSonnet4
261 | Self::ClaudeSonnet4_5
262 | Self::ClaudeSonnet4_6
263 | Self::ClaudeHaiku4_5 => 64_000,
264 Self::ClaudeOpus4_6 => 128_000,
265 Self::Claude3Haiku => 4_096,
266 Self::Custom {
267 max_output_tokens, ..
268 } => max_output_tokens.unwrap_or(4_096),
269 }
270 }
271
272 pub fn default_temperature(&self) -> f32 {
273 match self {
274 Self::ClaudeOpus4
275 | Self::ClaudeOpus4_1
276 | Self::ClaudeOpus4_5
277 | Self::ClaudeOpus4_6
278 | Self::ClaudeSonnet4
279 | Self::ClaudeSonnet4_5
280 | Self::ClaudeSonnet4_6
281 | Self::ClaudeHaiku4_5
282 | Self::Claude3Haiku => 1.0,
283 Self::Custom {
284 default_temperature,
285 ..
286 } => default_temperature.unwrap_or(1.0),
287 }
288 }
289
290 pub fn mode(&self) -> AnthropicModelMode {
291 match self {
292 Self::Custom { mode, .. } => mode.clone(),
293 _ if self.supports_adaptive_thinking() => AnthropicModelMode::AdaptiveThinking,
294 _ if self.supports_thinking() => AnthropicModelMode::Thinking {
295 budget_tokens: Some(4_096),
296 },
297 _ => AnthropicModelMode::Default,
298 }
299 }
300
301 pub fn supports_thinking(&self) -> bool {
302 match self {
303 Self::Custom { mode, .. } => {
304 matches!(
305 mode,
306 AnthropicModelMode::Thinking { .. } | AnthropicModelMode::AdaptiveThinking
307 )
308 }
309 _ => matches!(
310 self,
311 Self::ClaudeOpus4
312 | Self::ClaudeOpus4_1
313 | Self::ClaudeOpus4_5
314 | Self::ClaudeOpus4_6
315 | Self::ClaudeSonnet4
316 | Self::ClaudeSonnet4_5
317 | Self::ClaudeSonnet4_6
318 | Self::ClaudeHaiku4_5
319 ),
320 }
321 }
322
323 pub fn supports_adaptive_thinking(&self) -> bool {
324 match self {
325 Self::Custom { mode, .. } => matches!(mode, AnthropicModelMode::AdaptiveThinking),
326 _ => matches!(self, Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6),
327 }
328 }
329
330 pub fn beta_headers(&self) -> Option<String> {
331 let mut headers = vec![];
332
333 match self {
334 Self::Custom {
335 extra_beta_headers, ..
336 } => {
337 headers.extend(
338 extra_beta_headers
339 .iter()
340 .filter(|header| !header.trim().is_empty())
341 .cloned(),
342 );
343 }
344 _ => {}
345 }
346
347 if headers.is_empty() {
348 None
349 } else {
350 Some(headers.join(","))
351 }
352 }
353
354 pub fn tool_model_id(&self) -> &str {
355 if let Self::Custom {
356 tool_override: Some(tool_override),
357 ..
358 } = self
359 {
360 tool_override
361 } else {
362 self.request_id()
363 }
364 }
365}
366
367/// Generate completion with streaming.
368pub async fn stream_completion(
369 client: &dyn HttpClient,
370 api_url: &str,
371 api_key: &str,
372 request: Request,
373 beta_headers: Option<String>,
374) -> Result<BoxStream<'static, Result<Event, AnthropicError>>, AnthropicError> {
375 stream_completion_with_rate_limit_info(client, api_url, api_key, request, beta_headers)
376 .await
377 .map(|output| output.0)
378}
379
380/// Generate completion without streaming.
381pub async fn non_streaming_completion(
382 client: &dyn HttpClient,
383 api_url: &str,
384 api_key: &str,
385 request: Request,
386 beta_headers: Option<String>,
387) -> Result<Response, AnthropicError> {
388 let (mut response, rate_limits) =
389 send_request(client, api_url, api_key, &request, beta_headers).await?;
390
391 if response.status().is_success() {
392 let mut body = String::new();
393 response
394 .body_mut()
395 .read_to_string(&mut body)
396 .await
397 .map_err(AnthropicError::ReadResponse)?;
398
399 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
400 } else {
401 Err(handle_error_response(response, rate_limits).await)
402 }
403}
404
405async fn send_request(
406 client: &dyn HttpClient,
407 api_url: &str,
408 api_key: &str,
409 request: impl Serialize,
410 beta_headers: Option<String>,
411) -> Result<(http::Response<AsyncBody>, RateLimitInfo), AnthropicError> {
412 let uri = format!("{api_url}/v1/messages");
413
414 let mut request_builder = HttpRequest::builder()
415 .method(Method::POST)
416 .uri(uri)
417 .header("Anthropic-Version", "2023-06-01")
418 .header("X-Api-Key", api_key.trim())
419 .header("Content-Type", "application/json");
420
421 if let Some(beta_headers) = beta_headers {
422 request_builder = request_builder.header("Anthropic-Beta", beta_headers);
423 }
424
425 let serialized_request =
426 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
427 let request = request_builder
428 .body(AsyncBody::from(serialized_request))
429 .map_err(AnthropicError::BuildRequestBody)?;
430
431 let response = client
432 .send(request)
433 .await
434 .map_err(AnthropicError::HttpSend)?;
435
436 let rate_limits = RateLimitInfo::from_headers(response.headers());
437
438 Ok((response, rate_limits))
439}
440
441async fn handle_error_response(
442 mut response: http::Response<AsyncBody>,
443 rate_limits: RateLimitInfo,
444) -> AnthropicError {
445 if response.status().as_u16() == 529 {
446 return AnthropicError::ServerOverloaded {
447 retry_after: rate_limits.retry_after,
448 };
449 }
450
451 if let Some(retry_after) = rate_limits.retry_after {
452 return AnthropicError::RateLimit { retry_after };
453 }
454
455 let mut body = String::new();
456 let read_result = response
457 .body_mut()
458 .read_to_string(&mut body)
459 .await
460 .map_err(AnthropicError::ReadResponse);
461
462 if let Err(err) = read_result {
463 return err;
464 }
465
466 match serde_json::from_str::<Event>(&body) {
467 Ok(Event::Error { error }) => AnthropicError::ApiError(error),
468 Ok(_) | Err(_) => AnthropicError::HttpResponseError {
469 status_code: response.status(),
470 message: body,
471 },
472 }
473}
474
475/// An individual rate limit.
476#[derive(Debug)]
477pub struct RateLimit {
478 pub limit: usize,
479 pub remaining: usize,
480 pub reset: DateTime<Utc>,
481}
482
483impl RateLimit {
484 fn from_headers(resource: &str, headers: &HeaderMap<HeaderValue>) -> Result<Self> {
485 let limit =
486 get_header(&format!("anthropic-ratelimit-{resource}-limit"), headers)?.parse()?;
487 let remaining = get_header(
488 &format!("anthropic-ratelimit-{resource}-remaining"),
489 headers,
490 )?
491 .parse()?;
492 let reset = DateTime::parse_from_rfc3339(get_header(
493 &format!("anthropic-ratelimit-{resource}-reset"),
494 headers,
495 )?)?
496 .to_utc();
497
498 Ok(Self {
499 limit,
500 remaining,
501 reset,
502 })
503 }
504}
505
506/// <https://docs.anthropic.com/en/api/rate-limits#response-headers>
507#[derive(Debug)]
508pub struct RateLimitInfo {
509 pub retry_after: Option<Duration>,
510 pub requests: Option<RateLimit>,
511 pub tokens: Option<RateLimit>,
512 pub input_tokens: Option<RateLimit>,
513 pub output_tokens: Option<RateLimit>,
514}
515
516impl RateLimitInfo {
517 fn from_headers(headers: &HeaderMap<HeaderValue>) -> Self {
518 // Check if any rate limit headers exist
519 let has_rate_limit_headers = headers
520 .keys()
521 .any(|k| k == "retry-after" || k.as_str().starts_with("anthropic-ratelimit-"));
522
523 if !has_rate_limit_headers {
524 return Self {
525 retry_after: None,
526 requests: None,
527 tokens: None,
528 input_tokens: None,
529 output_tokens: None,
530 };
531 }
532
533 Self {
534 retry_after: parse_retry_after(headers),
535 requests: RateLimit::from_headers("requests", headers).ok(),
536 tokens: RateLimit::from_headers("tokens", headers).ok(),
537 input_tokens: RateLimit::from_headers("input-tokens", headers).ok(),
538 output_tokens: RateLimit::from_headers("output-tokens", headers).ok(),
539 }
540 }
541}
542
543/// Parses the Retry-After header value as an integer number of seconds (anthropic always uses
544/// seconds). Note that other services might specify an HTTP date or some other format for this
545/// header. Returns `None` if the header is not present or cannot be parsed.
546pub fn parse_retry_after(headers: &HeaderMap<HeaderValue>) -> Option<Duration> {
547 headers
548 .get("retry-after")
549 .and_then(|v| v.to_str().ok())
550 .and_then(|v| v.parse::<u64>().ok())
551 .map(Duration::from_secs)
552}
553
554fn get_header<'a>(key: &str, headers: &'a HeaderMap) -> anyhow::Result<&'a str> {
555 Ok(headers
556 .get(key)
557 .with_context(|| format!("missing header `{key}`"))?
558 .to_str()?)
559}
560
561pub async fn stream_completion_with_rate_limit_info(
562 client: &dyn HttpClient,
563 api_url: &str,
564 api_key: &str,
565 request: Request,
566 beta_headers: Option<String>,
567) -> Result<
568 (
569 BoxStream<'static, Result<Event, AnthropicError>>,
570 Option<RateLimitInfo>,
571 ),
572 AnthropicError,
573> {
574 let request = StreamingRequest {
575 base: request,
576 stream: true,
577 };
578
579 let (response, rate_limits) =
580 send_request(client, api_url, api_key, &request, beta_headers).await?;
581
582 if response.status().is_success() {
583 let reader = BufReader::new(response.into_body());
584 let stream = reader
585 .lines()
586 .filter_map(|line| async move {
587 match line {
588 Ok(line) => {
589 let line = line
590 .strip_prefix("data: ")
591 .or_else(|| line.strip_prefix("data:"))?;
592
593 match serde_json::from_str(line) {
594 Ok(response) => Some(Ok(response)),
595 Err(error) => Some(Err(AnthropicError::DeserializeResponse(error))),
596 }
597 }
598 Err(error) => Some(Err(AnthropicError::ReadResponse(error))),
599 }
600 })
601 .boxed();
602 Ok((stream, Some(rate_limits)))
603 } else {
604 Err(handle_error_response(response, rate_limits).await)
605 }
606}
607
608#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
609#[serde(rename_all = "lowercase")]
610pub enum CacheControlType {
611 Ephemeral,
612}
613
614#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
615pub struct CacheControl {
616 #[serde(rename = "type")]
617 pub cache_type: CacheControlType,
618}
619
620#[derive(Debug, Serialize, Deserialize)]
621pub struct Message {
622 pub role: Role,
623 pub content: Vec<RequestContent>,
624}
625
626#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash)]
627#[serde(rename_all = "lowercase")]
628pub enum Role {
629 User,
630 Assistant,
631}
632
633#[derive(Debug, Serialize, Deserialize)]
634#[serde(tag = "type")]
635pub enum RequestContent {
636 #[serde(rename = "text")]
637 Text {
638 text: String,
639 #[serde(skip_serializing_if = "Option::is_none")]
640 cache_control: Option<CacheControl>,
641 },
642 #[serde(rename = "thinking")]
643 Thinking {
644 thinking: String,
645 signature: String,
646 #[serde(skip_serializing_if = "Option::is_none")]
647 cache_control: Option<CacheControl>,
648 },
649 #[serde(rename = "redacted_thinking")]
650 RedactedThinking { data: String },
651 #[serde(rename = "image")]
652 Image {
653 source: ImageSource,
654 #[serde(skip_serializing_if = "Option::is_none")]
655 cache_control: Option<CacheControl>,
656 },
657 #[serde(rename = "tool_use")]
658 ToolUse {
659 id: String,
660 name: String,
661 input: serde_json::Value,
662 #[serde(skip_serializing_if = "Option::is_none")]
663 cache_control: Option<CacheControl>,
664 },
665 #[serde(rename = "tool_result")]
666 ToolResult {
667 tool_use_id: String,
668 is_error: bool,
669 content: ToolResultContent,
670 #[serde(skip_serializing_if = "Option::is_none")]
671 cache_control: Option<CacheControl>,
672 },
673}
674
675#[derive(Debug, Serialize, Deserialize)]
676#[serde(untagged)]
677pub enum ToolResultContent {
678 Plain(String),
679 Multipart(Vec<ToolResultPart>),
680}
681
682#[derive(Debug, Serialize, Deserialize)]
683#[serde(tag = "type", rename_all = "lowercase")]
684pub enum ToolResultPart {
685 Text { text: String },
686 Image { source: ImageSource },
687}
688
689#[derive(Debug, Serialize, Deserialize)]
690#[serde(tag = "type")]
691pub enum ResponseContent {
692 #[serde(rename = "text")]
693 Text { text: String },
694 #[serde(rename = "thinking")]
695 Thinking { thinking: String },
696 #[serde(rename = "redacted_thinking")]
697 RedactedThinking { data: String },
698 #[serde(rename = "tool_use")]
699 ToolUse {
700 id: String,
701 name: String,
702 input: serde_json::Value,
703 },
704}
705
706#[derive(Debug, Serialize, Deserialize)]
707pub struct ImageSource {
708 #[serde(rename = "type")]
709 pub source_type: String,
710 pub media_type: String,
711 pub data: String,
712}
713
714fn is_false(value: &bool) -> bool {
715 !value
716}
717
718#[derive(Debug, Serialize, Deserialize)]
719pub struct Tool {
720 pub name: String,
721 pub description: String,
722 pub input_schema: serde_json::Value,
723 #[serde(default, skip_serializing_if = "is_false")]
724 pub eager_input_streaming: bool,
725}
726
727#[derive(Debug, Serialize, Deserialize)]
728#[serde(tag = "type", rename_all = "lowercase")]
729pub enum ToolChoice {
730 Auto,
731 Any,
732 Tool { name: String },
733 None,
734}
735
736#[derive(Debug, Serialize, Deserialize)]
737#[serde(tag = "type", rename_all = "lowercase")]
738pub enum Thinking {
739 Enabled { budget_tokens: Option<u32> },
740 Adaptive,
741}
742
743#[derive(Debug, Clone, Copy, Serialize, Deserialize, EnumString)]
744#[serde(rename_all = "snake_case")]
745#[strum(serialize_all = "snake_case")]
746pub enum Effort {
747 Low,
748 Medium,
749 High,
750 Max,
751}
752
753#[derive(Debug, Clone, Serialize, Deserialize)]
754pub struct OutputConfig {
755 pub effort: Option<Effort>,
756}
757
758#[derive(Debug, Serialize, Deserialize)]
759#[serde(untagged)]
760pub enum StringOrContents {
761 String(String),
762 Content(Vec<RequestContent>),
763}
764
765#[derive(Debug, Serialize, Deserialize)]
766pub struct Request {
767 pub model: String,
768 pub max_tokens: u64,
769 pub messages: Vec<Message>,
770 #[serde(default, skip_serializing_if = "Vec::is_empty")]
771 pub tools: Vec<Tool>,
772 #[serde(default, skip_serializing_if = "Option::is_none")]
773 pub thinking: Option<Thinking>,
774 #[serde(default, skip_serializing_if = "Option::is_none")]
775 pub tool_choice: Option<ToolChoice>,
776 #[serde(default, skip_serializing_if = "Option::is_none")]
777 pub system: Option<StringOrContents>,
778 #[serde(default, skip_serializing_if = "Option::is_none")]
779 pub metadata: Option<Metadata>,
780 #[serde(default, skip_serializing_if = "Option::is_none")]
781 pub output_config: Option<OutputConfig>,
782 #[serde(default, skip_serializing_if = "Vec::is_empty")]
783 pub stop_sequences: Vec<String>,
784 #[serde(default, skip_serializing_if = "Option::is_none")]
785 pub speed: Option<Speed>,
786 #[serde(default, skip_serializing_if = "Option::is_none")]
787 pub temperature: Option<f32>,
788 #[serde(default, skip_serializing_if = "Option::is_none")]
789 pub top_k: Option<u32>,
790 #[serde(default, skip_serializing_if = "Option::is_none")]
791 pub top_p: Option<f32>,
792}
793
794#[derive(Debug, Default, Serialize, Deserialize)]
795#[serde(rename_all = "snake_case")]
796pub enum Speed {
797 #[default]
798 Standard,
799 Fast,
800}
801
802#[derive(Debug, Serialize, Deserialize)]
803pub struct StreamingRequest {
804 #[serde(flatten)]
805 pub base: Request,
806 pub stream: bool,
807}
808
809#[derive(Debug, Serialize, Deserialize)]
810pub struct Metadata {
811 pub user_id: Option<String>,
812}
813
814#[derive(Debug, Serialize, Deserialize, Default)]
815pub struct Usage {
816 #[serde(default, skip_serializing_if = "Option::is_none")]
817 pub input_tokens: Option<u64>,
818 #[serde(default, skip_serializing_if = "Option::is_none")]
819 pub output_tokens: Option<u64>,
820 #[serde(default, skip_serializing_if = "Option::is_none")]
821 pub cache_creation_input_tokens: Option<u64>,
822 #[serde(default, skip_serializing_if = "Option::is_none")]
823 pub cache_read_input_tokens: Option<u64>,
824}
825
826#[derive(Debug, Serialize, Deserialize)]
827pub struct Response {
828 pub id: String,
829 #[serde(rename = "type")]
830 pub response_type: String,
831 pub role: Role,
832 pub content: Vec<ResponseContent>,
833 pub model: String,
834 #[serde(default, skip_serializing_if = "Option::is_none")]
835 pub stop_reason: Option<String>,
836 #[serde(default, skip_serializing_if = "Option::is_none")]
837 pub stop_sequence: Option<String>,
838 pub usage: Usage,
839}
840
841#[derive(Debug, Serialize, Deserialize)]
842#[serde(tag = "type")]
843pub enum Event {
844 #[serde(rename = "message_start")]
845 MessageStart { message: Response },
846 #[serde(rename = "content_block_start")]
847 ContentBlockStart {
848 index: usize,
849 content_block: ResponseContent,
850 },
851 #[serde(rename = "content_block_delta")]
852 ContentBlockDelta { index: usize, delta: ContentDelta },
853 #[serde(rename = "content_block_stop")]
854 ContentBlockStop { index: usize },
855 #[serde(rename = "message_delta")]
856 MessageDelta { delta: MessageDelta, usage: Usage },
857 #[serde(rename = "message_stop")]
858 MessageStop,
859 #[serde(rename = "ping")]
860 Ping,
861 #[serde(rename = "error")]
862 Error { error: ApiError },
863}
864
865#[derive(Debug, Serialize, Deserialize)]
866#[serde(tag = "type")]
867pub enum ContentDelta {
868 #[serde(rename = "text_delta")]
869 TextDelta { text: String },
870 #[serde(rename = "thinking_delta")]
871 ThinkingDelta { thinking: String },
872 #[serde(rename = "signature_delta")]
873 SignatureDelta { signature: String },
874 #[serde(rename = "input_json_delta")]
875 InputJsonDelta { partial_json: String },
876}
877
878#[derive(Debug, Serialize, Deserialize)]
879pub struct MessageDelta {
880 pub stop_reason: Option<String>,
881 pub stop_sequence: Option<String>,
882}
883
884#[derive(Debug)]
885pub enum AnthropicError {
886 /// Failed to serialize the HTTP request body to JSON
887 SerializeRequest(serde_json::Error),
888
889 /// Failed to construct the HTTP request body
890 BuildRequestBody(http::Error),
891
892 /// Failed to send the HTTP request
893 HttpSend(anyhow::Error),
894
895 /// Failed to deserialize the response from JSON
896 DeserializeResponse(serde_json::Error),
897
898 /// Failed to read from response stream
899 ReadResponse(io::Error),
900
901 /// HTTP error response from the API
902 HttpResponseError {
903 status_code: StatusCode,
904 message: String,
905 },
906
907 /// Rate limit exceeded
908 RateLimit { retry_after: Duration },
909
910 /// Server overloaded
911 ServerOverloaded { retry_after: Option<Duration> },
912
913 /// API returned an error response
914 ApiError(ApiError),
915}
916
917#[derive(Debug, Serialize, Deserialize, Error)]
918#[error("Anthropic API Error: {error_type}: {message}")]
919pub struct ApiError {
920 #[serde(rename = "type")]
921 pub error_type: String,
922 pub message: String,
923}
924
925/// An Anthropic API error code.
926/// <https://docs.anthropic.com/en/api/errors#http-errors>
927#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumString)]
928#[strum(serialize_all = "snake_case")]
929pub enum ApiErrorCode {
930 /// 400 - `invalid_request_error`: There was an issue with the format or content of your request.
931 InvalidRequestError,
932 /// 401 - `authentication_error`: There's an issue with your API key.
933 AuthenticationError,
934 /// 403 - `permission_error`: Your API key does not have permission to use the specified resource.
935 PermissionError,
936 /// 404 - `not_found_error`: The requested resource was not found.
937 NotFoundError,
938 /// 413 - `request_too_large`: Request exceeds the maximum allowed number of bytes.
939 RequestTooLarge,
940 /// 429 - `rate_limit_error`: Your account has hit a rate limit.
941 RateLimitError,
942 /// 500 - `api_error`: An unexpected error has occurred internal to Anthropic's systems.
943 ApiError,
944 /// 529 - `overloaded_error`: Anthropic's API is temporarily overloaded.
945 OverloadedError,
946}
947
948impl ApiError {
949 pub fn code(&self) -> Option<ApiErrorCode> {
950 ApiErrorCode::from_str(&self.error_type).ok()
951 }
952
953 pub fn is_rate_limit_error(&self) -> bool {
954 matches!(self.error_type.as_str(), "rate_limit_error")
955 }
956
957 pub fn match_window_exceeded(&self) -> Option<u64> {
958 let Some(ApiErrorCode::InvalidRequestError) = self.code() else {
959 return None;
960 };
961
962 parse_prompt_too_long(&self.message)
963 }
964}
965
966pub fn parse_prompt_too_long(message: &str) -> Option<u64> {
967 message
968 .strip_prefix("prompt is too long: ")?
969 .split_once(" tokens")?
970 .0
971 .parse()
972 .ok()
973}
974
975/// Request body for the token counting API.
976/// Similar to `Request` but without `max_tokens` since it's not needed for counting.
977#[derive(Debug, Serialize)]
978pub struct CountTokensRequest {
979 pub model: String,
980 pub messages: Vec<Message>,
981 #[serde(default, skip_serializing_if = "Option::is_none")]
982 pub system: Option<StringOrContents>,
983 #[serde(default, skip_serializing_if = "Vec::is_empty")]
984 pub tools: Vec<Tool>,
985 #[serde(default, skip_serializing_if = "Option::is_none")]
986 pub thinking: Option<Thinking>,
987 #[serde(default, skip_serializing_if = "Option::is_none")]
988 pub tool_choice: Option<ToolChoice>,
989}
990
991/// Response from the token counting API.
992#[derive(Debug, Deserialize)]
993pub struct CountTokensResponse {
994 pub input_tokens: u64,
995}
996
997/// Count the number of tokens in a message without creating it.
998pub async fn count_tokens(
999 client: &dyn HttpClient,
1000 api_url: &str,
1001 api_key: &str,
1002 request: CountTokensRequest,
1003) -> Result<CountTokensResponse, AnthropicError> {
1004 let uri = format!("{api_url}/v1/messages/count_tokens");
1005
1006 let request_builder = HttpRequest::builder()
1007 .method(Method::POST)
1008 .uri(uri)
1009 .header("Anthropic-Version", "2023-06-01")
1010 .header("X-Api-Key", api_key.trim())
1011 .header("Content-Type", "application/json");
1012
1013 let serialized_request =
1014 serde_json::to_string(&request).map_err(AnthropicError::SerializeRequest)?;
1015 let http_request = request_builder
1016 .body(AsyncBody::from(serialized_request))
1017 .map_err(AnthropicError::BuildRequestBody)?;
1018
1019 let mut response = client
1020 .send(http_request)
1021 .await
1022 .map_err(AnthropicError::HttpSend)?;
1023
1024 let rate_limits = RateLimitInfo::from_headers(response.headers());
1025
1026 if response.status().is_success() {
1027 let mut body = String::new();
1028 response
1029 .body_mut()
1030 .read_to_string(&mut body)
1031 .await
1032 .map_err(AnthropicError::ReadResponse)?;
1033
1034 serde_json::from_str(&body).map_err(AnthropicError::DeserializeResponse)
1035 } else {
1036 Err(handle_error_response(response, rate_limits).await)
1037 }
1038}
1039
1040// -- Conversions from/to `language_model_core` types --
1041
1042impl From<language_model_core::Speed> for Speed {
1043 fn from(speed: language_model_core::Speed) -> Self {
1044 match speed {
1045 language_model_core::Speed::Standard => Speed::Standard,
1046 language_model_core::Speed::Fast => Speed::Fast,
1047 }
1048 }
1049}
1050
1051impl From<AnthropicError> for language_model_core::LanguageModelCompletionError {
1052 fn from(error: AnthropicError) -> Self {
1053 let provider = language_model_core::ANTHROPIC_PROVIDER_NAME;
1054 match error {
1055 AnthropicError::SerializeRequest(error) => Self::SerializeRequest { provider, error },
1056 AnthropicError::BuildRequestBody(error) => Self::BuildRequestBody { provider, error },
1057 AnthropicError::HttpSend(error) => Self::HttpSend { provider, error },
1058 AnthropicError::DeserializeResponse(error) => {
1059 Self::DeserializeResponse { provider, error }
1060 }
1061 AnthropicError::ReadResponse(error) => Self::ApiReadResponseError { provider, error },
1062 AnthropicError::HttpResponseError {
1063 status_code,
1064 message,
1065 } => Self::HttpResponseError {
1066 provider,
1067 status_code,
1068 message,
1069 },
1070 AnthropicError::RateLimit { retry_after } => Self::RateLimitExceeded {
1071 provider,
1072 retry_after: Some(retry_after),
1073 },
1074 AnthropicError::ServerOverloaded { retry_after } => Self::ServerOverloaded {
1075 provider,
1076 retry_after,
1077 },
1078 AnthropicError::ApiError(api_error) => api_error.into(),
1079 }
1080 }
1081}
1082
1083impl From<ApiError> for language_model_core::LanguageModelCompletionError {
1084 fn from(error: ApiError) -> Self {
1085 use ApiErrorCode::*;
1086 let provider = language_model_core::ANTHROPIC_PROVIDER_NAME;
1087 match error.code() {
1088 Some(code) => match code {
1089 InvalidRequestError => Self::BadRequestFormat {
1090 provider,
1091 message: error.message,
1092 },
1093 AuthenticationError => Self::AuthenticationError {
1094 provider,
1095 message: error.message,
1096 },
1097 PermissionError => Self::PermissionError {
1098 provider,
1099 message: error.message,
1100 },
1101 NotFoundError => Self::ApiEndpointNotFound { provider },
1102 RequestTooLarge => Self::PromptTooLarge {
1103 tokens: language_model_core::parse_prompt_too_long(&error.message),
1104 },
1105 RateLimitError => Self::RateLimitExceeded {
1106 provider,
1107 retry_after: None,
1108 },
1109 ApiError => Self::ApiInternalServerError {
1110 provider,
1111 message: error.message,
1112 },
1113 OverloadedError => Self::ServerOverloaded {
1114 provider,
1115 retry_after: None,
1116 },
1117 },
1118 None => Self::Other(error.into()),
1119 }
1120 }
1121}
1122
1123#[test]
1124fn custom_mode_thinking_is_preserved() {
1125 let model = Model::Custom {
1126 name: "my-custom-model".to_string(),
1127 max_tokens: 8192,
1128 display_name: None,
1129 tool_override: None,
1130 cache_configuration: None,
1131 max_output_tokens: None,
1132 default_temperature: None,
1133 extra_beta_headers: vec![],
1134 mode: AnthropicModelMode::Thinking {
1135 budget_tokens: Some(2048),
1136 },
1137 };
1138 assert_eq!(
1139 model.mode(),
1140 AnthropicModelMode::Thinking {
1141 budget_tokens: Some(2048)
1142 }
1143 );
1144 assert!(model.supports_thinking());
1145}
1146
1147#[test]
1148fn custom_mode_adaptive_is_preserved() {
1149 let model = Model::Custom {
1150 name: "my-custom-model".to_string(),
1151 max_tokens: 8192,
1152 display_name: None,
1153 tool_override: None,
1154 cache_configuration: None,
1155 max_output_tokens: None,
1156 default_temperature: None,
1157 extra_beta_headers: vec![],
1158 mode: AnthropicModelMode::AdaptiveThinking,
1159 };
1160 assert_eq!(model.mode(), AnthropicModelMode::AdaptiveThinking);
1161 assert!(model.supports_adaptive_thinking());
1162 assert!(model.supports_thinking());
1163}
1164
1165#[test]
1166fn custom_mode_default_disables_thinking() {
1167 let model = Model::Custom {
1168 name: "my-custom-model".to_string(),
1169 max_tokens: 8192,
1170 display_name: None,
1171 tool_override: None,
1172 cache_configuration: None,
1173 max_output_tokens: None,
1174 default_temperature: None,
1175 extra_beta_headers: vec![],
1176 mode: AnthropicModelMode::Default,
1177 };
1178 assert!(!model.supports_thinking());
1179 assert!(!model.supports_adaptive_thinking());
1180}
1181
1182#[test]
1183fn test_match_window_exceeded() {
1184 let error = ApiError {
1185 error_type: "invalid_request_error".to_string(),
1186 message: "prompt is too long: 220000 tokens > 200000".to_string(),
1187 };
1188 assert_eq!(error.match_window_exceeded(), Some(220_000));
1189
1190 let error = ApiError {
1191 error_type: "invalid_request_error".to_string(),
1192 message: "prompt is too long: 1234953 tokens".to_string(),
1193 };
1194 assert_eq!(error.match_window_exceeded(), Some(1234953));
1195
1196 let error = ApiError {
1197 error_type: "invalid_request_error".to_string(),
1198 message: "not a prompt length error".to_string(),
1199 };
1200 assert_eq!(error.match_window_exceeded(), None);
1201
1202 let error = ApiError {
1203 error_type: "rate_limit_error".to_string(),
1204 message: "prompt is too long: 12345 tokens".to_string(),
1205 };
1206 assert_eq!(error.match_window_exceeded(), None);
1207
1208 let error = ApiError {
1209 error_type: "invalid_request_error".to_string(),
1210 message: "prompt is too long: invalid tokens".to_string(),
1211 };
1212 assert_eq!(error.match_window_exceeded(), None);
1213}