1use serde::{Deserialize, Serialize};
2use strum::EnumIter;
3
4#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
5#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize, PartialEq)]
6pub enum BedrockAdaptiveThinkingEffort {
7 Low,
8 Medium,
9 #[default]
10 High,
11 Max,
12}
13
14impl BedrockAdaptiveThinkingEffort {
15 pub fn as_str(&self) -> &'static str {
16 match self {
17 Self::Low => "low",
18 Self::Medium => "medium",
19 Self::High => "high",
20 Self::Max => "max",
21 }
22 }
23}
24
25#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
26#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
27pub enum BedrockModelMode {
28 #[default]
29 Default,
30 Thinking {
31 budget_tokens: Option<u64>,
32 },
33 AdaptiveThinking {
34 effort: BedrockAdaptiveThinkingEffort,
35 },
36}
37
38#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
39#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
40pub struct BedrockModelCacheConfiguration {
41 pub max_cache_anchors: usize,
42 pub min_total_token: u64,
43}
44
45#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
46#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
47pub enum Model {
48 // Anthropic Claude 4+ models
49 #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
50 ClaudeHaiku4_5,
51 #[serde(rename = "claude-sonnet-4", alias = "claude-sonnet-4-latest")]
52 ClaudeSonnet4,
53 #[serde(
54 rename = "claude-sonnet-4-thinking",
55 alias = "claude-sonnet-4-thinking-latest"
56 )]
57 ClaudeSonnet4Thinking,
58 #[default]
59 #[serde(rename = "claude-sonnet-4-5", alias = "claude-sonnet-4-5-latest")]
60 ClaudeSonnet4_5,
61 #[serde(
62 rename = "claude-sonnet-4-5-thinking",
63 alias = "claude-sonnet-4-5-thinking-latest"
64 )]
65 ClaudeSonnet4_5Thinking,
66 #[serde(rename = "claude-opus-4-1", alias = "claude-opus-4-1-latest")]
67 ClaudeOpus4_1,
68 #[serde(
69 rename = "claude-opus-4-1-thinking",
70 alias = "claude-opus-4-1-thinking-latest"
71 )]
72 ClaudeOpus4_1Thinking,
73 #[serde(rename = "claude-opus-4-5", alias = "claude-opus-4-5-latest")]
74 ClaudeOpus4_5,
75 #[serde(
76 rename = "claude-opus-4-5-thinking",
77 alias = "claude-opus-4-5-thinking-latest"
78 )]
79 ClaudeOpus4_5Thinking,
80 #[serde(rename = "claude-opus-4-6", alias = "claude-opus-4-6-latest")]
81 ClaudeOpus4_6,
82 #[serde(
83 rename = "claude-opus-4-6-thinking",
84 alias = "claude-opus-4-6-thinking-latest"
85 )]
86 ClaudeOpus4_6Thinking,
87 #[serde(rename = "claude-sonnet-4-6", alias = "claude-sonnet-4-6-latest")]
88 ClaudeSonnet4_6,
89 #[serde(
90 rename = "claude-sonnet-4-6-thinking",
91 alias = "claude-sonnet-4-6-thinking-latest"
92 )]
93 ClaudeSonnet4_6Thinking,
94
95 // Meta Llama 4 models
96 #[serde(rename = "llama-4-scout-17b")]
97 Llama4Scout17B,
98 #[serde(rename = "llama-4-maverick-17b")]
99 Llama4Maverick17B,
100
101 // Google Gemma 3 models
102 #[serde(rename = "gemma-3-4b")]
103 Gemma3_4B,
104 #[serde(rename = "gemma-3-12b")]
105 Gemma3_12B,
106 #[serde(rename = "gemma-3-27b")]
107 Gemma3_27B,
108
109 // Mistral models
110 #[serde(rename = "magistral-small")]
111 MagistralSmall,
112 #[serde(rename = "mistral-large-3")]
113 MistralLarge3,
114 #[serde(rename = "pixtral-large")]
115 PixtralLarge,
116
117 // Qwen models
118 #[serde(rename = "qwen3-32b")]
119 Qwen3_32B,
120 #[serde(rename = "qwen3-vl-235b")]
121 Qwen3VL235B,
122 #[serde(rename = "qwen3-235b")]
123 Qwen3_235B,
124 #[serde(rename = "qwen3-next-80b")]
125 Qwen3Next80B,
126 #[serde(rename = "qwen3-coder-30b")]
127 Qwen3Coder30B,
128 #[serde(rename = "qwen3-coder-next")]
129 Qwen3CoderNext,
130 #[serde(rename = "qwen3-coder-480b")]
131 Qwen3Coder480B,
132
133 // Amazon Nova models
134 #[serde(rename = "nova-lite")]
135 NovaLite,
136 #[serde(rename = "nova-pro")]
137 NovaPro,
138 #[serde(rename = "nova-premier")]
139 NovaPremier,
140 #[serde(rename = "nova-2-lite")]
141 Nova2Lite,
142
143 // OpenAI GPT OSS models
144 #[serde(rename = "gpt-oss-20b")]
145 GptOss20B,
146 #[serde(rename = "gpt-oss-120b")]
147 GptOss120B,
148
149 // MiniMax models
150 #[serde(rename = "minimax-m2")]
151 MiniMaxM2,
152
153 // Moonshot models
154 #[serde(rename = "kimi-k2-thinking")]
155 KimiK2Thinking,
156 #[serde(rename = "kimi-k2-5")]
157 KimiK2_5,
158
159 // DeepSeek models
160 #[serde(rename = "deepseek-r1")]
161 DeepSeekR1,
162 #[serde(rename = "deepseek-v3")]
163 DeepSeekV3_1,
164 #[serde(rename = "deepseek-v3-2")]
165 DeepSeekV3_2,
166
167 #[serde(rename = "custom")]
168 Custom {
169 name: String,
170 max_tokens: u64,
171 display_name: Option<String>,
172 max_output_tokens: Option<u64>,
173 default_temperature: Option<f32>,
174 cache_configuration: Option<BedrockModelCacheConfiguration>,
175 },
176}
177
178impl Model {
179 pub fn default_fast(_region: &str) -> Self {
180 Self::ClaudeHaiku4_5
181 }
182
183 pub fn from_id(id: &str) -> anyhow::Result<Self> {
184 if id.starts_with("claude-opus-4-6-thinking") {
185 Ok(Self::ClaudeOpus4_6Thinking)
186 } else if id.starts_with("claude-opus-4-6") {
187 Ok(Self::ClaudeOpus4_6)
188 } else if id.starts_with("claude-opus-4-5-thinking") {
189 Ok(Self::ClaudeOpus4_5Thinking)
190 } else if id.starts_with("claude-opus-4-5") {
191 Ok(Self::ClaudeOpus4_5)
192 } else if id.starts_with("claude-opus-4-1-thinking") {
193 Ok(Self::ClaudeOpus4_1Thinking)
194 } else if id.starts_with("claude-opus-4-1") {
195 Ok(Self::ClaudeOpus4_1)
196 } else if id.starts_with("claude-sonnet-4-6-thinking") {
197 Ok(Self::ClaudeSonnet4_6Thinking)
198 } else if id.starts_with("claude-sonnet-4-6") {
199 Ok(Self::ClaudeSonnet4_6)
200 } else if id.starts_with("claude-sonnet-4-5-thinking") {
201 Ok(Self::ClaudeSonnet4_5Thinking)
202 } else if id.starts_with("claude-sonnet-4-5") {
203 Ok(Self::ClaudeSonnet4_5)
204 } else if id.starts_with("claude-sonnet-4-thinking") {
205 Ok(Self::ClaudeSonnet4Thinking)
206 } else if id.starts_with("claude-sonnet-4") {
207 Ok(Self::ClaudeSonnet4)
208 } else if id.starts_with("claude-haiku-4-5") {
209 Ok(Self::ClaudeHaiku4_5)
210 } else {
211 anyhow::bail!("invalid model id {id}");
212 }
213 }
214
215 pub fn id(&self) -> &str {
216 match self {
217 Self::ClaudeHaiku4_5 => "claude-haiku-4-5",
218 Self::ClaudeSonnet4 => "claude-sonnet-4",
219 Self::ClaudeSonnet4Thinking => "claude-sonnet-4-thinking",
220 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5",
221 Self::ClaudeSonnet4_5Thinking => "claude-sonnet-4-5-thinking",
222 Self::ClaudeOpus4_1 => "claude-opus-4-1",
223 Self::ClaudeOpus4_1Thinking => "claude-opus-4-1-thinking",
224 Self::ClaudeOpus4_5 => "claude-opus-4-5",
225 Self::ClaudeOpus4_5Thinking => "claude-opus-4-5-thinking",
226 Self::ClaudeOpus4_6 => "claude-opus-4-6",
227 Self::ClaudeOpus4_6Thinking => "claude-opus-4-6-thinking",
228 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6",
229 Self::ClaudeSonnet4_6Thinking => "claude-sonnet-4-6-thinking",
230 Self::Llama4Scout17B => "llama-4-scout-17b",
231 Self::Llama4Maverick17B => "llama-4-maverick-17b",
232 Self::Gemma3_4B => "gemma-3-4b",
233 Self::Gemma3_12B => "gemma-3-12b",
234 Self::Gemma3_27B => "gemma-3-27b",
235 Self::MagistralSmall => "magistral-small",
236 Self::MistralLarge3 => "mistral-large-3",
237 Self::PixtralLarge => "pixtral-large",
238 Self::Qwen3_32B => "qwen3-32b",
239 Self::Qwen3VL235B => "qwen3-vl-235b",
240 Self::Qwen3_235B => "qwen3-235b",
241 Self::Qwen3Next80B => "qwen3-next-80b",
242 Self::Qwen3Coder30B => "qwen3-coder-30b",
243 Self::Qwen3CoderNext => "qwen3-coder-next",
244 Self::Qwen3Coder480B => "qwen3-coder-480b",
245 Self::NovaLite => "nova-lite",
246 Self::NovaPro => "nova-pro",
247 Self::NovaPremier => "nova-premier",
248 Self::Nova2Lite => "nova-2-lite",
249 Self::GptOss20B => "gpt-oss-20b",
250 Self::GptOss120B => "gpt-oss-120b",
251 Self::MiniMaxM2 => "minimax-m2",
252 Self::KimiK2Thinking => "kimi-k2-thinking",
253 Self::KimiK2_5 => "kimi-k2-5",
254 Self::DeepSeekR1 => "deepseek-r1",
255 Self::DeepSeekV3_1 => "deepseek-v3",
256 Self::DeepSeekV3_2 => "deepseek-v3-2",
257 Self::Custom { name, .. } => name,
258 }
259 }
260
261 pub fn request_id(&self) -> &str {
262 match self {
263 Self::ClaudeHaiku4_5 => "anthropic.claude-haiku-4-5-20251001-v1:0",
264 Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => {
265 "anthropic.claude-sonnet-4-20250514-v1:0"
266 }
267 Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking => {
268 "anthropic.claude-sonnet-4-5-20250929-v1:0"
269 }
270 Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => {
271 "anthropic.claude-opus-4-1-20250805-v1:0"
272 }
273 Self::ClaudeOpus4_5 | Self::ClaudeOpus4_5Thinking => {
274 "anthropic.claude-opus-4-5-20251101-v1:0"
275 }
276 Self::ClaudeOpus4_6 | Self::ClaudeOpus4_6Thinking => "anthropic.claude-opus-4-6-v1",
277 Self::ClaudeSonnet4_6 | Self::ClaudeSonnet4_6Thinking => "anthropic.claude-sonnet-4-6",
278 Self::Llama4Scout17B => "meta.llama4-scout-17b-instruct-v1:0",
279 Self::Llama4Maverick17B => "meta.llama4-maverick-17b-instruct-v1:0",
280 Self::Gemma3_4B => "google.gemma-3-4b-it",
281 Self::Gemma3_12B => "google.gemma-3-12b-it",
282 Self::Gemma3_27B => "google.gemma-3-27b-it",
283 Self::MagistralSmall => "mistral.magistral-small-2509",
284 Self::MistralLarge3 => "mistral.mistral-large-3-675b-instruct",
285 Self::PixtralLarge => "mistral.pixtral-large-2502-v1:0",
286 Self::Qwen3VL235B => "qwen.qwen3-vl-235b-a22b",
287 Self::Qwen3_32B => "qwen.qwen3-32b-v1:0",
288 Self::Qwen3_235B => "qwen.qwen3-235b-a22b-2507-v1:0",
289 Self::Qwen3Next80B => "qwen.qwen3-next-80b-a3b",
290 Self::Qwen3Coder30B => "qwen.qwen3-coder-30b-a3b-v1:0",
291 Self::Qwen3CoderNext => "qwen.qwen3-coder-next",
292 Self::Qwen3Coder480B => "qwen.qwen3-coder-480b-a35b-v1:0",
293 Self::NovaLite => "amazon.nova-lite-v1:0",
294 Self::NovaPro => "amazon.nova-pro-v1:0",
295 Self::NovaPremier => "amazon.nova-premier-v1:0",
296 Self::Nova2Lite => "amazon.nova-2-lite-v1:0",
297 Self::GptOss20B => "openai.gpt-oss-20b-1:0",
298 Self::GptOss120B => "openai.gpt-oss-120b-1:0",
299 Self::MiniMaxM2 => "minimax.minimax-m2",
300 Self::KimiK2Thinking => "moonshot.kimi-k2-thinking",
301 Self::KimiK2_5 => "moonshotai.kimi-k2.5",
302 Self::DeepSeekR1 => "deepseek.r1-v1:0",
303 Self::DeepSeekV3_1 => "deepseek.v3-v1:0",
304 Self::DeepSeekV3_2 => "deepseek.v3.2",
305 Self::Custom { name, .. } => name,
306 }
307 }
308
309 pub fn display_name(&self) -> &str {
310 match self {
311 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
312 Self::ClaudeSonnet4 => "Claude Sonnet 4",
313 Self::ClaudeSonnet4Thinking => "Claude Sonnet 4 Thinking",
314 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
315 Self::ClaudeSonnet4_5Thinking => "Claude Sonnet 4.5 Thinking",
316 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
317 Self::ClaudeOpus4_1Thinking => "Claude Opus 4.1 Thinking",
318 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
319 Self::ClaudeOpus4_5Thinking => "Claude Opus 4.5 Thinking",
320 Self::ClaudeOpus4_6 => "Claude Opus 4.6",
321 Self::ClaudeOpus4_6Thinking => "Claude Opus 4.6 Thinking",
322 Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
323 Self::ClaudeSonnet4_6Thinking => "Claude Sonnet 4.6 Thinking",
324 Self::Llama4Scout17B => "Llama 4 Scout 17B",
325 Self::Llama4Maverick17B => "Llama 4 Maverick 17B",
326 Self::Gemma3_4B => "Gemma 3 4B",
327 Self::Gemma3_12B => "Gemma 3 12B",
328 Self::Gemma3_27B => "Gemma 3 27B",
329 Self::MagistralSmall => "Magistral Small",
330 Self::MistralLarge3 => "Mistral Large 3",
331 Self::PixtralLarge => "Pixtral Large",
332 Self::Qwen3VL235B => "Qwen3 VL 235B",
333 Self::Qwen3_32B => "Qwen3 32B",
334 Self::Qwen3_235B => "Qwen3 235B",
335 Self::Qwen3Next80B => "Qwen3 Next 80B",
336 Self::Qwen3Coder30B => "Qwen3 Coder 30B",
337 Self::Qwen3CoderNext => "Qwen3 Coder Next",
338 Self::Qwen3Coder480B => "Qwen3 Coder 480B",
339 Self::NovaLite => "Amazon Nova Lite",
340 Self::NovaPro => "Amazon Nova Pro",
341 Self::NovaPremier => "Amazon Nova Premier",
342 Self::Nova2Lite => "Amazon Nova 2 Lite",
343 Self::GptOss20B => "GPT OSS 20B",
344 Self::GptOss120B => "GPT OSS 120B",
345 Self::MiniMaxM2 => "MiniMax M2",
346 Self::KimiK2Thinking => "Kimi K2 Thinking",
347 Self::KimiK2_5 => "Kimi K2.5",
348 Self::DeepSeekR1 => "DeepSeek R1",
349 Self::DeepSeekV3_1 => "DeepSeek V3.1",
350 Self::DeepSeekV3_2 => "DeepSeek V3.2",
351 Self::Custom {
352 display_name, name, ..
353 } => display_name.as_deref().unwrap_or(name.as_str()),
354 }
355 }
356
357 pub fn max_token_count(&self) -> u64 {
358 self.max_tokens()
359 }
360
361 pub fn max_tokens(&self) -> u64 {
362 match self {
363 Self::ClaudeHaiku4_5
364 | Self::ClaudeSonnet4
365 | Self::ClaudeSonnet4Thinking
366 | Self::ClaudeSonnet4_5
367 | Self::ClaudeSonnet4_5Thinking
368 | Self::ClaudeOpus4_1
369 | Self::ClaudeOpus4_1Thinking
370 | Self::ClaudeOpus4_5
371 | Self::ClaudeOpus4_5Thinking
372 | Self::ClaudeOpus4_6
373 | Self::ClaudeOpus4_6Thinking
374 | Self::ClaudeSonnet4_6
375 | Self::ClaudeSonnet4_6Thinking => 200_000,
376 Self::Llama4Scout17B | Self::Llama4Maverick17B => 128_000,
377 Self::Gemma3_4B | Self::Gemma3_12B | Self::Gemma3_27B => 128_000,
378 Self::MagistralSmall | Self::MistralLarge3 | Self::PixtralLarge => 128_000,
379 Self::Qwen3_32B
380 | Self::Qwen3VL235B
381 | Self::Qwen3_235B
382 | Self::Qwen3Next80B
383 | Self::Qwen3Coder30B
384 | Self::Qwen3CoderNext
385 | Self::Qwen3Coder480B => 128_000,
386 Self::NovaLite | Self::NovaPro => 300_000,
387 Self::NovaPremier => 1_000_000,
388 Self::Nova2Lite => 300_000,
389 Self::GptOss20B | Self::GptOss120B => 128_000,
390 Self::MiniMaxM2 => 128_000,
391 Self::KimiK2Thinking | Self::KimiK2_5 => 128_000,
392 Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => 128_000,
393 Self::Custom { max_tokens, .. } => *max_tokens,
394 }
395 }
396
397 pub fn max_output_tokens(&self) -> u64 {
398 match self {
399 Self::ClaudeHaiku4_5
400 | Self::ClaudeSonnet4_5
401 | Self::ClaudeSonnet4_5Thinking
402 | Self::ClaudeOpus4_5
403 | Self::ClaudeOpus4_5Thinking
404 | Self::ClaudeSonnet4_6
405 | Self::ClaudeSonnet4_6Thinking => 64_000,
406 Self::ClaudeSonnet4 | Self::ClaudeSonnet4Thinking => 64_000,
407 Self::ClaudeOpus4_1 | Self::ClaudeOpus4_1Thinking => 32_000,
408 Self::ClaudeOpus4_6 | Self::ClaudeOpus4_6Thinking => 128_000,
409 Self::Llama4Scout17B
410 | Self::Llama4Maverick17B
411 | Self::Gemma3_4B
412 | Self::Gemma3_12B
413 | Self::Gemma3_27B
414 | Self::MagistralSmall
415 | Self::MistralLarge3
416 | Self::PixtralLarge => 8_192,
417 Self::Qwen3_32B
418 | Self::Qwen3VL235B
419 | Self::Qwen3_235B
420 | Self::Qwen3Next80B
421 | Self::Qwen3Coder30B
422 | Self::Qwen3CoderNext
423 | Self::Qwen3Coder480B => 8_192,
424 Self::NovaLite | Self::NovaPro | Self::NovaPremier | Self::Nova2Lite => 5_000,
425 Self::GptOss20B | Self::GptOss120B => 16_000,
426 Self::MiniMaxM2 => 16_000,
427 Self::KimiK2Thinking | Self::KimiK2_5 => 16_000,
428 Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => 16_000,
429 Self::Custom {
430 max_output_tokens, ..
431 } => max_output_tokens.unwrap_or(4_096),
432 }
433 }
434
435 pub fn default_temperature(&self) -> f32 {
436 match self {
437 Self::ClaudeHaiku4_5
438 | Self::ClaudeSonnet4
439 | Self::ClaudeSonnet4Thinking
440 | Self::ClaudeSonnet4_5
441 | Self::ClaudeSonnet4_5Thinking
442 | Self::ClaudeOpus4_1
443 | Self::ClaudeOpus4_1Thinking
444 | Self::ClaudeOpus4_5
445 | Self::ClaudeOpus4_5Thinking
446 | Self::ClaudeOpus4_6
447 | Self::ClaudeOpus4_6Thinking
448 | Self::ClaudeSonnet4_6
449 | Self::ClaudeSonnet4_6Thinking => 1.0,
450 Self::Custom {
451 default_temperature,
452 ..
453 } => default_temperature.unwrap_or(1.0),
454 _ => 1.0,
455 }
456 }
457
458 pub fn supports_tool_use(&self) -> bool {
459 match self {
460 Self::ClaudeHaiku4_5
461 | Self::ClaudeSonnet4
462 | Self::ClaudeSonnet4Thinking
463 | Self::ClaudeSonnet4_5
464 | Self::ClaudeSonnet4_5Thinking
465 | Self::ClaudeOpus4_1
466 | Self::ClaudeOpus4_1Thinking
467 | Self::ClaudeOpus4_5
468 | Self::ClaudeOpus4_5Thinking
469 | Self::ClaudeOpus4_6
470 | Self::ClaudeOpus4_6Thinking
471 | Self::ClaudeSonnet4_6
472 | Self::ClaudeSonnet4_6Thinking => true,
473 Self::NovaLite | Self::NovaPro | Self::NovaPremier | Self::Nova2Lite => true,
474 Self::MistralLarge3 | Self::PixtralLarge | Self::MagistralSmall => true,
475 // Gemma accepts toolConfig without error but produces unreliable tool
476 // calls -- malformed JSON args, hallucinated tool names, dropped calls.
477 Self::Qwen3_32B
478 | Self::Qwen3VL235B
479 | Self::Qwen3_235B
480 | Self::Qwen3Next80B
481 | Self::Qwen3Coder30B
482 | Self::Qwen3CoderNext
483 | Self::Qwen3Coder480B => true,
484 Self::MiniMaxM2 => true,
485 Self::KimiK2Thinking | Self::KimiK2_5 => true,
486 Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => true,
487 _ => false,
488 }
489 }
490
491 pub fn supports_images(&self) -> bool {
492 match self {
493 Self::ClaudeHaiku4_5
494 | Self::ClaudeSonnet4
495 | Self::ClaudeSonnet4Thinking
496 | Self::ClaudeSonnet4_5
497 | Self::ClaudeSonnet4_5Thinking
498 | Self::ClaudeOpus4_1
499 | Self::ClaudeOpus4_1Thinking
500 | Self::ClaudeOpus4_5
501 | Self::ClaudeOpus4_5Thinking
502 | Self::ClaudeOpus4_6
503 | Self::ClaudeOpus4_6Thinking
504 | Self::ClaudeSonnet4_6
505 | Self::ClaudeSonnet4_6Thinking => true,
506 Self::NovaLite | Self::NovaPro => true,
507 Self::PixtralLarge => true,
508 Self::Qwen3VL235B => true,
509 Self::KimiK2_5 => true,
510 _ => false,
511 }
512 }
513
514 pub fn supports_extended_context(&self) -> bool {
515 matches!(
516 self,
517 Self::ClaudeSonnet4
518 | Self::ClaudeSonnet4Thinking
519 | Self::ClaudeSonnet4_5
520 | Self::ClaudeSonnet4_5Thinking
521 | Self::ClaudeOpus4_5
522 | Self::ClaudeOpus4_5Thinking
523 | Self::ClaudeOpus4_6
524 | Self::ClaudeOpus4_6Thinking
525 | Self::ClaudeSonnet4_6
526 | Self::ClaudeSonnet4_6Thinking
527 )
528 }
529
530 pub fn supports_caching(&self) -> bool {
531 match self {
532 Self::ClaudeHaiku4_5
533 | Self::ClaudeSonnet4
534 | Self::ClaudeSonnet4Thinking
535 | Self::ClaudeSonnet4_5
536 | Self::ClaudeSonnet4_5Thinking
537 | Self::ClaudeOpus4_1
538 | Self::ClaudeOpus4_1Thinking
539 | Self::ClaudeOpus4_5
540 | Self::ClaudeOpus4_5Thinking
541 | Self::ClaudeOpus4_6
542 | Self::ClaudeOpus4_6Thinking
543 | Self::ClaudeSonnet4_6
544 | Self::ClaudeSonnet4_6Thinking => true,
545 Self::Custom {
546 cache_configuration,
547 ..
548 } => cache_configuration.is_some(),
549 _ => false,
550 }
551 }
552
553 pub fn cache_configuration(&self) -> Option<BedrockModelCacheConfiguration> {
554 match self {
555 Self::ClaudeSonnet4
556 | Self::ClaudeSonnet4Thinking
557 | Self::ClaudeSonnet4_5
558 | Self::ClaudeSonnet4_5Thinking
559 | Self::ClaudeOpus4_1
560 | Self::ClaudeOpus4_1Thinking
561 | Self::ClaudeOpus4_5
562 | Self::ClaudeOpus4_5Thinking
563 | Self::ClaudeOpus4_6
564 | Self::ClaudeOpus4_6Thinking
565 | Self::ClaudeSonnet4_6
566 | Self::ClaudeSonnet4_6Thinking => Some(BedrockModelCacheConfiguration {
567 max_cache_anchors: 4,
568 min_total_token: 1024,
569 }),
570 Self::ClaudeHaiku4_5 => Some(BedrockModelCacheConfiguration {
571 max_cache_anchors: 4,
572 min_total_token: 2048,
573 }),
574 Self::Custom {
575 cache_configuration,
576 ..
577 } => cache_configuration.clone(),
578 _ => None,
579 }
580 }
581
582 pub fn mode(&self) -> BedrockModelMode {
583 match self {
584 Self::ClaudeSonnet4Thinking | Self::ClaudeSonnet4_5Thinking => {
585 BedrockModelMode::Thinking {
586 budget_tokens: Some(4096),
587 }
588 }
589 Self::ClaudeOpus4_1Thinking | Self::ClaudeOpus4_5Thinking => {
590 BedrockModelMode::Thinking {
591 budget_tokens: Some(4096),
592 }
593 }
594 Self::ClaudeOpus4_6Thinking => BedrockModelMode::AdaptiveThinking {
595 effort: BedrockAdaptiveThinkingEffort::default(),
596 },
597 Self::ClaudeSonnet4_6Thinking => BedrockModelMode::AdaptiveThinking {
598 effort: BedrockAdaptiveThinkingEffort::default(),
599 },
600 _ => BedrockModelMode::Default,
601 }
602 }
603
604 pub fn cross_region_inference_id(
605 &self,
606 region: &str,
607 allow_global: bool,
608 ) -> anyhow::Result<String> {
609 let model_id = self.request_id();
610
611 let supports_global = matches!(
612 self,
613 Self::ClaudeHaiku4_5
614 | Self::ClaudeSonnet4
615 | Self::ClaudeSonnet4Thinking
616 | Self::ClaudeSonnet4_5
617 | Self::ClaudeSonnet4_5Thinking
618 | Self::ClaudeOpus4_5
619 | Self::ClaudeOpus4_5Thinking
620 | Self::ClaudeOpus4_6
621 | Self::ClaudeOpus4_6Thinking
622 | Self::ClaudeSonnet4_6
623 | Self::ClaudeSonnet4_6Thinking
624 | Self::Nova2Lite
625 );
626
627 // Determine region group based on AWS region
628 let region_group = if region.starts_with("us-gov-") {
629 "us-gov"
630 } else if region.starts_with("us-") || region.starts_with("sa-") {
631 if allow_global && supports_global {
632 "global"
633 } else {
634 "us"
635 }
636 } else if region.starts_with("ca-") {
637 if allow_global && supports_global {
638 "global"
639 } else {
640 "ca"
641 }
642 } else if region.starts_with("eu-") {
643 if allow_global && supports_global {
644 "global"
645 } else {
646 "eu"
647 }
648 } else if region == "ap-southeast-2" || region == "ap-southeast-4" {
649 // Australia
650 if allow_global && supports_global {
651 "global"
652 } else {
653 "au"
654 }
655 } else if region == "ap-northeast-1" || region == "ap-northeast-3" {
656 // Japan
657 if allow_global && supports_global {
658 "global"
659 } else {
660 "jp"
661 }
662 } else if region.starts_with("ap-") || region.starts_with("me-") {
663 if allow_global && supports_global {
664 "global"
665 } else {
666 "apac"
667 }
668 } else {
669 anyhow::bail!("Unsupported Region {region}");
670 };
671
672 match (self, region_group) {
673 (Self::Custom { .. }, _) => Ok(model_id.into()),
674
675 // Global inference profiles
676 (
677 Self::ClaudeHaiku4_5
678 | Self::ClaudeSonnet4
679 | Self::ClaudeSonnet4Thinking
680 | Self::ClaudeSonnet4_5
681 | Self::ClaudeSonnet4_5Thinking
682 | Self::ClaudeOpus4_5
683 | Self::ClaudeOpus4_5Thinking
684 | Self::ClaudeOpus4_6
685 | Self::ClaudeOpus4_6Thinking
686 | Self::ClaudeSonnet4_6
687 | Self::ClaudeSonnet4_6Thinking
688 | Self::Nova2Lite,
689 "global",
690 ) => Ok(format!("{}.{}", region_group, model_id)),
691
692 // US Government region inference profiles
693 (Self::ClaudeSonnet4_5 | Self::ClaudeSonnet4_5Thinking, "us-gov") => {
694 Ok(format!("{}.{}", region_group, model_id))
695 }
696
697 // US region inference profiles
698 (
699 Self::ClaudeHaiku4_5
700 | Self::ClaudeSonnet4
701 | Self::ClaudeSonnet4Thinking
702 | Self::ClaudeSonnet4_5
703 | Self::ClaudeSonnet4_5Thinking
704 | Self::ClaudeOpus4_1
705 | Self::ClaudeOpus4_1Thinking
706 | Self::ClaudeOpus4_5
707 | Self::ClaudeOpus4_5Thinking
708 | Self::ClaudeOpus4_6
709 | Self::ClaudeOpus4_6Thinking
710 | Self::ClaudeSonnet4_6
711 | Self::ClaudeSonnet4_6Thinking
712 | Self::Llama4Scout17B
713 | Self::Llama4Maverick17B
714 | Self::NovaLite
715 | Self::NovaPro
716 | Self::NovaPremier
717 | Self::Nova2Lite
718 | Self::PixtralLarge
719 | Self::DeepSeekR1,
720 "us",
721 ) => Ok(format!("{}.{}", region_group, model_id)),
722
723 // Canada region inference profiles
724 (Self::NovaLite, "ca") => Ok(format!("{}.{}", region_group, model_id)),
725
726 // EU region inference profiles
727 (
728 Self::ClaudeHaiku4_5
729 | Self::ClaudeSonnet4
730 | Self::ClaudeSonnet4_5
731 | Self::ClaudeSonnet4_5Thinking
732 | Self::ClaudeOpus4_6
733 | Self::ClaudeOpus4_6Thinking
734 | Self::ClaudeSonnet4_6
735 | Self::ClaudeSonnet4_6Thinking
736 | Self::NovaLite
737 | Self::NovaPro
738 | Self::Nova2Lite,
739 "eu",
740 ) => Ok(format!("{}.{}", region_group, model_id)),
741
742 // Australia region inference profiles
743 (
744 Self::ClaudeHaiku4_5
745 | Self::ClaudeSonnet4_5
746 | Self::ClaudeSonnet4_5Thinking
747 | Self::ClaudeOpus4_6
748 | Self::ClaudeOpus4_6Thinking
749 | Self::ClaudeSonnet4_6
750 | Self::ClaudeSonnet4_6Thinking,
751 "au",
752 ) => Ok(format!("{}.{}", region_group, model_id)),
753
754 // Japan region inference profiles
755 (
756 Self::ClaudeHaiku4_5
757 | Self::ClaudeSonnet4_5
758 | Self::ClaudeSonnet4_5Thinking
759 | Self::ClaudeSonnet4_6
760 | Self::ClaudeSonnet4_6Thinking
761 | Self::Nova2Lite,
762 "jp",
763 ) => Ok(format!("{}.{}", region_group, model_id)),
764
765 // APAC region inference profiles (other than AU/JP)
766 (
767 Self::ClaudeHaiku4_5
768 | Self::ClaudeSonnet4
769 | Self::ClaudeSonnet4_5
770 | Self::ClaudeSonnet4_5Thinking
771 | Self::NovaLite
772 | Self::NovaPro
773 | Self::Nova2Lite,
774 "apac",
775 ) => Ok(format!("{}.{}", region_group, model_id)),
776
777 // Default: use model ID directly
778 _ => Ok(model_id.into()),
779 }
780 }
781}
782
783#[cfg(test)]
784mod tests {
785 use super::*;
786
787 #[test]
788 fn test_us_region_inference_ids() -> anyhow::Result<()> {
789 assert_eq!(
790 Model::ClaudeSonnet4_5.cross_region_inference_id("us-east-1", false)?,
791 "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
792 );
793 assert_eq!(
794 Model::ClaudeSonnet4.cross_region_inference_id("us-west-2", false)?,
795 "us.anthropic.claude-sonnet-4-20250514-v1:0"
796 );
797 assert_eq!(
798 Model::NovaPro.cross_region_inference_id("us-east-2", false)?,
799 "us.amazon.nova-pro-v1:0"
800 );
801 assert_eq!(
802 Model::DeepSeekR1.cross_region_inference_id("us-east-1", false)?,
803 "us.deepseek.r1-v1:0"
804 );
805 Ok(())
806 }
807
808 #[test]
809 fn test_eu_region_inference_ids() -> anyhow::Result<()> {
810 assert_eq!(
811 Model::ClaudeSonnet4.cross_region_inference_id("eu-west-1", false)?,
812 "eu.anthropic.claude-sonnet-4-20250514-v1:0"
813 );
814 assert_eq!(
815 Model::ClaudeSonnet4_5.cross_region_inference_id("eu-west-1", false)?,
816 "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"
817 );
818 assert_eq!(
819 Model::NovaLite.cross_region_inference_id("eu-north-1", false)?,
820 "eu.amazon.nova-lite-v1:0"
821 );
822 assert_eq!(
823 Model::ClaudeOpus4_6.cross_region_inference_id("eu-west-1", false)?,
824 "eu.anthropic.claude-opus-4-6-v1"
825 );
826 Ok(())
827 }
828
829 #[test]
830 fn test_apac_region_inference_ids() -> anyhow::Result<()> {
831 assert_eq!(
832 Model::ClaudeSonnet4_5.cross_region_inference_id("ap-south-1", false)?,
833 "apac.anthropic.claude-sonnet-4-5-20250929-v1:0"
834 );
835 assert_eq!(
836 Model::NovaLite.cross_region_inference_id("ap-south-1", false)?,
837 "apac.amazon.nova-lite-v1:0"
838 );
839 Ok(())
840 }
841
842 #[test]
843 fn test_au_region_inference_ids() -> anyhow::Result<()> {
844 assert_eq!(
845 Model::ClaudeHaiku4_5.cross_region_inference_id("ap-southeast-2", false)?,
846 "au.anthropic.claude-haiku-4-5-20251001-v1:0"
847 );
848 assert_eq!(
849 Model::ClaudeSonnet4_5.cross_region_inference_id("ap-southeast-4", false)?,
850 "au.anthropic.claude-sonnet-4-5-20250929-v1:0"
851 );
852 assert_eq!(
853 Model::ClaudeOpus4_6.cross_region_inference_id("ap-southeast-2", false)?,
854 "au.anthropic.claude-opus-4-6-v1"
855 );
856 Ok(())
857 }
858
859 #[test]
860 fn test_jp_region_inference_ids() -> anyhow::Result<()> {
861 assert_eq!(
862 Model::ClaudeHaiku4_5.cross_region_inference_id("ap-northeast-1", false)?,
863 "jp.anthropic.claude-haiku-4-5-20251001-v1:0"
864 );
865 assert_eq!(
866 Model::ClaudeSonnet4_5.cross_region_inference_id("ap-northeast-3", false)?,
867 "jp.anthropic.claude-sonnet-4-5-20250929-v1:0"
868 );
869 assert_eq!(
870 Model::Nova2Lite.cross_region_inference_id("ap-northeast-1", false)?,
871 "jp.amazon.nova-2-lite-v1:0"
872 );
873 Ok(())
874 }
875
876 #[test]
877 fn test_ca_region_inference_ids() -> anyhow::Result<()> {
878 assert_eq!(
879 Model::NovaLite.cross_region_inference_id("ca-central-1", false)?,
880 "ca.amazon.nova-lite-v1:0"
881 );
882 Ok(())
883 }
884
885 #[test]
886 fn test_gov_region_inference_ids() -> anyhow::Result<()> {
887 assert_eq!(
888 Model::ClaudeSonnet4_5.cross_region_inference_id("us-gov-east-1", false)?,
889 "us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0"
890 );
891 assert_eq!(
892 Model::ClaudeSonnet4_5Thinking.cross_region_inference_id("us-gov-west-1", false)?,
893 "us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0"
894 );
895 Ok(())
896 }
897
898 #[test]
899 fn test_global_inference_ids() -> anyhow::Result<()> {
900 assert_eq!(
901 Model::ClaudeSonnet4.cross_region_inference_id("us-east-1", true)?,
902 "global.anthropic.claude-sonnet-4-20250514-v1:0"
903 );
904 assert_eq!(
905 Model::ClaudeSonnet4_5.cross_region_inference_id("eu-west-1", true)?,
906 "global.anthropic.claude-sonnet-4-5-20250929-v1:0"
907 );
908 assert_eq!(
909 Model::ClaudeHaiku4_5.cross_region_inference_id("ap-south-1", true)?,
910 "global.anthropic.claude-haiku-4-5-20251001-v1:0"
911 );
912 assert_eq!(
913 Model::ClaudeOpus4_6.cross_region_inference_id("us-east-1", true)?,
914 "global.anthropic.claude-opus-4-6-v1"
915 );
916 assert_eq!(
917 Model::Nova2Lite.cross_region_inference_id("us-east-1", true)?,
918 "global.amazon.nova-2-lite-v1:0"
919 );
920
921 // Models without global support fall back to regional
922 assert_eq!(
923 Model::NovaPro.cross_region_inference_id("us-east-1", true)?,
924 "us.amazon.nova-pro-v1:0"
925 );
926 Ok(())
927 }
928
929 #[test]
930 fn test_models_without_cross_region() -> anyhow::Result<()> {
931 // Models without cross-region support return their request_id directly
932 assert_eq!(
933 Model::Gemma3_4B.cross_region_inference_id("us-east-1", false)?,
934 "google.gemma-3-4b-it"
935 );
936 assert_eq!(
937 Model::MistralLarge3.cross_region_inference_id("eu-west-1", false)?,
938 "mistral.mistral-large-3-675b-instruct"
939 );
940 assert_eq!(
941 Model::Qwen3VL235B.cross_region_inference_id("ap-south-1", false)?,
942 "qwen.qwen3-vl-235b-a22b"
943 );
944 assert_eq!(
945 Model::GptOss120B.cross_region_inference_id("us-east-1", false)?,
946 "openai.gpt-oss-120b-1:0"
947 );
948 assert_eq!(
949 Model::MiniMaxM2.cross_region_inference_id("us-east-1", false)?,
950 "minimax.minimax-m2"
951 );
952 assert_eq!(
953 Model::KimiK2Thinking.cross_region_inference_id("us-east-1", false)?,
954 "moonshot.kimi-k2-thinking"
955 );
956 Ok(())
957 }
958
959 #[test]
960 fn test_custom_model_inference_ids() -> anyhow::Result<()> {
961 let custom_model = Model::Custom {
962 name: "custom.my-model-v1:0".to_string(),
963 max_tokens: 100000,
964 display_name: Some("My Custom Model".to_string()),
965 max_output_tokens: Some(8192),
966 default_temperature: Some(0.7),
967 cache_configuration: None,
968 };
969
970 assert_eq!(
971 custom_model.cross_region_inference_id("us-east-1", false)?,
972 "custom.my-model-v1:0"
973 );
974 assert_eq!(
975 custom_model.cross_region_inference_id("eu-west-1", true)?,
976 "custom.my-model-v1:0"
977 );
978 Ok(())
979 }
980
981 #[test]
982 fn test_friendly_id_vs_request_id() {
983 assert_eq!(Model::ClaudeSonnet4_5.id(), "claude-sonnet-4-5");
984 assert_eq!(Model::NovaLite.id(), "nova-lite");
985 assert_eq!(Model::DeepSeekR1.id(), "deepseek-r1");
986 assert_eq!(Model::Llama4Scout17B.id(), "llama-4-scout-17b");
987
988 assert_eq!(
989 Model::ClaudeSonnet4_5.request_id(),
990 "anthropic.claude-sonnet-4-5-20250929-v1:0"
991 );
992 assert_eq!(Model::NovaLite.request_id(), "amazon.nova-lite-v1:0");
993 assert_eq!(Model::DeepSeekR1.request_id(), "deepseek.r1-v1:0");
994 assert_eq!(
995 Model::Llama4Scout17B.request_id(),
996 "meta.llama4-scout-17b-instruct-v1:0"
997 );
998
999 // Thinking models have different friendly IDs but same request IDs
1000 assert_eq!(Model::ClaudeSonnet4.id(), "claude-sonnet-4");
1001 assert_eq!(
1002 Model::ClaudeSonnet4Thinking.id(),
1003 "claude-sonnet-4-thinking"
1004 );
1005 assert_eq!(
1006 Model::ClaudeSonnet4.request_id(),
1007 Model::ClaudeSonnet4Thinking.request_id()
1008 );
1009 }
1010
1011 #[test]
1012 fn test_model_modes() {
1013 assert_eq!(Model::ClaudeSonnet4.mode(), BedrockModelMode::Default);
1014 assert_eq!(
1015 Model::ClaudeSonnet4Thinking.mode(),
1016 BedrockModelMode::Thinking {
1017 budget_tokens: Some(4096)
1018 }
1019 );
1020 assert_eq!(
1021 Model::ClaudeOpus4_6Thinking.mode(),
1022 BedrockModelMode::AdaptiveThinking {
1023 effort: BedrockAdaptiveThinkingEffort::High
1024 }
1025 );
1026 }
1027
1028 #[test]
1029 fn test_max_tokens() {
1030 assert_eq!(Model::ClaudeSonnet4_5.max_tokens(), 200_000);
1031 assert_eq!(Model::ClaudeOpus4_6.max_tokens(), 200_000);
1032 assert_eq!(Model::Llama4Scout17B.max_tokens(), 128_000);
1033 assert_eq!(Model::NovaPremier.max_tokens(), 1_000_000);
1034 }
1035
1036 #[test]
1037 fn test_max_output_tokens() {
1038 assert_eq!(Model::ClaudeSonnet4_5.max_output_tokens(), 64_000);
1039 assert_eq!(Model::ClaudeOpus4_6.max_output_tokens(), 128_000);
1040 assert_eq!(Model::ClaudeOpus4_1.max_output_tokens(), 32_000);
1041 assert_eq!(Model::Gemma3_4B.max_output_tokens(), 8_192);
1042 }
1043
1044 #[test]
1045 fn test_supports_tool_use() {
1046 assert!(Model::ClaudeSonnet4_5.supports_tool_use());
1047 assert!(Model::NovaPro.supports_tool_use());
1048 assert!(Model::MistralLarge3.supports_tool_use());
1049 assert!(!Model::Gemma3_4B.supports_tool_use());
1050 assert!(Model::Qwen3_32B.supports_tool_use());
1051 assert!(Model::MiniMaxM2.supports_tool_use());
1052 assert!(Model::KimiK2_5.supports_tool_use());
1053 assert!(Model::DeepSeekR1.supports_tool_use());
1054 assert!(!Model::Llama4Scout17B.supports_tool_use());
1055 }
1056
1057 #[test]
1058 fn test_supports_caching() {
1059 assert!(Model::ClaudeSonnet4_5.supports_caching());
1060 assert!(Model::ClaudeOpus4_6.supports_caching());
1061 assert!(!Model::Llama4Scout17B.supports_caching());
1062 assert!(!Model::NovaPro.supports_caching());
1063 }
1064}