1use serde::{Deserialize, Serialize};
2use strum::EnumIter;
3
4#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
5#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize, PartialEq)]
6pub enum BedrockAdaptiveThinkingEffort {
7 Low,
8 Medium,
9 #[default]
10 High,
11 Max,
12}
13
14impl BedrockAdaptiveThinkingEffort {
15 pub fn as_str(&self) -> &'static str {
16 match self {
17 Self::Low => "low",
18 Self::Medium => "medium",
19 Self::High => "high",
20 Self::Max => "max",
21 }
22 }
23}
24
25#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
26#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
27pub enum BedrockModelMode {
28 #[default]
29 Default,
30 Thinking {
31 budget_tokens: Option<u64>,
32 },
33 AdaptiveThinking {
34 effort: BedrockAdaptiveThinkingEffort,
35 },
36}
37
38#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
39#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
40pub struct BedrockModelCacheConfiguration {
41 pub max_cache_anchors: usize,
42 pub min_total_token: u64,
43}
44
45#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
46#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
47pub enum Model {
48 // Anthropic Claude 4+ models
49 #[serde(
50 rename = "claude-haiku-4-5",
51 alias = "claude-haiku-4-5-latest"
52 )]
53 ClaudeHaiku4_5,
54 #[serde(
55 rename = "claude-sonnet-4",
56 alias = "claude-sonnet-4-latest",
57 alias = "claude-sonnet-4-thinking",
58 alias = "claude-sonnet-4-thinking-latest"
59 )]
60 ClaudeSonnet4,
61 #[default]
62 #[serde(
63 rename = "claude-sonnet-4-5",
64 alias = "claude-sonnet-4-5-latest",
65 alias = "claude-sonnet-4-5-thinking",
66 alias = "claude-sonnet-4-5-thinking-latest"
67 )]
68 ClaudeSonnet4_5,
69 #[serde(
70 rename = "claude-opus-4-1",
71 alias = "claude-opus-4-1-latest",
72 alias = "claude-opus-4-1-thinking",
73 alias = "claude-opus-4-1-thinking-latest"
74 )]
75 ClaudeOpus4_1,
76 #[serde(
77 rename = "claude-opus-4-5",
78 alias = "claude-opus-4-5-latest",
79 alias = "claude-opus-4-5-thinking",
80 alias = "claude-opus-4-5-thinking-latest"
81 )]
82 ClaudeOpus4_5,
83 #[serde(
84 rename = "claude-opus-4-6",
85 alias = "claude-opus-4-6-latest",
86 alias = "claude-opus-4-6-thinking",
87 alias = "claude-opus-4-6-thinking-latest"
88 )]
89 ClaudeOpus4_6,
90 #[serde(
91 rename = "claude-sonnet-4-6",
92 alias = "claude-sonnet-4-6-latest",
93 alias = "claude-sonnet-4-6-thinking",
94 alias = "claude-sonnet-4-6-thinking-latest"
95 )]
96 ClaudeSonnet4_6,
97
98 // Meta Llama 4 models
99 #[serde(rename = "llama-4-scout-17b")]
100 Llama4Scout17B,
101 #[serde(rename = "llama-4-maverick-17b")]
102 Llama4Maverick17B,
103
104 // Google Gemma 3 models
105 #[serde(rename = "gemma-3-4b")]
106 Gemma3_4B,
107 #[serde(rename = "gemma-3-12b")]
108 Gemma3_12B,
109 #[serde(rename = "gemma-3-27b")]
110 Gemma3_27B,
111
112 // Mistral models
113 #[serde(rename = "magistral-small")]
114 MagistralSmall,
115 #[serde(rename = "mistral-large-3")]
116 MistralLarge3,
117 #[serde(rename = "pixtral-large")]
118 PixtralLarge,
119
120 // Qwen models
121 #[serde(rename = "qwen3-32b")]
122 Qwen3_32B,
123 #[serde(rename = "qwen3-vl-235b")]
124 Qwen3VL235B,
125 #[serde(rename = "qwen3-235b")]
126 Qwen3_235B,
127 #[serde(rename = "qwen3-next-80b")]
128 Qwen3Next80B,
129 #[serde(rename = "qwen3-coder-30b")]
130 Qwen3Coder30B,
131 #[serde(rename = "qwen3-coder-next")]
132 Qwen3CoderNext,
133 #[serde(rename = "qwen3-coder-480b")]
134 Qwen3Coder480B,
135
136 // Amazon Nova models
137 #[serde(rename = "nova-lite")]
138 NovaLite,
139 #[serde(rename = "nova-pro")]
140 NovaPro,
141 #[serde(rename = "nova-premier")]
142 NovaPremier,
143 #[serde(rename = "nova-2-lite")]
144 Nova2Lite,
145
146 // OpenAI GPT OSS models
147 #[serde(rename = "gpt-oss-20b")]
148 GptOss20B,
149 #[serde(rename = "gpt-oss-120b")]
150 GptOss120B,
151
152 // MiniMax models
153 #[serde(rename = "minimax-m2")]
154 MiniMaxM2,
155
156 // Moonshot models
157 #[serde(rename = "kimi-k2-thinking")]
158 KimiK2Thinking,
159 #[serde(rename = "kimi-k2-5")]
160 KimiK2_5,
161
162 // DeepSeek models
163 #[serde(rename = "deepseek-r1")]
164 DeepSeekR1,
165 #[serde(rename = "deepseek-v3")]
166 DeepSeekV3_1,
167 #[serde(rename = "deepseek-v3-2")]
168 DeepSeekV3_2,
169
170 #[serde(rename = "custom")]
171 Custom {
172 name: String,
173 max_tokens: u64,
174 display_name: Option<String>,
175 max_output_tokens: Option<u64>,
176 default_temperature: Option<f32>,
177 cache_configuration: Option<BedrockModelCacheConfiguration>,
178 },
179}
180
181impl Model {
182 pub fn default_fast(_region: &str) -> Self {
183 Self::ClaudeHaiku4_5
184 }
185
186 pub fn from_id(id: &str) -> anyhow::Result<Self> {
187 // Order matters: longer prefixes must be checked first to avoid
188 // "claude-opus-4-6" matching before "claude-opus-4-6-thinking".
189 // The "-thinking" aliases map to the same model since thinking
190 // is now controlled by the UI toggle.
191 if id.starts_with("claude-opus-4-6-thinking") || id.starts_with("claude-opus-4-6") {
192 Ok(Self::ClaudeOpus4_6)
193 } else if id.starts_with("claude-opus-4-5-thinking") || id.starts_with("claude-opus-4-5") {
194 Ok(Self::ClaudeOpus4_5)
195 } else if id.starts_with("claude-opus-4-1-thinking") || id.starts_with("claude-opus-4-1") {
196 Ok(Self::ClaudeOpus4_1)
197 } else if id.starts_with("claude-sonnet-4-6-thinking")
198 || id.starts_with("claude-sonnet-4-6")
199 {
200 Ok(Self::ClaudeSonnet4_6)
201 } else if id.starts_with("claude-sonnet-4-5-thinking")
202 || id.starts_with("claude-sonnet-4-5")
203 {
204 Ok(Self::ClaudeSonnet4_5)
205 } else if id.starts_with("claude-sonnet-4-thinking") || id.starts_with("claude-sonnet-4") {
206 Ok(Self::ClaudeSonnet4)
207 } else if id.starts_with("claude-haiku-4-5") {
208 Ok(Self::ClaudeHaiku4_5)
209 } else {
210 anyhow::bail!("invalid model id {id}");
211 }
212 }
213
214 pub fn id(&self) -> &str {
215 match self {
216 Self::ClaudeHaiku4_5 => "claude-haiku-4-5",
217 Self::ClaudeSonnet4 => "claude-sonnet-4",
218 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5",
219 Self::ClaudeOpus4_1 => "claude-opus-4-1",
220 Self::ClaudeOpus4_5 => "claude-opus-4-5",
221 Self::ClaudeOpus4_6 => "claude-opus-4-6",
222 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6",
223 Self::Llama4Scout17B => "llama-4-scout-17b",
224 Self::Llama4Maverick17B => "llama-4-maverick-17b",
225 Self::Gemma3_4B => "gemma-3-4b",
226 Self::Gemma3_12B => "gemma-3-12b",
227 Self::Gemma3_27B => "gemma-3-27b",
228 Self::MagistralSmall => "magistral-small",
229 Self::MistralLarge3 => "mistral-large-3",
230 Self::PixtralLarge => "pixtral-large",
231 Self::Qwen3_32B => "qwen3-32b",
232 Self::Qwen3VL235B => "qwen3-vl-235b",
233 Self::Qwen3_235B => "qwen3-235b",
234 Self::Qwen3Next80B => "qwen3-next-80b",
235 Self::Qwen3Coder30B => "qwen3-coder-30b",
236 Self::Qwen3CoderNext => "qwen3-coder-next",
237 Self::Qwen3Coder480B => "qwen3-coder-480b",
238 Self::NovaLite => "nova-lite",
239 Self::NovaPro => "nova-pro",
240 Self::NovaPremier => "nova-premier",
241 Self::Nova2Lite => "nova-2-lite",
242 Self::GptOss20B => "gpt-oss-20b",
243 Self::GptOss120B => "gpt-oss-120b",
244 Self::MiniMaxM2 => "minimax-m2",
245 Self::KimiK2Thinking => "kimi-k2-thinking",
246 Self::KimiK2_5 => "kimi-k2-5",
247 Self::DeepSeekR1 => "deepseek-r1",
248 Self::DeepSeekV3_1 => "deepseek-v3",
249 Self::DeepSeekV3_2 => "deepseek-v3-2",
250 Self::Custom { name, .. } => name,
251 }
252 }
253
254 pub fn request_id(&self) -> &str {
255 match self {
256 Self::ClaudeHaiku4_5 => "anthropic.claude-haiku-4-5-20251001-v1:0",
257 Self::ClaudeSonnet4 => "anthropic.claude-sonnet-4-20250514-v1:0",
258 Self::ClaudeSonnet4_5 => "anthropic.claude-sonnet-4-5-20250929-v1:0",
259 Self::ClaudeOpus4_1 => "anthropic.claude-opus-4-1-20250805-v1:0",
260 Self::ClaudeOpus4_5 => "anthropic.claude-opus-4-5-20251101-v1:0",
261 Self::ClaudeOpus4_6 => "anthropic.claude-opus-4-6-v1",
262 Self::ClaudeSonnet4_6 => "anthropic.claude-sonnet-4-6",
263 Self::Llama4Scout17B => "meta.llama4-scout-17b-instruct-v1:0",
264 Self::Llama4Maverick17B => "meta.llama4-maverick-17b-instruct-v1:0",
265 Self::Gemma3_4B => "google.gemma-3-4b-it",
266 Self::Gemma3_12B => "google.gemma-3-12b-it",
267 Self::Gemma3_27B => "google.gemma-3-27b-it",
268 Self::MagistralSmall => "mistral.magistral-small-2509",
269 Self::MistralLarge3 => "mistral.mistral-large-3-675b-instruct",
270 Self::PixtralLarge => "mistral.pixtral-large-2502-v1:0",
271 Self::Qwen3VL235B => "qwen.qwen3-vl-235b-a22b",
272 Self::Qwen3_32B => "qwen.qwen3-32b-v1:0",
273 Self::Qwen3_235B => "qwen.qwen3-235b-a22b-2507-v1:0",
274 Self::Qwen3Next80B => "qwen.qwen3-next-80b-a3b",
275 Self::Qwen3Coder30B => "qwen.qwen3-coder-30b-a3b-v1:0",
276 Self::Qwen3CoderNext => "qwen.qwen3-coder-next",
277 Self::Qwen3Coder480B => "qwen.qwen3-coder-480b-a35b-v1:0",
278 Self::NovaLite => "amazon.nova-lite-v1:0",
279 Self::NovaPro => "amazon.nova-pro-v1:0",
280 Self::NovaPremier => "amazon.nova-premier-v1:0",
281 Self::Nova2Lite => "amazon.nova-2-lite-v1:0",
282 Self::GptOss20B => "openai.gpt-oss-20b-1:0",
283 Self::GptOss120B => "openai.gpt-oss-120b-1:0",
284 Self::MiniMaxM2 => "minimax.minimax-m2",
285 Self::KimiK2Thinking => "moonshot.kimi-k2-thinking",
286 Self::KimiK2_5 => "moonshotai.kimi-k2.5",
287 Self::DeepSeekR1 => "deepseek.r1-v1:0",
288 Self::DeepSeekV3_1 => "deepseek.v3-v1:0",
289 Self::DeepSeekV3_2 => "deepseek.v3.2",
290 Self::Custom { name, .. } => name,
291 }
292 }
293
294 pub fn display_name(&self) -> &str {
295 match self {
296 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
297 Self::ClaudeSonnet4 => "Claude Sonnet 4",
298 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
299 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
300 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
301 Self::ClaudeOpus4_6 => "Claude Opus 4.6",
302 Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
303 Self::Llama4Scout17B => "Llama 4 Scout 17B",
304 Self::Llama4Maverick17B => "Llama 4 Maverick 17B",
305 Self::Gemma3_4B => "Gemma 3 4B",
306 Self::Gemma3_12B => "Gemma 3 12B",
307 Self::Gemma3_27B => "Gemma 3 27B",
308 Self::MagistralSmall => "Magistral Small",
309 Self::MistralLarge3 => "Mistral Large 3",
310 Self::PixtralLarge => "Pixtral Large",
311 Self::Qwen3VL235B => "Qwen3 VL 235B",
312 Self::Qwen3_32B => "Qwen3 32B",
313 Self::Qwen3_235B => "Qwen3 235B",
314 Self::Qwen3Next80B => "Qwen3 Next 80B",
315 Self::Qwen3Coder30B => "Qwen3 Coder 30B",
316 Self::Qwen3CoderNext => "Qwen3 Coder Next",
317 Self::Qwen3Coder480B => "Qwen3 Coder 480B",
318 Self::NovaLite => "Amazon Nova Lite",
319 Self::NovaPro => "Amazon Nova Pro",
320 Self::NovaPremier => "Amazon Nova Premier",
321 Self::Nova2Lite => "Amazon Nova 2 Lite",
322 Self::GptOss20B => "GPT OSS 20B",
323 Self::GptOss120B => "GPT OSS 120B",
324 Self::MiniMaxM2 => "MiniMax M2",
325 Self::KimiK2Thinking => "Kimi K2 Thinking",
326 Self::KimiK2_5 => "Kimi K2.5",
327 Self::DeepSeekR1 => "DeepSeek R1",
328 Self::DeepSeekV3_1 => "DeepSeek V3.1",
329 Self::DeepSeekV3_2 => "DeepSeek V3.2",
330 Self::Custom {
331 display_name, name, ..
332 } => display_name.as_deref().unwrap_or(name.as_str()),
333 }
334 }
335
336 pub fn max_token_count(&self) -> u64 {
337 self.max_tokens()
338 }
339
340 pub fn max_tokens(&self) -> u64 {
341 match self {
342 Self::ClaudeHaiku4_5
343 | Self::ClaudeSonnet4
344 | Self::ClaudeSonnet4_5
345 | Self::ClaudeOpus4_1
346 | Self::ClaudeOpus4_5
347 | Self::ClaudeOpus4_6
348 | Self::ClaudeSonnet4_6 => 200_000,
349 Self::Llama4Scout17B | Self::Llama4Maverick17B => 128_000,
350 Self::Gemma3_4B | Self::Gemma3_12B | Self::Gemma3_27B => 128_000,
351 Self::MagistralSmall | Self::MistralLarge3 | Self::PixtralLarge => 128_000,
352 Self::Qwen3_32B
353 | Self::Qwen3VL235B
354 | Self::Qwen3_235B
355 | Self::Qwen3Next80B
356 | Self::Qwen3Coder30B
357 | Self::Qwen3CoderNext
358 | Self::Qwen3Coder480B => 128_000,
359 Self::NovaLite | Self::NovaPro => 300_000,
360 Self::NovaPremier => 1_000_000,
361 Self::Nova2Lite => 300_000,
362 Self::GptOss20B | Self::GptOss120B => 128_000,
363 Self::MiniMaxM2 => 128_000,
364 Self::KimiK2Thinking | Self::KimiK2_5 => 128_000,
365 Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => 128_000,
366 Self::Custom { max_tokens, .. } => *max_tokens,
367 }
368 }
369
370 pub fn max_output_tokens(&self) -> u64 {
371 match self {
372 Self::ClaudeHaiku4_5
373 | Self::ClaudeSonnet4
374 | Self::ClaudeSonnet4_5
375 | Self::ClaudeOpus4_5
376 | Self::ClaudeSonnet4_6 => 64_000,
377 Self::ClaudeOpus4_1 => 32_000,
378 Self::ClaudeOpus4_6 => 128_000,
379 Self::Llama4Scout17B
380 | Self::Llama4Maverick17B
381 | Self::Gemma3_4B
382 | Self::Gemma3_12B
383 | Self::Gemma3_27B
384 | Self::MagistralSmall
385 | Self::MistralLarge3
386 | Self::PixtralLarge => 8_192,
387 Self::Qwen3_32B
388 | Self::Qwen3VL235B
389 | Self::Qwen3_235B
390 | Self::Qwen3Next80B
391 | Self::Qwen3Coder30B
392 | Self::Qwen3CoderNext
393 | Self::Qwen3Coder480B => 8_192,
394 Self::NovaLite | Self::NovaPro | Self::NovaPremier | Self::Nova2Lite => 5_000,
395 Self::GptOss20B | Self::GptOss120B => 16_000,
396 Self::MiniMaxM2 => 16_000,
397 Self::KimiK2Thinking | Self::KimiK2_5 => 16_000,
398 Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => 16_000,
399 Self::Custom {
400 max_output_tokens, ..
401 } => max_output_tokens.unwrap_or(4_096),
402 }
403 }
404
405 pub fn default_temperature(&self) -> f32 {
406 match self {
407 Self::ClaudeHaiku4_5
408 | Self::ClaudeSonnet4
409 | Self::ClaudeSonnet4_5
410 | Self::ClaudeOpus4_1
411 | Self::ClaudeOpus4_5
412 | Self::ClaudeOpus4_6
413 | Self::ClaudeSonnet4_6 => 1.0,
414 Self::Custom {
415 default_temperature,
416 ..
417 } => default_temperature.unwrap_or(1.0),
418 _ => 1.0,
419 }
420 }
421
422 pub fn supports_tool_use(&self) -> bool {
423 match self {
424 Self::ClaudeHaiku4_5
425 | Self::ClaudeSonnet4
426 | Self::ClaudeSonnet4_5
427 | Self::ClaudeOpus4_1
428 | Self::ClaudeOpus4_5
429 | Self::ClaudeOpus4_6
430 | Self::ClaudeSonnet4_6 => true,
431 Self::NovaLite | Self::NovaPro | Self::NovaPremier | Self::Nova2Lite => true,
432 Self::MistralLarge3 | Self::PixtralLarge | Self::MagistralSmall => true,
433 // Gemma accepts toolConfig without error but produces unreliable tool
434 // calls -- malformed JSON args, hallucinated tool names, dropped calls.
435 Self::Qwen3_32B
436 | Self::Qwen3VL235B
437 | Self::Qwen3_235B
438 | Self::Qwen3Next80B
439 | Self::Qwen3Coder30B
440 | Self::Qwen3CoderNext
441 | Self::Qwen3Coder480B => true,
442 Self::MiniMaxM2 => true,
443 Self::KimiK2Thinking | Self::KimiK2_5 => true,
444 Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => true,
445 _ => false,
446 }
447 }
448
449 pub fn supports_images(&self) -> bool {
450 match self {
451 Self::ClaudeHaiku4_5
452 | Self::ClaudeSonnet4
453 | Self::ClaudeSonnet4_5
454 | Self::ClaudeOpus4_1
455 | Self::ClaudeOpus4_5
456 | Self::ClaudeOpus4_6
457 | Self::ClaudeSonnet4_6 => true,
458 Self::NovaLite | Self::NovaPro => true,
459 Self::PixtralLarge => true,
460 Self::Qwen3VL235B => true,
461 Self::KimiK2_5 => true,
462 _ => false,
463 }
464 }
465
466 pub fn supports_extended_context(&self) -> bool {
467 matches!(
468 self,
469 Self::ClaudeSonnet4
470 | Self::ClaudeSonnet4_5
471 | Self::ClaudeOpus4_5
472 | Self::ClaudeOpus4_6
473 | Self::ClaudeSonnet4_6
474 )
475 }
476
477 pub fn supports_caching(&self) -> bool {
478 match self {
479 Self::ClaudeHaiku4_5
480 | Self::ClaudeSonnet4
481 | Self::ClaudeSonnet4_5
482 | Self::ClaudeOpus4_1
483 | Self::ClaudeOpus4_5
484 | Self::ClaudeOpus4_6
485 | Self::ClaudeSonnet4_6 => true,
486 Self::Custom {
487 cache_configuration,
488 ..
489 } => cache_configuration.is_some(),
490 _ => false,
491 }
492 }
493
494 pub fn cache_configuration(&self) -> Option<BedrockModelCacheConfiguration> {
495 match self {
496 Self::ClaudeSonnet4
497 | Self::ClaudeSonnet4_5
498 | Self::ClaudeOpus4_1
499 | Self::ClaudeOpus4_5
500 | Self::ClaudeOpus4_6
501 | Self::ClaudeSonnet4_6 => Some(BedrockModelCacheConfiguration {
502 max_cache_anchors: 4,
503 min_total_token: 1024,
504 }),
505 Self::ClaudeHaiku4_5 => Some(BedrockModelCacheConfiguration {
506 max_cache_anchors: 4,
507 min_total_token: 2048,
508 }),
509 Self::Custom {
510 cache_configuration,
511 ..
512 } => cache_configuration.clone(),
513 _ => None,
514 }
515 }
516
517 pub fn supports_thinking(&self) -> bool {
518 matches!(
519 self,
520 Self::ClaudeHaiku4_5
521 | Self::ClaudeSonnet4
522 | Self::ClaudeSonnet4_5
523 | Self::ClaudeOpus4_1
524 | Self::ClaudeOpus4_5
525 | Self::ClaudeOpus4_6
526 | Self::ClaudeSonnet4_6
527 )
528 }
529
530 pub fn supports_adaptive_thinking(&self) -> bool {
531 matches!(self, Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6)
532 }
533
534 pub fn thinking_mode(&self) -> BedrockModelMode {
535 if self.supports_adaptive_thinking() {
536 BedrockModelMode::AdaptiveThinking {
537 effort: BedrockAdaptiveThinkingEffort::default(),
538 }
539 } else if self.supports_thinking() {
540 BedrockModelMode::Thinking {
541 budget_tokens: Some(4096),
542 }
543 } else {
544 BedrockModelMode::Default
545 }
546 }
547
548 pub fn cross_region_inference_id(
549 &self,
550 region: &str,
551 allow_global: bool,
552 ) -> anyhow::Result<String> {
553 let model_id = self.request_id();
554
555 let supports_global = matches!(
556 self,
557 Self::ClaudeHaiku4_5
558 | Self::ClaudeSonnet4
559 | Self::ClaudeSonnet4_5
560 | Self::ClaudeOpus4_5
561 | Self::ClaudeOpus4_6
562 | Self::ClaudeSonnet4_6
563 | Self::Nova2Lite
564 );
565
566 // Determine region group based on AWS region
567 let region_group = if region.starts_with("us-gov-") {
568 "us-gov"
569 } else if region.starts_with("us-") || region.starts_with("sa-") {
570 if allow_global && supports_global {
571 "global"
572 } else {
573 "us"
574 }
575 } else if region.starts_with("ca-") {
576 if allow_global && supports_global {
577 "global"
578 } else {
579 "ca"
580 }
581 } else if region.starts_with("eu-") {
582 if allow_global && supports_global {
583 "global"
584 } else {
585 "eu"
586 }
587 } else if region == "ap-southeast-2" || region == "ap-southeast-4" {
588 // Australia
589 if allow_global && supports_global {
590 "global"
591 } else {
592 "au"
593 }
594 } else if region == "ap-northeast-1" || region == "ap-northeast-3" {
595 // Japan
596 if allow_global && supports_global {
597 "global"
598 } else {
599 "jp"
600 }
601 } else if region.starts_with("ap-") || region.starts_with("me-") {
602 if allow_global && supports_global {
603 "global"
604 } else {
605 "apac"
606 }
607 } else {
608 anyhow::bail!("Unsupported Region {region}");
609 };
610
611 match (self, region_group) {
612 (Self::Custom { .. }, _) => Ok(model_id.into()),
613
614 // Global inference profiles
615 (
616 Self::ClaudeHaiku4_5
617 | Self::ClaudeSonnet4
618 | Self::ClaudeSonnet4_5
619 | Self::ClaudeOpus4_5
620 | Self::ClaudeOpus4_6
621 | Self::ClaudeSonnet4_6
622 | Self::Nova2Lite,
623 "global",
624 ) => Ok(format!("{}.{}", region_group, model_id)),
625
626 // US Government region inference profiles
627 (Self::ClaudeSonnet4_5, "us-gov") => {
628 Ok(format!("{}.{}", region_group, model_id))
629 }
630
631 // US region inference profiles
632 (
633 Self::ClaudeHaiku4_5
634 | Self::ClaudeSonnet4
635 | Self::ClaudeSonnet4_5
636 | Self::ClaudeOpus4_1
637 | Self::ClaudeOpus4_5
638 | Self::ClaudeOpus4_6
639 | Self::ClaudeSonnet4_6
640 | Self::Llama4Scout17B
641 | Self::Llama4Maverick17B
642 | Self::NovaLite
643 | Self::NovaPro
644 | Self::NovaPremier
645 | Self::Nova2Lite
646 | Self::PixtralLarge
647 | Self::DeepSeekR1,
648 "us",
649 ) => Ok(format!("{}.{}", region_group, model_id)),
650
651 // Canada region inference profiles
652 (Self::NovaLite, "ca") => Ok(format!("{}.{}", region_group, model_id)),
653
654 // EU region inference profiles
655 (
656 Self::ClaudeHaiku4_5
657 | Self::ClaudeSonnet4
658 | Self::ClaudeSonnet4_5
659 | Self::ClaudeOpus4_6
660 | Self::ClaudeSonnet4_6
661 | Self::NovaLite
662 | Self::NovaPro
663 | Self::Nova2Lite,
664 "eu",
665 ) => Ok(format!("{}.{}", region_group, model_id)),
666
667 // Australia region inference profiles
668 (
669 Self::ClaudeHaiku4_5
670 | Self::ClaudeSonnet4_5
671 | Self::ClaudeOpus4_6
672 | Self::ClaudeSonnet4_6,
673 "au",
674 ) => Ok(format!("{}.{}", region_group, model_id)),
675
676 // Japan region inference profiles
677 (
678 Self::ClaudeHaiku4_5
679 | Self::ClaudeSonnet4_5
680 | Self::ClaudeSonnet4_6
681 | Self::Nova2Lite,
682 "jp",
683 ) => Ok(format!("{}.{}", region_group, model_id)),
684
685 // APAC region inference profiles (other than AU/JP)
686 (
687 Self::ClaudeHaiku4_5
688 | Self::ClaudeSonnet4
689 | Self::ClaudeSonnet4_5
690 | Self::NovaLite
691 | Self::NovaPro
692 | Self::Nova2Lite,
693 "apac",
694 ) => Ok(format!("{}.{}", region_group, model_id)),
695
696 // Default: use model ID directly
697 _ => Ok(model_id.into()),
698 }
699 }
700}
701
702#[cfg(test)]
703mod tests {
704 use super::*;
705
706 #[test]
707 fn test_us_region_inference_ids() -> anyhow::Result<()> {
708 assert_eq!(
709 Model::ClaudeSonnet4_5.cross_region_inference_id("us-east-1", false)?,
710 "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
711 );
712 assert_eq!(
713 Model::ClaudeSonnet4.cross_region_inference_id("us-west-2", false)?,
714 "us.anthropic.claude-sonnet-4-20250514-v1:0"
715 );
716 assert_eq!(
717 Model::NovaPro.cross_region_inference_id("us-east-2", false)?,
718 "us.amazon.nova-pro-v1:0"
719 );
720 assert_eq!(
721 Model::DeepSeekR1.cross_region_inference_id("us-east-1", false)?,
722 "us.deepseek.r1-v1:0"
723 );
724 Ok(())
725 }
726
727 #[test]
728 fn test_eu_region_inference_ids() -> anyhow::Result<()> {
729 assert_eq!(
730 Model::ClaudeSonnet4.cross_region_inference_id("eu-west-1", false)?,
731 "eu.anthropic.claude-sonnet-4-20250514-v1:0"
732 );
733 assert_eq!(
734 Model::ClaudeSonnet4_5.cross_region_inference_id("eu-west-1", false)?,
735 "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"
736 );
737 assert_eq!(
738 Model::NovaLite.cross_region_inference_id("eu-north-1", false)?,
739 "eu.amazon.nova-lite-v1:0"
740 );
741 assert_eq!(
742 Model::ClaudeOpus4_6.cross_region_inference_id("eu-west-1", false)?,
743 "eu.anthropic.claude-opus-4-6-v1"
744 );
745 Ok(())
746 }
747
748 #[test]
749 fn test_apac_region_inference_ids() -> anyhow::Result<()> {
750 assert_eq!(
751 Model::ClaudeSonnet4_5.cross_region_inference_id("ap-south-1", false)?,
752 "apac.anthropic.claude-sonnet-4-5-20250929-v1:0"
753 );
754 assert_eq!(
755 Model::NovaLite.cross_region_inference_id("ap-south-1", false)?,
756 "apac.amazon.nova-lite-v1:0"
757 );
758 Ok(())
759 }
760
761 #[test]
762 fn test_au_region_inference_ids() -> anyhow::Result<()> {
763 assert_eq!(
764 Model::ClaudeHaiku4_5.cross_region_inference_id("ap-southeast-2", false)?,
765 "au.anthropic.claude-haiku-4-5-20251001-v1:0"
766 );
767 assert_eq!(
768 Model::ClaudeSonnet4_5.cross_region_inference_id("ap-southeast-4", false)?,
769 "au.anthropic.claude-sonnet-4-5-20250929-v1:0"
770 );
771 assert_eq!(
772 Model::ClaudeOpus4_6.cross_region_inference_id("ap-southeast-2", false)?,
773 "au.anthropic.claude-opus-4-6-v1"
774 );
775 Ok(())
776 }
777
778 #[test]
779 fn test_jp_region_inference_ids() -> anyhow::Result<()> {
780 assert_eq!(
781 Model::ClaudeHaiku4_5.cross_region_inference_id("ap-northeast-1", false)?,
782 "jp.anthropic.claude-haiku-4-5-20251001-v1:0"
783 );
784 assert_eq!(
785 Model::ClaudeSonnet4_5.cross_region_inference_id("ap-northeast-3", false)?,
786 "jp.anthropic.claude-sonnet-4-5-20250929-v1:0"
787 );
788 assert_eq!(
789 Model::Nova2Lite.cross_region_inference_id("ap-northeast-1", false)?,
790 "jp.amazon.nova-2-lite-v1:0"
791 );
792 Ok(())
793 }
794
795 #[test]
796 fn test_ca_region_inference_ids() -> anyhow::Result<()> {
797 assert_eq!(
798 Model::NovaLite.cross_region_inference_id("ca-central-1", false)?,
799 "ca.amazon.nova-lite-v1:0"
800 );
801 Ok(())
802 }
803
804 #[test]
805 fn test_gov_region_inference_ids() -> anyhow::Result<()> {
806 assert_eq!(
807 Model::ClaudeSonnet4_5.cross_region_inference_id("us-gov-east-1", false)?,
808 "us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0"
809 );
810 assert_eq!(
811 Model::ClaudeSonnet4_5.cross_region_inference_id("us-gov-west-1", false)?,
812 "us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0"
813 );
814 Ok(())
815 }
816
817 #[test]
818 fn test_global_inference_ids() -> anyhow::Result<()> {
819 assert_eq!(
820 Model::ClaudeSonnet4.cross_region_inference_id("us-east-1", true)?,
821 "global.anthropic.claude-sonnet-4-20250514-v1:0"
822 );
823 assert_eq!(
824 Model::ClaudeSonnet4_5.cross_region_inference_id("eu-west-1", true)?,
825 "global.anthropic.claude-sonnet-4-5-20250929-v1:0"
826 );
827 assert_eq!(
828 Model::ClaudeHaiku4_5.cross_region_inference_id("ap-south-1", true)?,
829 "global.anthropic.claude-haiku-4-5-20251001-v1:0"
830 );
831 assert_eq!(
832 Model::ClaudeOpus4_6.cross_region_inference_id("us-east-1", true)?,
833 "global.anthropic.claude-opus-4-6-v1"
834 );
835 assert_eq!(
836 Model::Nova2Lite.cross_region_inference_id("us-east-1", true)?,
837 "global.amazon.nova-2-lite-v1:0"
838 );
839
840 // Models without global support fall back to regional
841 assert_eq!(
842 Model::NovaPro.cross_region_inference_id("us-east-1", true)?,
843 "us.amazon.nova-pro-v1:0"
844 );
845 Ok(())
846 }
847
848 #[test]
849 fn test_models_without_cross_region() -> anyhow::Result<()> {
850 // Models without cross-region support return their request_id directly
851 assert_eq!(
852 Model::Gemma3_4B.cross_region_inference_id("us-east-1", false)?,
853 "google.gemma-3-4b-it"
854 );
855 assert_eq!(
856 Model::MistralLarge3.cross_region_inference_id("eu-west-1", false)?,
857 "mistral.mistral-large-3-675b-instruct"
858 );
859 assert_eq!(
860 Model::Qwen3VL235B.cross_region_inference_id("ap-south-1", false)?,
861 "qwen.qwen3-vl-235b-a22b"
862 );
863 assert_eq!(
864 Model::GptOss120B.cross_region_inference_id("us-east-1", false)?,
865 "openai.gpt-oss-120b-1:0"
866 );
867 assert_eq!(
868 Model::MiniMaxM2.cross_region_inference_id("us-east-1", false)?,
869 "minimax.minimax-m2"
870 );
871 assert_eq!(
872 Model::KimiK2Thinking.cross_region_inference_id("us-east-1", false)?,
873 "moonshot.kimi-k2-thinking"
874 );
875 Ok(())
876 }
877
878 #[test]
879 fn test_custom_model_inference_ids() -> anyhow::Result<()> {
880 let custom_model = Model::Custom {
881 name: "custom.my-model-v1:0".to_string(),
882 max_tokens: 100000,
883 display_name: Some("My Custom Model".to_string()),
884 max_output_tokens: Some(8192),
885 default_temperature: Some(0.7),
886 cache_configuration: None,
887 };
888
889 assert_eq!(
890 custom_model.cross_region_inference_id("us-east-1", false)?,
891 "custom.my-model-v1:0"
892 );
893 assert_eq!(
894 custom_model.cross_region_inference_id("eu-west-1", true)?,
895 "custom.my-model-v1:0"
896 );
897 Ok(())
898 }
899
900 #[test]
901 fn test_friendly_id_vs_request_id() {
902 assert_eq!(Model::ClaudeSonnet4_5.id(), "claude-sonnet-4-5");
903 assert_eq!(Model::NovaLite.id(), "nova-lite");
904 assert_eq!(Model::DeepSeekR1.id(), "deepseek-r1");
905 assert_eq!(Model::Llama4Scout17B.id(), "llama-4-scout-17b");
906
907 assert_eq!(
908 Model::ClaudeSonnet4_5.request_id(),
909 "anthropic.claude-sonnet-4-5-20250929-v1:0"
910 );
911 assert_eq!(Model::NovaLite.request_id(), "amazon.nova-lite-v1:0");
912 assert_eq!(Model::DeepSeekR1.request_id(), "deepseek.r1-v1:0");
913 assert_eq!(
914 Model::Llama4Scout17B.request_id(),
915 "meta.llama4-scout-17b-instruct-v1:0"
916 );
917
918 // Thinking aliases deserialize to the same model
919 assert_eq!(Model::ClaudeSonnet4.id(), "claude-sonnet-4");
920 assert_eq!(
921 Model::from_id("claude-sonnet-4-thinking").unwrap().id(),
922 "claude-sonnet-4"
923 );
924 }
925
926 #[test]
927 fn test_thinking_modes() {
928 assert!(Model::ClaudeHaiku4_5.supports_thinking());
929 assert!(Model::ClaudeSonnet4.supports_thinking());
930 assert!(Model::ClaudeSonnet4_5.supports_thinking());
931 assert!(Model::ClaudeOpus4_6.supports_thinking());
932
933 assert!(!Model::ClaudeSonnet4.supports_adaptive_thinking());
934 assert!(Model::ClaudeOpus4_6.supports_adaptive_thinking());
935 assert!(Model::ClaudeSonnet4_6.supports_adaptive_thinking());
936
937 assert_eq!(
938 Model::ClaudeSonnet4.thinking_mode(),
939 BedrockModelMode::Thinking {
940 budget_tokens: Some(4096)
941 }
942 );
943 assert_eq!(
944 Model::ClaudeOpus4_6.thinking_mode(),
945 BedrockModelMode::AdaptiveThinking {
946 effort: BedrockAdaptiveThinkingEffort::High
947 }
948 );
949 assert_eq!(
950 Model::ClaudeHaiku4_5.thinking_mode(),
951 BedrockModelMode::Thinking {
952 budget_tokens: Some(4096)
953 }
954 );
955 }
956
957 #[test]
958 fn test_max_tokens() {
959 assert_eq!(Model::ClaudeSonnet4_5.max_tokens(), 200_000);
960 assert_eq!(Model::ClaudeOpus4_6.max_tokens(), 200_000);
961 assert_eq!(Model::Llama4Scout17B.max_tokens(), 128_000);
962 assert_eq!(Model::NovaPremier.max_tokens(), 1_000_000);
963 }
964
965 #[test]
966 fn test_max_output_tokens() {
967 assert_eq!(Model::ClaudeSonnet4_5.max_output_tokens(), 64_000);
968 assert_eq!(Model::ClaudeOpus4_6.max_output_tokens(), 128_000);
969 assert_eq!(Model::ClaudeOpus4_1.max_output_tokens(), 32_000);
970 assert_eq!(Model::Gemma3_4B.max_output_tokens(), 8_192);
971 }
972
973 #[test]
974 fn test_supports_tool_use() {
975 assert!(Model::ClaudeSonnet4_5.supports_tool_use());
976 assert!(Model::NovaPro.supports_tool_use());
977 assert!(Model::MistralLarge3.supports_tool_use());
978 assert!(!Model::Gemma3_4B.supports_tool_use());
979 assert!(Model::Qwen3_32B.supports_tool_use());
980 assert!(Model::MiniMaxM2.supports_tool_use());
981 assert!(Model::KimiK2_5.supports_tool_use());
982 assert!(Model::DeepSeekR1.supports_tool_use());
983 assert!(!Model::Llama4Scout17B.supports_tool_use());
984 }
985
986 #[test]
987 fn test_supports_caching() {
988 assert!(Model::ClaudeSonnet4_5.supports_caching());
989 assert!(Model::ClaudeOpus4_6.supports_caching());
990 assert!(!Model::Llama4Scout17B.supports_caching());
991 assert!(!Model::NovaPro.supports_caching());
992 }
993}