models.rs

  1use serde::{Deserialize, Serialize};
  2use strum::EnumIter;
  3
  4#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  5#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize, PartialEq)]
  6pub enum BedrockAdaptiveThinkingEffort {
  7    Low,
  8    Medium,
  9    #[default]
 10    High,
 11    Max,
 12}
 13
 14impl BedrockAdaptiveThinkingEffort {
 15    pub fn as_str(&self) -> &'static str {
 16        match self {
 17            Self::Low => "low",
 18            Self::Medium => "medium",
 19            Self::High => "high",
 20            Self::Max => "max",
 21        }
 22    }
 23}
 24
 25#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
 26#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
 27pub enum BedrockModelMode {
 28    #[default]
 29    Default,
 30    Thinking {
 31        budget_tokens: Option<u64>,
 32    },
 33    AdaptiveThinking {
 34        effort: BedrockAdaptiveThinkingEffort,
 35    },
 36}
 37
 38#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
 39#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
 40pub struct BedrockModelCacheConfiguration {
 41    pub max_cache_anchors: usize,
 42    pub min_total_token: u64,
 43}
 44
 45#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
 46#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
 47pub enum Model {
 48    // Anthropic Claude 4+ models
 49    #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
 50    ClaudeHaiku4_5,
 51    #[serde(
 52        rename = "claude-sonnet-4",
 53        alias = "claude-sonnet-4-latest",
 54        alias = "claude-sonnet-4-thinking",
 55        alias = "claude-sonnet-4-thinking-latest"
 56    )]
 57    ClaudeSonnet4,
 58    #[default]
 59    #[serde(
 60        rename = "claude-sonnet-4-5",
 61        alias = "claude-sonnet-4-5-latest",
 62        alias = "claude-sonnet-4-5-thinking",
 63        alias = "claude-sonnet-4-5-thinking-latest"
 64    )]
 65    ClaudeSonnet4_5,
 66    #[serde(
 67        rename = "claude-opus-4-1",
 68        alias = "claude-opus-4-1-latest",
 69        alias = "claude-opus-4-1-thinking",
 70        alias = "claude-opus-4-1-thinking-latest"
 71    )]
 72    ClaudeOpus4_1,
 73    #[serde(
 74        rename = "claude-opus-4-5",
 75        alias = "claude-opus-4-5-latest",
 76        alias = "claude-opus-4-5-thinking",
 77        alias = "claude-opus-4-5-thinking-latest"
 78    )]
 79    ClaudeOpus4_5,
 80    #[serde(
 81        rename = "claude-opus-4-6",
 82        alias = "claude-opus-4-6-latest",
 83        alias = "claude-opus-4-6-thinking",
 84        alias = "claude-opus-4-6-thinking-latest"
 85    )]
 86    ClaudeOpus4_6,
 87    #[serde(
 88        rename = "claude-sonnet-4-6",
 89        alias = "claude-sonnet-4-6-latest",
 90        alias = "claude-sonnet-4-6-thinking",
 91        alias = "claude-sonnet-4-6-thinking-latest"
 92    )]
 93    ClaudeSonnet4_6,
 94
 95    // Meta Llama 4 models
 96    #[serde(rename = "llama-4-scout-17b")]
 97    Llama4Scout17B,
 98    #[serde(rename = "llama-4-maverick-17b")]
 99    Llama4Maverick17B,
100
101    // Google Gemma 3 models
102    #[serde(rename = "gemma-3-4b")]
103    Gemma3_4B,
104    #[serde(rename = "gemma-3-12b")]
105    Gemma3_12B,
106    #[serde(rename = "gemma-3-27b")]
107    Gemma3_27B,
108
109    // Mistral models
110    #[serde(rename = "magistral-small")]
111    MagistralSmall,
112    #[serde(rename = "mistral-large-3")]
113    MistralLarge3,
114    #[serde(rename = "pixtral-large")]
115    PixtralLarge,
116
117    // Qwen models
118    #[serde(rename = "qwen3-32b")]
119    Qwen3_32B,
120    #[serde(rename = "qwen3-vl-235b")]
121    Qwen3VL235B,
122    #[serde(rename = "qwen3-235b")]
123    Qwen3_235B,
124    #[serde(rename = "qwen3-next-80b")]
125    Qwen3Next80B,
126    #[serde(rename = "qwen3-coder-30b")]
127    Qwen3Coder30B,
128    #[serde(rename = "qwen3-coder-next")]
129    Qwen3CoderNext,
130    #[serde(rename = "qwen3-coder-480b")]
131    Qwen3Coder480B,
132
133    // Amazon Nova models
134    #[serde(rename = "nova-lite")]
135    NovaLite,
136    #[serde(rename = "nova-pro")]
137    NovaPro,
138    #[serde(rename = "nova-premier")]
139    NovaPremier,
140    #[serde(rename = "nova-2-lite")]
141    Nova2Lite,
142
143    // OpenAI GPT OSS models
144    #[serde(rename = "gpt-oss-20b")]
145    GptOss20B,
146    #[serde(rename = "gpt-oss-120b")]
147    GptOss120B,
148
149    // MiniMax models
150    #[serde(rename = "minimax-m2")]
151    MiniMaxM2,
152
153    // Moonshot models
154    #[serde(rename = "kimi-k2-thinking")]
155    KimiK2Thinking,
156    #[serde(rename = "kimi-k2-5")]
157    KimiK2_5,
158
159    // DeepSeek models
160    #[serde(rename = "deepseek-r1")]
161    DeepSeekR1,
162    #[serde(rename = "deepseek-v3")]
163    DeepSeekV3_1,
164    #[serde(rename = "deepseek-v3-2")]
165    DeepSeekV3_2,
166
167    #[serde(rename = "custom")]
168    Custom {
169        name: String,
170        max_tokens: u64,
171        display_name: Option<String>,
172        max_output_tokens: Option<u64>,
173        default_temperature: Option<f32>,
174        cache_configuration: Option<BedrockModelCacheConfiguration>,
175    },
176}
177
178impl Model {
179    pub fn default_fast(_region: &str) -> Self {
180        Self::ClaudeHaiku4_5
181    }
182
183    pub fn from_id(id: &str) -> anyhow::Result<Self> {
184        if id.starts_with("claude-opus-4-6") {
185            Ok(Self::ClaudeOpus4_6)
186        } else if id.starts_with("claude-opus-4-5") {
187            Ok(Self::ClaudeOpus4_5)
188        } else if id.starts_with("claude-opus-4-1") {
189            Ok(Self::ClaudeOpus4_1)
190        } else if id.starts_with("claude-sonnet-4-6") {
191            Ok(Self::ClaudeSonnet4_6)
192        } else if id.starts_with("claude-sonnet-4-5") {
193            Ok(Self::ClaudeSonnet4_5)
194        } else if id.starts_with("claude-sonnet-4") {
195            Ok(Self::ClaudeSonnet4)
196        } else if id.starts_with("claude-haiku-4-5") {
197            Ok(Self::ClaudeHaiku4_5)
198        } else {
199            anyhow::bail!("invalid model id {id}");
200        }
201    }
202
203    pub fn id(&self) -> &str {
204        match self {
205            Self::ClaudeHaiku4_5 => "claude-haiku-4-5",
206            Self::ClaudeSonnet4 => "claude-sonnet-4",
207            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5",
208            Self::ClaudeOpus4_1 => "claude-opus-4-1",
209            Self::ClaudeOpus4_5 => "claude-opus-4-5",
210            Self::ClaudeOpus4_6 => "claude-opus-4-6",
211            Self::ClaudeSonnet4_6 => "claude-sonnet-4-6",
212            Self::Llama4Scout17B => "llama-4-scout-17b",
213            Self::Llama4Maverick17B => "llama-4-maverick-17b",
214            Self::Gemma3_4B => "gemma-3-4b",
215            Self::Gemma3_12B => "gemma-3-12b",
216            Self::Gemma3_27B => "gemma-3-27b",
217            Self::MagistralSmall => "magistral-small",
218            Self::MistralLarge3 => "mistral-large-3",
219            Self::PixtralLarge => "pixtral-large",
220            Self::Qwen3_32B => "qwen3-32b",
221            Self::Qwen3VL235B => "qwen3-vl-235b",
222            Self::Qwen3_235B => "qwen3-235b",
223            Self::Qwen3Next80B => "qwen3-next-80b",
224            Self::Qwen3Coder30B => "qwen3-coder-30b",
225            Self::Qwen3CoderNext => "qwen3-coder-next",
226            Self::Qwen3Coder480B => "qwen3-coder-480b",
227            Self::NovaLite => "nova-lite",
228            Self::NovaPro => "nova-pro",
229            Self::NovaPremier => "nova-premier",
230            Self::Nova2Lite => "nova-2-lite",
231            Self::GptOss20B => "gpt-oss-20b",
232            Self::GptOss120B => "gpt-oss-120b",
233            Self::MiniMaxM2 => "minimax-m2",
234            Self::KimiK2Thinking => "kimi-k2-thinking",
235            Self::KimiK2_5 => "kimi-k2-5",
236            Self::DeepSeekR1 => "deepseek-r1",
237            Self::DeepSeekV3_1 => "deepseek-v3",
238            Self::DeepSeekV3_2 => "deepseek-v3-2",
239            Self::Custom { name, .. } => name,
240        }
241    }
242
243    pub fn request_id(&self) -> &str {
244        match self {
245            Self::ClaudeHaiku4_5 => "anthropic.claude-haiku-4-5-20251001-v1:0",
246            Self::ClaudeSonnet4 => "anthropic.claude-sonnet-4-20250514-v1:0",
247            Self::ClaudeSonnet4_5 => "anthropic.claude-sonnet-4-5-20250929-v1:0",
248            Self::ClaudeOpus4_1 => "anthropic.claude-opus-4-1-20250805-v1:0",
249            Self::ClaudeOpus4_5 => "anthropic.claude-opus-4-5-20251101-v1:0",
250            Self::ClaudeOpus4_6 => "anthropic.claude-opus-4-6-v1",
251            Self::ClaudeSonnet4_6 => "anthropic.claude-sonnet-4-6",
252            Self::Llama4Scout17B => "meta.llama4-scout-17b-instruct-v1:0",
253            Self::Llama4Maverick17B => "meta.llama4-maverick-17b-instruct-v1:0",
254            Self::Gemma3_4B => "google.gemma-3-4b-it",
255            Self::Gemma3_12B => "google.gemma-3-12b-it",
256            Self::Gemma3_27B => "google.gemma-3-27b-it",
257            Self::MagistralSmall => "mistral.magistral-small-2509",
258            Self::MistralLarge3 => "mistral.mistral-large-3-675b-instruct",
259            Self::PixtralLarge => "mistral.pixtral-large-2502-v1:0",
260            Self::Qwen3VL235B => "qwen.qwen3-vl-235b-a22b",
261            Self::Qwen3_32B => "qwen.qwen3-32b-v1:0",
262            Self::Qwen3_235B => "qwen.qwen3-235b-a22b-2507-v1:0",
263            Self::Qwen3Next80B => "qwen.qwen3-next-80b-a3b",
264            Self::Qwen3Coder30B => "qwen.qwen3-coder-30b-a3b-v1:0",
265            Self::Qwen3CoderNext => "qwen.qwen3-coder-next",
266            Self::Qwen3Coder480B => "qwen.qwen3-coder-480b-a35b-v1:0",
267            Self::NovaLite => "amazon.nova-lite-v1:0",
268            Self::NovaPro => "amazon.nova-pro-v1:0",
269            Self::NovaPremier => "amazon.nova-premier-v1:0",
270            Self::Nova2Lite => "amazon.nova-2-lite-v1:0",
271            Self::GptOss20B => "openai.gpt-oss-20b-1:0",
272            Self::GptOss120B => "openai.gpt-oss-120b-1:0",
273            Self::MiniMaxM2 => "minimax.minimax-m2",
274            Self::KimiK2Thinking => "moonshot.kimi-k2-thinking",
275            Self::KimiK2_5 => "moonshotai.kimi-k2.5",
276            Self::DeepSeekR1 => "deepseek.r1-v1:0",
277            Self::DeepSeekV3_1 => "deepseek.v3-v1:0",
278            Self::DeepSeekV3_2 => "deepseek.v3.2",
279            Self::Custom { name, .. } => name,
280        }
281    }
282
283    pub fn display_name(&self) -> &str {
284        match self {
285            Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
286            Self::ClaudeSonnet4 => "Claude Sonnet 4",
287            Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
288            Self::ClaudeOpus4_1 => "Claude Opus 4.1",
289            Self::ClaudeOpus4_5 => "Claude Opus 4.5",
290            Self::ClaudeOpus4_6 => "Claude Opus 4.6",
291            Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
292            Self::Llama4Scout17B => "Llama 4 Scout 17B",
293            Self::Llama4Maverick17B => "Llama 4 Maverick 17B",
294            Self::Gemma3_4B => "Gemma 3 4B",
295            Self::Gemma3_12B => "Gemma 3 12B",
296            Self::Gemma3_27B => "Gemma 3 27B",
297            Self::MagistralSmall => "Magistral Small",
298            Self::MistralLarge3 => "Mistral Large 3",
299            Self::PixtralLarge => "Pixtral Large",
300            Self::Qwen3VL235B => "Qwen3 VL 235B",
301            Self::Qwen3_32B => "Qwen3 32B",
302            Self::Qwen3_235B => "Qwen3 235B",
303            Self::Qwen3Next80B => "Qwen3 Next 80B",
304            Self::Qwen3Coder30B => "Qwen3 Coder 30B",
305            Self::Qwen3CoderNext => "Qwen3 Coder Next",
306            Self::Qwen3Coder480B => "Qwen3 Coder 480B",
307            Self::NovaLite => "Amazon Nova Lite",
308            Self::NovaPro => "Amazon Nova Pro",
309            Self::NovaPremier => "Amazon Nova Premier",
310            Self::Nova2Lite => "Amazon Nova 2 Lite",
311            Self::GptOss20B => "GPT OSS 20B",
312            Self::GptOss120B => "GPT OSS 120B",
313            Self::MiniMaxM2 => "MiniMax M2",
314            Self::KimiK2Thinking => "Kimi K2 Thinking",
315            Self::KimiK2_5 => "Kimi K2.5",
316            Self::DeepSeekR1 => "DeepSeek R1",
317            Self::DeepSeekV3_1 => "DeepSeek V3.1",
318            Self::DeepSeekV3_2 => "DeepSeek V3.2",
319            Self::Custom {
320                display_name, name, ..
321            } => display_name.as_deref().unwrap_or(name.as_str()),
322        }
323    }
324
325    pub fn max_token_count(&self) -> u64 {
326        self.max_tokens()
327    }
328
329    pub fn max_tokens(&self) -> u64 {
330        match self {
331            Self::ClaudeHaiku4_5
332            | Self::ClaudeSonnet4
333            | Self::ClaudeSonnet4_5
334            | Self::ClaudeOpus4_1
335            | Self::ClaudeOpus4_5
336            | Self::ClaudeOpus4_6
337            | Self::ClaudeSonnet4_6 => 200_000,
338            Self::Llama4Scout17B | Self::Llama4Maverick17B => 128_000,
339            Self::Gemma3_4B | Self::Gemma3_12B | Self::Gemma3_27B => 128_000,
340            Self::MagistralSmall | Self::MistralLarge3 | Self::PixtralLarge => 128_000,
341            Self::Qwen3_32B
342            | Self::Qwen3VL235B
343            | Self::Qwen3_235B
344            | Self::Qwen3Next80B
345            | Self::Qwen3Coder30B
346            | Self::Qwen3CoderNext
347            | Self::Qwen3Coder480B => 128_000,
348            Self::NovaLite | Self::NovaPro => 300_000,
349            Self::NovaPremier => 1_000_000,
350            Self::Nova2Lite => 300_000,
351            Self::GptOss20B | Self::GptOss120B => 128_000,
352            Self::MiniMaxM2 => 128_000,
353            Self::KimiK2Thinking | Self::KimiK2_5 => 128_000,
354            Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => 128_000,
355            Self::Custom { max_tokens, .. } => *max_tokens,
356        }
357    }
358
359    pub fn max_output_tokens(&self) -> u64 {
360        match self {
361            Self::ClaudeHaiku4_5
362            | Self::ClaudeSonnet4
363            | Self::ClaudeSonnet4_5
364            | Self::ClaudeOpus4_5
365            | Self::ClaudeSonnet4_6 => 64_000,
366            Self::ClaudeOpus4_1 => 32_000,
367            Self::ClaudeOpus4_6 => 128_000,
368            Self::Llama4Scout17B
369            | Self::Llama4Maverick17B
370            | Self::Gemma3_4B
371            | Self::Gemma3_12B
372            | Self::Gemma3_27B
373            | Self::MagistralSmall
374            | Self::MistralLarge3
375            | Self::PixtralLarge => 8_192,
376            Self::Qwen3_32B
377            | Self::Qwen3VL235B
378            | Self::Qwen3_235B
379            | Self::Qwen3Next80B
380            | Self::Qwen3Coder30B
381            | Self::Qwen3CoderNext
382            | Self::Qwen3Coder480B => 8_192,
383            Self::NovaLite | Self::NovaPro | Self::NovaPremier | Self::Nova2Lite => 5_000,
384            Self::GptOss20B | Self::GptOss120B => 16_000,
385            Self::MiniMaxM2 => 16_000,
386            Self::KimiK2Thinking | Self::KimiK2_5 => 16_000,
387            Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => 16_000,
388            Self::Custom {
389                max_output_tokens, ..
390            } => max_output_tokens.unwrap_or(4_096),
391        }
392    }
393
394    pub fn default_temperature(&self) -> f32 {
395        match self {
396            Self::ClaudeHaiku4_5
397            | Self::ClaudeSonnet4
398            | Self::ClaudeSonnet4_5
399            | Self::ClaudeOpus4_1
400            | Self::ClaudeOpus4_5
401            | Self::ClaudeOpus4_6
402            | Self::ClaudeSonnet4_6 => 1.0,
403            Self::Custom {
404                default_temperature,
405                ..
406            } => default_temperature.unwrap_or(1.0),
407            _ => 1.0,
408        }
409    }
410
411    pub fn supports_tool_use(&self) -> bool {
412        match self {
413            Self::ClaudeHaiku4_5
414            | Self::ClaudeSonnet4
415            | Self::ClaudeSonnet4_5
416            | Self::ClaudeOpus4_1
417            | Self::ClaudeOpus4_5
418            | Self::ClaudeOpus4_6
419            | Self::ClaudeSonnet4_6 => true,
420            Self::NovaLite | Self::NovaPro | Self::NovaPremier | Self::Nova2Lite => true,
421            Self::MistralLarge3 | Self::PixtralLarge | Self::MagistralSmall => true,
422            // Gemma accepts toolConfig without error but produces unreliable tool
423            // calls -- malformed JSON args, hallucinated tool names, dropped calls.
424            Self::Qwen3_32B
425            | Self::Qwen3VL235B
426            | Self::Qwen3_235B
427            | Self::Qwen3Next80B
428            | Self::Qwen3Coder30B
429            | Self::Qwen3CoderNext
430            | Self::Qwen3Coder480B => true,
431            Self::MiniMaxM2 => true,
432            Self::KimiK2Thinking | Self::KimiK2_5 => true,
433            Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => true,
434            _ => false,
435        }
436    }
437
438    pub fn supports_images(&self) -> bool {
439        match self {
440            Self::ClaudeHaiku4_5
441            | Self::ClaudeSonnet4
442            | Self::ClaudeSonnet4_5
443            | Self::ClaudeOpus4_1
444            | Self::ClaudeOpus4_5
445            | Self::ClaudeOpus4_6
446            | Self::ClaudeSonnet4_6 => true,
447            Self::NovaLite | Self::NovaPro => true,
448            Self::PixtralLarge => true,
449            Self::Qwen3VL235B => true,
450            Self::KimiK2_5 => true,
451            _ => false,
452        }
453    }
454
455    pub fn supports_extended_context(&self) -> bool {
456        matches!(
457            self,
458            Self::ClaudeSonnet4
459                | Self::ClaudeSonnet4_5
460                | Self::ClaudeOpus4_5
461                | Self::ClaudeOpus4_6
462                | Self::ClaudeSonnet4_6
463        )
464    }
465
466    pub fn supports_caching(&self) -> bool {
467        match self {
468            Self::ClaudeHaiku4_5
469            | Self::ClaudeSonnet4
470            | Self::ClaudeSonnet4_5
471            | Self::ClaudeOpus4_1
472            | Self::ClaudeOpus4_5
473            | Self::ClaudeOpus4_6
474            | Self::ClaudeSonnet4_6 => true,
475            Self::Custom {
476                cache_configuration,
477                ..
478            } => cache_configuration.is_some(),
479            _ => false,
480        }
481    }
482
483    pub fn cache_configuration(&self) -> Option<BedrockModelCacheConfiguration> {
484        match self {
485            Self::ClaudeSonnet4
486            | Self::ClaudeSonnet4_5
487            | Self::ClaudeOpus4_1
488            | Self::ClaudeOpus4_5
489            | Self::ClaudeOpus4_6
490            | Self::ClaudeSonnet4_6 => Some(BedrockModelCacheConfiguration {
491                max_cache_anchors: 4,
492                min_total_token: 1024,
493            }),
494            Self::ClaudeHaiku4_5 => Some(BedrockModelCacheConfiguration {
495                max_cache_anchors: 4,
496                min_total_token: 2048,
497            }),
498            Self::Custom {
499                cache_configuration,
500                ..
501            } => cache_configuration.clone(),
502            _ => None,
503        }
504    }
505
506    pub fn supports_thinking(&self) -> bool {
507        matches!(
508            self,
509            Self::ClaudeHaiku4_5
510                | Self::ClaudeSonnet4
511                | Self::ClaudeSonnet4_5
512                | Self::ClaudeOpus4_1
513                | Self::ClaudeOpus4_5
514                | Self::ClaudeOpus4_6
515                | Self::ClaudeSonnet4_6
516        )
517    }
518
519    pub fn supports_adaptive_thinking(&self) -> bool {
520        matches!(self, Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6)
521    }
522
523    pub fn thinking_mode(&self) -> BedrockModelMode {
524        if self.supports_adaptive_thinking() {
525            BedrockModelMode::AdaptiveThinking {
526                effort: BedrockAdaptiveThinkingEffort::default(),
527            }
528        } else if self.supports_thinking() {
529            BedrockModelMode::Thinking {
530                budget_tokens: Some(4096),
531            }
532        } else {
533            BedrockModelMode::Default
534        }
535    }
536
537    pub fn cross_region_inference_id(
538        &self,
539        region: &str,
540        allow_global: bool,
541    ) -> anyhow::Result<String> {
542        let model_id = self.request_id();
543
544        let supports_global = matches!(
545            self,
546            Self::ClaudeHaiku4_5
547                | Self::ClaudeSonnet4
548                | Self::ClaudeSonnet4_5
549                | Self::ClaudeOpus4_5
550                | Self::ClaudeOpus4_6
551                | Self::ClaudeSonnet4_6
552                | Self::Nova2Lite
553        );
554
555        // Determine region group based on AWS region
556        let region_group = if region.starts_with("us-gov-") {
557            "us-gov"
558        } else if region.starts_with("us-") || region.starts_with("sa-") {
559            if allow_global && supports_global {
560                "global"
561            } else {
562                "us"
563            }
564        } else if region.starts_with("ca-") {
565            if allow_global && supports_global {
566                "global"
567            } else {
568                "ca"
569            }
570        } else if region.starts_with("eu-") {
571            if allow_global && supports_global {
572                "global"
573            } else {
574                "eu"
575            }
576        } else if region == "ap-southeast-2" || region == "ap-southeast-4" {
577            // Australia
578            if allow_global && supports_global {
579                "global"
580            } else {
581                "au"
582            }
583        } else if region == "ap-northeast-1" || region == "ap-northeast-3" {
584            // Japan
585            if allow_global && supports_global {
586                "global"
587            } else {
588                "jp"
589            }
590        } else if region.starts_with("ap-") || region.starts_with("me-") {
591            if allow_global && supports_global {
592                "global"
593            } else {
594                "apac"
595            }
596        } else {
597            anyhow::bail!("Unsupported Region {region}");
598        };
599
600        match (self, region_group) {
601            (Self::Custom { .. }, _) => Ok(model_id.into()),
602
603            // Global inference profiles
604            (
605                Self::ClaudeHaiku4_5
606                | Self::ClaudeSonnet4
607                | Self::ClaudeSonnet4_5
608                | Self::ClaudeOpus4_5
609                | Self::ClaudeOpus4_6
610                | Self::ClaudeSonnet4_6
611                | Self::Nova2Lite,
612                "global",
613            ) => Ok(format!("{}.{}", region_group, model_id)),
614
615            // US Government region inference profiles
616            (Self::ClaudeSonnet4_5, "us-gov") => Ok(format!("{}.{}", region_group, model_id)),
617
618            // US region inference profiles
619            (
620                Self::ClaudeHaiku4_5
621                | Self::ClaudeSonnet4
622                | Self::ClaudeSonnet4_5
623                | Self::ClaudeOpus4_1
624                | Self::ClaudeOpus4_5
625                | Self::ClaudeOpus4_6
626                | Self::ClaudeSonnet4_6
627                | Self::Llama4Scout17B
628                | Self::Llama4Maverick17B
629                | Self::NovaLite
630                | Self::NovaPro
631                | Self::NovaPremier
632                | Self::Nova2Lite
633                | Self::PixtralLarge
634                | Self::DeepSeekR1,
635                "us",
636            ) => Ok(format!("{}.{}", region_group, model_id)),
637
638            // Canada region inference profiles
639            (Self::NovaLite, "ca") => Ok(format!("{}.{}", region_group, model_id)),
640
641            // EU region inference profiles
642            (
643                Self::ClaudeHaiku4_5
644                | Self::ClaudeSonnet4
645                | Self::ClaudeSonnet4_5
646                | Self::ClaudeOpus4_6
647                | Self::ClaudeSonnet4_6
648                | Self::NovaLite
649                | Self::NovaPro
650                | Self::Nova2Lite,
651                "eu",
652            ) => Ok(format!("{}.{}", region_group, model_id)),
653
654            // Australia region inference profiles
655            (
656                Self::ClaudeHaiku4_5
657                | Self::ClaudeSonnet4_5
658                | Self::ClaudeOpus4_6
659                | Self::ClaudeSonnet4_6,
660                "au",
661            ) => Ok(format!("{}.{}", region_group, model_id)),
662
663            // Japan region inference profiles
664            (
665                Self::ClaudeHaiku4_5
666                | Self::ClaudeSonnet4_5
667                | Self::ClaudeSonnet4_6
668                | Self::Nova2Lite,
669                "jp",
670            ) => Ok(format!("{}.{}", region_group, model_id)),
671
672            // APAC region inference profiles (other than AU/JP)
673            (
674                Self::ClaudeHaiku4_5
675                | Self::ClaudeSonnet4
676                | Self::ClaudeSonnet4_5
677                | Self::NovaLite
678                | Self::NovaPro
679                | Self::Nova2Lite,
680                "apac",
681            ) => Ok(format!("{}.{}", region_group, model_id)),
682
683            // Default: use model ID directly
684            _ => Ok(model_id.into()),
685        }
686    }
687}
688
689#[cfg(test)]
690mod tests {
691    use super::*;
692
693    #[test]
694    fn test_us_region_inference_ids() -> anyhow::Result<()> {
695        assert_eq!(
696            Model::ClaudeSonnet4_5.cross_region_inference_id("us-east-1", false)?,
697            "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
698        );
699        assert_eq!(
700            Model::ClaudeSonnet4.cross_region_inference_id("us-west-2", false)?,
701            "us.anthropic.claude-sonnet-4-20250514-v1:0"
702        );
703        assert_eq!(
704            Model::NovaPro.cross_region_inference_id("us-east-2", false)?,
705            "us.amazon.nova-pro-v1:0"
706        );
707        assert_eq!(
708            Model::DeepSeekR1.cross_region_inference_id("us-east-1", false)?,
709            "us.deepseek.r1-v1:0"
710        );
711        Ok(())
712    }
713
714    #[test]
715    fn test_eu_region_inference_ids() -> anyhow::Result<()> {
716        assert_eq!(
717            Model::ClaudeSonnet4.cross_region_inference_id("eu-west-1", false)?,
718            "eu.anthropic.claude-sonnet-4-20250514-v1:0"
719        );
720        assert_eq!(
721            Model::ClaudeSonnet4_5.cross_region_inference_id("eu-west-1", false)?,
722            "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"
723        );
724        assert_eq!(
725            Model::NovaLite.cross_region_inference_id("eu-north-1", false)?,
726            "eu.amazon.nova-lite-v1:0"
727        );
728        assert_eq!(
729            Model::ClaudeOpus4_6.cross_region_inference_id("eu-west-1", false)?,
730            "eu.anthropic.claude-opus-4-6-v1"
731        );
732        Ok(())
733    }
734
735    #[test]
736    fn test_apac_region_inference_ids() -> anyhow::Result<()> {
737        assert_eq!(
738            Model::ClaudeSonnet4_5.cross_region_inference_id("ap-south-1", false)?,
739            "apac.anthropic.claude-sonnet-4-5-20250929-v1:0"
740        );
741        assert_eq!(
742            Model::NovaLite.cross_region_inference_id("ap-south-1", false)?,
743            "apac.amazon.nova-lite-v1:0"
744        );
745        Ok(())
746    }
747
748    #[test]
749    fn test_au_region_inference_ids() -> anyhow::Result<()> {
750        assert_eq!(
751            Model::ClaudeHaiku4_5.cross_region_inference_id("ap-southeast-2", false)?,
752            "au.anthropic.claude-haiku-4-5-20251001-v1:0"
753        );
754        assert_eq!(
755            Model::ClaudeSonnet4_5.cross_region_inference_id("ap-southeast-4", false)?,
756            "au.anthropic.claude-sonnet-4-5-20250929-v1:0"
757        );
758        assert_eq!(
759            Model::ClaudeOpus4_6.cross_region_inference_id("ap-southeast-2", false)?,
760            "au.anthropic.claude-opus-4-6-v1"
761        );
762        Ok(())
763    }
764
765    #[test]
766    fn test_jp_region_inference_ids() -> anyhow::Result<()> {
767        assert_eq!(
768            Model::ClaudeHaiku4_5.cross_region_inference_id("ap-northeast-1", false)?,
769            "jp.anthropic.claude-haiku-4-5-20251001-v1:0"
770        );
771        assert_eq!(
772            Model::ClaudeSonnet4_5.cross_region_inference_id("ap-northeast-3", false)?,
773            "jp.anthropic.claude-sonnet-4-5-20250929-v1:0"
774        );
775        assert_eq!(
776            Model::Nova2Lite.cross_region_inference_id("ap-northeast-1", false)?,
777            "jp.amazon.nova-2-lite-v1:0"
778        );
779        Ok(())
780    }
781
782    #[test]
783    fn test_ca_region_inference_ids() -> anyhow::Result<()> {
784        assert_eq!(
785            Model::NovaLite.cross_region_inference_id("ca-central-1", false)?,
786            "ca.amazon.nova-lite-v1:0"
787        );
788        Ok(())
789    }
790
791    #[test]
792    fn test_gov_region_inference_ids() -> anyhow::Result<()> {
793        assert_eq!(
794            Model::ClaudeSonnet4_5.cross_region_inference_id("us-gov-east-1", false)?,
795            "us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0"
796        );
797        assert_eq!(
798            Model::ClaudeSonnet4_5.cross_region_inference_id("us-gov-west-1", false)?,
799            "us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0"
800        );
801        Ok(())
802    }
803
804    #[test]
805    fn test_global_inference_ids() -> anyhow::Result<()> {
806        assert_eq!(
807            Model::ClaudeSonnet4.cross_region_inference_id("us-east-1", true)?,
808            "global.anthropic.claude-sonnet-4-20250514-v1:0"
809        );
810        assert_eq!(
811            Model::ClaudeSonnet4_5.cross_region_inference_id("eu-west-1", true)?,
812            "global.anthropic.claude-sonnet-4-5-20250929-v1:0"
813        );
814        assert_eq!(
815            Model::ClaudeHaiku4_5.cross_region_inference_id("ap-south-1", true)?,
816            "global.anthropic.claude-haiku-4-5-20251001-v1:0"
817        );
818        assert_eq!(
819            Model::ClaudeOpus4_6.cross_region_inference_id("us-east-1", true)?,
820            "global.anthropic.claude-opus-4-6-v1"
821        );
822        assert_eq!(
823            Model::Nova2Lite.cross_region_inference_id("us-east-1", true)?,
824            "global.amazon.nova-2-lite-v1:0"
825        );
826
827        // Models without global support fall back to regional
828        assert_eq!(
829            Model::NovaPro.cross_region_inference_id("us-east-1", true)?,
830            "us.amazon.nova-pro-v1:0"
831        );
832        Ok(())
833    }
834
835    #[test]
836    fn test_models_without_cross_region() -> anyhow::Result<()> {
837        // Models without cross-region support return their request_id directly
838        assert_eq!(
839            Model::Gemma3_4B.cross_region_inference_id("us-east-1", false)?,
840            "google.gemma-3-4b-it"
841        );
842        assert_eq!(
843            Model::MistralLarge3.cross_region_inference_id("eu-west-1", false)?,
844            "mistral.mistral-large-3-675b-instruct"
845        );
846        assert_eq!(
847            Model::Qwen3VL235B.cross_region_inference_id("ap-south-1", false)?,
848            "qwen.qwen3-vl-235b-a22b"
849        );
850        assert_eq!(
851            Model::GptOss120B.cross_region_inference_id("us-east-1", false)?,
852            "openai.gpt-oss-120b-1:0"
853        );
854        assert_eq!(
855            Model::MiniMaxM2.cross_region_inference_id("us-east-1", false)?,
856            "minimax.minimax-m2"
857        );
858        assert_eq!(
859            Model::KimiK2Thinking.cross_region_inference_id("us-east-1", false)?,
860            "moonshot.kimi-k2-thinking"
861        );
862        Ok(())
863    }
864
865    #[test]
866    fn test_custom_model_inference_ids() -> anyhow::Result<()> {
867        let custom_model = Model::Custom {
868            name: "custom.my-model-v1:0".to_string(),
869            max_tokens: 100000,
870            display_name: Some("My Custom Model".to_string()),
871            max_output_tokens: Some(8192),
872            default_temperature: Some(0.7),
873            cache_configuration: None,
874        };
875
876        assert_eq!(
877            custom_model.cross_region_inference_id("us-east-1", false)?,
878            "custom.my-model-v1:0"
879        );
880        assert_eq!(
881            custom_model.cross_region_inference_id("eu-west-1", true)?,
882            "custom.my-model-v1:0"
883        );
884        Ok(())
885    }
886
887    #[test]
888    fn test_friendly_id_vs_request_id() {
889        assert_eq!(Model::ClaudeSonnet4_5.id(), "claude-sonnet-4-5");
890        assert_eq!(Model::NovaLite.id(), "nova-lite");
891        assert_eq!(Model::DeepSeekR1.id(), "deepseek-r1");
892        assert_eq!(Model::Llama4Scout17B.id(), "llama-4-scout-17b");
893
894        assert_eq!(
895            Model::ClaudeSonnet4_5.request_id(),
896            "anthropic.claude-sonnet-4-5-20250929-v1:0"
897        );
898        assert_eq!(Model::NovaLite.request_id(), "amazon.nova-lite-v1:0");
899        assert_eq!(Model::DeepSeekR1.request_id(), "deepseek.r1-v1:0");
900        assert_eq!(
901            Model::Llama4Scout17B.request_id(),
902            "meta.llama4-scout-17b-instruct-v1:0"
903        );
904
905        // Thinking aliases deserialize to the same model
906        assert_eq!(Model::ClaudeSonnet4.id(), "claude-sonnet-4");
907        assert_eq!(
908            Model::from_id("claude-sonnet-4-thinking").unwrap().id(),
909            "claude-sonnet-4"
910        );
911    }
912
913    #[test]
914    fn test_thinking_modes() {
915        assert!(Model::ClaudeHaiku4_5.supports_thinking());
916        assert!(Model::ClaudeSonnet4.supports_thinking());
917        assert!(Model::ClaudeSonnet4_5.supports_thinking());
918        assert!(Model::ClaudeOpus4_6.supports_thinking());
919
920        assert!(!Model::ClaudeSonnet4.supports_adaptive_thinking());
921        assert!(Model::ClaudeOpus4_6.supports_adaptive_thinking());
922        assert!(Model::ClaudeSonnet4_6.supports_adaptive_thinking());
923
924        assert_eq!(
925            Model::ClaudeSonnet4.thinking_mode(),
926            BedrockModelMode::Thinking {
927                budget_tokens: Some(4096)
928            }
929        );
930        assert_eq!(
931            Model::ClaudeOpus4_6.thinking_mode(),
932            BedrockModelMode::AdaptiveThinking {
933                effort: BedrockAdaptiveThinkingEffort::High
934            }
935        );
936        assert_eq!(
937            Model::ClaudeHaiku4_5.thinking_mode(),
938            BedrockModelMode::Thinking {
939                budget_tokens: Some(4096)
940            }
941        );
942    }
943
944    #[test]
945    fn test_max_tokens() {
946        assert_eq!(Model::ClaudeSonnet4_5.max_tokens(), 200_000);
947        assert_eq!(Model::ClaudeOpus4_6.max_tokens(), 200_000);
948        assert_eq!(Model::Llama4Scout17B.max_tokens(), 128_000);
949        assert_eq!(Model::NovaPremier.max_tokens(), 1_000_000);
950    }
951
952    #[test]
953    fn test_max_output_tokens() {
954        assert_eq!(Model::ClaudeSonnet4_5.max_output_tokens(), 64_000);
955        assert_eq!(Model::ClaudeOpus4_6.max_output_tokens(), 128_000);
956        assert_eq!(Model::ClaudeOpus4_1.max_output_tokens(), 32_000);
957        assert_eq!(Model::Gemma3_4B.max_output_tokens(), 8_192);
958    }
959
960    #[test]
961    fn test_supports_tool_use() {
962        assert!(Model::ClaudeSonnet4_5.supports_tool_use());
963        assert!(Model::NovaPro.supports_tool_use());
964        assert!(Model::MistralLarge3.supports_tool_use());
965        assert!(!Model::Gemma3_4B.supports_tool_use());
966        assert!(Model::Qwen3_32B.supports_tool_use());
967        assert!(Model::MiniMaxM2.supports_tool_use());
968        assert!(Model::KimiK2_5.supports_tool_use());
969        assert!(Model::DeepSeekR1.supports_tool_use());
970        assert!(!Model::Llama4Scout17B.supports_tool_use());
971    }
972
973    #[test]
974    fn test_supports_caching() {
975        assert!(Model::ClaudeSonnet4_5.supports_caching());
976        assert!(Model::ClaudeOpus4_6.supports_caching());
977        assert!(!Model::Llama4Scout17B.supports_caching());
978        assert!(!Model::NovaPro.supports_caching());
979    }
980}