models.rs

   1use serde::{Deserialize, Serialize};
   2use strum::EnumIter;
   3
   4#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
   5#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize, PartialEq)]
   6pub enum BedrockAdaptiveThinkingEffort {
   7    Low,
   8    Medium,
   9    #[default]
  10    High,
  11    Max,
  12}
  13
  14impl BedrockAdaptiveThinkingEffort {
  15    pub fn as_str(&self) -> &'static str {
  16        match self {
  17            Self::Low => "low",
  18            Self::Medium => "medium",
  19            Self::High => "high",
  20            Self::Max => "max",
  21        }
  22    }
  23}
  24
  25#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  26#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  27pub enum BedrockModelMode {
  28    #[default]
  29    Default,
  30    Thinking {
  31        budget_tokens: Option<u64>,
  32    },
  33    AdaptiveThinking {
  34        effort: BedrockAdaptiveThinkingEffort,
  35    },
  36}
  37
  38#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  39#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  40pub struct BedrockModelCacheConfiguration {
  41    pub max_cache_anchors: usize,
  42    pub min_total_token: u64,
  43}
  44
  45#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  46#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
  47pub enum Model {
  48    // Anthropic Claude 4+ models
  49    #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
  50    ClaudeHaiku4_5,
  51    #[serde(
  52        rename = "claude-sonnet-4",
  53        alias = "claude-sonnet-4-latest",
  54        alias = "claude-sonnet-4-thinking",
  55        alias = "claude-sonnet-4-thinking-latest"
  56    )]
  57    ClaudeSonnet4,
  58    #[default]
  59    #[serde(
  60        rename = "claude-sonnet-4-5",
  61        alias = "claude-sonnet-4-5-latest",
  62        alias = "claude-sonnet-4-5-thinking",
  63        alias = "claude-sonnet-4-5-thinking-latest"
  64    )]
  65    ClaudeSonnet4_5,
  66    #[serde(
  67        rename = "claude-opus-4-1",
  68        alias = "claude-opus-4-1-latest",
  69        alias = "claude-opus-4-1-thinking",
  70        alias = "claude-opus-4-1-thinking-latest"
  71    )]
  72    ClaudeOpus4_1,
  73    #[serde(
  74        rename = "claude-opus-4-5",
  75        alias = "claude-opus-4-5-latest",
  76        alias = "claude-opus-4-5-thinking",
  77        alias = "claude-opus-4-5-thinking-latest"
  78    )]
  79    ClaudeOpus4_5,
  80    #[serde(
  81        rename = "claude-opus-4-6",
  82        alias = "claude-opus-4-6-latest",
  83        alias = "claude-opus-4-6-thinking",
  84        alias = "claude-opus-4-6-thinking-latest"
  85    )]
  86    ClaudeOpus4_6,
  87    #[serde(
  88        rename = "claude-opus-4-7",
  89        alias = "claude-opus-4-7-latest",
  90        alias = "claude-opus-4-7-thinking",
  91        alias = "claude-opus-4-7-thinking-latest"
  92    )]
  93    ClaudeOpus4_7,
  94    #[serde(
  95        rename = "claude-sonnet-4-6",
  96        alias = "claude-sonnet-4-6-latest",
  97        alias = "claude-sonnet-4-6-thinking",
  98        alias = "claude-sonnet-4-6-thinking-latest"
  99    )]
 100    ClaudeSonnet4_6,
 101
 102    // Meta Llama 4 models
 103    #[serde(rename = "llama-4-scout-17b")]
 104    Llama4Scout17B,
 105    #[serde(rename = "llama-4-maverick-17b")]
 106    Llama4Maverick17B,
 107
 108    // Google Gemma 3 models
 109    #[serde(rename = "gemma-3-4b")]
 110    Gemma3_4B,
 111    #[serde(rename = "gemma-3-12b")]
 112    Gemma3_12B,
 113    #[serde(rename = "gemma-3-27b")]
 114    Gemma3_27B,
 115
 116    // Mistral models
 117    #[serde(rename = "magistral-small")]
 118    MagistralSmall,
 119    #[serde(rename = "mistral-large-3")]
 120    MistralLarge3,
 121    #[serde(rename = "pixtral-large")]
 122    PixtralLarge,
 123    #[serde(rename = "devstral-2-123b")]
 124    Devstral2_123B,
 125    #[serde(rename = "ministral-14b")]
 126    Ministral14B,
 127
 128    // Qwen models
 129    #[serde(rename = "qwen3-32b")]
 130    Qwen3_32B,
 131    #[serde(rename = "qwen3-vl-235b")]
 132    Qwen3VL235B,
 133    #[serde(rename = "qwen3-235b")]
 134    Qwen3_235B,
 135    #[serde(rename = "qwen3-next-80b")]
 136    Qwen3Next80B,
 137    #[serde(rename = "qwen3-coder-30b")]
 138    Qwen3Coder30B,
 139    #[serde(rename = "qwen3-coder-next")]
 140    Qwen3CoderNext,
 141    #[serde(rename = "qwen3-coder-480b")]
 142    Qwen3Coder480B,
 143
 144    // Amazon Nova models
 145    #[serde(rename = "nova-lite")]
 146    NovaLite,
 147    #[serde(rename = "nova-pro")]
 148    NovaPro,
 149    #[serde(rename = "nova-premier")]
 150    NovaPremier,
 151    #[serde(rename = "nova-2-lite")]
 152    Nova2Lite,
 153
 154    // OpenAI GPT OSS models
 155    #[serde(rename = "gpt-oss-20b")]
 156    GptOss20B,
 157    #[serde(rename = "gpt-oss-120b")]
 158    GptOss120B,
 159
 160    // NVIDIA Nemotron models
 161    #[serde(rename = "nemotron-super-3-120b")]
 162    NemotronSuper3_120B,
 163    #[serde(rename = "nemotron-nano-3-30b")]
 164    NemotronNano3_30B,
 165
 166    // MiniMax models
 167    #[serde(rename = "minimax-m2")]
 168    MiniMaxM2,
 169    #[serde(rename = "minimax-m2-1")]
 170    MiniMaxM2_1,
 171    #[serde(rename = "minimax-m2-5")]
 172    MiniMaxM2_5,
 173
 174    // Z.AI GLM models
 175    #[serde(rename = "glm-5")]
 176    GLM5,
 177    #[serde(rename = "glm-4-7")]
 178    GLM4_7,
 179    #[serde(rename = "glm-4-7-flash")]
 180    GLM4_7Flash,
 181
 182    // Moonshot models
 183    #[serde(rename = "kimi-k2-thinking")]
 184    KimiK2Thinking,
 185    #[serde(rename = "kimi-k2-5")]
 186    KimiK2_5,
 187
 188    // DeepSeek models
 189    #[serde(rename = "deepseek-r1")]
 190    DeepSeekR1,
 191    #[serde(rename = "deepseek-v3")]
 192    DeepSeekV3_1,
 193    #[serde(rename = "deepseek-v3-2")]
 194    DeepSeekV3_2,
 195
 196    #[serde(rename = "custom")]
 197    Custom {
 198        name: String,
 199        max_tokens: u64,
 200        display_name: Option<String>,
 201        max_output_tokens: Option<u64>,
 202        default_temperature: Option<f32>,
 203        cache_configuration: Option<BedrockModelCacheConfiguration>,
 204    },
 205}
 206
 207impl Model {
 208    pub fn default_fast(_region: &str) -> Self {
 209        Self::ClaudeHaiku4_5
 210    }
 211
 212    pub fn from_id(id: &str) -> anyhow::Result<Self> {
 213        if id.starts_with("claude-opus-4-7") {
 214            Ok(Self::ClaudeOpus4_7)
 215        } else if id.starts_with("claude-opus-4-6") {
 216            Ok(Self::ClaudeOpus4_6)
 217        } else if id.starts_with("claude-opus-4-5") {
 218            Ok(Self::ClaudeOpus4_5)
 219        } else if id.starts_with("claude-opus-4-1") {
 220            Ok(Self::ClaudeOpus4_1)
 221        } else if id.starts_with("claude-sonnet-4-6") {
 222            Ok(Self::ClaudeSonnet4_6)
 223        } else if id.starts_with("claude-sonnet-4-5") {
 224            Ok(Self::ClaudeSonnet4_5)
 225        } else if id.starts_with("claude-sonnet-4") {
 226            Ok(Self::ClaudeSonnet4)
 227        } else if id.starts_with("claude-haiku-4-5") {
 228            Ok(Self::ClaudeHaiku4_5)
 229        } else {
 230            anyhow::bail!("invalid model id {id}");
 231        }
 232    }
 233
 234    pub fn id(&self) -> &str {
 235        match self {
 236            Self::ClaudeHaiku4_5 => "claude-haiku-4-5",
 237            Self::ClaudeSonnet4 => "claude-sonnet-4",
 238            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5",
 239            Self::ClaudeOpus4_1 => "claude-opus-4-1",
 240            Self::ClaudeOpus4_5 => "claude-opus-4-5",
 241            Self::ClaudeOpus4_6 => "claude-opus-4-6",
 242            Self::ClaudeOpus4_7 => "claude-opus-4-7",
 243            Self::ClaudeSonnet4_6 => "claude-sonnet-4-6",
 244            Self::Llama4Scout17B => "llama-4-scout-17b",
 245            Self::Llama4Maverick17B => "llama-4-maverick-17b",
 246            Self::Gemma3_4B => "gemma-3-4b",
 247            Self::Gemma3_12B => "gemma-3-12b",
 248            Self::Gemma3_27B => "gemma-3-27b",
 249            Self::MagistralSmall => "magistral-small",
 250            Self::MistralLarge3 => "mistral-large-3",
 251            Self::PixtralLarge => "pixtral-large",
 252            Self::Devstral2_123B => "devstral-2-123b",
 253            Self::Ministral14B => "ministral-14b",
 254            Self::Qwen3_32B => "qwen3-32b",
 255            Self::Qwen3VL235B => "qwen3-vl-235b",
 256            Self::Qwen3_235B => "qwen3-235b",
 257            Self::Qwen3Next80B => "qwen3-next-80b",
 258            Self::Qwen3Coder30B => "qwen3-coder-30b",
 259            Self::Qwen3CoderNext => "qwen3-coder-next",
 260            Self::Qwen3Coder480B => "qwen3-coder-480b",
 261            Self::NovaLite => "nova-lite",
 262            Self::NovaPro => "nova-pro",
 263            Self::NovaPremier => "nova-premier",
 264            Self::Nova2Lite => "nova-2-lite",
 265            Self::GptOss20B => "gpt-oss-20b",
 266            Self::GptOss120B => "gpt-oss-120b",
 267            Self::NemotronSuper3_120B => "nemotron-super-3-120b",
 268            Self::NemotronNano3_30B => "nemotron-nano-3-30b",
 269            Self::MiniMaxM2 => "minimax-m2",
 270            Self::MiniMaxM2_1 => "minimax-m2-1",
 271            Self::MiniMaxM2_5 => "minimax-m2-5",
 272            Self::GLM5 => "glm-5",
 273            Self::GLM4_7 => "glm-4-7",
 274            Self::GLM4_7Flash => "glm-4-7-flash",
 275            Self::KimiK2Thinking => "kimi-k2-thinking",
 276            Self::KimiK2_5 => "kimi-k2-5",
 277            Self::DeepSeekR1 => "deepseek-r1",
 278            Self::DeepSeekV3_1 => "deepseek-v3",
 279            Self::DeepSeekV3_2 => "deepseek-v3-2",
 280            Self::Custom { name, .. } => name,
 281        }
 282    }
 283
 284    pub fn request_id(&self) -> &str {
 285        match self {
 286            Self::ClaudeHaiku4_5 => "anthropic.claude-haiku-4-5-20251001-v1:0",
 287            Self::ClaudeSonnet4 => "anthropic.claude-sonnet-4-20250514-v1:0",
 288            Self::ClaudeSonnet4_5 => "anthropic.claude-sonnet-4-5-20250929-v1:0",
 289            Self::ClaudeOpus4_1 => "anthropic.claude-opus-4-1-20250805-v1:0",
 290            Self::ClaudeOpus4_5 => "anthropic.claude-opus-4-5-20251101-v1:0",
 291            Self::ClaudeOpus4_6 => "anthropic.claude-opus-4-6-v1",
 292            Self::ClaudeOpus4_7 => "anthropic.claude-opus-4-7",
 293            Self::ClaudeSonnet4_6 => "anthropic.claude-sonnet-4-6",
 294            Self::Llama4Scout17B => "meta.llama4-scout-17b-instruct-v1:0",
 295            Self::Llama4Maverick17B => "meta.llama4-maverick-17b-instruct-v1:0",
 296            Self::Gemma3_4B => "google.gemma-3-4b-it",
 297            Self::Gemma3_12B => "google.gemma-3-12b-it",
 298            Self::Gemma3_27B => "google.gemma-3-27b-it",
 299            Self::MagistralSmall => "mistral.magistral-small-2509",
 300            Self::MistralLarge3 => "mistral.mistral-large-3-675b-instruct",
 301            Self::PixtralLarge => "mistral.pixtral-large-2502-v1:0",
 302            Self::Devstral2_123B => "mistral.devstral-2-123b",
 303            Self::Ministral14B => "mistral.ministral-3-14b-instruct",
 304            Self::Qwen3VL235B => "qwen.qwen3-vl-235b-a22b",
 305            Self::Qwen3_32B => "qwen.qwen3-32b-v1:0",
 306            Self::Qwen3_235B => "qwen.qwen3-235b-a22b-2507-v1:0",
 307            Self::Qwen3Next80B => "qwen.qwen3-next-80b-a3b",
 308            Self::Qwen3Coder30B => "qwen.qwen3-coder-30b-a3b-v1:0",
 309            Self::Qwen3CoderNext => "qwen.qwen3-coder-next",
 310            Self::Qwen3Coder480B => "qwen.qwen3-coder-480b-a35b-v1:0",
 311            Self::NovaLite => "amazon.nova-lite-v1:0",
 312            Self::NovaPro => "amazon.nova-pro-v1:0",
 313            Self::NovaPremier => "amazon.nova-premier-v1:0",
 314            Self::Nova2Lite => "amazon.nova-2-lite-v1:0",
 315            Self::GptOss20B => "openai.gpt-oss-20b-1:0",
 316            Self::GptOss120B => "openai.gpt-oss-120b-1:0",
 317            Self::NemotronSuper3_120B => "nvidia.nemotron-super-3-120b",
 318            Self::NemotronNano3_30B => "nvidia.nemotron-nano-3-30b",
 319            Self::MiniMaxM2 => "minimax.minimax-m2",
 320            Self::MiniMaxM2_1 => "minimax.minimax-m2.1",
 321            Self::MiniMaxM2_5 => "minimax.minimax-m2.5",
 322            Self::GLM5 => "zai.glm-5",
 323            Self::GLM4_7 => "zai.glm-4.7",
 324            Self::GLM4_7Flash => "zai.glm-4.7-flash",
 325            Self::KimiK2Thinking => "moonshot.kimi-k2-thinking",
 326            Self::KimiK2_5 => "moonshotai.kimi-k2.5",
 327            Self::DeepSeekR1 => "deepseek.r1-v1:0",
 328            Self::DeepSeekV3_1 => "deepseek.v3-v1:0",
 329            Self::DeepSeekV3_2 => "deepseek.v3.2",
 330            Self::Custom { name, .. } => name,
 331        }
 332    }
 333
 334    pub fn display_name(&self) -> &str {
 335        match self {
 336            Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
 337            Self::ClaudeSonnet4 => "Claude Sonnet 4",
 338            Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
 339            Self::ClaudeOpus4_1 => "Claude Opus 4.1",
 340            Self::ClaudeOpus4_5 => "Claude Opus 4.5",
 341            Self::ClaudeOpus4_6 => "Claude Opus 4.6",
 342            Self::ClaudeOpus4_7 => "Claude Opus 4.7",
 343            Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
 344            Self::Llama4Scout17B => "Llama 4 Scout 17B",
 345            Self::Llama4Maverick17B => "Llama 4 Maverick 17B",
 346            Self::Gemma3_4B => "Gemma 3 4B",
 347            Self::Gemma3_12B => "Gemma 3 12B",
 348            Self::Gemma3_27B => "Gemma 3 27B",
 349            Self::MagistralSmall => "Magistral Small",
 350            Self::MistralLarge3 => "Mistral Large 3",
 351            Self::PixtralLarge => "Pixtral Large",
 352            Self::Devstral2_123B => "Devstral 2 123B",
 353            Self::Ministral14B => "Ministral 14B",
 354            Self::Qwen3VL235B => "Qwen3 VL 235B",
 355            Self::Qwen3_32B => "Qwen3 32B",
 356            Self::Qwen3_235B => "Qwen3 235B",
 357            Self::Qwen3Next80B => "Qwen3 Next 80B",
 358            Self::Qwen3Coder30B => "Qwen3 Coder 30B",
 359            Self::Qwen3CoderNext => "Qwen3 Coder Next",
 360            Self::Qwen3Coder480B => "Qwen3 Coder 480B",
 361            Self::NovaLite => "Amazon Nova Lite",
 362            Self::NovaPro => "Amazon Nova Pro",
 363            Self::NovaPremier => "Amazon Nova Premier",
 364            Self::Nova2Lite => "Amazon Nova 2 Lite",
 365            Self::GptOss20B => "GPT OSS 20B",
 366            Self::GptOss120B => "GPT OSS 120B",
 367            Self::NemotronSuper3_120B => "Nemotron Super 3 120B",
 368            Self::NemotronNano3_30B => "Nemotron Nano 3 30B",
 369            Self::MiniMaxM2 => "MiniMax M2",
 370            Self::MiniMaxM2_1 => "MiniMax M2.1",
 371            Self::MiniMaxM2_5 => "MiniMax M2.5",
 372            Self::GLM5 => "GLM 5",
 373            Self::GLM4_7 => "GLM 4.7",
 374            Self::GLM4_7Flash => "GLM 4.7 Flash",
 375            Self::KimiK2Thinking => "Kimi K2 Thinking",
 376            Self::KimiK2_5 => "Kimi K2.5",
 377            Self::DeepSeekR1 => "DeepSeek R1",
 378            Self::DeepSeekV3_1 => "DeepSeek V3.1",
 379            Self::DeepSeekV3_2 => "DeepSeek V3.2",
 380            Self::Custom {
 381                display_name, name, ..
 382            } => display_name.as_deref().unwrap_or(name.as_str()),
 383        }
 384    }
 385
 386    pub fn max_token_count(&self) -> u64 {
 387        self.max_tokens()
 388    }
 389
 390    pub fn max_tokens(&self) -> u64 {
 391        match self {
 392            Self::ClaudeHaiku4_5
 393            | Self::ClaudeSonnet4
 394            | Self::ClaudeSonnet4_5
 395            | Self::ClaudeOpus4_1
 396            | Self::ClaudeOpus4_5
 397            | Self::ClaudeOpus4_6
 398            | Self::ClaudeOpus4_7
 399            | Self::ClaudeSonnet4_6 => 200_000,
 400            Self::Llama4Scout17B | Self::Llama4Maverick17B => 128_000,
 401            Self::Gemma3_4B | Self::Gemma3_12B | Self::Gemma3_27B => 128_000,
 402            Self::MagistralSmall | Self::MistralLarge3 | Self::PixtralLarge => 128_000,
 403            Self::Devstral2_123B | Self::Ministral14B => 256_000,
 404            Self::Qwen3_32B
 405            | Self::Qwen3VL235B
 406            | Self::Qwen3_235B
 407            | Self::Qwen3Next80B
 408            | Self::Qwen3Coder30B
 409            | Self::Qwen3CoderNext
 410            | Self::Qwen3Coder480B => 128_000,
 411            Self::NovaLite | Self::NovaPro => 300_000,
 412            Self::NovaPremier => 1_000_000,
 413            Self::Nova2Lite => 300_000,
 414            Self::GptOss20B | Self::GptOss120B => 128_000,
 415            Self::NemotronSuper3_120B | Self::NemotronNano3_30B => 262_000,
 416            Self::MiniMaxM2 | Self::MiniMaxM2_1 | Self::MiniMaxM2_5 => 196_000,
 417            Self::GLM5 | Self::GLM4_7 | Self::GLM4_7Flash => 203_000,
 418            Self::KimiK2Thinking | Self::KimiK2_5 => 128_000,
 419            Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => 128_000,
 420            Self::Custom { max_tokens, .. } => *max_tokens,
 421        }
 422    }
 423
 424    pub fn max_output_tokens(&self) -> u64 {
 425        match self {
 426            Self::ClaudeHaiku4_5
 427            | Self::ClaudeSonnet4
 428            | Self::ClaudeSonnet4_5
 429            | Self::ClaudeOpus4_5
 430            | Self::ClaudeSonnet4_6 => 64_000,
 431            Self::ClaudeOpus4_1 => 32_000,
 432            Self::ClaudeOpus4_6 | Self::ClaudeOpus4_7 => 128_000,
 433            Self::Llama4Scout17B
 434            | Self::Llama4Maverick17B
 435            | Self::Gemma3_4B
 436            | Self::Gemma3_12B
 437            | Self::Gemma3_27B
 438            | Self::MagistralSmall
 439            | Self::MistralLarge3
 440            | Self::PixtralLarge => 8_192,
 441            Self::Devstral2_123B | Self::Ministral14B => 131_000,
 442            Self::Qwen3_32B
 443            | Self::Qwen3VL235B
 444            | Self::Qwen3_235B
 445            | Self::Qwen3Next80B
 446            | Self::Qwen3Coder30B
 447            | Self::Qwen3CoderNext
 448            | Self::Qwen3Coder480B => 8_192,
 449            Self::NovaLite | Self::NovaPro | Self::NovaPremier | Self::Nova2Lite => 5_000,
 450            Self::GptOss20B | Self::GptOss120B => 16_000,
 451            Self::NemotronSuper3_120B | Self::NemotronNano3_30B => 131_000,
 452            Self::MiniMaxM2 | Self::MiniMaxM2_1 | Self::MiniMaxM2_5 => 98_000,
 453            Self::GLM5 | Self::GLM4_7 | Self::GLM4_7Flash => 101_000,
 454            Self::KimiK2Thinking | Self::KimiK2_5 => 16_000,
 455            Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => 16_000,
 456            Self::Custom {
 457                max_output_tokens, ..
 458            } => max_output_tokens.unwrap_or(4_096),
 459        }
 460    }
 461
 462    pub fn default_temperature(&self) -> f32 {
 463        match self {
 464            Self::ClaudeHaiku4_5
 465            | Self::ClaudeSonnet4
 466            | Self::ClaudeSonnet4_5
 467            | Self::ClaudeOpus4_1
 468            | Self::ClaudeOpus4_5
 469            | Self::ClaudeOpus4_6
 470            | Self::ClaudeOpus4_7
 471            | Self::ClaudeSonnet4_6 => 1.0,
 472            Self::Custom {
 473                default_temperature,
 474                ..
 475            } => default_temperature.unwrap_or(1.0),
 476            _ => 1.0,
 477        }
 478    }
 479
 480    pub fn supports_tool_use(&self) -> bool {
 481        match self {
 482            Self::ClaudeHaiku4_5
 483            | Self::ClaudeSonnet4
 484            | Self::ClaudeSonnet4_5
 485            | Self::ClaudeOpus4_1
 486            | Self::ClaudeOpus4_5
 487            | Self::ClaudeOpus4_6
 488            | Self::ClaudeOpus4_7
 489            | Self::ClaudeSonnet4_6 => true,
 490            Self::NovaLite | Self::NovaPro | Self::NovaPremier | Self::Nova2Lite => true,
 491            Self::MistralLarge3 | Self::PixtralLarge | Self::MagistralSmall => true,
 492            Self::Devstral2_123B | Self::Ministral14B => true,
 493            // Gemma accepts toolConfig without error but produces unreliable tool
 494            // calls -- malformed JSON args, hallucinated tool names, dropped calls.
 495            Self::Qwen3_32B
 496            | Self::Qwen3VL235B
 497            | Self::Qwen3_235B
 498            | Self::Qwen3Next80B
 499            | Self::Qwen3Coder30B
 500            | Self::Qwen3CoderNext
 501            | Self::Qwen3Coder480B => true,
 502            Self::MiniMaxM2 | Self::MiniMaxM2_1 | Self::MiniMaxM2_5 => true,
 503            Self::NemotronSuper3_120B | Self::NemotronNano3_30B => true,
 504            Self::GLM5 | Self::GLM4_7 | Self::GLM4_7Flash => true,
 505            Self::KimiK2Thinking | Self::KimiK2_5 => true,
 506            Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => true,
 507            _ => false,
 508        }
 509    }
 510
 511    pub fn supports_images(&self) -> bool {
 512        match self {
 513            Self::ClaudeHaiku4_5
 514            | Self::ClaudeSonnet4
 515            | Self::ClaudeSonnet4_5
 516            | Self::ClaudeOpus4_1
 517            | Self::ClaudeOpus4_5
 518            | Self::ClaudeOpus4_6
 519            | Self::ClaudeOpus4_7
 520            | Self::ClaudeSonnet4_6 => true,
 521            Self::NovaLite | Self::NovaPro => true,
 522            Self::PixtralLarge => true,
 523            Self::Qwen3VL235B => true,
 524            Self::KimiK2_5 => true,
 525            _ => false,
 526        }
 527    }
 528
 529    pub fn supports_extended_context(&self) -> bool {
 530        matches!(
 531            self,
 532            Self::ClaudeSonnet4
 533                | Self::ClaudeSonnet4_5
 534                | Self::ClaudeOpus4_5
 535                | Self::ClaudeOpus4_6
 536                | Self::ClaudeOpus4_7
 537                | Self::ClaudeSonnet4_6
 538        )
 539    }
 540
 541    pub fn supports_caching(&self) -> bool {
 542        match self {
 543            Self::ClaudeHaiku4_5
 544            | Self::ClaudeSonnet4
 545            | Self::ClaudeSonnet4_5
 546            | Self::ClaudeOpus4_1
 547            | Self::ClaudeOpus4_5
 548            | Self::ClaudeOpus4_6
 549            | Self::ClaudeOpus4_7
 550            | Self::ClaudeSonnet4_6 => true,
 551            Self::Custom {
 552                cache_configuration,
 553                ..
 554            } => cache_configuration.is_some(),
 555            _ => false,
 556        }
 557    }
 558
 559    pub fn cache_configuration(&self) -> Option<BedrockModelCacheConfiguration> {
 560        match self {
 561            Self::ClaudeSonnet4
 562            | Self::ClaudeSonnet4_5
 563            | Self::ClaudeOpus4_1
 564            | Self::ClaudeOpus4_5
 565            | Self::ClaudeOpus4_6
 566            | Self::ClaudeOpus4_7
 567            | Self::ClaudeSonnet4_6 => Some(BedrockModelCacheConfiguration {
 568                max_cache_anchors: 4,
 569                min_total_token: 1024,
 570            }),
 571            Self::ClaudeHaiku4_5 => Some(BedrockModelCacheConfiguration {
 572                max_cache_anchors: 4,
 573                min_total_token: 2048,
 574            }),
 575            Self::Custom {
 576                cache_configuration,
 577                ..
 578            } => cache_configuration.clone(),
 579            _ => None,
 580        }
 581    }
 582
 583    pub fn supports_thinking(&self) -> bool {
 584        matches!(
 585            self,
 586            Self::ClaudeHaiku4_5
 587                | Self::ClaudeSonnet4
 588                | Self::ClaudeSonnet4_5
 589                | Self::ClaudeOpus4_1
 590                | Self::ClaudeOpus4_5
 591                | Self::ClaudeOpus4_6
 592                | Self::ClaudeOpus4_7
 593                | Self::ClaudeSonnet4_6
 594        )
 595    }
 596
 597    pub fn supports_adaptive_thinking(&self) -> bool {
 598        matches!(
 599            self,
 600            Self::ClaudeOpus4_6 | Self::ClaudeOpus4_7 | Self::ClaudeSonnet4_6
 601        )
 602    }
 603
 604    pub fn thinking_mode(&self) -> BedrockModelMode {
 605        if self.supports_adaptive_thinking() {
 606            BedrockModelMode::AdaptiveThinking {
 607                effort: BedrockAdaptiveThinkingEffort::default(),
 608            }
 609        } else if self.supports_thinking() {
 610            BedrockModelMode::Thinking {
 611                budget_tokens: Some(4096),
 612            }
 613        } else {
 614            BedrockModelMode::Default
 615        }
 616    }
 617
 618    pub fn cross_region_inference_id(
 619        &self,
 620        region: &str,
 621        allow_global: bool,
 622    ) -> anyhow::Result<String> {
 623        let model_id = self.request_id();
 624
 625        let supports_global = matches!(
 626            self,
 627            Self::ClaudeHaiku4_5
 628                | Self::ClaudeSonnet4
 629                | Self::ClaudeSonnet4_5
 630                | Self::ClaudeOpus4_5
 631                | Self::ClaudeOpus4_6
 632                | Self::ClaudeOpus4_7
 633                | Self::ClaudeSonnet4_6
 634                | Self::Nova2Lite
 635        );
 636
 637        // Determine region group based on AWS region
 638        let region_group = if region.starts_with("us-gov-") {
 639            "us-gov"
 640        } else if region.starts_with("us-") || region.starts_with("sa-") {
 641            if allow_global && supports_global {
 642                "global"
 643            } else {
 644                "us"
 645            }
 646        } else if region.starts_with("ca-") {
 647            if allow_global && supports_global {
 648                "global"
 649            } else {
 650                "ca"
 651            }
 652        } else if region.starts_with("eu-") {
 653            if allow_global && supports_global {
 654                "global"
 655            } else {
 656                "eu"
 657            }
 658        } else if region == "ap-southeast-2" || region == "ap-southeast-4" {
 659            // Australia
 660            if allow_global && supports_global {
 661                "global"
 662            } else {
 663                "au"
 664            }
 665        } else if region == "ap-northeast-1" || region == "ap-northeast-3" {
 666            // Japan
 667            if allow_global && supports_global {
 668                "global"
 669            } else {
 670                "jp"
 671            }
 672        } else if region.starts_with("ap-") || region.starts_with("me-") {
 673            if allow_global && supports_global {
 674                "global"
 675            } else {
 676                "apac"
 677            }
 678        } else {
 679            anyhow::bail!("Unsupported Region {region}");
 680        };
 681
 682        match (self, region_group) {
 683            (Self::Custom { .. }, _) => Ok(model_id.into()),
 684
 685            // Global inference profiles
 686            (
 687                Self::ClaudeHaiku4_5
 688                | Self::ClaudeSonnet4
 689                | Self::ClaudeSonnet4_5
 690                | Self::ClaudeOpus4_5
 691                | Self::ClaudeOpus4_6
 692                | Self::ClaudeOpus4_7
 693                | Self::ClaudeSonnet4_6
 694                | Self::Nova2Lite,
 695                "global",
 696            ) => Ok(format!("{}.{}", region_group, model_id)),
 697
 698            // US Government region inference profiles
 699            (Self::ClaudeSonnet4_5, "us-gov") => Ok(format!("{}.{}", region_group, model_id)),
 700
 701            // US region inference profiles
 702            (
 703                Self::ClaudeHaiku4_5
 704                | Self::ClaudeSonnet4
 705                | Self::ClaudeSonnet4_5
 706                | Self::ClaudeOpus4_1
 707                | Self::ClaudeOpus4_5
 708                | Self::ClaudeOpus4_6
 709                | Self::ClaudeOpus4_7
 710                | Self::ClaudeSonnet4_6
 711                | Self::Llama4Scout17B
 712                | Self::Llama4Maverick17B
 713                | Self::NovaLite
 714                | Self::NovaPro
 715                | Self::NovaPremier
 716                | Self::Nova2Lite
 717                | Self::PixtralLarge
 718                | Self::DeepSeekR1,
 719                "us",
 720            ) => Ok(format!("{}.{}", region_group, model_id)),
 721
 722            // Canada region inference profiles
 723            (Self::NovaLite, "ca") => Ok(format!("{}.{}", region_group, model_id)),
 724
 725            // EU region inference profiles
 726            (
 727                Self::ClaudeHaiku4_5
 728                | Self::ClaudeSonnet4
 729                | Self::ClaudeSonnet4_5
 730                | Self::ClaudeOpus4_6
 731                | Self::ClaudeOpus4_7
 732                | Self::ClaudeSonnet4_6
 733                | Self::NovaLite
 734                | Self::NovaPro
 735                | Self::Nova2Lite,
 736                "eu",
 737            ) => Ok(format!("{}.{}", region_group, model_id)),
 738
 739            // Australia region inference profiles
 740            (
 741                Self::ClaudeHaiku4_5
 742                | Self::ClaudeSonnet4_5
 743                | Self::ClaudeOpus4_6
 744                | Self::ClaudeOpus4_7
 745                | Self::ClaudeSonnet4_6,
 746                "au",
 747            ) => Ok(format!("{}.{}", region_group, model_id)),
 748
 749            // Japan region inference profiles
 750            (
 751                Self::ClaudeHaiku4_5
 752                | Self::ClaudeSonnet4_5
 753                | Self::ClaudeSonnet4_6
 754                | Self::Nova2Lite,
 755                "jp",
 756            ) => Ok(format!("{}.{}", region_group, model_id)),
 757
 758            // APAC region inference profiles (other than AU/JP)
 759            (
 760                Self::ClaudeHaiku4_5
 761                | Self::ClaudeSonnet4
 762                | Self::ClaudeSonnet4_5
 763                | Self::NovaLite
 764                | Self::NovaPro
 765                | Self::Nova2Lite,
 766                "apac",
 767            ) => Ok(format!("{}.{}", region_group, model_id)),
 768
 769            // Default: use model ID directly
 770            _ => Ok(model_id.into()),
 771        }
 772    }
 773}
 774
 775#[cfg(test)]
 776mod tests {
 777    use super::*;
 778
 779    #[test]
 780    fn test_us_region_inference_ids() -> anyhow::Result<()> {
 781        assert_eq!(
 782            Model::ClaudeSonnet4_5.cross_region_inference_id("us-east-1", false)?,
 783            "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
 784        );
 785        assert_eq!(
 786            Model::ClaudeSonnet4.cross_region_inference_id("us-west-2", false)?,
 787            "us.anthropic.claude-sonnet-4-20250514-v1:0"
 788        );
 789        assert_eq!(
 790            Model::NovaPro.cross_region_inference_id("us-east-2", false)?,
 791            "us.amazon.nova-pro-v1:0"
 792        );
 793        assert_eq!(
 794            Model::DeepSeekR1.cross_region_inference_id("us-east-1", false)?,
 795            "us.deepseek.r1-v1:0"
 796        );
 797        Ok(())
 798    }
 799
 800    #[test]
 801    fn test_eu_region_inference_ids() -> anyhow::Result<()> {
 802        assert_eq!(
 803            Model::ClaudeSonnet4.cross_region_inference_id("eu-west-1", false)?,
 804            "eu.anthropic.claude-sonnet-4-20250514-v1:0"
 805        );
 806        assert_eq!(
 807            Model::ClaudeSonnet4_5.cross_region_inference_id("eu-west-1", false)?,
 808            "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"
 809        );
 810        assert_eq!(
 811            Model::NovaLite.cross_region_inference_id("eu-north-1", false)?,
 812            "eu.amazon.nova-lite-v1:0"
 813        );
 814        assert_eq!(
 815            Model::ClaudeOpus4_6.cross_region_inference_id("eu-west-1", false)?,
 816            "eu.anthropic.claude-opus-4-6-v1"
 817        );
 818        assert_eq!(
 819            Model::ClaudeOpus4_7.cross_region_inference_id("eu-west-1", false)?,
 820            "eu.anthropic.claude-opus-4-7"
 821        );
 822        Ok(())
 823    }
 824
 825    #[test]
 826    fn test_apac_region_inference_ids() -> anyhow::Result<()> {
 827        assert_eq!(
 828            Model::ClaudeSonnet4_5.cross_region_inference_id("ap-south-1", false)?,
 829            "apac.anthropic.claude-sonnet-4-5-20250929-v1:0"
 830        );
 831        assert_eq!(
 832            Model::NovaLite.cross_region_inference_id("ap-south-1", false)?,
 833            "apac.amazon.nova-lite-v1:0"
 834        );
 835        Ok(())
 836    }
 837
 838    #[test]
 839    fn test_au_region_inference_ids() -> anyhow::Result<()> {
 840        assert_eq!(
 841            Model::ClaudeHaiku4_5.cross_region_inference_id("ap-southeast-2", false)?,
 842            "au.anthropic.claude-haiku-4-5-20251001-v1:0"
 843        );
 844        assert_eq!(
 845            Model::ClaudeSonnet4_5.cross_region_inference_id("ap-southeast-4", false)?,
 846            "au.anthropic.claude-sonnet-4-5-20250929-v1:0"
 847        );
 848        assert_eq!(
 849            Model::ClaudeOpus4_6.cross_region_inference_id("ap-southeast-2", false)?,
 850            "au.anthropic.claude-opus-4-6-v1"
 851        );
 852        assert_eq!(
 853            Model::ClaudeOpus4_7.cross_region_inference_id("ap-southeast-2", false)?,
 854            "au.anthropic.claude-opus-4-7"
 855        );
 856        Ok(())
 857    }
 858
 859    #[test]
 860    fn test_jp_region_inference_ids() -> anyhow::Result<()> {
 861        assert_eq!(
 862            Model::ClaudeHaiku4_5.cross_region_inference_id("ap-northeast-1", false)?,
 863            "jp.anthropic.claude-haiku-4-5-20251001-v1:0"
 864        );
 865        assert_eq!(
 866            Model::ClaudeSonnet4_5.cross_region_inference_id("ap-northeast-3", false)?,
 867            "jp.anthropic.claude-sonnet-4-5-20250929-v1:0"
 868        );
 869        assert_eq!(
 870            Model::Nova2Lite.cross_region_inference_id("ap-northeast-1", false)?,
 871            "jp.amazon.nova-2-lite-v1:0"
 872        );
 873        Ok(())
 874    }
 875
 876    #[test]
 877    fn test_ca_region_inference_ids() -> anyhow::Result<()> {
 878        assert_eq!(
 879            Model::NovaLite.cross_region_inference_id("ca-central-1", false)?,
 880            "ca.amazon.nova-lite-v1:0"
 881        );
 882        Ok(())
 883    }
 884
 885    #[test]
 886    fn test_gov_region_inference_ids() -> anyhow::Result<()> {
 887        assert_eq!(
 888            Model::ClaudeSonnet4_5.cross_region_inference_id("us-gov-east-1", false)?,
 889            "us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0"
 890        );
 891        assert_eq!(
 892            Model::ClaudeSonnet4_5.cross_region_inference_id("us-gov-west-1", false)?,
 893            "us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0"
 894        );
 895        Ok(())
 896    }
 897
 898    #[test]
 899    fn test_global_inference_ids() -> anyhow::Result<()> {
 900        assert_eq!(
 901            Model::ClaudeSonnet4.cross_region_inference_id("us-east-1", true)?,
 902            "global.anthropic.claude-sonnet-4-20250514-v1:0"
 903        );
 904        assert_eq!(
 905            Model::ClaudeSonnet4_5.cross_region_inference_id("eu-west-1", true)?,
 906            "global.anthropic.claude-sonnet-4-5-20250929-v1:0"
 907        );
 908        assert_eq!(
 909            Model::ClaudeHaiku4_5.cross_region_inference_id("ap-south-1", true)?,
 910            "global.anthropic.claude-haiku-4-5-20251001-v1:0"
 911        );
 912        assert_eq!(
 913            Model::ClaudeOpus4_6.cross_region_inference_id("us-east-1", true)?,
 914            "global.anthropic.claude-opus-4-6-v1"
 915        );
 916        assert_eq!(
 917            Model::ClaudeOpus4_7.cross_region_inference_id("us-east-1", true)?,
 918            "global.anthropic.claude-opus-4-7"
 919        );
 920        assert_eq!(
 921            Model::Nova2Lite.cross_region_inference_id("us-east-1", true)?,
 922            "global.amazon.nova-2-lite-v1:0"
 923        );
 924
 925        // Models without global support fall back to regional
 926        assert_eq!(
 927            Model::NovaPro.cross_region_inference_id("us-east-1", true)?,
 928            "us.amazon.nova-pro-v1:0"
 929        );
 930        Ok(())
 931    }
 932
 933    #[test]
 934    fn test_models_without_cross_region() -> anyhow::Result<()> {
 935        // Models without cross-region support return their request_id directly
 936        assert_eq!(
 937            Model::Gemma3_4B.cross_region_inference_id("us-east-1", false)?,
 938            "google.gemma-3-4b-it"
 939        );
 940        assert_eq!(
 941            Model::MistralLarge3.cross_region_inference_id("eu-west-1", false)?,
 942            "mistral.mistral-large-3-675b-instruct"
 943        );
 944        assert_eq!(
 945            Model::Qwen3VL235B.cross_region_inference_id("ap-south-1", false)?,
 946            "qwen.qwen3-vl-235b-a22b"
 947        );
 948        assert_eq!(
 949            Model::GptOss120B.cross_region_inference_id("us-east-1", false)?,
 950            "openai.gpt-oss-120b-1:0"
 951        );
 952        assert_eq!(
 953            Model::MiniMaxM2.cross_region_inference_id("us-east-1", false)?,
 954            "minimax.minimax-m2"
 955        );
 956        assert_eq!(
 957            Model::KimiK2Thinking.cross_region_inference_id("us-east-1", false)?,
 958            "moonshot.kimi-k2-thinking"
 959        );
 960        Ok(())
 961    }
 962
 963    #[test]
 964    fn test_custom_model_inference_ids() -> anyhow::Result<()> {
 965        let custom_model = Model::Custom {
 966            name: "custom.my-model-v1:0".to_string(),
 967            max_tokens: 100000,
 968            display_name: Some("My Custom Model".to_string()),
 969            max_output_tokens: Some(8192),
 970            default_temperature: Some(0.7),
 971            cache_configuration: None,
 972        };
 973
 974        assert_eq!(
 975            custom_model.cross_region_inference_id("us-east-1", false)?,
 976            "custom.my-model-v1:0"
 977        );
 978        assert_eq!(
 979            custom_model.cross_region_inference_id("eu-west-1", true)?,
 980            "custom.my-model-v1:0"
 981        );
 982        Ok(())
 983    }
 984
 985    #[test]
 986    fn test_friendly_id_vs_request_id() {
 987        assert_eq!(Model::ClaudeSonnet4_5.id(), "claude-sonnet-4-5");
 988        assert_eq!(Model::NovaLite.id(), "nova-lite");
 989        assert_eq!(Model::DeepSeekR1.id(), "deepseek-r1");
 990        assert_eq!(Model::Llama4Scout17B.id(), "llama-4-scout-17b");
 991
 992        assert_eq!(
 993            Model::ClaudeSonnet4_5.request_id(),
 994            "anthropic.claude-sonnet-4-5-20250929-v1:0"
 995        );
 996        assert_eq!(Model::NovaLite.request_id(), "amazon.nova-lite-v1:0");
 997        assert_eq!(Model::DeepSeekR1.request_id(), "deepseek.r1-v1:0");
 998        assert_eq!(
 999            Model::Llama4Scout17B.request_id(),
1000            "meta.llama4-scout-17b-instruct-v1:0"
1001        );
1002
1003        // Thinking aliases deserialize to the same model
1004        assert_eq!(Model::ClaudeSonnet4.id(), "claude-sonnet-4");
1005        assert_eq!(
1006            Model::from_id("claude-sonnet-4-thinking").unwrap().id(),
1007            "claude-sonnet-4"
1008        );
1009    }
1010
1011    #[test]
1012    fn test_thinking_modes() {
1013        assert!(Model::ClaudeHaiku4_5.supports_thinking());
1014        assert!(Model::ClaudeSonnet4.supports_thinking());
1015        assert!(Model::ClaudeSonnet4_5.supports_thinking());
1016        assert!(Model::ClaudeOpus4_6.supports_thinking());
1017
1018        assert!(!Model::ClaudeSonnet4.supports_adaptive_thinking());
1019        assert!(Model::ClaudeOpus4_6.supports_adaptive_thinking());
1020        assert!(Model::ClaudeSonnet4_6.supports_adaptive_thinking());
1021
1022        assert_eq!(
1023            Model::ClaudeSonnet4.thinking_mode(),
1024            BedrockModelMode::Thinking {
1025                budget_tokens: Some(4096)
1026            }
1027        );
1028        assert_eq!(
1029            Model::ClaudeOpus4_6.thinking_mode(),
1030            BedrockModelMode::AdaptiveThinking {
1031                effort: BedrockAdaptiveThinkingEffort::High
1032            }
1033        );
1034        assert_eq!(
1035            Model::ClaudeHaiku4_5.thinking_mode(),
1036            BedrockModelMode::Thinking {
1037                budget_tokens: Some(4096)
1038            }
1039        );
1040    }
1041
1042    #[test]
1043    fn test_max_tokens() {
1044        assert_eq!(Model::ClaudeSonnet4_5.max_tokens(), 200_000);
1045        assert_eq!(Model::ClaudeOpus4_6.max_tokens(), 200_000);
1046        assert_eq!(Model::Llama4Scout17B.max_tokens(), 128_000);
1047        assert_eq!(Model::NovaPremier.max_tokens(), 1_000_000);
1048    }
1049
1050    #[test]
1051    fn test_max_output_tokens() {
1052        assert_eq!(Model::ClaudeSonnet4_5.max_output_tokens(), 64_000);
1053        assert_eq!(Model::ClaudeOpus4_6.max_output_tokens(), 128_000);
1054        assert_eq!(Model::ClaudeOpus4_1.max_output_tokens(), 32_000);
1055        assert_eq!(Model::Gemma3_4B.max_output_tokens(), 8_192);
1056    }
1057
1058    #[test]
1059    fn test_supports_tool_use() {
1060        assert!(Model::ClaudeSonnet4_5.supports_tool_use());
1061        assert!(Model::NovaPro.supports_tool_use());
1062        assert!(Model::MistralLarge3.supports_tool_use());
1063        assert!(!Model::Gemma3_4B.supports_tool_use());
1064        assert!(Model::Qwen3_32B.supports_tool_use());
1065        assert!(Model::MiniMaxM2.supports_tool_use());
1066        assert!(Model::KimiK2_5.supports_tool_use());
1067        assert!(Model::DeepSeekR1.supports_tool_use());
1068        assert!(!Model::Llama4Scout17B.supports_tool_use());
1069    }
1070
1071    #[test]
1072    fn test_supports_caching() {
1073        assert!(Model::ClaudeSonnet4_5.supports_caching());
1074        assert!(Model::ClaudeOpus4_6.supports_caching());
1075        assert!(!Model::Llama4Scout17B.supports_caching());
1076        assert!(!Model::NovaPro.supports_caching());
1077    }
1078}