models.rs

   1use serde::{Deserialize, Serialize};
   2use strum::EnumIter;
   3
   4#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
   5#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize, PartialEq)]
   6pub enum BedrockAdaptiveThinkingEffort {
   7    Low,
   8    Medium,
   9    #[default]
  10    High,
  11    Max,
  12}
  13
  14impl BedrockAdaptiveThinkingEffort {
  15    pub fn as_str(&self) -> &'static str {
  16        match self {
  17            Self::Low => "low",
  18            Self::Medium => "medium",
  19            Self::High => "high",
  20            Self::Max => "max",
  21        }
  22    }
  23}
  24
  25#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  26#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  27pub enum BedrockModelMode {
  28    #[default]
  29    Default,
  30    Thinking {
  31        budget_tokens: Option<u64>,
  32    },
  33    AdaptiveThinking {
  34        effort: BedrockAdaptiveThinkingEffort,
  35    },
  36}
  37
  38#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  39#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
  40pub struct BedrockModelCacheConfiguration {
  41    pub max_cache_anchors: usize,
  42    pub min_total_token: u64,
  43}
  44
  45#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
  46#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
  47pub enum Model {
  48    // Anthropic Claude 4+ models
  49    #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
  50    ClaudeHaiku4_5,
  51    #[serde(
  52        rename = "claude-sonnet-4",
  53        alias = "claude-sonnet-4-latest",
  54        alias = "claude-sonnet-4-thinking",
  55        alias = "claude-sonnet-4-thinking-latest"
  56    )]
  57    ClaudeSonnet4,
  58    #[default]
  59    #[serde(
  60        rename = "claude-sonnet-4-5",
  61        alias = "claude-sonnet-4-5-latest",
  62        alias = "claude-sonnet-4-5-thinking",
  63        alias = "claude-sonnet-4-5-thinking-latest"
  64    )]
  65    ClaudeSonnet4_5,
  66    #[serde(
  67        rename = "claude-opus-4-1",
  68        alias = "claude-opus-4-1-latest",
  69        alias = "claude-opus-4-1-thinking",
  70        alias = "claude-opus-4-1-thinking-latest"
  71    )]
  72    ClaudeOpus4_1,
  73    #[serde(
  74        rename = "claude-opus-4-5",
  75        alias = "claude-opus-4-5-latest",
  76        alias = "claude-opus-4-5-thinking",
  77        alias = "claude-opus-4-5-thinking-latest"
  78    )]
  79    ClaudeOpus4_5,
  80    #[serde(
  81        rename = "claude-opus-4-6",
  82        alias = "claude-opus-4-6-latest",
  83        alias = "claude-opus-4-6-thinking",
  84        alias = "claude-opus-4-6-thinking-latest"
  85    )]
  86    ClaudeOpus4_6,
  87    #[serde(
  88        rename = "claude-sonnet-4-6",
  89        alias = "claude-sonnet-4-6-latest",
  90        alias = "claude-sonnet-4-6-thinking",
  91        alias = "claude-sonnet-4-6-thinking-latest"
  92    )]
  93    ClaudeSonnet4_6,
  94
  95    // Meta Llama 4 models
  96    #[serde(rename = "llama-4-scout-17b")]
  97    Llama4Scout17B,
  98    #[serde(rename = "llama-4-maverick-17b")]
  99    Llama4Maverick17B,
 100
 101    // Google Gemma 3 models
 102    #[serde(rename = "gemma-3-4b")]
 103    Gemma3_4B,
 104    #[serde(rename = "gemma-3-12b")]
 105    Gemma3_12B,
 106    #[serde(rename = "gemma-3-27b")]
 107    Gemma3_27B,
 108
 109    // Mistral models
 110    #[serde(rename = "magistral-small")]
 111    MagistralSmall,
 112    #[serde(rename = "mistral-large-3")]
 113    MistralLarge3,
 114    #[serde(rename = "pixtral-large")]
 115    PixtralLarge,
 116    #[serde(rename = "devstral-2-123b")]
 117    Devstral2_123B,
 118    #[serde(rename = "ministral-14b")]
 119    Ministral14B,
 120
 121    // Qwen models
 122    #[serde(rename = "qwen3-32b")]
 123    Qwen3_32B,
 124    #[serde(rename = "qwen3-vl-235b")]
 125    Qwen3VL235B,
 126    #[serde(rename = "qwen3-235b")]
 127    Qwen3_235B,
 128    #[serde(rename = "qwen3-next-80b")]
 129    Qwen3Next80B,
 130    #[serde(rename = "qwen3-coder-30b")]
 131    Qwen3Coder30B,
 132    #[serde(rename = "qwen3-coder-next")]
 133    Qwen3CoderNext,
 134    #[serde(rename = "qwen3-coder-480b")]
 135    Qwen3Coder480B,
 136
 137    // Amazon Nova models
 138    #[serde(rename = "nova-lite")]
 139    NovaLite,
 140    #[serde(rename = "nova-pro")]
 141    NovaPro,
 142    #[serde(rename = "nova-premier")]
 143    NovaPremier,
 144    #[serde(rename = "nova-2-lite")]
 145    Nova2Lite,
 146
 147    // OpenAI GPT OSS models
 148    #[serde(rename = "gpt-oss-20b")]
 149    GptOss20B,
 150    #[serde(rename = "gpt-oss-120b")]
 151    GptOss120B,
 152
 153    // NVIDIA Nemotron models
 154    #[serde(rename = "nemotron-super-3-120b")]
 155    NemotronSuper3_120B,
 156    #[serde(rename = "nemotron-nano-3-30b")]
 157    NemotronNano3_30B,
 158
 159    // MiniMax models
 160    #[serde(rename = "minimax-m2")]
 161    MiniMaxM2,
 162    #[serde(rename = "minimax-m2-1")]
 163    MiniMaxM2_1,
 164    #[serde(rename = "minimax-m2-5")]
 165    MiniMaxM2_5,
 166
 167    // Z.AI GLM models
 168    #[serde(rename = "glm-5")]
 169    GLM5,
 170    #[serde(rename = "glm-4-7")]
 171    GLM4_7,
 172    #[serde(rename = "glm-4-7-flash")]
 173    GLM4_7Flash,
 174
 175    // Moonshot models
 176    #[serde(rename = "kimi-k2-thinking")]
 177    KimiK2Thinking,
 178    #[serde(rename = "kimi-k2-5")]
 179    KimiK2_5,
 180
 181    // DeepSeek models
 182    #[serde(rename = "deepseek-r1")]
 183    DeepSeekR1,
 184    #[serde(rename = "deepseek-v3")]
 185    DeepSeekV3_1,
 186    #[serde(rename = "deepseek-v3-2")]
 187    DeepSeekV3_2,
 188
 189    #[serde(rename = "custom")]
 190    Custom {
 191        name: String,
 192        max_tokens: u64,
 193        display_name: Option<String>,
 194        max_output_tokens: Option<u64>,
 195        default_temperature: Option<f32>,
 196        cache_configuration: Option<BedrockModelCacheConfiguration>,
 197    },
 198}
 199
 200impl Model {
 201    pub fn default_fast(_region: &str) -> Self {
 202        Self::ClaudeHaiku4_5
 203    }
 204
 205    pub fn from_id(id: &str) -> anyhow::Result<Self> {
 206        if id.starts_with("claude-opus-4-6") {
 207            Ok(Self::ClaudeOpus4_6)
 208        } else if id.starts_with("claude-opus-4-5") {
 209            Ok(Self::ClaudeOpus4_5)
 210        } else if id.starts_with("claude-opus-4-1") {
 211            Ok(Self::ClaudeOpus4_1)
 212        } else if id.starts_with("claude-sonnet-4-6") {
 213            Ok(Self::ClaudeSonnet4_6)
 214        } else if id.starts_with("claude-sonnet-4-5") {
 215            Ok(Self::ClaudeSonnet4_5)
 216        } else if id.starts_with("claude-sonnet-4") {
 217            Ok(Self::ClaudeSonnet4)
 218        } else if id.starts_with("claude-haiku-4-5") {
 219            Ok(Self::ClaudeHaiku4_5)
 220        } else {
 221            anyhow::bail!("invalid model id {id}");
 222        }
 223    }
 224
 225    pub fn id(&self) -> &str {
 226        match self {
 227            Self::ClaudeHaiku4_5 => "claude-haiku-4-5",
 228            Self::ClaudeSonnet4 => "claude-sonnet-4",
 229            Self::ClaudeSonnet4_5 => "claude-sonnet-4-5",
 230            Self::ClaudeOpus4_1 => "claude-opus-4-1",
 231            Self::ClaudeOpus4_5 => "claude-opus-4-5",
 232            Self::ClaudeOpus4_6 => "claude-opus-4-6",
 233            Self::ClaudeSonnet4_6 => "claude-sonnet-4-6",
 234            Self::Llama4Scout17B => "llama-4-scout-17b",
 235            Self::Llama4Maverick17B => "llama-4-maverick-17b",
 236            Self::Gemma3_4B => "gemma-3-4b",
 237            Self::Gemma3_12B => "gemma-3-12b",
 238            Self::Gemma3_27B => "gemma-3-27b",
 239            Self::MagistralSmall => "magistral-small",
 240            Self::MistralLarge3 => "mistral-large-3",
 241            Self::PixtralLarge => "pixtral-large",
 242            Self::Devstral2_123B => "devstral-2-123b",
 243            Self::Ministral14B => "ministral-14b",
 244            Self::Qwen3_32B => "qwen3-32b",
 245            Self::Qwen3VL235B => "qwen3-vl-235b",
 246            Self::Qwen3_235B => "qwen3-235b",
 247            Self::Qwen3Next80B => "qwen3-next-80b",
 248            Self::Qwen3Coder30B => "qwen3-coder-30b",
 249            Self::Qwen3CoderNext => "qwen3-coder-next",
 250            Self::Qwen3Coder480B => "qwen3-coder-480b",
 251            Self::NovaLite => "nova-lite",
 252            Self::NovaPro => "nova-pro",
 253            Self::NovaPremier => "nova-premier",
 254            Self::Nova2Lite => "nova-2-lite",
 255            Self::GptOss20B => "gpt-oss-20b",
 256            Self::GptOss120B => "gpt-oss-120b",
 257            Self::NemotronSuper3_120B => "nemotron-super-3-120b",
 258            Self::NemotronNano3_30B => "nemotron-nano-3-30b",
 259            Self::MiniMaxM2 => "minimax-m2",
 260            Self::MiniMaxM2_1 => "minimax-m2-1",
 261            Self::MiniMaxM2_5 => "minimax-m2-5",
 262            Self::GLM5 => "glm-5",
 263            Self::GLM4_7 => "glm-4-7",
 264            Self::GLM4_7Flash => "glm-4-7-flash",
 265            Self::KimiK2Thinking => "kimi-k2-thinking",
 266            Self::KimiK2_5 => "kimi-k2-5",
 267            Self::DeepSeekR1 => "deepseek-r1",
 268            Self::DeepSeekV3_1 => "deepseek-v3",
 269            Self::DeepSeekV3_2 => "deepseek-v3-2",
 270            Self::Custom { name, .. } => name,
 271        }
 272    }
 273
 274    pub fn request_id(&self) -> &str {
 275        match self {
 276            Self::ClaudeHaiku4_5 => "anthropic.claude-haiku-4-5-20251001-v1:0",
 277            Self::ClaudeSonnet4 => "anthropic.claude-sonnet-4-20250514-v1:0",
 278            Self::ClaudeSonnet4_5 => "anthropic.claude-sonnet-4-5-20250929-v1:0",
 279            Self::ClaudeOpus4_1 => "anthropic.claude-opus-4-1-20250805-v1:0",
 280            Self::ClaudeOpus4_5 => "anthropic.claude-opus-4-5-20251101-v1:0",
 281            Self::ClaudeOpus4_6 => "anthropic.claude-opus-4-6-v1",
 282            Self::ClaudeSonnet4_6 => "anthropic.claude-sonnet-4-6",
 283            Self::Llama4Scout17B => "meta.llama4-scout-17b-instruct-v1:0",
 284            Self::Llama4Maverick17B => "meta.llama4-maverick-17b-instruct-v1:0",
 285            Self::Gemma3_4B => "google.gemma-3-4b-it",
 286            Self::Gemma3_12B => "google.gemma-3-12b-it",
 287            Self::Gemma3_27B => "google.gemma-3-27b-it",
 288            Self::MagistralSmall => "mistral.magistral-small-2509",
 289            Self::MistralLarge3 => "mistral.mistral-large-3-675b-instruct",
 290            Self::PixtralLarge => "mistral.pixtral-large-2502-v1:0",
 291            Self::Devstral2_123B => "mistral.devstral-2-123b",
 292            Self::Ministral14B => "mistral.ministral-3-14b-instruct",
 293            Self::Qwen3VL235B => "qwen.qwen3-vl-235b-a22b",
 294            Self::Qwen3_32B => "qwen.qwen3-32b-v1:0",
 295            Self::Qwen3_235B => "qwen.qwen3-235b-a22b-2507-v1:0",
 296            Self::Qwen3Next80B => "qwen.qwen3-next-80b-a3b",
 297            Self::Qwen3Coder30B => "qwen.qwen3-coder-30b-a3b-v1:0",
 298            Self::Qwen3CoderNext => "qwen.qwen3-coder-next",
 299            Self::Qwen3Coder480B => "qwen.qwen3-coder-480b-a35b-v1:0",
 300            Self::NovaLite => "amazon.nova-lite-v1:0",
 301            Self::NovaPro => "amazon.nova-pro-v1:0",
 302            Self::NovaPremier => "amazon.nova-premier-v1:0",
 303            Self::Nova2Lite => "amazon.nova-2-lite-v1:0",
 304            Self::GptOss20B => "openai.gpt-oss-20b-1:0",
 305            Self::GptOss120B => "openai.gpt-oss-120b-1:0",
 306            Self::NemotronSuper3_120B => "nvidia.nemotron-super-3-120b",
 307            Self::NemotronNano3_30B => "nvidia.nemotron-nano-3-30b",
 308            Self::MiniMaxM2 => "minimax.minimax-m2",
 309            Self::MiniMaxM2_1 => "minimax.minimax-m2.1",
 310            Self::MiniMaxM2_5 => "minimax.minimax-m2.5",
 311            Self::GLM5 => "zai.glm-5",
 312            Self::GLM4_7 => "zai.glm-4.7",
 313            Self::GLM4_7Flash => "zai.glm-4.7-flash",
 314            Self::KimiK2Thinking => "moonshot.kimi-k2-thinking",
 315            Self::KimiK2_5 => "moonshotai.kimi-k2.5",
 316            Self::DeepSeekR1 => "deepseek.r1-v1:0",
 317            Self::DeepSeekV3_1 => "deepseek.v3-v1:0",
 318            Self::DeepSeekV3_2 => "deepseek.v3.2",
 319            Self::Custom { name, .. } => name,
 320        }
 321    }
 322
 323    pub fn display_name(&self) -> &str {
 324        match self {
 325            Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
 326            Self::ClaudeSonnet4 => "Claude Sonnet 4",
 327            Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
 328            Self::ClaudeOpus4_1 => "Claude Opus 4.1",
 329            Self::ClaudeOpus4_5 => "Claude Opus 4.5",
 330            Self::ClaudeOpus4_6 => "Claude Opus 4.6",
 331            Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
 332            Self::Llama4Scout17B => "Llama 4 Scout 17B",
 333            Self::Llama4Maverick17B => "Llama 4 Maverick 17B",
 334            Self::Gemma3_4B => "Gemma 3 4B",
 335            Self::Gemma3_12B => "Gemma 3 12B",
 336            Self::Gemma3_27B => "Gemma 3 27B",
 337            Self::MagistralSmall => "Magistral Small",
 338            Self::MistralLarge3 => "Mistral Large 3",
 339            Self::PixtralLarge => "Pixtral Large",
 340            Self::Devstral2_123B => "Devstral 2 123B",
 341            Self::Ministral14B => "Ministral 14B",
 342            Self::Qwen3VL235B => "Qwen3 VL 235B",
 343            Self::Qwen3_32B => "Qwen3 32B",
 344            Self::Qwen3_235B => "Qwen3 235B",
 345            Self::Qwen3Next80B => "Qwen3 Next 80B",
 346            Self::Qwen3Coder30B => "Qwen3 Coder 30B",
 347            Self::Qwen3CoderNext => "Qwen3 Coder Next",
 348            Self::Qwen3Coder480B => "Qwen3 Coder 480B",
 349            Self::NovaLite => "Amazon Nova Lite",
 350            Self::NovaPro => "Amazon Nova Pro",
 351            Self::NovaPremier => "Amazon Nova Premier",
 352            Self::Nova2Lite => "Amazon Nova 2 Lite",
 353            Self::GptOss20B => "GPT OSS 20B",
 354            Self::GptOss120B => "GPT OSS 120B",
 355            Self::NemotronSuper3_120B => "Nemotron Super 3 120B",
 356            Self::NemotronNano3_30B => "Nemotron Nano 3 30B",
 357            Self::MiniMaxM2 => "MiniMax M2",
 358            Self::MiniMaxM2_1 => "MiniMax M2.1",
 359            Self::MiniMaxM2_5 => "MiniMax M2.5",
 360            Self::GLM5 => "GLM 5",
 361            Self::GLM4_7 => "GLM 4.7",
 362            Self::GLM4_7Flash => "GLM 4.7 Flash",
 363            Self::KimiK2Thinking => "Kimi K2 Thinking",
 364            Self::KimiK2_5 => "Kimi K2.5",
 365            Self::DeepSeekR1 => "DeepSeek R1",
 366            Self::DeepSeekV3_1 => "DeepSeek V3.1",
 367            Self::DeepSeekV3_2 => "DeepSeek V3.2",
 368            Self::Custom {
 369                display_name, name, ..
 370            } => display_name.as_deref().unwrap_or(name.as_str()),
 371        }
 372    }
 373
 374    pub fn max_token_count(&self) -> u64 {
 375        self.max_tokens()
 376    }
 377
 378    pub fn max_tokens(&self) -> u64 {
 379        match self {
 380            Self::ClaudeHaiku4_5
 381            | Self::ClaudeSonnet4
 382            | Self::ClaudeSonnet4_5
 383            | Self::ClaudeOpus4_1
 384            | Self::ClaudeOpus4_5
 385            | Self::ClaudeOpus4_6
 386            | Self::ClaudeSonnet4_6 => 200_000,
 387            Self::Llama4Scout17B | Self::Llama4Maverick17B => 128_000,
 388            Self::Gemma3_4B | Self::Gemma3_12B | Self::Gemma3_27B => 128_000,
 389            Self::MagistralSmall | Self::MistralLarge3 | Self::PixtralLarge => 128_000,
 390            Self::Devstral2_123B | Self::Ministral14B => 256_000,
 391            Self::Qwen3_32B
 392            | Self::Qwen3VL235B
 393            | Self::Qwen3_235B
 394            | Self::Qwen3Next80B
 395            | Self::Qwen3Coder30B
 396            | Self::Qwen3CoderNext
 397            | Self::Qwen3Coder480B => 128_000,
 398            Self::NovaLite | Self::NovaPro => 300_000,
 399            Self::NovaPremier => 1_000_000,
 400            Self::Nova2Lite => 300_000,
 401            Self::GptOss20B | Self::GptOss120B => 128_000,
 402            Self::NemotronSuper3_120B | Self::NemotronNano3_30B => 262_000,
 403            Self::MiniMaxM2 | Self::MiniMaxM2_1 | Self::MiniMaxM2_5 => 196_000,
 404            Self::GLM5 | Self::GLM4_7 | Self::GLM4_7Flash => 203_000,
 405            Self::KimiK2Thinking | Self::KimiK2_5 => 128_000,
 406            Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => 128_000,
 407            Self::Custom { max_tokens, .. } => *max_tokens,
 408        }
 409    }
 410
 411    pub fn max_output_tokens(&self) -> u64 {
 412        match self {
 413            Self::ClaudeHaiku4_5
 414            | Self::ClaudeSonnet4
 415            | Self::ClaudeSonnet4_5
 416            | Self::ClaudeOpus4_5
 417            | Self::ClaudeSonnet4_6 => 64_000,
 418            Self::ClaudeOpus4_1 => 32_000,
 419            Self::ClaudeOpus4_6 => 128_000,
 420            Self::Llama4Scout17B
 421            | Self::Llama4Maverick17B
 422            | Self::Gemma3_4B
 423            | Self::Gemma3_12B
 424            | Self::Gemma3_27B
 425            | Self::MagistralSmall
 426            | Self::MistralLarge3
 427            | Self::PixtralLarge => 8_192,
 428            Self::Devstral2_123B | Self::Ministral14B => 131_000,
 429            Self::Qwen3_32B
 430            | Self::Qwen3VL235B
 431            | Self::Qwen3_235B
 432            | Self::Qwen3Next80B
 433            | Self::Qwen3Coder30B
 434            | Self::Qwen3CoderNext
 435            | Self::Qwen3Coder480B => 8_192,
 436            Self::NovaLite | Self::NovaPro | Self::NovaPremier | Self::Nova2Lite => 5_000,
 437            Self::GptOss20B | Self::GptOss120B => 16_000,
 438            Self::NemotronSuper3_120B | Self::NemotronNano3_30B => 131_000,
 439            Self::MiniMaxM2 | Self::MiniMaxM2_1 | Self::MiniMaxM2_5 => 98_000,
 440            Self::GLM5 | Self::GLM4_7 | Self::GLM4_7Flash => 101_000,
 441            Self::KimiK2Thinking | Self::KimiK2_5 => 16_000,
 442            Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => 16_000,
 443            Self::Custom {
 444                max_output_tokens, ..
 445            } => max_output_tokens.unwrap_or(4_096),
 446        }
 447    }
 448
 449    pub fn default_temperature(&self) -> f32 {
 450        match self {
 451            Self::ClaudeHaiku4_5
 452            | Self::ClaudeSonnet4
 453            | Self::ClaudeSonnet4_5
 454            | Self::ClaudeOpus4_1
 455            | Self::ClaudeOpus4_5
 456            | Self::ClaudeOpus4_6
 457            | Self::ClaudeSonnet4_6 => 1.0,
 458            Self::Custom {
 459                default_temperature,
 460                ..
 461            } => default_temperature.unwrap_or(1.0),
 462            _ => 1.0,
 463        }
 464    }
 465
 466    pub fn supports_tool_use(&self) -> bool {
 467        match self {
 468            Self::ClaudeHaiku4_5
 469            | Self::ClaudeSonnet4
 470            | Self::ClaudeSonnet4_5
 471            | Self::ClaudeOpus4_1
 472            | Self::ClaudeOpus4_5
 473            | Self::ClaudeOpus4_6
 474            | Self::ClaudeSonnet4_6 => true,
 475            Self::NovaLite | Self::NovaPro | Self::NovaPremier | Self::Nova2Lite => true,
 476            Self::MistralLarge3 | Self::PixtralLarge | Self::MagistralSmall => true,
 477            Self::Devstral2_123B | Self::Ministral14B => true,
 478            // Gemma accepts toolConfig without error but produces unreliable tool
 479            // calls -- malformed JSON args, hallucinated tool names, dropped calls.
 480            Self::Qwen3_32B
 481            | Self::Qwen3VL235B
 482            | Self::Qwen3_235B
 483            | Self::Qwen3Next80B
 484            | Self::Qwen3Coder30B
 485            | Self::Qwen3CoderNext
 486            | Self::Qwen3Coder480B => true,
 487            Self::MiniMaxM2 | Self::MiniMaxM2_1 | Self::MiniMaxM2_5 => true,
 488            Self::NemotronSuper3_120B | Self::NemotronNano3_30B => true,
 489            Self::GLM5 | Self::GLM4_7 | Self::GLM4_7Flash => true,
 490            Self::KimiK2Thinking | Self::KimiK2_5 => true,
 491            Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => true,
 492            _ => false,
 493        }
 494    }
 495
 496    pub fn supports_images(&self) -> bool {
 497        match self {
 498            Self::ClaudeHaiku4_5
 499            | Self::ClaudeSonnet4
 500            | Self::ClaudeSonnet4_5
 501            | Self::ClaudeOpus4_1
 502            | Self::ClaudeOpus4_5
 503            | Self::ClaudeOpus4_6
 504            | Self::ClaudeSonnet4_6 => true,
 505            Self::NovaLite | Self::NovaPro => true,
 506            Self::PixtralLarge => true,
 507            Self::Qwen3VL235B => true,
 508            Self::KimiK2_5 => true,
 509            _ => false,
 510        }
 511    }
 512
 513    pub fn supports_extended_context(&self) -> bool {
 514        matches!(
 515            self,
 516            Self::ClaudeSonnet4
 517                | Self::ClaudeSonnet4_5
 518                | Self::ClaudeOpus4_5
 519                | Self::ClaudeOpus4_6
 520                | Self::ClaudeSonnet4_6
 521        )
 522    }
 523
 524    pub fn supports_caching(&self) -> bool {
 525        match self {
 526            Self::ClaudeHaiku4_5
 527            | Self::ClaudeSonnet4
 528            | Self::ClaudeSonnet4_5
 529            | Self::ClaudeOpus4_1
 530            | Self::ClaudeOpus4_5
 531            | Self::ClaudeOpus4_6
 532            | Self::ClaudeSonnet4_6 => true,
 533            Self::Custom {
 534                cache_configuration,
 535                ..
 536            } => cache_configuration.is_some(),
 537            _ => false,
 538        }
 539    }
 540
 541    pub fn cache_configuration(&self) -> Option<BedrockModelCacheConfiguration> {
 542        match self {
 543            Self::ClaudeSonnet4
 544            | Self::ClaudeSonnet4_5
 545            | Self::ClaudeOpus4_1
 546            | Self::ClaudeOpus4_5
 547            | Self::ClaudeOpus4_6
 548            | Self::ClaudeSonnet4_6 => Some(BedrockModelCacheConfiguration {
 549                max_cache_anchors: 4,
 550                min_total_token: 1024,
 551            }),
 552            Self::ClaudeHaiku4_5 => Some(BedrockModelCacheConfiguration {
 553                max_cache_anchors: 4,
 554                min_total_token: 2048,
 555            }),
 556            Self::Custom {
 557                cache_configuration,
 558                ..
 559            } => cache_configuration.clone(),
 560            _ => None,
 561        }
 562    }
 563
 564    pub fn supports_thinking(&self) -> bool {
 565        matches!(
 566            self,
 567            Self::ClaudeHaiku4_5
 568                | Self::ClaudeSonnet4
 569                | Self::ClaudeSonnet4_5
 570                | Self::ClaudeOpus4_1
 571                | Self::ClaudeOpus4_5
 572                | Self::ClaudeOpus4_6
 573                | Self::ClaudeSonnet4_6
 574        )
 575    }
 576
 577    pub fn supports_adaptive_thinking(&self) -> bool {
 578        matches!(self, Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6)
 579    }
 580
 581    pub fn thinking_mode(&self) -> BedrockModelMode {
 582        if self.supports_adaptive_thinking() {
 583            BedrockModelMode::AdaptiveThinking {
 584                effort: BedrockAdaptiveThinkingEffort::default(),
 585            }
 586        } else if self.supports_thinking() {
 587            BedrockModelMode::Thinking {
 588                budget_tokens: Some(4096),
 589            }
 590        } else {
 591            BedrockModelMode::Default
 592        }
 593    }
 594
 595    pub fn cross_region_inference_id(
 596        &self,
 597        region: &str,
 598        allow_global: bool,
 599    ) -> anyhow::Result<String> {
 600        let model_id = self.request_id();
 601
 602        let supports_global = matches!(
 603            self,
 604            Self::ClaudeHaiku4_5
 605                | Self::ClaudeSonnet4
 606                | Self::ClaudeSonnet4_5
 607                | Self::ClaudeOpus4_5
 608                | Self::ClaudeOpus4_6
 609                | Self::ClaudeSonnet4_6
 610                | Self::Nova2Lite
 611        );
 612
 613        // Determine region group based on AWS region
 614        let region_group = if region.starts_with("us-gov-") {
 615            "us-gov"
 616        } else if region.starts_with("us-") || region.starts_with("sa-") {
 617            if allow_global && supports_global {
 618                "global"
 619            } else {
 620                "us"
 621            }
 622        } else if region.starts_with("ca-") {
 623            if allow_global && supports_global {
 624                "global"
 625            } else {
 626                "ca"
 627            }
 628        } else if region.starts_with("eu-") {
 629            if allow_global && supports_global {
 630                "global"
 631            } else {
 632                "eu"
 633            }
 634        } else if region == "ap-southeast-2" || region == "ap-southeast-4" {
 635            // Australia
 636            if allow_global && supports_global {
 637                "global"
 638            } else {
 639                "au"
 640            }
 641        } else if region == "ap-northeast-1" || region == "ap-northeast-3" {
 642            // Japan
 643            if allow_global && supports_global {
 644                "global"
 645            } else {
 646                "jp"
 647            }
 648        } else if region.starts_with("ap-") || region.starts_with("me-") {
 649            if allow_global && supports_global {
 650                "global"
 651            } else {
 652                "apac"
 653            }
 654        } else {
 655            anyhow::bail!("Unsupported Region {region}");
 656        };
 657
 658        match (self, region_group) {
 659            (Self::Custom { .. }, _) => Ok(model_id.into()),
 660
 661            // Global inference profiles
 662            (
 663                Self::ClaudeHaiku4_5
 664                | Self::ClaudeSonnet4
 665                | Self::ClaudeSonnet4_5
 666                | Self::ClaudeOpus4_5
 667                | Self::ClaudeOpus4_6
 668                | Self::ClaudeSonnet4_6
 669                | Self::Nova2Lite,
 670                "global",
 671            ) => Ok(format!("{}.{}", region_group, model_id)),
 672
 673            // US Government region inference profiles
 674            (Self::ClaudeSonnet4_5, "us-gov") => Ok(format!("{}.{}", region_group, model_id)),
 675
 676            // US region inference profiles
 677            (
 678                Self::ClaudeHaiku4_5
 679                | Self::ClaudeSonnet4
 680                | Self::ClaudeSonnet4_5
 681                | Self::ClaudeOpus4_1
 682                | Self::ClaudeOpus4_5
 683                | Self::ClaudeOpus4_6
 684                | Self::ClaudeSonnet4_6
 685                | Self::Llama4Scout17B
 686                | Self::Llama4Maverick17B
 687                | Self::NovaLite
 688                | Self::NovaPro
 689                | Self::NovaPremier
 690                | Self::Nova2Lite
 691                | Self::PixtralLarge
 692                | Self::DeepSeekR1,
 693                "us",
 694            ) => Ok(format!("{}.{}", region_group, model_id)),
 695
 696            // Canada region inference profiles
 697            (Self::NovaLite, "ca") => Ok(format!("{}.{}", region_group, model_id)),
 698
 699            // EU region inference profiles
 700            (
 701                Self::ClaudeHaiku4_5
 702                | Self::ClaudeSonnet4
 703                | Self::ClaudeSonnet4_5
 704                | Self::ClaudeOpus4_6
 705                | Self::ClaudeSonnet4_6
 706                | Self::NovaLite
 707                | Self::NovaPro
 708                | Self::Nova2Lite,
 709                "eu",
 710            ) => Ok(format!("{}.{}", region_group, model_id)),
 711
 712            // Australia region inference profiles
 713            (
 714                Self::ClaudeHaiku4_5
 715                | Self::ClaudeSonnet4_5
 716                | Self::ClaudeOpus4_6
 717                | Self::ClaudeSonnet4_6,
 718                "au",
 719            ) => Ok(format!("{}.{}", region_group, model_id)),
 720
 721            // Japan region inference profiles
 722            (
 723                Self::ClaudeHaiku4_5
 724                | Self::ClaudeSonnet4_5
 725                | Self::ClaudeSonnet4_6
 726                | Self::Nova2Lite,
 727                "jp",
 728            ) => Ok(format!("{}.{}", region_group, model_id)),
 729
 730            // APAC region inference profiles (other than AU/JP)
 731            (
 732                Self::ClaudeHaiku4_5
 733                | Self::ClaudeSonnet4
 734                | Self::ClaudeSonnet4_5
 735                | Self::NovaLite
 736                | Self::NovaPro
 737                | Self::Nova2Lite,
 738                "apac",
 739            ) => Ok(format!("{}.{}", region_group, model_id)),
 740
 741            // Default: use model ID directly
 742            _ => Ok(model_id.into()),
 743        }
 744    }
 745}
 746
 747#[cfg(test)]
 748mod tests {
 749    use super::*;
 750
 751    #[test]
 752    fn test_us_region_inference_ids() -> anyhow::Result<()> {
 753        assert_eq!(
 754            Model::ClaudeSonnet4_5.cross_region_inference_id("us-east-1", false)?,
 755            "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
 756        );
 757        assert_eq!(
 758            Model::ClaudeSonnet4.cross_region_inference_id("us-west-2", false)?,
 759            "us.anthropic.claude-sonnet-4-20250514-v1:0"
 760        );
 761        assert_eq!(
 762            Model::NovaPro.cross_region_inference_id("us-east-2", false)?,
 763            "us.amazon.nova-pro-v1:0"
 764        );
 765        assert_eq!(
 766            Model::DeepSeekR1.cross_region_inference_id("us-east-1", false)?,
 767            "us.deepseek.r1-v1:0"
 768        );
 769        Ok(())
 770    }
 771
 772    #[test]
 773    fn test_eu_region_inference_ids() -> anyhow::Result<()> {
 774        assert_eq!(
 775            Model::ClaudeSonnet4.cross_region_inference_id("eu-west-1", false)?,
 776            "eu.anthropic.claude-sonnet-4-20250514-v1:0"
 777        );
 778        assert_eq!(
 779            Model::ClaudeSonnet4_5.cross_region_inference_id("eu-west-1", false)?,
 780            "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"
 781        );
 782        assert_eq!(
 783            Model::NovaLite.cross_region_inference_id("eu-north-1", false)?,
 784            "eu.amazon.nova-lite-v1:0"
 785        );
 786        assert_eq!(
 787            Model::ClaudeOpus4_6.cross_region_inference_id("eu-west-1", false)?,
 788            "eu.anthropic.claude-opus-4-6-v1"
 789        );
 790        Ok(())
 791    }
 792
 793    #[test]
 794    fn test_apac_region_inference_ids() -> anyhow::Result<()> {
 795        assert_eq!(
 796            Model::ClaudeSonnet4_5.cross_region_inference_id("ap-south-1", false)?,
 797            "apac.anthropic.claude-sonnet-4-5-20250929-v1:0"
 798        );
 799        assert_eq!(
 800            Model::NovaLite.cross_region_inference_id("ap-south-1", false)?,
 801            "apac.amazon.nova-lite-v1:0"
 802        );
 803        Ok(())
 804    }
 805
 806    #[test]
 807    fn test_au_region_inference_ids() -> anyhow::Result<()> {
 808        assert_eq!(
 809            Model::ClaudeHaiku4_5.cross_region_inference_id("ap-southeast-2", false)?,
 810            "au.anthropic.claude-haiku-4-5-20251001-v1:0"
 811        );
 812        assert_eq!(
 813            Model::ClaudeSonnet4_5.cross_region_inference_id("ap-southeast-4", false)?,
 814            "au.anthropic.claude-sonnet-4-5-20250929-v1:0"
 815        );
 816        assert_eq!(
 817            Model::ClaudeOpus4_6.cross_region_inference_id("ap-southeast-2", false)?,
 818            "au.anthropic.claude-opus-4-6-v1"
 819        );
 820        Ok(())
 821    }
 822
 823    #[test]
 824    fn test_jp_region_inference_ids() -> anyhow::Result<()> {
 825        assert_eq!(
 826            Model::ClaudeHaiku4_5.cross_region_inference_id("ap-northeast-1", false)?,
 827            "jp.anthropic.claude-haiku-4-5-20251001-v1:0"
 828        );
 829        assert_eq!(
 830            Model::ClaudeSonnet4_5.cross_region_inference_id("ap-northeast-3", false)?,
 831            "jp.anthropic.claude-sonnet-4-5-20250929-v1:0"
 832        );
 833        assert_eq!(
 834            Model::Nova2Lite.cross_region_inference_id("ap-northeast-1", false)?,
 835            "jp.amazon.nova-2-lite-v1:0"
 836        );
 837        Ok(())
 838    }
 839
 840    #[test]
 841    fn test_ca_region_inference_ids() -> anyhow::Result<()> {
 842        assert_eq!(
 843            Model::NovaLite.cross_region_inference_id("ca-central-1", false)?,
 844            "ca.amazon.nova-lite-v1:0"
 845        );
 846        Ok(())
 847    }
 848
 849    #[test]
 850    fn test_gov_region_inference_ids() -> anyhow::Result<()> {
 851        assert_eq!(
 852            Model::ClaudeSonnet4_5.cross_region_inference_id("us-gov-east-1", false)?,
 853            "us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0"
 854        );
 855        assert_eq!(
 856            Model::ClaudeSonnet4_5.cross_region_inference_id("us-gov-west-1", false)?,
 857            "us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0"
 858        );
 859        Ok(())
 860    }
 861
 862    #[test]
 863    fn test_global_inference_ids() -> anyhow::Result<()> {
 864        assert_eq!(
 865            Model::ClaudeSonnet4.cross_region_inference_id("us-east-1", true)?,
 866            "global.anthropic.claude-sonnet-4-20250514-v1:0"
 867        );
 868        assert_eq!(
 869            Model::ClaudeSonnet4_5.cross_region_inference_id("eu-west-1", true)?,
 870            "global.anthropic.claude-sonnet-4-5-20250929-v1:0"
 871        );
 872        assert_eq!(
 873            Model::ClaudeHaiku4_5.cross_region_inference_id("ap-south-1", true)?,
 874            "global.anthropic.claude-haiku-4-5-20251001-v1:0"
 875        );
 876        assert_eq!(
 877            Model::ClaudeOpus4_6.cross_region_inference_id("us-east-1", true)?,
 878            "global.anthropic.claude-opus-4-6-v1"
 879        );
 880        assert_eq!(
 881            Model::Nova2Lite.cross_region_inference_id("us-east-1", true)?,
 882            "global.amazon.nova-2-lite-v1:0"
 883        );
 884
 885        // Models without global support fall back to regional
 886        assert_eq!(
 887            Model::NovaPro.cross_region_inference_id("us-east-1", true)?,
 888            "us.amazon.nova-pro-v1:0"
 889        );
 890        Ok(())
 891    }
 892
 893    #[test]
 894    fn test_models_without_cross_region() -> anyhow::Result<()> {
 895        // Models without cross-region support return their request_id directly
 896        assert_eq!(
 897            Model::Gemma3_4B.cross_region_inference_id("us-east-1", false)?,
 898            "google.gemma-3-4b-it"
 899        );
 900        assert_eq!(
 901            Model::MistralLarge3.cross_region_inference_id("eu-west-1", false)?,
 902            "mistral.mistral-large-3-675b-instruct"
 903        );
 904        assert_eq!(
 905            Model::Qwen3VL235B.cross_region_inference_id("ap-south-1", false)?,
 906            "qwen.qwen3-vl-235b-a22b"
 907        );
 908        assert_eq!(
 909            Model::GptOss120B.cross_region_inference_id("us-east-1", false)?,
 910            "openai.gpt-oss-120b-1:0"
 911        );
 912        assert_eq!(
 913            Model::MiniMaxM2.cross_region_inference_id("us-east-1", false)?,
 914            "minimax.minimax-m2"
 915        );
 916        assert_eq!(
 917            Model::KimiK2Thinking.cross_region_inference_id("us-east-1", false)?,
 918            "moonshot.kimi-k2-thinking"
 919        );
 920        Ok(())
 921    }
 922
 923    #[test]
 924    fn test_custom_model_inference_ids() -> anyhow::Result<()> {
 925        let custom_model = Model::Custom {
 926            name: "custom.my-model-v1:0".to_string(),
 927            max_tokens: 100000,
 928            display_name: Some("My Custom Model".to_string()),
 929            max_output_tokens: Some(8192),
 930            default_temperature: Some(0.7),
 931            cache_configuration: None,
 932        };
 933
 934        assert_eq!(
 935            custom_model.cross_region_inference_id("us-east-1", false)?,
 936            "custom.my-model-v1:0"
 937        );
 938        assert_eq!(
 939            custom_model.cross_region_inference_id("eu-west-1", true)?,
 940            "custom.my-model-v1:0"
 941        );
 942        Ok(())
 943    }
 944
 945    #[test]
 946    fn test_friendly_id_vs_request_id() {
 947        assert_eq!(Model::ClaudeSonnet4_5.id(), "claude-sonnet-4-5");
 948        assert_eq!(Model::NovaLite.id(), "nova-lite");
 949        assert_eq!(Model::DeepSeekR1.id(), "deepseek-r1");
 950        assert_eq!(Model::Llama4Scout17B.id(), "llama-4-scout-17b");
 951
 952        assert_eq!(
 953            Model::ClaudeSonnet4_5.request_id(),
 954            "anthropic.claude-sonnet-4-5-20250929-v1:0"
 955        );
 956        assert_eq!(Model::NovaLite.request_id(), "amazon.nova-lite-v1:0");
 957        assert_eq!(Model::DeepSeekR1.request_id(), "deepseek.r1-v1:0");
 958        assert_eq!(
 959            Model::Llama4Scout17B.request_id(),
 960            "meta.llama4-scout-17b-instruct-v1:0"
 961        );
 962
 963        // Thinking aliases deserialize to the same model
 964        assert_eq!(Model::ClaudeSonnet4.id(), "claude-sonnet-4");
 965        assert_eq!(
 966            Model::from_id("claude-sonnet-4-thinking").unwrap().id(),
 967            "claude-sonnet-4"
 968        );
 969    }
 970
 971    #[test]
 972    fn test_thinking_modes() {
 973        assert!(Model::ClaudeHaiku4_5.supports_thinking());
 974        assert!(Model::ClaudeSonnet4.supports_thinking());
 975        assert!(Model::ClaudeSonnet4_5.supports_thinking());
 976        assert!(Model::ClaudeOpus4_6.supports_thinking());
 977
 978        assert!(!Model::ClaudeSonnet4.supports_adaptive_thinking());
 979        assert!(Model::ClaudeOpus4_6.supports_adaptive_thinking());
 980        assert!(Model::ClaudeSonnet4_6.supports_adaptive_thinking());
 981
 982        assert_eq!(
 983            Model::ClaudeSonnet4.thinking_mode(),
 984            BedrockModelMode::Thinking {
 985                budget_tokens: Some(4096)
 986            }
 987        );
 988        assert_eq!(
 989            Model::ClaudeOpus4_6.thinking_mode(),
 990            BedrockModelMode::AdaptiveThinking {
 991                effort: BedrockAdaptiveThinkingEffort::High
 992            }
 993        );
 994        assert_eq!(
 995            Model::ClaudeHaiku4_5.thinking_mode(),
 996            BedrockModelMode::Thinking {
 997                budget_tokens: Some(4096)
 998            }
 999        );
1000    }
1001
1002    #[test]
1003    fn test_max_tokens() {
1004        assert_eq!(Model::ClaudeSonnet4_5.max_tokens(), 200_000);
1005        assert_eq!(Model::ClaudeOpus4_6.max_tokens(), 200_000);
1006        assert_eq!(Model::Llama4Scout17B.max_tokens(), 128_000);
1007        assert_eq!(Model::NovaPremier.max_tokens(), 1_000_000);
1008    }
1009
1010    #[test]
1011    fn test_max_output_tokens() {
1012        assert_eq!(Model::ClaudeSonnet4_5.max_output_tokens(), 64_000);
1013        assert_eq!(Model::ClaudeOpus4_6.max_output_tokens(), 128_000);
1014        assert_eq!(Model::ClaudeOpus4_1.max_output_tokens(), 32_000);
1015        assert_eq!(Model::Gemma3_4B.max_output_tokens(), 8_192);
1016    }
1017
1018    #[test]
1019    fn test_supports_tool_use() {
1020        assert!(Model::ClaudeSonnet4_5.supports_tool_use());
1021        assert!(Model::NovaPro.supports_tool_use());
1022        assert!(Model::MistralLarge3.supports_tool_use());
1023        assert!(!Model::Gemma3_4B.supports_tool_use());
1024        assert!(Model::Qwen3_32B.supports_tool_use());
1025        assert!(Model::MiniMaxM2.supports_tool_use());
1026        assert!(Model::KimiK2_5.supports_tool_use());
1027        assert!(Model::DeepSeekR1.supports_tool_use());
1028        assert!(!Model::Llama4Scout17B.supports_tool_use());
1029    }
1030
1031    #[test]
1032    fn test_supports_caching() {
1033        assert!(Model::ClaudeSonnet4_5.supports_caching());
1034        assert!(Model::ClaudeOpus4_6.supports_caching());
1035        assert!(!Model::Llama4Scout17B.supports_caching());
1036        assert!(!Model::NovaPro.supports_caching());
1037    }
1038}