1use serde::{Deserialize, Serialize};
2use strum::EnumIter;
3
4#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
5#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize, PartialEq)]
6pub enum BedrockAdaptiveThinkingEffort {
7 Low,
8 Medium,
9 #[default]
10 High,
11 Max,
12}
13
14impl BedrockAdaptiveThinkingEffort {
15 pub fn as_str(&self) -> &'static str {
16 match self {
17 Self::Low => "low",
18 Self::Medium => "medium",
19 Self::High => "high",
20 Self::Max => "max",
21 }
22 }
23}
24
25#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
26#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
27pub enum BedrockModelMode {
28 #[default]
29 Default,
30 Thinking {
31 budget_tokens: Option<u64>,
32 },
33 AdaptiveThinking {
34 effort: BedrockAdaptiveThinkingEffort,
35 },
36}
37
38#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
39#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
40pub struct BedrockModelCacheConfiguration {
41 pub max_cache_anchors: usize,
42 pub min_total_token: u64,
43}
44
45#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
46#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, EnumIter)]
47pub enum Model {
48 // Anthropic Claude 4+ models
49 #[serde(rename = "claude-haiku-4-5", alias = "claude-haiku-4-5-latest")]
50 ClaudeHaiku4_5,
51 #[serde(
52 rename = "claude-sonnet-4",
53 alias = "claude-sonnet-4-latest",
54 alias = "claude-sonnet-4-thinking",
55 alias = "claude-sonnet-4-thinking-latest"
56 )]
57 ClaudeSonnet4,
58 #[default]
59 #[serde(
60 rename = "claude-sonnet-4-5",
61 alias = "claude-sonnet-4-5-latest",
62 alias = "claude-sonnet-4-5-thinking",
63 alias = "claude-sonnet-4-5-thinking-latest"
64 )]
65 ClaudeSonnet4_5,
66 #[serde(
67 rename = "claude-opus-4-1",
68 alias = "claude-opus-4-1-latest",
69 alias = "claude-opus-4-1-thinking",
70 alias = "claude-opus-4-1-thinking-latest"
71 )]
72 ClaudeOpus4_1,
73 #[serde(
74 rename = "claude-opus-4-5",
75 alias = "claude-opus-4-5-latest",
76 alias = "claude-opus-4-5-thinking",
77 alias = "claude-opus-4-5-thinking-latest"
78 )]
79 ClaudeOpus4_5,
80 #[serde(
81 rename = "claude-opus-4-6",
82 alias = "claude-opus-4-6-latest",
83 alias = "claude-opus-4-6-thinking",
84 alias = "claude-opus-4-6-thinking-latest"
85 )]
86 ClaudeOpus4_6,
87 #[serde(
88 rename = "claude-sonnet-4-6",
89 alias = "claude-sonnet-4-6-latest",
90 alias = "claude-sonnet-4-6-thinking",
91 alias = "claude-sonnet-4-6-thinking-latest"
92 )]
93 ClaudeSonnet4_6,
94
95 // Meta Llama 4 models
96 #[serde(rename = "llama-4-scout-17b")]
97 Llama4Scout17B,
98 #[serde(rename = "llama-4-maverick-17b")]
99 Llama4Maverick17B,
100
101 // Google Gemma 3 models
102 #[serde(rename = "gemma-3-4b")]
103 Gemma3_4B,
104 #[serde(rename = "gemma-3-12b")]
105 Gemma3_12B,
106 #[serde(rename = "gemma-3-27b")]
107 Gemma3_27B,
108
109 // Mistral models
110 #[serde(rename = "magistral-small")]
111 MagistralSmall,
112 #[serde(rename = "mistral-large-3")]
113 MistralLarge3,
114 #[serde(rename = "pixtral-large")]
115 PixtralLarge,
116 #[serde(rename = "devstral-2-123b")]
117 Devstral2_123B,
118 #[serde(rename = "ministral-14b")]
119 Ministral14B,
120
121 // Qwen models
122 #[serde(rename = "qwen3-32b")]
123 Qwen3_32B,
124 #[serde(rename = "qwen3-vl-235b")]
125 Qwen3VL235B,
126 #[serde(rename = "qwen3-235b")]
127 Qwen3_235B,
128 #[serde(rename = "qwen3-next-80b")]
129 Qwen3Next80B,
130 #[serde(rename = "qwen3-coder-30b")]
131 Qwen3Coder30B,
132 #[serde(rename = "qwen3-coder-next")]
133 Qwen3CoderNext,
134 #[serde(rename = "qwen3-coder-480b")]
135 Qwen3Coder480B,
136
137 // Amazon Nova models
138 #[serde(rename = "nova-lite")]
139 NovaLite,
140 #[serde(rename = "nova-pro")]
141 NovaPro,
142 #[serde(rename = "nova-premier")]
143 NovaPremier,
144 #[serde(rename = "nova-2-lite")]
145 Nova2Lite,
146
147 // OpenAI GPT OSS models
148 #[serde(rename = "gpt-oss-20b")]
149 GptOss20B,
150 #[serde(rename = "gpt-oss-120b")]
151 GptOss120B,
152
153 // NVIDIA Nemotron models
154 #[serde(rename = "nemotron-super-3-120b")]
155 NemotronSuper3_120B,
156 #[serde(rename = "nemotron-nano-3-30b")]
157 NemotronNano3_30B,
158
159 // MiniMax models
160 #[serde(rename = "minimax-m2")]
161 MiniMaxM2,
162 #[serde(rename = "minimax-m2-1")]
163 MiniMaxM2_1,
164 #[serde(rename = "minimax-m2-5")]
165 MiniMaxM2_5,
166
167 // Z.AI GLM models
168 #[serde(rename = "glm-5")]
169 GLM5,
170 #[serde(rename = "glm-4-7")]
171 GLM4_7,
172 #[serde(rename = "glm-4-7-flash")]
173 GLM4_7Flash,
174
175 // Moonshot models
176 #[serde(rename = "kimi-k2-thinking")]
177 KimiK2Thinking,
178 #[serde(rename = "kimi-k2-5")]
179 KimiK2_5,
180
181 // DeepSeek models
182 #[serde(rename = "deepseek-r1")]
183 DeepSeekR1,
184 #[serde(rename = "deepseek-v3")]
185 DeepSeekV3_1,
186 #[serde(rename = "deepseek-v3-2")]
187 DeepSeekV3_2,
188
189 #[serde(rename = "custom")]
190 Custom {
191 name: String,
192 max_tokens: u64,
193 display_name: Option<String>,
194 max_output_tokens: Option<u64>,
195 default_temperature: Option<f32>,
196 cache_configuration: Option<BedrockModelCacheConfiguration>,
197 },
198}
199
200impl Model {
201 pub fn default_fast(_region: &str) -> Self {
202 Self::ClaudeHaiku4_5
203 }
204
205 pub fn from_id(id: &str) -> anyhow::Result<Self> {
206 if id.starts_with("claude-opus-4-6") {
207 Ok(Self::ClaudeOpus4_6)
208 } else if id.starts_with("claude-opus-4-5") {
209 Ok(Self::ClaudeOpus4_5)
210 } else if id.starts_with("claude-opus-4-1") {
211 Ok(Self::ClaudeOpus4_1)
212 } else if id.starts_with("claude-sonnet-4-6") {
213 Ok(Self::ClaudeSonnet4_6)
214 } else if id.starts_with("claude-sonnet-4-5") {
215 Ok(Self::ClaudeSonnet4_5)
216 } else if id.starts_with("claude-sonnet-4") {
217 Ok(Self::ClaudeSonnet4)
218 } else if id.starts_with("claude-haiku-4-5") {
219 Ok(Self::ClaudeHaiku4_5)
220 } else {
221 anyhow::bail!("invalid model id {id}");
222 }
223 }
224
225 pub fn id(&self) -> &str {
226 match self {
227 Self::ClaudeHaiku4_5 => "claude-haiku-4-5",
228 Self::ClaudeSonnet4 => "claude-sonnet-4",
229 Self::ClaudeSonnet4_5 => "claude-sonnet-4-5",
230 Self::ClaudeOpus4_1 => "claude-opus-4-1",
231 Self::ClaudeOpus4_5 => "claude-opus-4-5",
232 Self::ClaudeOpus4_6 => "claude-opus-4-6",
233 Self::ClaudeSonnet4_6 => "claude-sonnet-4-6",
234 Self::Llama4Scout17B => "llama-4-scout-17b",
235 Self::Llama4Maverick17B => "llama-4-maverick-17b",
236 Self::Gemma3_4B => "gemma-3-4b",
237 Self::Gemma3_12B => "gemma-3-12b",
238 Self::Gemma3_27B => "gemma-3-27b",
239 Self::MagistralSmall => "magistral-small",
240 Self::MistralLarge3 => "mistral-large-3",
241 Self::PixtralLarge => "pixtral-large",
242 Self::Devstral2_123B => "devstral-2-123b",
243 Self::Ministral14B => "ministral-14b",
244 Self::Qwen3_32B => "qwen3-32b",
245 Self::Qwen3VL235B => "qwen3-vl-235b",
246 Self::Qwen3_235B => "qwen3-235b",
247 Self::Qwen3Next80B => "qwen3-next-80b",
248 Self::Qwen3Coder30B => "qwen3-coder-30b",
249 Self::Qwen3CoderNext => "qwen3-coder-next",
250 Self::Qwen3Coder480B => "qwen3-coder-480b",
251 Self::NovaLite => "nova-lite",
252 Self::NovaPro => "nova-pro",
253 Self::NovaPremier => "nova-premier",
254 Self::Nova2Lite => "nova-2-lite",
255 Self::GptOss20B => "gpt-oss-20b",
256 Self::GptOss120B => "gpt-oss-120b",
257 Self::NemotronSuper3_120B => "nemotron-super-3-120b",
258 Self::NemotronNano3_30B => "nemotron-nano-3-30b",
259 Self::MiniMaxM2 => "minimax-m2",
260 Self::MiniMaxM2_1 => "minimax-m2-1",
261 Self::MiniMaxM2_5 => "minimax-m2-5",
262 Self::GLM5 => "glm-5",
263 Self::GLM4_7 => "glm-4-7",
264 Self::GLM4_7Flash => "glm-4-7-flash",
265 Self::KimiK2Thinking => "kimi-k2-thinking",
266 Self::KimiK2_5 => "kimi-k2-5",
267 Self::DeepSeekR1 => "deepseek-r1",
268 Self::DeepSeekV3_1 => "deepseek-v3",
269 Self::DeepSeekV3_2 => "deepseek-v3-2",
270 Self::Custom { name, .. } => name,
271 }
272 }
273
274 pub fn request_id(&self) -> &str {
275 match self {
276 Self::ClaudeHaiku4_5 => "anthropic.claude-haiku-4-5-20251001-v1:0",
277 Self::ClaudeSonnet4 => "anthropic.claude-sonnet-4-20250514-v1:0",
278 Self::ClaudeSonnet4_5 => "anthropic.claude-sonnet-4-5-20250929-v1:0",
279 Self::ClaudeOpus4_1 => "anthropic.claude-opus-4-1-20250805-v1:0",
280 Self::ClaudeOpus4_5 => "anthropic.claude-opus-4-5-20251101-v1:0",
281 Self::ClaudeOpus4_6 => "anthropic.claude-opus-4-6-v1",
282 Self::ClaudeSonnet4_6 => "anthropic.claude-sonnet-4-6",
283 Self::Llama4Scout17B => "meta.llama4-scout-17b-instruct-v1:0",
284 Self::Llama4Maverick17B => "meta.llama4-maverick-17b-instruct-v1:0",
285 Self::Gemma3_4B => "google.gemma-3-4b-it",
286 Self::Gemma3_12B => "google.gemma-3-12b-it",
287 Self::Gemma3_27B => "google.gemma-3-27b-it",
288 Self::MagistralSmall => "mistral.magistral-small-2509",
289 Self::MistralLarge3 => "mistral.mistral-large-3-675b-instruct",
290 Self::PixtralLarge => "mistral.pixtral-large-2502-v1:0",
291 Self::Devstral2_123B => "mistral.devstral-2-123b",
292 Self::Ministral14B => "mistral.ministral-3-14b-instruct",
293 Self::Qwen3VL235B => "qwen.qwen3-vl-235b-a22b",
294 Self::Qwen3_32B => "qwen.qwen3-32b-v1:0",
295 Self::Qwen3_235B => "qwen.qwen3-235b-a22b-2507-v1:0",
296 Self::Qwen3Next80B => "qwen.qwen3-next-80b-a3b",
297 Self::Qwen3Coder30B => "qwen.qwen3-coder-30b-a3b-v1:0",
298 Self::Qwen3CoderNext => "qwen.qwen3-coder-next",
299 Self::Qwen3Coder480B => "qwen.qwen3-coder-480b-a35b-v1:0",
300 Self::NovaLite => "amazon.nova-lite-v1:0",
301 Self::NovaPro => "amazon.nova-pro-v1:0",
302 Self::NovaPremier => "amazon.nova-premier-v1:0",
303 Self::Nova2Lite => "amazon.nova-2-lite-v1:0",
304 Self::GptOss20B => "openai.gpt-oss-20b-1:0",
305 Self::GptOss120B => "openai.gpt-oss-120b-1:0",
306 Self::NemotronSuper3_120B => "nvidia.nemotron-super-3-120b",
307 Self::NemotronNano3_30B => "nvidia.nemotron-nano-3-30b",
308 Self::MiniMaxM2 => "minimax.minimax-m2",
309 Self::MiniMaxM2_1 => "minimax.minimax-m2.1",
310 Self::MiniMaxM2_5 => "minimax.minimax-m2.5",
311 Self::GLM5 => "zai.glm-5",
312 Self::GLM4_7 => "zai.glm-4.7",
313 Self::GLM4_7Flash => "zai.glm-4.7-flash",
314 Self::KimiK2Thinking => "moonshot.kimi-k2-thinking",
315 Self::KimiK2_5 => "moonshotai.kimi-k2.5",
316 Self::DeepSeekR1 => "deepseek.r1-v1:0",
317 Self::DeepSeekV3_1 => "deepseek.v3-v1:0",
318 Self::DeepSeekV3_2 => "deepseek.v3.2",
319 Self::Custom { name, .. } => name,
320 }
321 }
322
323 pub fn display_name(&self) -> &str {
324 match self {
325 Self::ClaudeHaiku4_5 => "Claude Haiku 4.5",
326 Self::ClaudeSonnet4 => "Claude Sonnet 4",
327 Self::ClaudeSonnet4_5 => "Claude Sonnet 4.5",
328 Self::ClaudeOpus4_1 => "Claude Opus 4.1",
329 Self::ClaudeOpus4_5 => "Claude Opus 4.5",
330 Self::ClaudeOpus4_6 => "Claude Opus 4.6",
331 Self::ClaudeSonnet4_6 => "Claude Sonnet 4.6",
332 Self::Llama4Scout17B => "Llama 4 Scout 17B",
333 Self::Llama4Maverick17B => "Llama 4 Maverick 17B",
334 Self::Gemma3_4B => "Gemma 3 4B",
335 Self::Gemma3_12B => "Gemma 3 12B",
336 Self::Gemma3_27B => "Gemma 3 27B",
337 Self::MagistralSmall => "Magistral Small",
338 Self::MistralLarge3 => "Mistral Large 3",
339 Self::PixtralLarge => "Pixtral Large",
340 Self::Devstral2_123B => "Devstral 2 123B",
341 Self::Ministral14B => "Ministral 14B",
342 Self::Qwen3VL235B => "Qwen3 VL 235B",
343 Self::Qwen3_32B => "Qwen3 32B",
344 Self::Qwen3_235B => "Qwen3 235B",
345 Self::Qwen3Next80B => "Qwen3 Next 80B",
346 Self::Qwen3Coder30B => "Qwen3 Coder 30B",
347 Self::Qwen3CoderNext => "Qwen3 Coder Next",
348 Self::Qwen3Coder480B => "Qwen3 Coder 480B",
349 Self::NovaLite => "Amazon Nova Lite",
350 Self::NovaPro => "Amazon Nova Pro",
351 Self::NovaPremier => "Amazon Nova Premier",
352 Self::Nova2Lite => "Amazon Nova 2 Lite",
353 Self::GptOss20B => "GPT OSS 20B",
354 Self::GptOss120B => "GPT OSS 120B",
355 Self::NemotronSuper3_120B => "Nemotron Super 3 120B",
356 Self::NemotronNano3_30B => "Nemotron Nano 3 30B",
357 Self::MiniMaxM2 => "MiniMax M2",
358 Self::MiniMaxM2_1 => "MiniMax M2.1",
359 Self::MiniMaxM2_5 => "MiniMax M2.5",
360 Self::GLM5 => "GLM 5",
361 Self::GLM4_7 => "GLM 4.7",
362 Self::GLM4_7Flash => "GLM 4.7 Flash",
363 Self::KimiK2Thinking => "Kimi K2 Thinking",
364 Self::KimiK2_5 => "Kimi K2.5",
365 Self::DeepSeekR1 => "DeepSeek R1",
366 Self::DeepSeekV3_1 => "DeepSeek V3.1",
367 Self::DeepSeekV3_2 => "DeepSeek V3.2",
368 Self::Custom {
369 display_name, name, ..
370 } => display_name.as_deref().unwrap_or(name.as_str()),
371 }
372 }
373
374 pub fn max_token_count(&self) -> u64 {
375 self.max_tokens()
376 }
377
378 pub fn max_tokens(&self) -> u64 {
379 match self {
380 Self::ClaudeHaiku4_5
381 | Self::ClaudeSonnet4
382 | Self::ClaudeSonnet4_5
383 | Self::ClaudeOpus4_1
384 | Self::ClaudeOpus4_5
385 | Self::ClaudeOpus4_6
386 | Self::ClaudeSonnet4_6 => 200_000,
387 Self::Llama4Scout17B | Self::Llama4Maverick17B => 128_000,
388 Self::Gemma3_4B | Self::Gemma3_12B | Self::Gemma3_27B => 128_000,
389 Self::MagistralSmall | Self::MistralLarge3 | Self::PixtralLarge => 128_000,
390 Self::Devstral2_123B | Self::Ministral14B => 256_000,
391 Self::Qwen3_32B
392 | Self::Qwen3VL235B
393 | Self::Qwen3_235B
394 | Self::Qwen3Next80B
395 | Self::Qwen3Coder30B
396 | Self::Qwen3CoderNext
397 | Self::Qwen3Coder480B => 128_000,
398 Self::NovaLite | Self::NovaPro => 300_000,
399 Self::NovaPremier => 1_000_000,
400 Self::Nova2Lite => 300_000,
401 Self::GptOss20B | Self::GptOss120B => 128_000,
402 Self::NemotronSuper3_120B | Self::NemotronNano3_30B => 262_000,
403 Self::MiniMaxM2 | Self::MiniMaxM2_1 | Self::MiniMaxM2_5 => 196_000,
404 Self::GLM5 | Self::GLM4_7 | Self::GLM4_7Flash => 203_000,
405 Self::KimiK2Thinking | Self::KimiK2_5 => 128_000,
406 Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => 128_000,
407 Self::Custom { max_tokens, .. } => *max_tokens,
408 }
409 }
410
411 pub fn max_output_tokens(&self) -> u64 {
412 match self {
413 Self::ClaudeHaiku4_5
414 | Self::ClaudeSonnet4
415 | Self::ClaudeSonnet4_5
416 | Self::ClaudeOpus4_5
417 | Self::ClaudeSonnet4_6 => 64_000,
418 Self::ClaudeOpus4_1 => 32_000,
419 Self::ClaudeOpus4_6 => 128_000,
420 Self::Llama4Scout17B
421 | Self::Llama4Maverick17B
422 | Self::Gemma3_4B
423 | Self::Gemma3_12B
424 | Self::Gemma3_27B
425 | Self::MagistralSmall
426 | Self::MistralLarge3
427 | Self::PixtralLarge => 8_192,
428 Self::Devstral2_123B | Self::Ministral14B => 131_000,
429 Self::Qwen3_32B
430 | Self::Qwen3VL235B
431 | Self::Qwen3_235B
432 | Self::Qwen3Next80B
433 | Self::Qwen3Coder30B
434 | Self::Qwen3CoderNext
435 | Self::Qwen3Coder480B => 8_192,
436 Self::NovaLite | Self::NovaPro | Self::NovaPremier | Self::Nova2Lite => 5_000,
437 Self::GptOss20B | Self::GptOss120B => 16_000,
438 Self::NemotronSuper3_120B | Self::NemotronNano3_30B => 131_000,
439 Self::MiniMaxM2 | Self::MiniMaxM2_1 | Self::MiniMaxM2_5 => 98_000,
440 Self::GLM5 | Self::GLM4_7 | Self::GLM4_7Flash => 101_000,
441 Self::KimiK2Thinking | Self::KimiK2_5 => 16_000,
442 Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => 16_000,
443 Self::Custom {
444 max_output_tokens, ..
445 } => max_output_tokens.unwrap_or(4_096),
446 }
447 }
448
449 pub fn default_temperature(&self) -> f32 {
450 match self {
451 Self::ClaudeHaiku4_5
452 | Self::ClaudeSonnet4
453 | Self::ClaudeSonnet4_5
454 | Self::ClaudeOpus4_1
455 | Self::ClaudeOpus4_5
456 | Self::ClaudeOpus4_6
457 | Self::ClaudeSonnet4_6 => 1.0,
458 Self::Custom {
459 default_temperature,
460 ..
461 } => default_temperature.unwrap_or(1.0),
462 _ => 1.0,
463 }
464 }
465
466 pub fn supports_tool_use(&self) -> bool {
467 match self {
468 Self::ClaudeHaiku4_5
469 | Self::ClaudeSonnet4
470 | Self::ClaudeSonnet4_5
471 | Self::ClaudeOpus4_1
472 | Self::ClaudeOpus4_5
473 | Self::ClaudeOpus4_6
474 | Self::ClaudeSonnet4_6 => true,
475 Self::NovaLite | Self::NovaPro | Self::NovaPremier | Self::Nova2Lite => true,
476 Self::MistralLarge3 | Self::PixtralLarge | Self::MagistralSmall => true,
477 Self::Devstral2_123B | Self::Ministral14B => true,
478 // Gemma accepts toolConfig without error but produces unreliable tool
479 // calls -- malformed JSON args, hallucinated tool names, dropped calls.
480 Self::Qwen3_32B
481 | Self::Qwen3VL235B
482 | Self::Qwen3_235B
483 | Self::Qwen3Next80B
484 | Self::Qwen3Coder30B
485 | Self::Qwen3CoderNext
486 | Self::Qwen3Coder480B => true,
487 Self::MiniMaxM2 | Self::MiniMaxM2_1 | Self::MiniMaxM2_5 => true,
488 Self::NemotronSuper3_120B | Self::NemotronNano3_30B => true,
489 Self::GLM5 | Self::GLM4_7 | Self::GLM4_7Flash => true,
490 Self::KimiK2Thinking | Self::KimiK2_5 => true,
491 Self::DeepSeekR1 | Self::DeepSeekV3_1 | Self::DeepSeekV3_2 => true,
492 _ => false,
493 }
494 }
495
496 pub fn supports_images(&self) -> bool {
497 match self {
498 Self::ClaudeHaiku4_5
499 | Self::ClaudeSonnet4
500 | Self::ClaudeSonnet4_5
501 | Self::ClaudeOpus4_1
502 | Self::ClaudeOpus4_5
503 | Self::ClaudeOpus4_6
504 | Self::ClaudeSonnet4_6 => true,
505 Self::NovaLite | Self::NovaPro => true,
506 Self::PixtralLarge => true,
507 Self::Qwen3VL235B => true,
508 Self::KimiK2_5 => true,
509 _ => false,
510 }
511 }
512
513 pub fn supports_extended_context(&self) -> bool {
514 matches!(
515 self,
516 Self::ClaudeSonnet4
517 | Self::ClaudeSonnet4_5
518 | Self::ClaudeOpus4_5
519 | Self::ClaudeOpus4_6
520 | Self::ClaudeSonnet4_6
521 )
522 }
523
524 pub fn supports_caching(&self) -> bool {
525 match self {
526 Self::ClaudeHaiku4_5
527 | Self::ClaudeSonnet4
528 | Self::ClaudeSonnet4_5
529 | Self::ClaudeOpus4_1
530 | Self::ClaudeOpus4_5
531 | Self::ClaudeOpus4_6
532 | Self::ClaudeSonnet4_6 => true,
533 Self::Custom {
534 cache_configuration,
535 ..
536 } => cache_configuration.is_some(),
537 _ => false,
538 }
539 }
540
541 pub fn cache_configuration(&self) -> Option<BedrockModelCacheConfiguration> {
542 match self {
543 Self::ClaudeSonnet4
544 | Self::ClaudeSonnet4_5
545 | Self::ClaudeOpus4_1
546 | Self::ClaudeOpus4_5
547 | Self::ClaudeOpus4_6
548 | Self::ClaudeSonnet4_6 => Some(BedrockModelCacheConfiguration {
549 max_cache_anchors: 4,
550 min_total_token: 1024,
551 }),
552 Self::ClaudeHaiku4_5 => Some(BedrockModelCacheConfiguration {
553 max_cache_anchors: 4,
554 min_total_token: 2048,
555 }),
556 Self::Custom {
557 cache_configuration,
558 ..
559 } => cache_configuration.clone(),
560 _ => None,
561 }
562 }
563
564 pub fn supports_thinking(&self) -> bool {
565 matches!(
566 self,
567 Self::ClaudeHaiku4_5
568 | Self::ClaudeSonnet4
569 | Self::ClaudeSonnet4_5
570 | Self::ClaudeOpus4_1
571 | Self::ClaudeOpus4_5
572 | Self::ClaudeOpus4_6
573 | Self::ClaudeSonnet4_6
574 )
575 }
576
577 pub fn supports_adaptive_thinking(&self) -> bool {
578 matches!(self, Self::ClaudeOpus4_6 | Self::ClaudeSonnet4_6)
579 }
580
581 pub fn thinking_mode(&self) -> BedrockModelMode {
582 if self.supports_adaptive_thinking() {
583 BedrockModelMode::AdaptiveThinking {
584 effort: BedrockAdaptiveThinkingEffort::default(),
585 }
586 } else if self.supports_thinking() {
587 BedrockModelMode::Thinking {
588 budget_tokens: Some(4096),
589 }
590 } else {
591 BedrockModelMode::Default
592 }
593 }
594
595 pub fn cross_region_inference_id(
596 &self,
597 region: &str,
598 allow_global: bool,
599 ) -> anyhow::Result<String> {
600 let model_id = self.request_id();
601
602 let supports_global = matches!(
603 self,
604 Self::ClaudeHaiku4_5
605 | Self::ClaudeSonnet4
606 | Self::ClaudeSonnet4_5
607 | Self::ClaudeOpus4_5
608 | Self::ClaudeOpus4_6
609 | Self::ClaudeSonnet4_6
610 | Self::Nova2Lite
611 );
612
613 // Determine region group based on AWS region
614 let region_group = if region.starts_with("us-gov-") {
615 "us-gov"
616 } else if region.starts_with("us-") || region.starts_with("sa-") {
617 if allow_global && supports_global {
618 "global"
619 } else {
620 "us"
621 }
622 } else if region.starts_with("ca-") {
623 if allow_global && supports_global {
624 "global"
625 } else {
626 "ca"
627 }
628 } else if region.starts_with("eu-") {
629 if allow_global && supports_global {
630 "global"
631 } else {
632 "eu"
633 }
634 } else if region == "ap-southeast-2" || region == "ap-southeast-4" {
635 // Australia
636 if allow_global && supports_global {
637 "global"
638 } else {
639 "au"
640 }
641 } else if region == "ap-northeast-1" || region == "ap-northeast-3" {
642 // Japan
643 if allow_global && supports_global {
644 "global"
645 } else {
646 "jp"
647 }
648 } else if region.starts_with("ap-") || region.starts_with("me-") {
649 if allow_global && supports_global {
650 "global"
651 } else {
652 "apac"
653 }
654 } else {
655 anyhow::bail!("Unsupported Region {region}");
656 };
657
658 match (self, region_group) {
659 (Self::Custom { .. }, _) => Ok(model_id.into()),
660
661 // Global inference profiles
662 (
663 Self::ClaudeHaiku4_5
664 | Self::ClaudeSonnet4
665 | Self::ClaudeSonnet4_5
666 | Self::ClaudeOpus4_5
667 | Self::ClaudeOpus4_6
668 | Self::ClaudeSonnet4_6
669 | Self::Nova2Lite,
670 "global",
671 ) => Ok(format!("{}.{}", region_group, model_id)),
672
673 // US Government region inference profiles
674 (Self::ClaudeSonnet4_5, "us-gov") => Ok(format!("{}.{}", region_group, model_id)),
675
676 // US region inference profiles
677 (
678 Self::ClaudeHaiku4_5
679 | Self::ClaudeSonnet4
680 | Self::ClaudeSonnet4_5
681 | Self::ClaudeOpus4_1
682 | Self::ClaudeOpus4_5
683 | Self::ClaudeOpus4_6
684 | Self::ClaudeSonnet4_6
685 | Self::Llama4Scout17B
686 | Self::Llama4Maverick17B
687 | Self::NovaLite
688 | Self::NovaPro
689 | Self::NovaPremier
690 | Self::Nova2Lite
691 | Self::PixtralLarge
692 | Self::DeepSeekR1,
693 "us",
694 ) => Ok(format!("{}.{}", region_group, model_id)),
695
696 // Canada region inference profiles
697 (Self::NovaLite, "ca") => Ok(format!("{}.{}", region_group, model_id)),
698
699 // EU region inference profiles
700 (
701 Self::ClaudeHaiku4_5
702 | Self::ClaudeSonnet4
703 | Self::ClaudeSonnet4_5
704 | Self::ClaudeOpus4_6
705 | Self::ClaudeSonnet4_6
706 | Self::NovaLite
707 | Self::NovaPro
708 | Self::Nova2Lite,
709 "eu",
710 ) => Ok(format!("{}.{}", region_group, model_id)),
711
712 // Australia region inference profiles
713 (
714 Self::ClaudeHaiku4_5
715 | Self::ClaudeSonnet4_5
716 | Self::ClaudeOpus4_6
717 | Self::ClaudeSonnet4_6,
718 "au",
719 ) => Ok(format!("{}.{}", region_group, model_id)),
720
721 // Japan region inference profiles
722 (
723 Self::ClaudeHaiku4_5
724 | Self::ClaudeSonnet4_5
725 | Self::ClaudeSonnet4_6
726 | Self::Nova2Lite,
727 "jp",
728 ) => Ok(format!("{}.{}", region_group, model_id)),
729
730 // APAC region inference profiles (other than AU/JP)
731 (
732 Self::ClaudeHaiku4_5
733 | Self::ClaudeSonnet4
734 | Self::ClaudeSonnet4_5
735 | Self::NovaLite
736 | Self::NovaPro
737 | Self::Nova2Lite,
738 "apac",
739 ) => Ok(format!("{}.{}", region_group, model_id)),
740
741 // Default: use model ID directly
742 _ => Ok(model_id.into()),
743 }
744 }
745}
746
747#[cfg(test)]
748mod tests {
749 use super::*;
750
751 #[test]
752 fn test_us_region_inference_ids() -> anyhow::Result<()> {
753 assert_eq!(
754 Model::ClaudeSonnet4_5.cross_region_inference_id("us-east-1", false)?,
755 "us.anthropic.claude-sonnet-4-5-20250929-v1:0"
756 );
757 assert_eq!(
758 Model::ClaudeSonnet4.cross_region_inference_id("us-west-2", false)?,
759 "us.anthropic.claude-sonnet-4-20250514-v1:0"
760 );
761 assert_eq!(
762 Model::NovaPro.cross_region_inference_id("us-east-2", false)?,
763 "us.amazon.nova-pro-v1:0"
764 );
765 assert_eq!(
766 Model::DeepSeekR1.cross_region_inference_id("us-east-1", false)?,
767 "us.deepseek.r1-v1:0"
768 );
769 Ok(())
770 }
771
772 #[test]
773 fn test_eu_region_inference_ids() -> anyhow::Result<()> {
774 assert_eq!(
775 Model::ClaudeSonnet4.cross_region_inference_id("eu-west-1", false)?,
776 "eu.anthropic.claude-sonnet-4-20250514-v1:0"
777 );
778 assert_eq!(
779 Model::ClaudeSonnet4_5.cross_region_inference_id("eu-west-1", false)?,
780 "eu.anthropic.claude-sonnet-4-5-20250929-v1:0"
781 );
782 assert_eq!(
783 Model::NovaLite.cross_region_inference_id("eu-north-1", false)?,
784 "eu.amazon.nova-lite-v1:0"
785 );
786 assert_eq!(
787 Model::ClaudeOpus4_6.cross_region_inference_id("eu-west-1", false)?,
788 "eu.anthropic.claude-opus-4-6-v1"
789 );
790 Ok(())
791 }
792
793 #[test]
794 fn test_apac_region_inference_ids() -> anyhow::Result<()> {
795 assert_eq!(
796 Model::ClaudeSonnet4_5.cross_region_inference_id("ap-south-1", false)?,
797 "apac.anthropic.claude-sonnet-4-5-20250929-v1:0"
798 );
799 assert_eq!(
800 Model::NovaLite.cross_region_inference_id("ap-south-1", false)?,
801 "apac.amazon.nova-lite-v1:0"
802 );
803 Ok(())
804 }
805
806 #[test]
807 fn test_au_region_inference_ids() -> anyhow::Result<()> {
808 assert_eq!(
809 Model::ClaudeHaiku4_5.cross_region_inference_id("ap-southeast-2", false)?,
810 "au.anthropic.claude-haiku-4-5-20251001-v1:0"
811 );
812 assert_eq!(
813 Model::ClaudeSonnet4_5.cross_region_inference_id("ap-southeast-4", false)?,
814 "au.anthropic.claude-sonnet-4-5-20250929-v1:0"
815 );
816 assert_eq!(
817 Model::ClaudeOpus4_6.cross_region_inference_id("ap-southeast-2", false)?,
818 "au.anthropic.claude-opus-4-6-v1"
819 );
820 Ok(())
821 }
822
823 #[test]
824 fn test_jp_region_inference_ids() -> anyhow::Result<()> {
825 assert_eq!(
826 Model::ClaudeHaiku4_5.cross_region_inference_id("ap-northeast-1", false)?,
827 "jp.anthropic.claude-haiku-4-5-20251001-v1:0"
828 );
829 assert_eq!(
830 Model::ClaudeSonnet4_5.cross_region_inference_id("ap-northeast-3", false)?,
831 "jp.anthropic.claude-sonnet-4-5-20250929-v1:0"
832 );
833 assert_eq!(
834 Model::Nova2Lite.cross_region_inference_id("ap-northeast-1", false)?,
835 "jp.amazon.nova-2-lite-v1:0"
836 );
837 Ok(())
838 }
839
840 #[test]
841 fn test_ca_region_inference_ids() -> anyhow::Result<()> {
842 assert_eq!(
843 Model::NovaLite.cross_region_inference_id("ca-central-1", false)?,
844 "ca.amazon.nova-lite-v1:0"
845 );
846 Ok(())
847 }
848
849 #[test]
850 fn test_gov_region_inference_ids() -> anyhow::Result<()> {
851 assert_eq!(
852 Model::ClaudeSonnet4_5.cross_region_inference_id("us-gov-east-1", false)?,
853 "us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0"
854 );
855 assert_eq!(
856 Model::ClaudeSonnet4_5.cross_region_inference_id("us-gov-west-1", false)?,
857 "us-gov.anthropic.claude-sonnet-4-5-20250929-v1:0"
858 );
859 Ok(())
860 }
861
862 #[test]
863 fn test_global_inference_ids() -> anyhow::Result<()> {
864 assert_eq!(
865 Model::ClaudeSonnet4.cross_region_inference_id("us-east-1", true)?,
866 "global.anthropic.claude-sonnet-4-20250514-v1:0"
867 );
868 assert_eq!(
869 Model::ClaudeSonnet4_5.cross_region_inference_id("eu-west-1", true)?,
870 "global.anthropic.claude-sonnet-4-5-20250929-v1:0"
871 );
872 assert_eq!(
873 Model::ClaudeHaiku4_5.cross_region_inference_id("ap-south-1", true)?,
874 "global.anthropic.claude-haiku-4-5-20251001-v1:0"
875 );
876 assert_eq!(
877 Model::ClaudeOpus4_6.cross_region_inference_id("us-east-1", true)?,
878 "global.anthropic.claude-opus-4-6-v1"
879 );
880 assert_eq!(
881 Model::Nova2Lite.cross_region_inference_id("us-east-1", true)?,
882 "global.amazon.nova-2-lite-v1:0"
883 );
884
885 // Models without global support fall back to regional
886 assert_eq!(
887 Model::NovaPro.cross_region_inference_id("us-east-1", true)?,
888 "us.amazon.nova-pro-v1:0"
889 );
890 Ok(())
891 }
892
893 #[test]
894 fn test_models_without_cross_region() -> anyhow::Result<()> {
895 // Models without cross-region support return their request_id directly
896 assert_eq!(
897 Model::Gemma3_4B.cross_region_inference_id("us-east-1", false)?,
898 "google.gemma-3-4b-it"
899 );
900 assert_eq!(
901 Model::MistralLarge3.cross_region_inference_id("eu-west-1", false)?,
902 "mistral.mistral-large-3-675b-instruct"
903 );
904 assert_eq!(
905 Model::Qwen3VL235B.cross_region_inference_id("ap-south-1", false)?,
906 "qwen.qwen3-vl-235b-a22b"
907 );
908 assert_eq!(
909 Model::GptOss120B.cross_region_inference_id("us-east-1", false)?,
910 "openai.gpt-oss-120b-1:0"
911 );
912 assert_eq!(
913 Model::MiniMaxM2.cross_region_inference_id("us-east-1", false)?,
914 "minimax.minimax-m2"
915 );
916 assert_eq!(
917 Model::KimiK2Thinking.cross_region_inference_id("us-east-1", false)?,
918 "moonshot.kimi-k2-thinking"
919 );
920 Ok(())
921 }
922
923 #[test]
924 fn test_custom_model_inference_ids() -> anyhow::Result<()> {
925 let custom_model = Model::Custom {
926 name: "custom.my-model-v1:0".to_string(),
927 max_tokens: 100000,
928 display_name: Some("My Custom Model".to_string()),
929 max_output_tokens: Some(8192),
930 default_temperature: Some(0.7),
931 cache_configuration: None,
932 };
933
934 assert_eq!(
935 custom_model.cross_region_inference_id("us-east-1", false)?,
936 "custom.my-model-v1:0"
937 );
938 assert_eq!(
939 custom_model.cross_region_inference_id("eu-west-1", true)?,
940 "custom.my-model-v1:0"
941 );
942 Ok(())
943 }
944
945 #[test]
946 fn test_friendly_id_vs_request_id() {
947 assert_eq!(Model::ClaudeSonnet4_5.id(), "claude-sonnet-4-5");
948 assert_eq!(Model::NovaLite.id(), "nova-lite");
949 assert_eq!(Model::DeepSeekR1.id(), "deepseek-r1");
950 assert_eq!(Model::Llama4Scout17B.id(), "llama-4-scout-17b");
951
952 assert_eq!(
953 Model::ClaudeSonnet4_5.request_id(),
954 "anthropic.claude-sonnet-4-5-20250929-v1:0"
955 );
956 assert_eq!(Model::NovaLite.request_id(), "amazon.nova-lite-v1:0");
957 assert_eq!(Model::DeepSeekR1.request_id(), "deepseek.r1-v1:0");
958 assert_eq!(
959 Model::Llama4Scout17B.request_id(),
960 "meta.llama4-scout-17b-instruct-v1:0"
961 );
962
963 // Thinking aliases deserialize to the same model
964 assert_eq!(Model::ClaudeSonnet4.id(), "claude-sonnet-4");
965 assert_eq!(
966 Model::from_id("claude-sonnet-4-thinking").unwrap().id(),
967 "claude-sonnet-4"
968 );
969 }
970
971 #[test]
972 fn test_thinking_modes() {
973 assert!(Model::ClaudeHaiku4_5.supports_thinking());
974 assert!(Model::ClaudeSonnet4.supports_thinking());
975 assert!(Model::ClaudeSonnet4_5.supports_thinking());
976 assert!(Model::ClaudeOpus4_6.supports_thinking());
977
978 assert!(!Model::ClaudeSonnet4.supports_adaptive_thinking());
979 assert!(Model::ClaudeOpus4_6.supports_adaptive_thinking());
980 assert!(Model::ClaudeSonnet4_6.supports_adaptive_thinking());
981
982 assert_eq!(
983 Model::ClaudeSonnet4.thinking_mode(),
984 BedrockModelMode::Thinking {
985 budget_tokens: Some(4096)
986 }
987 );
988 assert_eq!(
989 Model::ClaudeOpus4_6.thinking_mode(),
990 BedrockModelMode::AdaptiveThinking {
991 effort: BedrockAdaptiveThinkingEffort::High
992 }
993 );
994 assert_eq!(
995 Model::ClaudeHaiku4_5.thinking_mode(),
996 BedrockModelMode::Thinking {
997 budget_tokens: Some(4096)
998 }
999 );
1000 }
1001
1002 #[test]
1003 fn test_max_tokens() {
1004 assert_eq!(Model::ClaudeSonnet4_5.max_tokens(), 200_000);
1005 assert_eq!(Model::ClaudeOpus4_6.max_tokens(), 200_000);
1006 assert_eq!(Model::Llama4Scout17B.max_tokens(), 128_000);
1007 assert_eq!(Model::NovaPremier.max_tokens(), 1_000_000);
1008 }
1009
1010 #[test]
1011 fn test_max_output_tokens() {
1012 assert_eq!(Model::ClaudeSonnet4_5.max_output_tokens(), 64_000);
1013 assert_eq!(Model::ClaudeOpus4_6.max_output_tokens(), 128_000);
1014 assert_eq!(Model::ClaudeOpus4_1.max_output_tokens(), 32_000);
1015 assert_eq!(Model::Gemma3_4B.max_output_tokens(), 8_192);
1016 }
1017
1018 #[test]
1019 fn test_supports_tool_use() {
1020 assert!(Model::ClaudeSonnet4_5.supports_tool_use());
1021 assert!(Model::NovaPro.supports_tool_use());
1022 assert!(Model::MistralLarge3.supports_tool_use());
1023 assert!(!Model::Gemma3_4B.supports_tool_use());
1024 assert!(Model::Qwen3_32B.supports_tool_use());
1025 assert!(Model::MiniMaxM2.supports_tool_use());
1026 assert!(Model::KimiK2_5.supports_tool_use());
1027 assert!(Model::DeepSeekR1.supports_tool_use());
1028 assert!(!Model::Llama4Scout17B.supports_tool_use());
1029 }
1030
1031 #[test]
1032 fn test_supports_caching() {
1033 assert!(Model::ClaudeSonnet4_5.supports_caching());
1034 assert!(Model::ClaudeOpus4_6.supports_caching());
1035 assert!(!Model::Llama4Scout17B.supports_caching());
1036 assert!(!Model::NovaPro.supports_caching());
1037 }
1038}