diff --git a/cmd/neuralwatt/main.go b/cmd/neuralwatt/main.go index 750f135f66feeb85fd4b970fd2483d0156d1244e..12ae884e31a7b22c7fd0935dcfe5618c728f838b 100644 --- a/cmd/neuralwatt/main.go +++ b/cmd/neuralwatt/main.go @@ -18,126 +18,52 @@ import ( "charm.land/catwalk/pkg/catwalk" ) -type NeuralwattModel struct { - ID string `json:"id"` - MaxModelLen int64 `json:"max_model_len"` +type Pricing struct { + InputPerMillion *float64 `json:"input_per_million"` + OutputPerMillion *float64 `json:"output_per_million"` + CachedInputPerMillion *float64 `json:"cached_input_per_million"` + CachedOutputPerMillion *float64 `json:"cached_output_per_million"` + PricingTBD bool `json:"pricing_tbd"` } -type ModelsResponse struct { - Data []NeuralwattModel `json:"data"` +type Capabilities struct { + Tools bool `json:"tools"` + Vision bool `json:"vision"` + Reasoning bool `json:"reasoning"` + ReasoningEffort bool `json:"reasoning_effort"` } -// ModelMeta contains the hardcoded metadata for a Neuralwatt model. -// The API only returns id and max_model_len, so pricing and capabilities -// are sourced from the pricing page at https://portal.neuralwatt.com/pricing. -type ModelMeta struct { - Tools bool - Reasoning bool - Vision bool - CostPer1MIn float64 - CostPer1MOut float64 +type Limits struct { + MaxOutputTokens *int64 `json:"max_output_tokens"` } -var modelMetadata = map[string]ModelMeta{ - "mistralai/Devstral-Small-2-24B-Instruct-2512": { - Tools: true, - Reasoning: false, - Vision: true, - CostPer1MIn: 0.1, - CostPer1MOut: 0.3, - }, - "zai-org/GLM-5.1-FP8": { - Tools: true, - Reasoning: true, - Vision: false, - CostPer1MIn: 1.1, - CostPer1MOut: 3.6, - }, - "glm-5.1-fast": { - Tools: true, - Reasoning: false, - Vision: false, - CostPer1MIn: 1.1, - CostPer1MOut: 3.6, - }, - "openai/gpt-oss-20b": { - Tools: true, - Reasoning: false, - Vision: false, - CostPer1MIn: 0.0, - CostPer1MOut: 0.2, - }, - "moonshotai/Kimi-K2.5": { - Tools: true, - Reasoning: false, - Vision: true, - CostPer1MIn: 0.5, - CostPer1MOut: 2.6, - }, - "kimi-k2.5-fast": { - Tools: true, - Reasoning: false, - Vision: true, - CostPer1MIn: 0.5, - CostPer1MOut: 2.6, - }, - "MiniMaxAI/MiniMax-M2.5": { - Tools: true, - Reasoning: true, - Vision: false, - CostPer1MIn: 0.3, - CostPer1MOut: 1.4, - }, - "Qwen/Qwen3.5-35B-A3B": { - Tools: true, - Reasoning: true, - Vision: false, - CostPer1MIn: 0.3, - CostPer1MOut: 1.1, - }, - "Qwen/Qwen3.5-397B-A17B-FP8": { - Tools: true, - Reasoning: true, - Vision: false, - CostPer1MIn: 0.7, - CostPer1MOut: 4.1, - }, - "qwen3.5-397b-fast": { - Tools: true, - Reasoning: false, - Vision: false, - CostPer1MIn: 0.7, - CostPer1MOut: 4.1, - }, +type Metadata struct { + DisplayName string `json:"display_name"` + Pricing Pricing `json:"pricing"` + Capabilities Capabilities `json:"capabilities"` + Limits Limits `json:"limits"` + Deprecated bool `json:"deprecated"` } -// modelNames provides display names for Neuralwatt-owned models that lack an -// org prefix and use lowercase IDs. -var modelNames = map[string]string{ - "glm-5.1-fast": "GLM 5.1 Fast", - "kimi-k2.5-fast": "Kimi K2.5 Fast", - "qwen3.5-397b-fast": "Qwen3.5 397B Fast", +type NeuralwattModel struct { + ID string `json:"id"` + MaxModelLen int64 `json:"max_model_len"` + Metadata Metadata `json:"metadata"` +} + +type ModelsResponse struct { + Data []NeuralwattModel `json:"data"` } func roundCost(v float64) float64 { return math.Round(v*1e5) / 1e5 } -// modelDisplayName converts a model ID to a human-readable display name. For -// models with an org prefix (e.g. "zai-org/GLM-5-FP8"), the prefix is stripped. -// Neuralwatt-owned models without a prefix are looked up in modelNames for -// proper casing. -func modelDisplayName(id string) string { - if name, ok := modelNames[id]; ok { - return name - } - - name := id - if idx := strings.Index(name, "/"); idx != -1 { - name = name[idx+1:] +func ptrDeref[T any](v *T, fallback T) T { + if v == nil { + return fallback } - name = strings.ReplaceAll(name, "-", " ") - return name + return *v } func fetchNeuralwattModels(apiEndpoint string) (*ModelsResponse, error) { @@ -171,6 +97,14 @@ func fetchNeuralwattModels(apiEndpoint string) (*ModelsResponse, error) { return &mr, nil } +func fallbackDisplayName(id string) string { + name := id + if idx := strings.Index(name, "/"); idx != -1 { + name = name[idx+1:] + } + return strings.ReplaceAll(name, "-", " ") +} + func main() { neuralwattProvider := catwalk.Provider{ Name: "Neuralwatt", @@ -188,6 +122,13 @@ func main() { } for _, model := range modelsResp.Data { + meta := model.Metadata + + if meta.Deprecated { + fmt.Printf("Skipping deprecated model %s\n", model.ID) + continue + } + // Skip models with small context windows if model.MaxModelLen < 20000 { fmt.Printf("Skipping model %s: context %d < 20000\n", @@ -195,37 +136,49 @@ func main() { continue } - meta, ok := modelMetadata[model.ID] - if !ok { - fmt.Printf("Skipping unknown model %s (no metadata)\n", model.ID) + if !meta.Capabilities.Tools { + fmt.Printf("Skipping model %s (no tool support)\n", model.ID) continue } - // Only include models that support tools - if !meta.Tools { - continue + costIn := ptrDeref(meta.Pricing.InputPerMillion, 0) + costOut := ptrDeref(meta.Pricing.OutputPerMillion, 0) + // Null cached pricing means same as non-cached + costInCached := ptrDeref(meta.Pricing.CachedInputPerMillion, costIn) + costOutCached := ptrDeref(meta.Pricing.CachedOutputPerMillion, costOut) + + var defaultMaxTokens int64 + if meta.Limits.MaxOutputTokens != nil { + defaultMaxTokens = *meta.Limits.MaxOutputTokens + } else { + defaultMaxTokens = model.MaxModelLen / 10 } var reasoningLevels []string var defaultReasoning string - if meta.Reasoning { + if meta.Capabilities.ReasoningEffort { reasoningLevels = []string{"low", "medium", "high"} defaultReasoning = "medium" } + name := meta.DisplayName + if name == "" { + name = fallbackDisplayName(model.ID) + } + m := catwalk.Model{ ID: model.ID, - Name: modelDisplayName(model.ID), - CostPer1MIn: roundCost(meta.CostPer1MIn), - CostPer1MOut: roundCost(meta.CostPer1MOut), - CostPer1MInCached: 0, // Not available - CostPer1MOutCached: 0, // Not available + Name: name, + CostPer1MIn: roundCost(costIn), + CostPer1MOut: roundCost(costOut), + CostPer1MInCached: roundCost(costInCached), + CostPer1MOutCached: roundCost(costOutCached), ContextWindow: model.MaxModelLen, - DefaultMaxTokens: model.MaxModelLen / 10, - CanReason: meta.Reasoning, + DefaultMaxTokens: defaultMaxTokens, + CanReason: meta.Capabilities.Reasoning, DefaultReasoningEffort: defaultReasoning, ReasoningLevels: reasoningLevels, - SupportsImages: meta.Vision, + SupportsImages: meta.Capabilities.Vision, } neuralwattProvider.Models = append(neuralwattProvider.Models, m) diff --git a/internal/providers/configs/neuralwatt.json b/internal/providers/configs/neuralwatt.json index 743a3d485a4e9c6b024d0ab26aa37482fe275550..735961fd79591f69a309bdcbdf3644b2e49f00b5 100644 --- a/internal/providers/configs/neuralwatt.json +++ b/internal/providers/configs/neuralwatt.json @@ -9,41 +9,47 @@ "models": [ { "id": "mistralai/Devstral-Small-2-24B-Instruct-2512", - "name": "Devstral Small 2 24B Instruct 2512", - "cost_per_1m_in": 0.1, - "cost_per_1m_out": 0.3, - "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, + "name": "Devstral-Small-2-24B-Instruct-2512", + "cost_per_1m_in": 0.12, + "cost_per_1m_out": 0.35, + "cost_per_1m_in_cached": 0.12, + "cost_per_1m_out_cached": 0.35, "context_window": 262144, "default_max_tokens": 26214, "can_reason": false, "supports_attachments": true }, + { + "id": "glm-5-fast", + "name": "GLM-5 Fast", + "cost_per_1m_in": 1.1, + "cost_per_1m_out": 3.6, + "cost_per_1m_in_cached": 1.1, + "cost_per_1m_out_cached": 3.6, + "context_window": 202752, + "default_max_tokens": 20275, + "can_reason": false, + "supports_attachments": false + }, { "id": "zai-org/GLM-5.1-FP8", - "name": "GLM 5.1 FP8", + "name": "GLM-5.1", "cost_per_1m_in": 1.1, "cost_per_1m_out": 3.6, - "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, + "cost_per_1m_in_cached": 1.1, + "cost_per_1m_out_cached": 3.6, "context_window": 202752, "default_max_tokens": 20275, "can_reason": true, - "reasoning_levels": [ - "low", - "medium", - "high" - ], - "default_reasoning_effort": "medium", "supports_attachments": false }, { "id": "glm-5.1-fast", - "name": "GLM 5.1 Fast", + "name": "GLM-5.1 Fast", "cost_per_1m_in": 1.1, "cost_per_1m_out": 3.6, - "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, + "cost_per_1m_in_cached": 1.1, + "cost_per_1m_out_cached": 3.6, "context_window": 202752, "default_max_tokens": 20275, "can_reason": false, @@ -52,74 +58,110 @@ { "id": "moonshotai/Kimi-K2.5", "name": "Kimi K2.5", - "cost_per_1m_in": 0.5, - "cost_per_1m_out": 2.6, - "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, + "cost_per_1m_in": 0.52, + "cost_per_1m_out": 2.59, + "cost_per_1m_in_cached": 0.52, + "cost_per_1m_out_cached": 2.59, "context_window": 262144, "default_max_tokens": 26214, - "can_reason": false, + "can_reason": true, "supports_attachments": true }, { "id": "kimi-k2.5-fast", "name": "Kimi K2.5 Fast", - "cost_per_1m_in": 0.5, - "cost_per_1m_out": 2.6, - "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, + "cost_per_1m_in": 0.52, + "cost_per_1m_out": 2.59, + "cost_per_1m_in_cached": 0.52, + "cost_per_1m_out_cached": 2.59, "context_window": 262144, "default_max_tokens": 26214, "can_reason": false, "supports_attachments": true }, + { + "id": "moonshotai/Kimi-K2.6", + "name": "Kimi K2.6", + "cost_per_1m_in": 0.69, + "cost_per_1m_out": 3.22, + "cost_per_1m_in_cached": 0.69, + "cost_per_1m_out_cached": 3.22, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": true, + "supports_attachments": true + }, + { + "id": "kimi-k2.6-fast", + "name": "Kimi K2.6 Fast", + "cost_per_1m_in": 0.69, + "cost_per_1m_out": 3.22, + "cost_per_1m_in_cached": 0.69, + "cost_per_1m_out_cached": 3.22, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": true, + "supports_attachments": true + }, { "id": "MiniMaxAI/MiniMax-M2.5", "name": "MiniMax M2.5", - "cost_per_1m_in": 0.3, - "cost_per_1m_out": 1.4, - "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, + "cost_per_1m_in": 0.35, + "cost_per_1m_out": 1.38, + "cost_per_1m_in_cached": 0.35, + "cost_per_1m_out_cached": 1.38, "context_window": 196608, "default_max_tokens": 19660, "can_reason": true, - "reasoning_levels": [ - "low", - "medium", - "high" - ], - "default_reasoning_effort": "medium", "supports_attachments": false }, { "id": "Qwen/Qwen3.5-397B-A17B-FP8", - "name": "Qwen3.5 397B A17B FP8", - "cost_per_1m_in": 0.7, - "cost_per_1m_out": 4.1, - "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, + "name": "Qwen3.5 397B", + "cost_per_1m_in": 0.69, + "cost_per_1m_out": 4.14, + "cost_per_1m_in_cached": 0.69, + "cost_per_1m_out_cached": 4.14, "context_window": 262144, "default_max_tokens": 26214, "can_reason": true, - "reasoning_levels": [ - "low", - "medium", - "high" - ], - "default_reasoning_effort": "medium", "supports_attachments": false }, { "id": "qwen3.5-397b-fast", "name": "Qwen3.5 397B Fast", - "cost_per_1m_in": 0.7, - "cost_per_1m_out": 4.1, - "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0, + "cost_per_1m_in": 0.69, + "cost_per_1m_out": 4.14, + "cost_per_1m_in_cached": 0.69, + "cost_per_1m_out_cached": 4.14, "context_window": 262144, "default_max_tokens": 26214, "can_reason": false, "supports_attachments": false + }, + { + "id": "Qwen/Qwen3.6-35B-A3B", + "name": "Qwen3.6 35B", + "cost_per_1m_in": 0.05, + "cost_per_1m_out": 0.1, + "cost_per_1m_in_cached": 0.05, + "cost_per_1m_out_cached": 0.1, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": true, + "supports_attachments": false + }, + { + "id": "qwen3.6-35b-fast", + "name": "Qwen3.6 35B Fast", + "cost_per_1m_in": 0.05, + "cost_per_1m_out": 0.1, + "cost_per_1m_in_cached": 0.05, + "cost_per_1m_out_cached": 0.1, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": false, + "supports_attachments": false } ] }