fix(neuralwatt): remove hardcoded overrides (#268)

Amolith created 1 week ago

And add some new models as a consequence

Change summary

cmd/neuralwatt/main.go                     | 193 +++++++++--------------
internal/providers/configs/neuralwatt.json | 144 +++++++++++------
2 files changed, 166 insertions(+), 171 deletions(-)

Detailed changes

cmd/neuralwatt/main.go 🔗

@@ -18,126 +18,52 @@ import (
 	"charm.land/catwalk/pkg/catwalk"
 )
 
-type NeuralwattModel struct {
-	ID          string `json:"id"`
-	MaxModelLen int64  `json:"max_model_len"`
+type Pricing struct {
+	InputPerMillion        *float64 `json:"input_per_million"`
+	OutputPerMillion       *float64 `json:"output_per_million"`
+	CachedInputPerMillion  *float64 `json:"cached_input_per_million"`
+	CachedOutputPerMillion *float64 `json:"cached_output_per_million"`
+	PricingTBD             bool     `json:"pricing_tbd"`
 }
 
-type ModelsResponse struct {
-	Data []NeuralwattModel `json:"data"`
+type Capabilities struct {
+	Tools           bool `json:"tools"`
+	Vision          bool `json:"vision"`
+	Reasoning       bool `json:"reasoning"`
+	ReasoningEffort bool `json:"reasoning_effort"`
 }
 
-// ModelMeta contains the hardcoded metadata for a Neuralwatt model.
-// The API only returns id and max_model_len, so pricing and capabilities
-// are sourced from the pricing page at https://portal.neuralwatt.com/pricing.
-type ModelMeta struct {
-	Tools        bool
-	Reasoning    bool
-	Vision       bool
-	CostPer1MIn  float64
-	CostPer1MOut float64
+type Limits struct {
+	MaxOutputTokens *int64 `json:"max_output_tokens"`
 }
 
-var modelMetadata = map[string]ModelMeta{
-	"mistralai/Devstral-Small-2-24B-Instruct-2512": {
-		Tools:        true,
-		Reasoning:    false,
-		Vision:       true,
-		CostPer1MIn:  0.1,
-		CostPer1MOut: 0.3,
-	},
-	"zai-org/GLM-5.1-FP8": {
-		Tools:        true,
-		Reasoning:    true,
-		Vision:       false,
-		CostPer1MIn:  1.1,
-		CostPer1MOut: 3.6,
-	},
-	"glm-5.1-fast": {
-		Tools:        true,
-		Reasoning:    false,
-		Vision:       false,
-		CostPer1MIn:  1.1,
-		CostPer1MOut: 3.6,
-	},
-	"openai/gpt-oss-20b": {
-		Tools:        true,
-		Reasoning:    false,
-		Vision:       false,
-		CostPer1MIn:  0.0,
-		CostPer1MOut: 0.2,
-	},
-	"moonshotai/Kimi-K2.5": {
-		Tools:        true,
-		Reasoning:    false,
-		Vision:       true,
-		CostPer1MIn:  0.5,
-		CostPer1MOut: 2.6,
-	},
-	"kimi-k2.5-fast": {
-		Tools:        true,
-		Reasoning:    false,
-		Vision:       true,
-		CostPer1MIn:  0.5,
-		CostPer1MOut: 2.6,
-	},
-	"MiniMaxAI/MiniMax-M2.5": {
-		Tools:        true,
-		Reasoning:    true,
-		Vision:       false,
-		CostPer1MIn:  0.3,
-		CostPer1MOut: 1.4,
-	},
-	"Qwen/Qwen3.5-35B-A3B": {
-		Tools:        true,
-		Reasoning:    true,
-		Vision:       false,
-		CostPer1MIn:  0.3,
-		CostPer1MOut: 1.1,
-	},
-	"Qwen/Qwen3.5-397B-A17B-FP8": {
-		Tools:        true,
-		Reasoning:    true,
-		Vision:       false,
-		CostPer1MIn:  0.7,
-		CostPer1MOut: 4.1,
-	},
-	"qwen3.5-397b-fast": {
-		Tools:        true,
-		Reasoning:    false,
-		Vision:       false,
-		CostPer1MIn:  0.7,
-		CostPer1MOut: 4.1,
-	},
+type Metadata struct {
+	DisplayName  string       `json:"display_name"`
+	Pricing      Pricing      `json:"pricing"`
+	Capabilities Capabilities `json:"capabilities"`
+	Limits       Limits       `json:"limits"`
+	Deprecated   bool         `json:"deprecated"`
 }
 
-// modelNames provides display names for Neuralwatt-owned models that lack an
-// org prefix and use lowercase IDs.
-var modelNames = map[string]string{
-	"glm-5.1-fast":      "GLM 5.1 Fast",
-	"kimi-k2.5-fast":    "Kimi K2.5 Fast",
-	"qwen3.5-397b-fast": "Qwen3.5 397B Fast",
+type NeuralwattModel struct {
+	ID          string   `json:"id"`
+	MaxModelLen int64    `json:"max_model_len"`
+	Metadata    Metadata `json:"metadata"`
+}
+
+type ModelsResponse struct {
+	Data []NeuralwattModel `json:"data"`
 }
 
 func roundCost(v float64) float64 {
 	return math.Round(v*1e5) / 1e5
 }
 
-// modelDisplayName converts a model ID to a human-readable display name. For
-// models with an org prefix (e.g. "zai-org/GLM-5-FP8"), the prefix is stripped.
-// Neuralwatt-owned models without a prefix are looked up in modelNames for
-// proper casing.
-func modelDisplayName(id string) string {
-	if name, ok := modelNames[id]; ok {
-		return name
-	}
-
-	name := id
-	if idx := strings.Index(name, "/"); idx != -1 {
-		name = name[idx+1:]
+func ptrDeref[T any](v *T, fallback T) T {
+	if v == nil {
+		return fallback
 	}
-	name = strings.ReplaceAll(name, "-", " ")
-	return name
+	return *v
 }
 
 func fetchNeuralwattModels(apiEndpoint string) (*ModelsResponse, error) {
@@ -171,6 +97,14 @@ func fetchNeuralwattModels(apiEndpoint string) (*ModelsResponse, error) {
 	return &mr, nil
 }
 
+func fallbackDisplayName(id string) string {
+	name := id
+	if idx := strings.Index(name, "/"); idx != -1 {
+		name = name[idx+1:]
+	}
+	return strings.ReplaceAll(name, "-", " ")
+}
+
 func main() {
 	neuralwattProvider := catwalk.Provider{
 		Name:                "Neuralwatt",
@@ -188,6 +122,13 @@ func main() {
 	}
 
 	for _, model := range modelsResp.Data {
+		meta := model.Metadata
+
+		if meta.Deprecated {
+			fmt.Printf("Skipping deprecated model %s\n", model.ID)
+			continue
+		}
+
 		// Skip models with small context windows
 		if model.MaxModelLen < 20000 {
 			fmt.Printf("Skipping model %s: context %d < 20000\n",
@@ -195,37 +136,49 @@ func main() {
 			continue
 		}
 
-		meta, ok := modelMetadata[model.ID]
-		if !ok {
-			fmt.Printf("Skipping unknown model %s (no metadata)\n", model.ID)
+		if !meta.Capabilities.Tools {
+			fmt.Printf("Skipping model %s (no tool support)\n", model.ID)
 			continue
 		}
 
-		// Only include models that support tools
-		if !meta.Tools {
-			continue
+		costIn := ptrDeref(meta.Pricing.InputPerMillion, 0)
+		costOut := ptrDeref(meta.Pricing.OutputPerMillion, 0)
+		// Null cached pricing means same as non-cached
+		costInCached := ptrDeref(meta.Pricing.CachedInputPerMillion, costIn)
+		costOutCached := ptrDeref(meta.Pricing.CachedOutputPerMillion, costOut)
+
+		var defaultMaxTokens int64
+		if meta.Limits.MaxOutputTokens != nil {
+			defaultMaxTokens = *meta.Limits.MaxOutputTokens
+		} else {
+			defaultMaxTokens = model.MaxModelLen / 10
 		}
 
 		var reasoningLevels []string
 		var defaultReasoning string
-		if meta.Reasoning {
+		if meta.Capabilities.ReasoningEffort {
 			reasoningLevels = []string{"low", "medium", "high"}
 			defaultReasoning = "medium"
 		}
 
+		name := meta.DisplayName
+		if name == "" {
+			name = fallbackDisplayName(model.ID)
+		}
+
 		m := catwalk.Model{
 			ID:                     model.ID,
-			Name:                   modelDisplayName(model.ID),
-			CostPer1MIn:            roundCost(meta.CostPer1MIn),
-			CostPer1MOut:           roundCost(meta.CostPer1MOut),
-			CostPer1MInCached:      0, // Not available
-			CostPer1MOutCached:     0, // Not available
+			Name:                   name,
+			CostPer1MIn:            roundCost(costIn),
+			CostPer1MOut:           roundCost(costOut),
+			CostPer1MInCached:      roundCost(costInCached),
+			CostPer1MOutCached:     roundCost(costOutCached),
 			ContextWindow:          model.MaxModelLen,
-			DefaultMaxTokens:       model.MaxModelLen / 10,
-			CanReason:              meta.Reasoning,
+			DefaultMaxTokens:       defaultMaxTokens,
+			CanReason:              meta.Capabilities.Reasoning,
 			DefaultReasoningEffort: defaultReasoning,
 			ReasoningLevels:        reasoningLevels,
-			SupportsImages:         meta.Vision,
+			SupportsImages:         meta.Capabilities.Vision,
 		}
 
 		neuralwattProvider.Models = append(neuralwattProvider.Models, m)

internal/providers/configs/neuralwatt.json 🔗

@@ -9,41 +9,47 @@
   "models": [
     {
       "id": "mistralai/Devstral-Small-2-24B-Instruct-2512",
-      "name": "Devstral Small 2 24B Instruct 2512",
-      "cost_per_1m_in": 0.1,
-      "cost_per_1m_out": 0.3,
-      "cost_per_1m_in_cached": 0,
-      "cost_per_1m_out_cached": 0,
+      "name": "Devstral-Small-2-24B-Instruct-2512",
+      "cost_per_1m_in": 0.12,
+      "cost_per_1m_out": 0.35,
+      "cost_per_1m_in_cached": 0.12,
+      "cost_per_1m_out_cached": 0.35,
       "context_window": 262144,
       "default_max_tokens": 26214,
       "can_reason": false,
       "supports_attachments": true
     },
+    {
+      "id": "glm-5-fast",
+      "name": "GLM-5 Fast",
+      "cost_per_1m_in": 1.1,
+      "cost_per_1m_out": 3.6,
+      "cost_per_1m_in_cached": 1.1,
+      "cost_per_1m_out_cached": 3.6,
+      "context_window": 202752,
+      "default_max_tokens": 20275,
+      "can_reason": false,
+      "supports_attachments": false
+    },
     {
       "id": "zai-org/GLM-5.1-FP8",
-      "name": "GLM 5.1 FP8",
+      "name": "GLM-5.1",
       "cost_per_1m_in": 1.1,
       "cost_per_1m_out": 3.6,
-      "cost_per_1m_in_cached": 0,
-      "cost_per_1m_out_cached": 0,
+      "cost_per_1m_in_cached": 1.1,
+      "cost_per_1m_out_cached": 3.6,
       "context_window": 202752,
       "default_max_tokens": 20275,
       "can_reason": true,
-      "reasoning_levels": [
-        "low",
-        "medium",
-        "high"
-      ],
-      "default_reasoning_effort": "medium",
       "supports_attachments": false
     },
     {
       "id": "glm-5.1-fast",
-      "name": "GLM 5.1 Fast",
+      "name": "GLM-5.1 Fast",
       "cost_per_1m_in": 1.1,
       "cost_per_1m_out": 3.6,
-      "cost_per_1m_in_cached": 0,
-      "cost_per_1m_out_cached": 0,
+      "cost_per_1m_in_cached": 1.1,
+      "cost_per_1m_out_cached": 3.6,
       "context_window": 202752,
       "default_max_tokens": 20275,
       "can_reason": false,
@@ -52,74 +58,110 @@
     {
       "id": "moonshotai/Kimi-K2.5",
       "name": "Kimi K2.5",
-      "cost_per_1m_in": 0.5,
-      "cost_per_1m_out": 2.6,
-      "cost_per_1m_in_cached": 0,
-      "cost_per_1m_out_cached": 0,
+      "cost_per_1m_in": 0.52,
+      "cost_per_1m_out": 2.59,
+      "cost_per_1m_in_cached": 0.52,
+      "cost_per_1m_out_cached": 2.59,
       "context_window": 262144,
       "default_max_tokens": 26214,
-      "can_reason": false,
+      "can_reason": true,
       "supports_attachments": true
     },
     {
       "id": "kimi-k2.5-fast",
       "name": "Kimi K2.5 Fast",
-      "cost_per_1m_in": 0.5,
-      "cost_per_1m_out": 2.6,
-      "cost_per_1m_in_cached": 0,
-      "cost_per_1m_out_cached": 0,
+      "cost_per_1m_in": 0.52,
+      "cost_per_1m_out": 2.59,
+      "cost_per_1m_in_cached": 0.52,
+      "cost_per_1m_out_cached": 2.59,
       "context_window": 262144,
       "default_max_tokens": 26214,
       "can_reason": false,
       "supports_attachments": true
     },
+    {
+      "id": "moonshotai/Kimi-K2.6",
+      "name": "Kimi K2.6",
+      "cost_per_1m_in": 0.69,
+      "cost_per_1m_out": 3.22,
+      "cost_per_1m_in_cached": 0.69,
+      "cost_per_1m_out_cached": 3.22,
+      "context_window": 262144,
+      "default_max_tokens": 26214,
+      "can_reason": true,
+      "supports_attachments": true
+    },
+    {
+      "id": "kimi-k2.6-fast",
+      "name": "Kimi K2.6 Fast",
+      "cost_per_1m_in": 0.69,
+      "cost_per_1m_out": 3.22,
+      "cost_per_1m_in_cached": 0.69,
+      "cost_per_1m_out_cached": 3.22,
+      "context_window": 262144,
+      "default_max_tokens": 26214,
+      "can_reason": true,
+      "supports_attachments": true
+    },
     {
       "id": "MiniMaxAI/MiniMax-M2.5",
       "name": "MiniMax M2.5",
-      "cost_per_1m_in": 0.3,
-      "cost_per_1m_out": 1.4,
-      "cost_per_1m_in_cached": 0,
-      "cost_per_1m_out_cached": 0,
+      "cost_per_1m_in": 0.35,
+      "cost_per_1m_out": 1.38,
+      "cost_per_1m_in_cached": 0.35,
+      "cost_per_1m_out_cached": 1.38,
       "context_window": 196608,
       "default_max_tokens": 19660,
       "can_reason": true,
-      "reasoning_levels": [
-        "low",
-        "medium",
-        "high"
-      ],
-      "default_reasoning_effort": "medium",
       "supports_attachments": false
     },
     {
       "id": "Qwen/Qwen3.5-397B-A17B-FP8",
-      "name": "Qwen3.5 397B A17B FP8",
-      "cost_per_1m_in": 0.7,
-      "cost_per_1m_out": 4.1,
-      "cost_per_1m_in_cached": 0,
-      "cost_per_1m_out_cached": 0,
+      "name": "Qwen3.5 397B",
+      "cost_per_1m_in": 0.69,
+      "cost_per_1m_out": 4.14,
+      "cost_per_1m_in_cached": 0.69,
+      "cost_per_1m_out_cached": 4.14,
       "context_window": 262144,
       "default_max_tokens": 26214,
       "can_reason": true,
-      "reasoning_levels": [
-        "low",
-        "medium",
-        "high"
-      ],
-      "default_reasoning_effort": "medium",
       "supports_attachments": false
     },
     {
       "id": "qwen3.5-397b-fast",
       "name": "Qwen3.5 397B Fast",
-      "cost_per_1m_in": 0.7,
-      "cost_per_1m_out": 4.1,
-      "cost_per_1m_in_cached": 0,
-      "cost_per_1m_out_cached": 0,
+      "cost_per_1m_in": 0.69,
+      "cost_per_1m_out": 4.14,
+      "cost_per_1m_in_cached": 0.69,
+      "cost_per_1m_out_cached": 4.14,
       "context_window": 262144,
       "default_max_tokens": 26214,
       "can_reason": false,
       "supports_attachments": false
+    },
+    {
+      "id": "Qwen/Qwen3.6-35B-A3B",
+      "name": "Qwen3.6 35B",
+      "cost_per_1m_in": 0.05,
+      "cost_per_1m_out": 0.1,
+      "cost_per_1m_in_cached": 0.05,
+      "cost_per_1m_out_cached": 0.1,
+      "context_window": 131072,
+      "default_max_tokens": 13107,
+      "can_reason": true,
+      "supports_attachments": false
+    },
+    {
+      "id": "qwen3.6-35b-fast",
+      "name": "Qwen3.6 35B Fast",
+      "cost_per_1m_in": 0.05,
+      "cost_per_1m_out": 0.1,
+      "cost_per_1m_in_cached": 0.05,
+      "cost_per_1m_out_cached": 0.1,
+      "context_window": 131072,
+      "default_max_tokens": 13107,
+      "can_reason": false,
+      "supports_attachments": false
     }
   ]
 }