@@ -18,126 +18,52 @@ import (
"charm.land/catwalk/pkg/catwalk"
)
-type NeuralwattModel struct {
- ID string `json:"id"`
- MaxModelLen int64 `json:"max_model_len"`
+type Pricing struct {
+ InputPerMillion *float64 `json:"input_per_million"`
+ OutputPerMillion *float64 `json:"output_per_million"`
+ CachedInputPerMillion *float64 `json:"cached_input_per_million"`
+ CachedOutputPerMillion *float64 `json:"cached_output_per_million"`
+ PricingTBD bool `json:"pricing_tbd"`
}
-type ModelsResponse struct {
- Data []NeuralwattModel `json:"data"`
+type Capabilities struct {
+ Tools bool `json:"tools"`
+ Vision bool `json:"vision"`
+ Reasoning bool `json:"reasoning"`
+ ReasoningEffort bool `json:"reasoning_effort"`
}
-// ModelMeta contains the hardcoded metadata for a Neuralwatt model.
-// The API only returns id and max_model_len, so pricing and capabilities
-// are sourced from the pricing page at https://portal.neuralwatt.com/pricing.
-type ModelMeta struct {
- Tools bool
- Reasoning bool
- Vision bool
- CostPer1MIn float64
- CostPer1MOut float64
+type Limits struct {
+ MaxOutputTokens *int64 `json:"max_output_tokens"`
}
-var modelMetadata = map[string]ModelMeta{
- "mistralai/Devstral-Small-2-24B-Instruct-2512": {
- Tools: true,
- Reasoning: false,
- Vision: true,
- CostPer1MIn: 0.1,
- CostPer1MOut: 0.3,
- },
- "zai-org/GLM-5.1-FP8": {
- Tools: true,
- Reasoning: true,
- Vision: false,
- CostPer1MIn: 1.1,
- CostPer1MOut: 3.6,
- },
- "glm-5.1-fast": {
- Tools: true,
- Reasoning: false,
- Vision: false,
- CostPer1MIn: 1.1,
- CostPer1MOut: 3.6,
- },
- "openai/gpt-oss-20b": {
- Tools: true,
- Reasoning: false,
- Vision: false,
- CostPer1MIn: 0.0,
- CostPer1MOut: 0.2,
- },
- "moonshotai/Kimi-K2.5": {
- Tools: true,
- Reasoning: false,
- Vision: true,
- CostPer1MIn: 0.5,
- CostPer1MOut: 2.6,
- },
- "kimi-k2.5-fast": {
- Tools: true,
- Reasoning: false,
- Vision: true,
- CostPer1MIn: 0.5,
- CostPer1MOut: 2.6,
- },
- "MiniMaxAI/MiniMax-M2.5": {
- Tools: true,
- Reasoning: true,
- Vision: false,
- CostPer1MIn: 0.3,
- CostPer1MOut: 1.4,
- },
- "Qwen/Qwen3.5-35B-A3B": {
- Tools: true,
- Reasoning: true,
- Vision: false,
- CostPer1MIn: 0.3,
- CostPer1MOut: 1.1,
- },
- "Qwen/Qwen3.5-397B-A17B-FP8": {
- Tools: true,
- Reasoning: true,
- Vision: false,
- CostPer1MIn: 0.7,
- CostPer1MOut: 4.1,
- },
- "qwen3.5-397b-fast": {
- Tools: true,
- Reasoning: false,
- Vision: false,
- CostPer1MIn: 0.7,
- CostPer1MOut: 4.1,
- },
+type Metadata struct {
+ DisplayName string `json:"display_name"`
+ Pricing Pricing `json:"pricing"`
+ Capabilities Capabilities `json:"capabilities"`
+ Limits Limits `json:"limits"`
+ Deprecated bool `json:"deprecated"`
}
-// modelNames provides display names for Neuralwatt-owned models that lack an
-// org prefix and use lowercase IDs.
-var modelNames = map[string]string{
- "glm-5.1-fast": "GLM 5.1 Fast",
- "kimi-k2.5-fast": "Kimi K2.5 Fast",
- "qwen3.5-397b-fast": "Qwen3.5 397B Fast",
+type NeuralwattModel struct {
+ ID string `json:"id"`
+ MaxModelLen int64 `json:"max_model_len"`
+ Metadata Metadata `json:"metadata"`
+}
+
+type ModelsResponse struct {
+ Data []NeuralwattModel `json:"data"`
}
func roundCost(v float64) float64 {
return math.Round(v*1e5) / 1e5
}
-// modelDisplayName converts a model ID to a human-readable display name. For
-// models with an org prefix (e.g. "zai-org/GLM-5-FP8"), the prefix is stripped.
-// Neuralwatt-owned models without a prefix are looked up in modelNames for
-// proper casing.
-func modelDisplayName(id string) string {
- if name, ok := modelNames[id]; ok {
- return name
- }
-
- name := id
- if idx := strings.Index(name, "/"); idx != -1 {
- name = name[idx+1:]
+func ptrDeref[T any](v *T, fallback T) T {
+ if v == nil {
+ return fallback
}
- name = strings.ReplaceAll(name, "-", " ")
- return name
+ return *v
}
func fetchNeuralwattModels(apiEndpoint string) (*ModelsResponse, error) {
@@ -171,6 +97,14 @@ func fetchNeuralwattModels(apiEndpoint string) (*ModelsResponse, error) {
return &mr, nil
}
+func fallbackDisplayName(id string) string {
+ name := id
+ if idx := strings.Index(name, "/"); idx != -1 {
+ name = name[idx+1:]
+ }
+ return strings.ReplaceAll(name, "-", " ")
+}
+
func main() {
neuralwattProvider := catwalk.Provider{
Name: "Neuralwatt",
@@ -188,6 +122,13 @@ func main() {
}
for _, model := range modelsResp.Data {
+ meta := model.Metadata
+
+ if meta.Deprecated {
+ fmt.Printf("Skipping deprecated model %s\n", model.ID)
+ continue
+ }
+
// Skip models with small context windows
if model.MaxModelLen < 20000 {
fmt.Printf("Skipping model %s: context %d < 20000\n",
@@ -195,37 +136,49 @@ func main() {
continue
}
- meta, ok := modelMetadata[model.ID]
- if !ok {
- fmt.Printf("Skipping unknown model %s (no metadata)\n", model.ID)
+ if !meta.Capabilities.Tools {
+ fmt.Printf("Skipping model %s (no tool support)\n", model.ID)
continue
}
- // Only include models that support tools
- if !meta.Tools {
- continue
+ costIn := ptrDeref(meta.Pricing.InputPerMillion, 0)
+ costOut := ptrDeref(meta.Pricing.OutputPerMillion, 0)
+ // Null cached pricing means same as non-cached
+ costInCached := ptrDeref(meta.Pricing.CachedInputPerMillion, costIn)
+ costOutCached := ptrDeref(meta.Pricing.CachedOutputPerMillion, costOut)
+
+ var defaultMaxTokens int64
+ if meta.Limits.MaxOutputTokens != nil {
+ defaultMaxTokens = *meta.Limits.MaxOutputTokens
+ } else {
+ defaultMaxTokens = model.MaxModelLen / 10
}
var reasoningLevels []string
var defaultReasoning string
- if meta.Reasoning {
+ if meta.Capabilities.ReasoningEffort {
reasoningLevels = []string{"low", "medium", "high"}
defaultReasoning = "medium"
}
+ name := meta.DisplayName
+ if name == "" {
+ name = fallbackDisplayName(model.ID)
+ }
+
m := catwalk.Model{
ID: model.ID,
- Name: modelDisplayName(model.ID),
- CostPer1MIn: roundCost(meta.CostPer1MIn),
- CostPer1MOut: roundCost(meta.CostPer1MOut),
- CostPer1MInCached: 0, // Not available
- CostPer1MOutCached: 0, // Not available
+ Name: name,
+ CostPer1MIn: roundCost(costIn),
+ CostPer1MOut: roundCost(costOut),
+ CostPer1MInCached: roundCost(costInCached),
+ CostPer1MOutCached: roundCost(costOutCached),
ContextWindow: model.MaxModelLen,
- DefaultMaxTokens: model.MaxModelLen / 10,
- CanReason: meta.Reasoning,
+ DefaultMaxTokens: defaultMaxTokens,
+ CanReason: meta.Capabilities.Reasoning,
DefaultReasoningEffort: defaultReasoning,
ReasoningLevels: reasoningLevels,
- SupportsImages: meta.Vision,
+ SupportsImages: meta.Capabilities.Vision,
}
neuralwattProvider.Models = append(neuralwattProvider.Models, m)
@@ -9,41 +9,47 @@
"models": [
{
"id": "mistralai/Devstral-Small-2-24B-Instruct-2512",
- "name": "Devstral Small 2 24B Instruct 2512",
- "cost_per_1m_in": 0.1,
- "cost_per_1m_out": 0.3,
- "cost_per_1m_in_cached": 0,
- "cost_per_1m_out_cached": 0,
+ "name": "Devstral-Small-2-24B-Instruct-2512",
+ "cost_per_1m_in": 0.12,
+ "cost_per_1m_out": 0.35,
+ "cost_per_1m_in_cached": 0.12,
+ "cost_per_1m_out_cached": 0.35,
"context_window": 262144,
"default_max_tokens": 26214,
"can_reason": false,
"supports_attachments": true
},
+ {
+ "id": "glm-5-fast",
+ "name": "GLM-5 Fast",
+ "cost_per_1m_in": 1.1,
+ "cost_per_1m_out": 3.6,
+ "cost_per_1m_in_cached": 1.1,
+ "cost_per_1m_out_cached": 3.6,
+ "context_window": 202752,
+ "default_max_tokens": 20275,
+ "can_reason": false,
+ "supports_attachments": false
+ },
{
"id": "zai-org/GLM-5.1-FP8",
- "name": "GLM 5.1 FP8",
+ "name": "GLM-5.1",
"cost_per_1m_in": 1.1,
"cost_per_1m_out": 3.6,
- "cost_per_1m_in_cached": 0,
- "cost_per_1m_out_cached": 0,
+ "cost_per_1m_in_cached": 1.1,
+ "cost_per_1m_out_cached": 3.6,
"context_window": 202752,
"default_max_tokens": 20275,
"can_reason": true,
- "reasoning_levels": [
- "low",
- "medium",
- "high"
- ],
- "default_reasoning_effort": "medium",
"supports_attachments": false
},
{
"id": "glm-5.1-fast",
- "name": "GLM 5.1 Fast",
+ "name": "GLM-5.1 Fast",
"cost_per_1m_in": 1.1,
"cost_per_1m_out": 3.6,
- "cost_per_1m_in_cached": 0,
- "cost_per_1m_out_cached": 0,
+ "cost_per_1m_in_cached": 1.1,
+ "cost_per_1m_out_cached": 3.6,
"context_window": 202752,
"default_max_tokens": 20275,
"can_reason": false,
@@ -52,74 +58,110 @@
{
"id": "moonshotai/Kimi-K2.5",
"name": "Kimi K2.5",
- "cost_per_1m_in": 0.5,
- "cost_per_1m_out": 2.6,
- "cost_per_1m_in_cached": 0,
- "cost_per_1m_out_cached": 0,
+ "cost_per_1m_in": 0.52,
+ "cost_per_1m_out": 2.59,
+ "cost_per_1m_in_cached": 0.52,
+ "cost_per_1m_out_cached": 2.59,
"context_window": 262144,
"default_max_tokens": 26214,
- "can_reason": false,
+ "can_reason": true,
"supports_attachments": true
},
{
"id": "kimi-k2.5-fast",
"name": "Kimi K2.5 Fast",
- "cost_per_1m_in": 0.5,
- "cost_per_1m_out": 2.6,
- "cost_per_1m_in_cached": 0,
- "cost_per_1m_out_cached": 0,
+ "cost_per_1m_in": 0.52,
+ "cost_per_1m_out": 2.59,
+ "cost_per_1m_in_cached": 0.52,
+ "cost_per_1m_out_cached": 2.59,
"context_window": 262144,
"default_max_tokens": 26214,
"can_reason": false,
"supports_attachments": true
},
+ {
+ "id": "moonshotai/Kimi-K2.6",
+ "name": "Kimi K2.6",
+ "cost_per_1m_in": 0.69,
+ "cost_per_1m_out": 3.22,
+ "cost_per_1m_in_cached": 0.69,
+ "cost_per_1m_out_cached": 3.22,
+ "context_window": 262144,
+ "default_max_tokens": 26214,
+ "can_reason": true,
+ "supports_attachments": true
+ },
+ {
+ "id": "kimi-k2.6-fast",
+ "name": "Kimi K2.6 Fast",
+ "cost_per_1m_in": 0.69,
+ "cost_per_1m_out": 3.22,
+ "cost_per_1m_in_cached": 0.69,
+ "cost_per_1m_out_cached": 3.22,
+ "context_window": 262144,
+ "default_max_tokens": 26214,
+ "can_reason": true,
+ "supports_attachments": true
+ },
{
"id": "MiniMaxAI/MiniMax-M2.5",
"name": "MiniMax M2.5",
- "cost_per_1m_in": 0.3,
- "cost_per_1m_out": 1.4,
- "cost_per_1m_in_cached": 0,
- "cost_per_1m_out_cached": 0,
+ "cost_per_1m_in": 0.35,
+ "cost_per_1m_out": 1.38,
+ "cost_per_1m_in_cached": 0.35,
+ "cost_per_1m_out_cached": 1.38,
"context_window": 196608,
"default_max_tokens": 19660,
"can_reason": true,
- "reasoning_levels": [
- "low",
- "medium",
- "high"
- ],
- "default_reasoning_effort": "medium",
"supports_attachments": false
},
{
"id": "Qwen/Qwen3.5-397B-A17B-FP8",
- "name": "Qwen3.5 397B A17B FP8",
- "cost_per_1m_in": 0.7,
- "cost_per_1m_out": 4.1,
- "cost_per_1m_in_cached": 0,
- "cost_per_1m_out_cached": 0,
+ "name": "Qwen3.5 397B",
+ "cost_per_1m_in": 0.69,
+ "cost_per_1m_out": 4.14,
+ "cost_per_1m_in_cached": 0.69,
+ "cost_per_1m_out_cached": 4.14,
"context_window": 262144,
"default_max_tokens": 26214,
"can_reason": true,
- "reasoning_levels": [
- "low",
- "medium",
- "high"
- ],
- "default_reasoning_effort": "medium",
"supports_attachments": false
},
{
"id": "qwen3.5-397b-fast",
"name": "Qwen3.5 397B Fast",
- "cost_per_1m_in": 0.7,
- "cost_per_1m_out": 4.1,
- "cost_per_1m_in_cached": 0,
- "cost_per_1m_out_cached": 0,
+ "cost_per_1m_in": 0.69,
+ "cost_per_1m_out": 4.14,
+ "cost_per_1m_in_cached": 0.69,
+ "cost_per_1m_out_cached": 4.14,
"context_window": 262144,
"default_max_tokens": 26214,
"can_reason": false,
"supports_attachments": false
+ },
+ {
+ "id": "Qwen/Qwen3.6-35B-A3B",
+ "name": "Qwen3.6 35B",
+ "cost_per_1m_in": 0.05,
+ "cost_per_1m_out": 0.1,
+ "cost_per_1m_in_cached": 0.05,
+ "cost_per_1m_out_cached": 0.1,
+ "context_window": 131072,
+ "default_max_tokens": 13107,
+ "can_reason": true,
+ "supports_attachments": false
+ },
+ {
+ "id": "qwen3.6-35b-fast",
+ "name": "Qwen3.6 35B Fast",
+ "cost_per_1m_in": 0.05,
+ "cost_per_1m_out": 0.1,
+ "cost_per_1m_in_cached": 0.05,
+ "cost_per_1m_out_cached": 0.1,
+ "context_window": 131072,
+ "default_max_tokens": 13107,
+ "can_reason": false,
+ "supports_attachments": false
}
]
}