chore: new provider: nebius token factory (#227)

Harm Weites and Andrey Nering created 1 month ago

Co-authored-by: Andrey Nering <andreynering@users.noreply.github.com>

Change summary

Taskfile.yaml                          |   5 
cmd/nebius/main.go                     | 172 +++++++
internal/providers/configs/nebius.json | 653 +++++++++++++++++++++++++++
internal/providers/providers.go        |   8 
pkg/catwalk/provider.go                |   2 
5 files changed, 840 insertions(+)

Detailed changes

Taskfile.yaml 🔗

@@ -99,6 +99,11 @@ tasks:
     cmds:
       - go run cmd/ionet/main.go
 
+  gen:nebius:
+    desc: Generate Nebius provider configurations
+    cmds:
+      - go run cmd/nebius/main.go
+
   gen:openrouter:
     desc: Generate openrouter provider configurations
     cmds:

cmd/nebius/main.go 🔗

@@ -0,0 +1,172 @@
+// Package main provides a command-line tool to fetch models from Nebius Token Factory
+// and generate a configuration file for the provider.
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log"
+	"math"
+	"net/http"
+	"os"
+	"slices"
+	"strconv"
+	"strings"
+	"time"
+
+	"charm.land/catwalk/pkg/catwalk"
+)
+
+// Model represents a model from the Nebius Token Factory API.
+type Model struct {
+	ID                string   `json:"id"`
+	DisplayName       string   `json:"name"`
+	ContextLength     int64    `json:"context_length"`
+	SupportedFeatures []string `json:"supported_features,omitempty"`
+	Pricing           Pricing  `json:"pricing"`
+	Architecture      struct {
+		Modality string `json:"modality"`
+	} `json:"architecture,omitempty"`
+}
+
+type Pricing struct {
+	Prompt     string `json:"prompt"`
+	Completion string `json:"completion"`
+}
+
+type ModelsResponse struct {
+	Data []Model `json:"data"`
+}
+
+func (m Model) hasFeature(featureValue string) bool {
+	if m.SupportedFeatures != nil {
+		for _, feature := range m.SupportedFeatures {
+			if strings.EqualFold(feature, featureValue) {
+				return true
+			}
+		}
+	}
+	return false
+}
+
+func fetchNebiusModels() (*ModelsResponse, error) {
+	client := &http.Client{Timeout: 30 * time.Second}
+	req, _ := http.NewRequestWithContext(
+		context.Background(),
+		"GET",
+		"https://api.tokenfactory.nebius.com/v1/models?verbose=true",
+		nil,
+	)
+	req.Header.Set("User-Agent", "Crush-Client/1.0")
+
+	// Read API key from environment variable
+	apiKey := os.Getenv("NEBIUS_API_KEY")
+	if apiKey == "" {
+		return nil, fmt.Errorf("$NEBIUS_API_KEY is required")
+	}
+	req.Header.Set("Authorization", "Bearer "+apiKey)
+
+	resp, err := client.Do(req)
+	if err != nil {
+		return nil, err //nolint:wrapcheck
+	}
+	defer resp.Body.Close() //nolint:errcheck
+	if resp.StatusCode != 200 {
+		body, _ := io.ReadAll(resp.Body)
+		return nil, fmt.Errorf("status %d: %s", resp.StatusCode, body)
+	}
+	var mr ModelsResponse
+	if err := json.NewDecoder(resp.Body).Decode(&mr); err != nil {
+		return nil, err //nolint:wrapcheck
+	}
+	return &mr, nil
+}
+
+func main() {
+	modelsResp, err := fetchNebiusModels()
+	if err != nil {
+		log.Fatal("Error fetching Nebius models:", err)
+	}
+
+	nebiusProvider := catwalk.Provider{
+		Name:                "Nebius Token Factory",
+		ID:                  catwalk.InferenceProviderNebius,
+		APIKey:              "$NEBIUS_API_KEY",
+		APIEndpoint:         "https://api.tokenfactory.nebius.com/v1", // this is their default region, eu-north1
+		Type:                catwalk.TypeOpenAICompat,
+		DefaultLargeModelID: "Qwen/Qwen3-Coder-30B-A3B-Instruct",
+		DefaultSmallModelID: "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B",
+	}
+
+	for _, model := range modelsResp.Data {
+		// we skip models that don't support tool calling
+		if !model.hasFeature("tools") {
+			continue
+		}
+
+		// Convert pricing from string to float64
+		var costPer1MIn, costPer1MOut float64
+
+		// Handle prompt price conversion
+		promptPrice, err := strconv.ParseFloat(model.Pricing.Prompt, 64)
+		if err != nil {
+			promptPrice = 0.0
+		}
+		costPer1MIn = math.Round(promptPrice*1_000_000*100) / 100 // Round to 2 decimal places
+
+		// Handle completion price conversion
+		completionPrice, err := strconv.ParseFloat(model.Pricing.Completion, 64)
+		if err != nil {
+			completionPrice = 0.0
+		}
+		costPer1MOut = math.Round(completionPrice*1_000_000*100) / 100 // Round to 2 decimal places
+
+		var (
+			supportsImages   = strings.Contains(strings.ToLower(model.Architecture.Modality), "image")
+			canReason        = model.hasFeature("reasoning")
+			reasoningLevels  []string
+			defaultReasoning string
+		)
+		if canReason {
+			reasoningLevels = []string{"low", "medium", "high"}
+			defaultReasoning = "medium"
+		}
+
+		m := catwalk.Model{
+			ID:                     model.ID,
+			Name:                   model.DisplayName,
+			CostPer1MIn:            costPer1MIn,
+			CostPer1MOut:           costPer1MOut,
+			CostPer1MInCached:      0,
+			CostPer1MOutCached:     0,
+			ContextWindow:          model.ContextLength,
+			DefaultMaxTokens:       model.ContextLength / 10, // there is no MaxTokens exposed, so play safe
+			CanReason:              canReason,
+			ReasoningLevels:        reasoningLevels,
+			DefaultReasoningEffort: defaultReasoning,
+			SupportsImages:         supportsImages,
+		}
+
+		nebiusProvider.Models = append(nebiusProvider.Models, m)
+		fmt.Printf("Added model %s with context window %d\n", model.ID, model.ContextLength)
+	}
+
+	slices.SortFunc(nebiusProvider.Models, func(a catwalk.Model, b catwalk.Model) int {
+		return strings.Compare(a.Name, b.Name)
+	})
+
+	// Save the JSON in internal/providers/configs/nebius.json
+	data, err := json.MarshalIndent(nebiusProvider, "", "  ")
+	if err != nil {
+		log.Fatal("Error marshaling Nebius provider:", err)
+	}
+	data = append(data, '\n')
+
+	if err := os.WriteFile("internal/providers/configs/nebius.json", data, 0o600); err != nil {
+		log.Fatal("Error writing Nebius provider config:", err)
+	}
+
+	fmt.Printf("Generated nebius.json with %d models\n", len(nebiusProvider.Models))
+}

internal/providers/configs/nebius.json 🔗

@@ -0,0 +1,653 @@
+{
+  "name": "Nebius Token Factory",
+  "id": "nebius",
+  "api_key": "$NEBIUS_API_KEY",
+  "api_endpoint": "https://api.tokenfactory.nebius.com/v1",
+  "type": "openai-compat",
+  "default_large_model_id": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
+  "default_small_model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B",
+  "models": [
+    {
+      "id": "deepseek-ai/DeepSeek-R1-0528",
+      "name": "DeepSeek-R1-0528",
+      "cost_per_1m_in": 0.8,
+      "cost_per_1m_out": 2.4,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 163840,
+      "default_max_tokens": 16384,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "deepseek-ai/DeepSeek-V3-0324",
+      "name": "DeepSeek-V3-0324",
+      "cost_per_1m_in": 0.5,
+      "cost_per_1m_out": 1.5,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 163840,
+      "default_max_tokens": 16384,
+      "can_reason": false,
+      "supports_attachments": false
+    },
+    {
+      "id": "deepseek-ai/DeepSeek-V3.2",
+      "name": "DeepSeek-V3.2",
+      "cost_per_1m_in": 0.3,
+      "cost_per_1m_out": 0.45,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 163000,
+      "default_max_tokens": 16300,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "deepseek-ai/DeepSeek-V3.2-fast",
+      "name": "DeepSeek-V3.2 (fast)",
+      "cost_per_1m_in": 0.4,
+      "cost_per_1m_out": 2,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 8000,
+      "default_max_tokens": 800,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "zai-org/GLM-4.5",
+      "name": "GLM-4.5",
+      "cost_per_1m_in": 0.6,
+      "cost_per_1m_out": 2.2,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 131072,
+      "default_max_tokens": 13107,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "zai-org/GLM-4.5-Air",
+      "name": "GLM-4.5-Air",
+      "cost_per_1m_in": 0.2,
+      "cost_per_1m_out": 1.2,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 131072,
+      "default_max_tokens": 13107,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "zai-org/GLM-4.7-FP8",
+      "name": "GLM-4.7",
+      "cost_per_1m_in": 0.4,
+      "cost_per_1m_out": 2,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 202752,
+      "default_max_tokens": 20275,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "zai-org/GLM-5",
+      "name": "GLM-5",
+      "cost_per_1m_in": 1,
+      "cost_per_1m_out": 3.2,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 202752,
+      "default_max_tokens": 20275,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "NousResearch/Hermes-4-405B",
+      "name": "Hermes-4-405B",
+      "cost_per_1m_in": 1,
+      "cost_per_1m_out": 3,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 131072,
+      "default_max_tokens": 13107,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "NousResearch/Hermes-4-70B",
+      "name": "Hermes-4-70B",
+      "cost_per_1m_in": 0.13,
+      "cost_per_1m_out": 0.4,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 131072,
+      "default_max_tokens": 13107,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "PrimeIntellect/INTELLECT-3",
+      "name": "INTELLECT-3",
+      "cost_per_1m_in": 0.2,
+      "cost_per_1m_out": 1.1,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 131072,
+      "default_max_tokens": 13107,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "moonshotai/Kimi-K2-Instruct",
+      "name": "Kimi-K2-Instruct",
+      "cost_per_1m_in": 0.5,
+      "cost_per_1m_out": 2.4,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 131072,
+      "default_max_tokens": 13107,
+      "can_reason": false,
+      "supports_attachments": false
+    },
+    {
+      "id": "moonshotai/Kimi-K2-Thinking",
+      "name": "Kimi-K2-Thinking",
+      "cost_per_1m_in": 0.6,
+      "cost_per_1m_out": 2.5,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 262144,
+      "default_max_tokens": 26214,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "moonshotai/Kimi-K2.5",
+      "name": "Kimi-K2.5",
+      "cost_per_1m_in": 0.5,
+      "cost_per_1m_out": 2.5,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 262144,
+      "default_max_tokens": 26214,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "moonshotai/Kimi-K2.5-fast",
+      "name": "Kimi-K2.5 (fast)",
+      "cost_per_1m_in": 0.5,
+      "cost_per_1m_out": 2.5,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 8000,
+      "default_max_tokens": 800,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "meta-llama/Llama-3.3-70B-Instruct",
+      "name": "Llama-3.3-70B-Instruct",
+      "cost_per_1m_in": 0.13,
+      "cost_per_1m_out": 0.4,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 131072,
+      "default_max_tokens": 13107,
+      "can_reason": false,
+      "supports_attachments": false
+    },
+    {
+      "id": "meta-llama/Llama-3.3-70B-Instruct-fast",
+      "name": "Llama-3.3-70B-Instruct (fast)",
+      "cost_per_1m_in": 0.25,
+      "cost_per_1m_out": 0.75,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 131072,
+      "default_max_tokens": 13107,
+      "can_reason": false,
+      "supports_attachments": false
+    },
+    {
+      "id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+      "name": "Meta-Llama-3.1-8B-Instruct",
+      "cost_per_1m_in": 0.02,
+      "cost_per_1m_out": 0.06,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 131072,
+      "default_max_tokens": 13107,
+      "can_reason": false,
+      "supports_attachments": false
+    },
+    {
+      "id": "meta-llama/Meta-Llama-3.1-8B-Instruct-fast",
+      "name": "Meta-Llama-3.1-8B-Instruct (fast)",
+      "cost_per_1m_in": 0.03,
+      "cost_per_1m_out": 0.09,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 131072,
+      "default_max_tokens": 13107,
+      "can_reason": false,
+      "supports_attachments": false
+    },
+    {
+      "id": "MiniMaxAI/MiniMax-M2.1",
+      "name": "MiniMax-M2.1",
+      "cost_per_1m_in": 0.3,
+      "cost_per_1m_out": 1.2,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 196608,
+      "default_max_tokens": 19660,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "MiniMaxAI/MiniMax-M2.5",
+      "name": "MiniMax-M2.5",
+      "cost_per_1m_in": 0.3,
+      "cost_per_1m_out": 1.2,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 196608,
+      "default_max_tokens": 19660,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B",
+      "name": "Nemotron-3-Nano-30B-A3B",
+      "cost_per_1m_in": 0.06,
+      "cost_per_1m_out": 0.24,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 262144,
+      "default_max_tokens": 26214,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "nvidia/nemotron-3-super-120b-a12b",
+      "name": "Nemotron-3-Super-120b-a12b",
+      "cost_per_1m_in": 0.3,
+      "cost_per_1m_out": 0.9,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 262144,
+      "default_max_tokens": 26214,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "Qwen/Qwen3-235B-A22B-Instruct-2507",
+      "name": "Qwen3-235B-A22B-Instruct-2507",
+      "cost_per_1m_in": 0.2,
+      "cost_per_1m_out": 0.6,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 262144,
+      "default_max_tokens": 26214,
+      "can_reason": false,
+      "supports_attachments": false
+    },
+    {
+      "id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
+      "name": "Qwen3-235B-A22B-Thinking-2507",
+      "cost_per_1m_in": 0.2,
+      "cost_per_1m_out": 0.8,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 262144,
+      "default_max_tokens": 26214,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "Qwen/Qwen3-235B-A22B-Thinking-2507-fast",
+      "name": "Qwen3-235B-A22B-Thinking-2507 (fast)",
+      "cost_per_1m_in": 0.5,
+      "cost_per_1m_out": 2,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 8000,
+      "default_max_tokens": 800,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "Qwen/Qwen3-30B-A3B-Instruct-2507",
+      "name": "Qwen3-30B-A3B-Instruct-2507",
+      "cost_per_1m_in": 0.1,
+      "cost_per_1m_out": 0.3,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 262144,
+      "default_max_tokens": 26214,
+      "can_reason": false,
+      "supports_attachments": false
+    },
+    {
+      "id": "Qwen/Qwen3-30B-A3B-Thinking-2507",
+      "name": "Qwen3-30B-A3B-Thinking-2507",
+      "cost_per_1m_in": 0.1,
+      "cost_per_1m_out": 0.3,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 262144,
+      "default_max_tokens": 26214,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "Qwen/Qwen3-32B",
+      "name": "Qwen3-32B",
+      "cost_per_1m_in": 0.1,
+      "cost_per_1m_out": 0.3,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 40960,
+      "default_max_tokens": 4096,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "Qwen/Qwen3-32B-fast",
+      "name": "Qwen3-32B (fast)",
+      "cost_per_1m_in": 0.2,
+      "cost_per_1m_out": 0.6,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 40960,
+      "default_max_tokens": 4096,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
+      "name": "Qwen3-Coder-30B-A3B-Instruct",
+      "cost_per_1m_in": 0.1,
+      "cost_per_1m_out": 0.3,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 262144,
+      "default_max_tokens": 26214,
+      "can_reason": false,
+      "supports_attachments": false
+    },
+    {
+      "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
+      "name": "Qwen3-Coder-480B-A35B-Instruct",
+      "cost_per_1m_in": 0.4,
+      "cost_per_1m_out": 1.8,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 262144,
+      "default_max_tokens": 26214,
+      "can_reason": false,
+      "supports_attachments": false
+    },
+    {
+      "id": "Qwen/Qwen3-Next-80B-A3B-Thinking",
+      "name": "Qwen3-Next-80B-A3B-Thinking",
+      "cost_per_1m_in": 0.15,
+      "cost_per_1m_out": 1.2,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 128000,
+      "default_max_tokens": 12800,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "Qwen/Qwen3-Next-80B-A3B-Thinking-fast",
+      "name": "Qwen3-Next-80B-A3B-Thinking (fast)",
+      "cost_per_1m_in": 0.15,
+      "cost_per_1m_out": 1.2,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 8000,
+      "default_max_tokens": 800,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "Qwen/Qwen3.5-397B-A17B",
+      "name": "Qwen3.5-397B-A17B",
+      "cost_per_1m_in": 0.6,
+      "cost_per_1m_out": 3.6,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 262144,
+      "default_max_tokens": 26214,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "Qwen/Qwen3.5-397B-A17B-fast",
+      "name": "Qwen3.5-397B-A17B (fast)",
+      "cost_per_1m_in": 0.6,
+      "cost_per_1m_out": 3.6,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 8000,
+      "default_max_tokens": 800,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "openai/gpt-oss-120b",
+      "name": "gpt-oss-120b",
+      "cost_per_1m_in": 0.15,
+      "cost_per_1m_out": 0.6,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 131072,
+      "default_max_tokens": 13107,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "openai/gpt-oss-120b-fast",
+      "name": "gpt-oss-120b (fast)",
+      "cost_per_1m_in": 0.1,
+      "cost_per_1m_out": 0.5,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 8000,
+      "default_max_tokens": 800,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    },
+    {
+      "id": "openai/gpt-oss-20b",
+      "name": "gpt-oss-20b",
+      "cost_per_1m_in": 0.05,
+      "cost_per_1m_out": 0.2,
+      "cost_per_1m_in_cached": 0,
+      "cost_per_1m_out_cached": 0,
+      "context_window": 131072,
+      "default_max_tokens": 13107,
+      "can_reason": true,
+      "reasoning_levels": [
+        "low",
+        "medium",
+        "high"
+      ],
+      "default_reasoning_effort": "medium",
+      "supports_attachments": false
+    }
+  ]
+}

internal/providers/providers.go 🔗

@@ -42,6 +42,9 @@ var zhipuConfig []byte
 //go:embed configs/zhipu-coding.json
 var zhipuCodingConfig []byte
 
+//go:embed configs/nebius.json
+var nebiusConfig []byte
+
 //go:embed configs/bedrock.json
 var bedrockConfig []byte
 
@@ -125,6 +128,7 @@ var providerRegistry = []ProviderFunc{
 	ioNetProvider,
 	qiniuCloudProvider,
 	avianProvider,
+	nebiusProvider,
 }
 
 // GetAll returns all registered providers.
@@ -253,6 +257,10 @@ func qiniuCloudProvider() catwalk.Provider {
 	return loadProviderFromConfig(qiniuCloudConfig)
 }
 
+func nebiusProvider() catwalk.Provider {
+	return loadProviderFromConfig(nebiusConfig)
+}
+
 func avianProvider() catwalk.Provider {
 	return loadProviderFromConfig(avianConfig)
 }

pkg/catwalk/provider.go 🔗

@@ -48,6 +48,7 @@ const (
 	InferenceProviderIoNet        InferenceProvider = "ionet"
 	InferenceProviderQiniuCloud   InferenceProvider = "qiniucloud"
 	InferenceProviderAvian        InferenceProvider = "avian"
+	InferenceProviderNebius       InferenceProvider = "nebius"
 )
 
 // Provider represents an AI provider configuration.
@@ -119,6 +120,7 @@ func KnownProviders() []InferenceProvider {
 		InferenceProviderMiniMaxChina,
 		InferenceProviderQiniuCloud,
 		InferenceProviderAvian,
+		InferenceProviderNebius,
 	}
 }