diff --git a/Taskfile.yaml b/Taskfile.yaml index 2b1421fa55a542fe3f5659e4801edfbd03ed6253..1fde70ae786efdae23725505ed3942668850658e 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -99,6 +99,11 @@ tasks: cmds: - go run cmd/ionet/main.go + gen:nebius: + desc: Generate Nebius provider configurations + cmds: + - go run cmd/nebius/main.go + gen:openrouter: desc: Generate openrouter provider configurations cmds: diff --git a/cmd/nebius/main.go b/cmd/nebius/main.go new file mode 100644 index 0000000000000000000000000000000000000000..b6c413877edc707739be7da84efe211ee45c5ed5 --- /dev/null +++ b/cmd/nebius/main.go @@ -0,0 +1,172 @@ +// Package main provides a command-line tool to fetch models from Nebius Token Factory +// and generate a configuration file for the provider. +package main + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log" + "math" + "net/http" + "os" + "slices" + "strconv" + "strings" + "time" + + "charm.land/catwalk/pkg/catwalk" +) + +// Model represents a model from the Nebius Token Factory API. +type Model struct { + ID string `json:"id"` + DisplayName string `json:"name"` + ContextLength int64 `json:"context_length"` + SupportedFeatures []string `json:"supported_features,omitempty"` + Pricing Pricing `json:"pricing"` + Architecture struct { + Modality string `json:"modality"` + } `json:"architecture,omitempty"` +} + +type Pricing struct { + Prompt string `json:"prompt"` + Completion string `json:"completion"` +} + +type ModelsResponse struct { + Data []Model `json:"data"` +} + +func (m Model) hasFeature(featureValue string) bool { + if m.SupportedFeatures != nil { + for _, feature := range m.SupportedFeatures { + if strings.EqualFold(feature, featureValue) { + return true + } + } + } + return false +} + +func fetchNebiusModels() (*ModelsResponse, error) { + client := &http.Client{Timeout: 30 * time.Second} + req, _ := http.NewRequestWithContext( + context.Background(), + "GET", + "https://api.tokenfactory.nebius.com/v1/models?verbose=true", + nil, + ) + req.Header.Set("User-Agent", "Crush-Client/1.0") + + // Read API key from environment variable + apiKey := os.Getenv("NEBIUS_API_KEY") + if apiKey == "" { + return nil, fmt.Errorf("$NEBIUS_API_KEY is required") + } + req.Header.Set("Authorization", "Bearer "+apiKey) + + resp, err := client.Do(req) + if err != nil { + return nil, err //nolint:wrapcheck + } + defer resp.Body.Close() //nolint:errcheck + if resp.StatusCode != 200 { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("status %d: %s", resp.StatusCode, body) + } + var mr ModelsResponse + if err := json.NewDecoder(resp.Body).Decode(&mr); err != nil { + return nil, err //nolint:wrapcheck + } + return &mr, nil +} + +func main() { + modelsResp, err := fetchNebiusModels() + if err != nil { + log.Fatal("Error fetching Nebius models:", err) + } + + nebiusProvider := catwalk.Provider{ + Name: "Nebius Token Factory", + ID: catwalk.InferenceProviderNebius, + APIKey: "$NEBIUS_API_KEY", + APIEndpoint: "https://api.tokenfactory.nebius.com/v1", // this is their default region, eu-north1 + Type: catwalk.TypeOpenAICompat, + DefaultLargeModelID: "Qwen/Qwen3-Coder-30B-A3B-Instruct", + DefaultSmallModelID: "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B", + } + + for _, model := range modelsResp.Data { + // we skip models that don't support tool calling + if !model.hasFeature("tools") { + continue + } + + // Convert pricing from string to float64 + var costPer1MIn, costPer1MOut float64 + + // Handle prompt price conversion + promptPrice, err := strconv.ParseFloat(model.Pricing.Prompt, 64) + if err != nil { + promptPrice = 0.0 + } + costPer1MIn = math.Round(promptPrice*1_000_000*100) / 100 // Round to 2 decimal places + + // Handle completion price conversion + completionPrice, err := strconv.ParseFloat(model.Pricing.Completion, 64) + if err != nil { + completionPrice = 0.0 + } + costPer1MOut = math.Round(completionPrice*1_000_000*100) / 100 // Round to 2 decimal places + + var ( + supportsImages = strings.Contains(strings.ToLower(model.Architecture.Modality), "image") + canReason = model.hasFeature("reasoning") + reasoningLevels []string + defaultReasoning string + ) + if canReason { + reasoningLevels = []string{"low", "medium", "high"} + defaultReasoning = "medium" + } + + m := catwalk.Model{ + ID: model.ID, + Name: model.DisplayName, + CostPer1MIn: costPer1MIn, + CostPer1MOut: costPer1MOut, + CostPer1MInCached: 0, + CostPer1MOutCached: 0, + ContextWindow: model.ContextLength, + DefaultMaxTokens: model.ContextLength / 10, // there is no MaxTokens exposed, so play safe + CanReason: canReason, + ReasoningLevels: reasoningLevels, + DefaultReasoningEffort: defaultReasoning, + SupportsImages: supportsImages, + } + + nebiusProvider.Models = append(nebiusProvider.Models, m) + fmt.Printf("Added model %s with context window %d\n", model.ID, model.ContextLength) + } + + slices.SortFunc(nebiusProvider.Models, func(a catwalk.Model, b catwalk.Model) int { + return strings.Compare(a.Name, b.Name) + }) + + // Save the JSON in internal/providers/configs/nebius.json + data, err := json.MarshalIndent(nebiusProvider, "", " ") + if err != nil { + log.Fatal("Error marshaling Nebius provider:", err) + } + data = append(data, '\n') + + if err := os.WriteFile("internal/providers/configs/nebius.json", data, 0o600); err != nil { + log.Fatal("Error writing Nebius provider config:", err) + } + + fmt.Printf("Generated nebius.json with %d models\n", len(nebiusProvider.Models)) +} diff --git a/internal/providers/configs/nebius.json b/internal/providers/configs/nebius.json new file mode 100644 index 0000000000000000000000000000000000000000..5c9db54924bc8cad9cdf075046ebd10d23e84309 --- /dev/null +++ b/internal/providers/configs/nebius.json @@ -0,0 +1,653 @@ +{ + "name": "Nebius Token Factory", + "id": "nebius", + "api_key": "$NEBIUS_API_KEY", + "api_endpoint": "https://api.tokenfactory.nebius.com/v1", + "type": "openai-compat", + "default_large_model_id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "default_small_model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B", + "models": [ + { + "id": "deepseek-ai/DeepSeek-R1-0528", + "name": "DeepSeek-R1-0528", + "cost_per_1m_in": 0.8, + "cost_per_1m_out": 2.4, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 163840, + "default_max_tokens": 16384, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "deepseek-ai/DeepSeek-V3-0324", + "name": "DeepSeek-V3-0324", + "cost_per_1m_in": 0.5, + "cost_per_1m_out": 1.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 163840, + "default_max_tokens": 16384, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "deepseek-ai/DeepSeek-V3.2", + "name": "DeepSeek-V3.2", + "cost_per_1m_in": 0.3, + "cost_per_1m_out": 0.45, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 163000, + "default_max_tokens": 16300, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "deepseek-ai/DeepSeek-V3.2-fast", + "name": "DeepSeek-V3.2 (fast)", + "cost_per_1m_in": 0.4, + "cost_per_1m_out": 2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 8000, + "default_max_tokens": 800, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "zai-org/GLM-4.5", + "name": "GLM-4.5", + "cost_per_1m_in": 0.6, + "cost_per_1m_out": 2.2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "zai-org/GLM-4.5-Air", + "name": "GLM-4.5-Air", + "cost_per_1m_in": 0.2, + "cost_per_1m_out": 1.2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "zai-org/GLM-4.7-FP8", + "name": "GLM-4.7", + "cost_per_1m_in": 0.4, + "cost_per_1m_out": 2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 202752, + "default_max_tokens": 20275, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "zai-org/GLM-5", + "name": "GLM-5", + "cost_per_1m_in": 1, + "cost_per_1m_out": 3.2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 202752, + "default_max_tokens": 20275, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "NousResearch/Hermes-4-405B", + "name": "Hermes-4-405B", + "cost_per_1m_in": 1, + "cost_per_1m_out": 3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "NousResearch/Hermes-4-70B", + "name": "Hermes-4-70B", + "cost_per_1m_in": 0.13, + "cost_per_1m_out": 0.4, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "PrimeIntellect/INTELLECT-3", + "name": "INTELLECT-3", + "cost_per_1m_in": 0.2, + "cost_per_1m_out": 1.1, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "moonshotai/Kimi-K2-Instruct", + "name": "Kimi-K2-Instruct", + "cost_per_1m_in": 0.5, + "cost_per_1m_out": 2.4, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "moonshotai/Kimi-K2-Thinking", + "name": "Kimi-K2-Thinking", + "cost_per_1m_in": 0.6, + "cost_per_1m_out": 2.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "moonshotai/Kimi-K2.5", + "name": "Kimi-K2.5", + "cost_per_1m_in": 0.5, + "cost_per_1m_out": 2.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "moonshotai/Kimi-K2.5-fast", + "name": "Kimi-K2.5 (fast)", + "cost_per_1m_in": 0.5, + "cost_per_1m_out": 2.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 8000, + "default_max_tokens": 800, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "meta-llama/Llama-3.3-70B-Instruct", + "name": "Llama-3.3-70B-Instruct", + "cost_per_1m_in": 0.13, + "cost_per_1m_out": 0.4, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "meta-llama/Llama-3.3-70B-Instruct-fast", + "name": "Llama-3.3-70B-Instruct (fast)", + "cost_per_1m_in": 0.25, + "cost_per_1m_out": 0.75, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "name": "Meta-Llama-3.1-8B-Instruct", + "cost_per_1m_in": 0.02, + "cost_per_1m_out": 0.06, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "meta-llama/Meta-Llama-3.1-8B-Instruct-fast", + "name": "Meta-Llama-3.1-8B-Instruct (fast)", + "cost_per_1m_in": 0.03, + "cost_per_1m_out": 0.09, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "MiniMaxAI/MiniMax-M2.1", + "name": "MiniMax-M2.1", + "cost_per_1m_in": 0.3, + "cost_per_1m_out": 1.2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 196608, + "default_max_tokens": 19660, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "MiniMaxAI/MiniMax-M2.5", + "name": "MiniMax-M2.5", + "cost_per_1m_in": 0.3, + "cost_per_1m_out": 1.2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 196608, + "default_max_tokens": 19660, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B", + "name": "Nemotron-3-Nano-30B-A3B", + "cost_per_1m_in": 0.06, + "cost_per_1m_out": 0.24, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "nvidia/nemotron-3-super-120b-a12b", + "name": "Nemotron-3-Super-120b-a12b", + "cost_per_1m_in": 0.3, + "cost_per_1m_out": 0.9, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "name": "Qwen3-235B-A22B-Instruct-2507", + "cost_per_1m_in": 0.2, + "cost_per_1m_out": 0.6, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "name": "Qwen3-235B-A22B-Thinking-2507", + "cost_per_1m_in": 0.2, + "cost_per_1m_out": 0.8, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3-235B-A22B-Thinking-2507-fast", + "name": "Qwen3-235B-A22B-Thinking-2507 (fast)", + "cost_per_1m_in": 0.5, + "cost_per_1m_out": 2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 8000, + "default_max_tokens": 800, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "name": "Qwen3-30B-A3B-Instruct-2507", + "cost_per_1m_in": 0.1, + "cost_per_1m_out": 0.3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3-30B-A3B-Thinking-2507", + "name": "Qwen3-30B-A3B-Thinking-2507", + "cost_per_1m_in": 0.1, + "cost_per_1m_out": 0.3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3-32B", + "name": "Qwen3-32B", + "cost_per_1m_in": 0.1, + "cost_per_1m_out": 0.3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 40960, + "default_max_tokens": 4096, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3-32B-fast", + "name": "Qwen3-32B (fast)", + "cost_per_1m_in": 0.2, + "cost_per_1m_out": 0.6, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 40960, + "default_max_tokens": 4096, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "name": "Qwen3-Coder-30B-A3B-Instruct", + "cost_per_1m_in": 0.1, + "cost_per_1m_out": 0.3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct", + "name": "Qwen3-Coder-480B-A35B-Instruct", + "cost_per_1m_in": 0.4, + "cost_per_1m_out": 1.8, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3-Next-80B-A3B-Thinking", + "name": "Qwen3-Next-80B-A3B-Thinking", + "cost_per_1m_in": 0.15, + "cost_per_1m_out": 1.2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 128000, + "default_max_tokens": 12800, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3-Next-80B-A3B-Thinking-fast", + "name": "Qwen3-Next-80B-A3B-Thinking (fast)", + "cost_per_1m_in": 0.15, + "cost_per_1m_out": 1.2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 8000, + "default_max_tokens": 800, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3.5-397B-A17B", + "name": "Qwen3.5-397B-A17B", + "cost_per_1m_in": 0.6, + "cost_per_1m_out": 3.6, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3.5-397B-A17B-fast", + "name": "Qwen3.5-397B-A17B (fast)", + "cost_per_1m_in": 0.6, + "cost_per_1m_out": 3.6, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 8000, + "default_max_tokens": 800, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "openai/gpt-oss-120b", + "name": "gpt-oss-120b", + "cost_per_1m_in": 0.15, + "cost_per_1m_out": 0.6, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "openai/gpt-oss-120b-fast", + "name": "gpt-oss-120b (fast)", + "cost_per_1m_in": 0.1, + "cost_per_1m_out": 0.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 8000, + "default_max_tokens": 800, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "openai/gpt-oss-20b", + "name": "gpt-oss-20b", + "cost_per_1m_in": 0.05, + "cost_per_1m_out": 0.2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + } + ] +} diff --git a/internal/providers/providers.go b/internal/providers/providers.go index bab8a7db4880ba38679c4e9c7a5193ba4e5a1a98..524b338ca76d08d00b6b913894371c3e770bf279 100644 --- a/internal/providers/providers.go +++ b/internal/providers/providers.go @@ -42,6 +42,9 @@ var zhipuConfig []byte //go:embed configs/zhipu-coding.json var zhipuCodingConfig []byte +//go:embed configs/nebius.json +var nebiusConfig []byte + //go:embed configs/bedrock.json var bedrockConfig []byte @@ -125,6 +128,7 @@ var providerRegistry = []ProviderFunc{ ioNetProvider, qiniuCloudProvider, avianProvider, + nebiusProvider, } // GetAll returns all registered providers. @@ -253,6 +257,10 @@ func qiniuCloudProvider() catwalk.Provider { return loadProviderFromConfig(qiniuCloudConfig) } +func nebiusProvider() catwalk.Provider { + return loadProviderFromConfig(nebiusConfig) +} + func avianProvider() catwalk.Provider { return loadProviderFromConfig(avianConfig) } diff --git a/pkg/catwalk/provider.go b/pkg/catwalk/provider.go index aa9726757712695a28bd4997426f350bc34f9c28..27c0bc6680211d9a376ad00f70275b70553987fc 100644 --- a/pkg/catwalk/provider.go +++ b/pkg/catwalk/provider.go @@ -48,6 +48,7 @@ const ( InferenceProviderIoNet InferenceProvider = "ionet" InferenceProviderQiniuCloud InferenceProvider = "qiniucloud" InferenceProviderAvian InferenceProvider = "avian" + InferenceProviderNebius InferenceProvider = "nebius" ) // Provider represents an AI provider configuration. @@ -119,6 +120,7 @@ func KnownProviders() []InferenceProvider { InferenceProviderMiniMaxChina, InferenceProviderQiniuCloud, InferenceProviderAvian, + InferenceProviderNebius, } }