diff --git a/.github/workflows/update.yml b/.github/workflows/update.yml index 45c8b7acdbd9b32c30e3fbbf253dc04993d5bf8e..3d6d0a6527940b25b16c60e48f8aba3ffed7f8d7 100644 --- a/.github/workflows/update.yml +++ b/.github/workflows/update.yml @@ -24,6 +24,7 @@ jobs: go run ./cmd/ionet/main.go go run ./cmd/openrouter/main.go go run ./cmd/synthetic/main.go + go run ./cmd/venice/main.go go run ./cmd/vercel/main.go - uses: stefanzweifel/git-auto-commit-action@04702edda442b2e678b25b537cec683a1493fcb9 # v5 with: diff --git a/Taskfile.yaml b/Taskfile.yaml index 1ed85a75e3fdd4a63a09daaf7912e8a5ce4db6b7..fe1035f8be2abf4b2c5f63b72dbfce8dd5759131 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -63,6 +63,7 @@ tasks: - task: gen:ionet - task: gen:openrouter - task: gen:synthetic + - task: gen:venice - task: gen:vercel gen:aihubmix: @@ -95,6 +96,11 @@ tasks: cmds: - go run cmd/synthetic/main.go + gen:venice: + desc: Generate venice provider configurations + cmds: + - go run cmd/venice/main.go + gen:vercel: desc: Generate vercel provider configurations cmds: diff --git a/cmd/venice/main.go b/cmd/venice/main.go new file mode 100644 index 0000000000000000000000000000000000000000..d875bcd07452b64bbf4388544593f5397c467f8f --- /dev/null +++ b/cmd/venice/main.go @@ -0,0 +1,272 @@ +// Package main provides a command-line tool to fetch models from Venice +// and generate a configuration file for the provider. +package main + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log" + "math" + "net/http" + "os" + "slices" + "strings" + "time" + + "charm.land/catwalk/pkg/catwalk" +) + +type ModelsResponse struct { + Data []VeniceModel `json:"data"` +} + +type VeniceModel struct { + Created int64 `json:"created"` + ID string `json:"id"` + ModelSpec VeniceModelSpec `json:"model_spec"` + Object string `json:"object"` + OwnedBy string `json:"owned_by"` + Type string `json:"type"` +} + +type VeniceModelSpec struct { + AvailableContextTokens int64 `json:"availableContextTokens"` + Capabilities VeniceModelCapabilities `json:"capabilities"` + Constraints VeniceModelConstraints `json:"constraints"` + Name string `json:"name"` + ModelSource string `json:"modelSource"` + Offline bool `json:"offline"` + Pricing VeniceModelPricing `json:"pricing"` + Traits []string `json:"traits"` + Beta bool `json:"beta"` +} + +type VeniceModelCapabilities struct { + OptimizedForCode bool `json:"optimizedForCode"` + Quantization string `json:"quantization"` + SupportsFunctionCalling bool `json:"supportsFunctionCalling"` + SupportsReasoning bool `json:"supportsReasoning"` + SupportsResponseSchema bool `json:"supportsResponseSchema"` + SupportsVision bool `json:"supportsVision"` + SupportsWebSearch bool `json:"supportsWebSearch"` + SupportsLogProbs bool `json:"supportsLogProbs"` +} + +type VeniceModelConstraints struct { + Temperature *VeniceDefaultFloat `json:"temperature"` + TopP *VeniceDefaultFloat `json:"top_p"` +} + +type VeniceDefaultFloat struct { + Default float64 `json:"default"` +} + +type VeniceModelPricing struct { + Input VeniceModelPricingValue `json:"input"` + Output VeniceModelPricingValue `json:"output"` +} + +type VeniceModelPricingValue struct { + USD float64 `json:"usd"` + Diem float64 `json:"diem"` +} + +func fetchVeniceModels(apiEndpoint string) (*ModelsResponse, error) { + client := &http.Client{Timeout: 30 * time.Second} + url := strings.TrimRight(apiEndpoint, "/") + "/models" + req, _ := http.NewRequestWithContext(context.Background(), "GET", url, nil) + req.Header.Set("User-Agent", "Crush-Client/1.0") + + if apiKey := strings.TrimSpace(os.Getenv("VENICE_API_KEY")); apiKey != "" && !strings.HasPrefix(apiKey, "$") { + req.Header.Set("Authorization", "Bearer "+apiKey) + } + + resp, err := client.Do(req) + if err != nil { + return nil, err //nolint:wrapcheck + } + defer resp.Body.Close() //nolint:errcheck + if resp.StatusCode != 200 { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("status %d: %s", resp.StatusCode, body) + } + + var mr ModelsResponse + if err := json.NewDecoder(resp.Body).Decode(&mr); err != nil { + return nil, err //nolint:wrapcheck + } + return &mr, nil +} + +func minInt64(a, b int64) int64 { + if a < b { + return a + } + return b +} + +func maxInt64(a, b int64) int64 { + if a > b { + return a + } + return b +} + +func bestLargeModelID(models []catwalk.Model) string { + var best *catwalk.Model + for i := range models { + m := &models[i] + + if best == nil { + best = m + continue + } + mCost := m.CostPer1MIn + m.CostPer1MOut + bestCost := best.CostPer1MIn + best.CostPer1MOut + if mCost > bestCost { + best = m + continue + } + if mCost == bestCost && m.ContextWindow > best.ContextWindow { + best = m + } + } + if best == nil { + return "" + } + return best.ID +} + +func bestSmallModelID(models []catwalk.Model) string { + var best *catwalk.Model + for i := range models { + m := &models[i] + if best == nil { + best = m + continue + } + mCost := m.CostPer1MIn + m.CostPer1MOut + bestCost := best.CostPer1MIn + best.CostPer1MOut + if mCost < bestCost { + best = m + continue + } + if mCost == bestCost && m.ContextWindow < best.ContextWindow { + best = m + } + } + if best == nil { + return "" + } + return best.ID +} + +func main() { + veniceProvider := catwalk.Provider{ + Name: "Venice AI", + ID: catwalk.InferenceProviderVenice, + APIKey: "$VENICE_API_KEY", + APIEndpoint: "https://api.venice.ai/api/v1", + Type: catwalk.TypeOpenAICompat, + Models: []catwalk.Model{}, + } + + codeOptimizedModels := []catwalk.Model{} + + modelsResp, err := fetchVeniceModels(veniceProvider.APIEndpoint) + if err != nil { + log.Fatal("Error fetching Venice models:", err) + } + + for _, model := range modelsResp.Data { + if strings.ToLower(model.Type) != "text" { + continue + } + if model.ModelSpec.Offline { + continue + } + if !model.ModelSpec.Capabilities.SupportsFunctionCalling { + continue + } + + if model.ModelSpec.Beta { + continue + } + + contextWindow := model.ModelSpec.AvailableContextTokens + if contextWindow <= 0 { + continue + } + + defaultMaxTokens := minInt64(contextWindow/4, 32768) + defaultMaxTokens = maxInt64(defaultMaxTokens, 2048) + + canReason := model.ModelSpec.Capabilities.SupportsReasoning + var reasoningLevels []string + var defaultReasoning string + if canReason { + reasoningLevels = []string{"low", "medium", "high"} + defaultReasoning = "medium" + } + + options := catwalk.ModelOptions{} + if model.ModelSpec.Constraints.Temperature != nil { + v := model.ModelSpec.Constraints.Temperature.Default + if !math.IsNaN(v) { + options.Temperature = &v + } + } + if model.ModelSpec.Constraints.TopP != nil { + v := model.ModelSpec.Constraints.TopP.Default + if !math.IsNaN(v) { + options.TopP = &v + } + } + + m := catwalk.Model{ + ID: model.ID, + Name: model.ModelSpec.Name, + CostPer1MIn: model.ModelSpec.Pricing.Input.USD, + CostPer1MOut: model.ModelSpec.Pricing.Output.USD, + CostPer1MInCached: 0, + CostPer1MOutCached: 0, + ContextWindow: contextWindow, + DefaultMaxTokens: defaultMaxTokens, + CanReason: canReason, + ReasoningLevels: reasoningLevels, + DefaultReasoningEffort: defaultReasoning, + SupportsImages: model.ModelSpec.Capabilities.SupportsVision, + Options: options, + } + + veniceProvider.Models = append(veniceProvider.Models, m) + if model.ModelSpec.Capabilities.OptimizedForCode { + codeOptimizedModels = append(codeOptimizedModels, m) + } + } + + candidateModels := veniceProvider.Models + if len(codeOptimizedModels) > 0 { + candidateModels = codeOptimizedModels + } + + veniceProvider.DefaultLargeModelID = bestLargeModelID(candidateModels) + veniceProvider.DefaultSmallModelID = bestSmallModelID(candidateModels) + + slices.SortFunc(veniceProvider.Models, func(a catwalk.Model, b catwalk.Model) int { + return strings.Compare(a.Name, b.Name) + }) + + data, err := json.MarshalIndent(veniceProvider, "", " ") + if err != nil { + log.Fatal("Error marshaling Venice provider:", err) + } + + if err := os.WriteFile("internal/providers/configs/venice.json", data, 0o600); err != nil { + log.Fatal("Error writing Venice provider config:", err) + } + + fmt.Printf("Generated venice.json with %d models\n", len(veniceProvider.Models)) +} diff --git a/internal/providers/configs/venice.json b/internal/providers/configs/venice.json index 145d965d1dab819e4b99663b22fda51865a32fb4..f6457af0687885b591d6a4dd1ef3fe48c7d113b8 100644 --- a/internal/providers/configs/venice.json +++ b/internal/providers/configs/venice.json @@ -1,47 +1,405 @@ { "name": "Venice AI", "id": "venice", - "type": "openai-compat", "api_key": "$VENICE_API_KEY", "api_endpoint": "https://api.venice.ai/api/v1", - "default_large_model_id": "qwen3-235b:strip_thinking_response=true", - "default_small_model_id": "mistral-31-24b", + "type": "openai-compat", + "default_large_model_id": "claude-opus-4-6", + "default_small_model_id": "qwen3-5-35b-a3b", "models": [ { - "id": "qwen3-235b:strip_thinking_response=true", - "name": "Venice Large (qwen3-235b)", - "cost_per_1m_in": 1.5, - "cost_per_1m_out": 6, + "id": "claude-opus-4-5", + "name": "Claude Opus 4.5", + "cost_per_1m_in": 6, + "cost_per_1m_out": 30, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, - "context_window": 131072, - "default_max_tokens": 50000, + "context_window": 198000, + "default_max_tokens": 32768, "can_reason": true, - "supports_attachments": false + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": true, + "options": {} }, { - "id": "qwen3-4b:strip_thinking_response=true", - "name": "Venice Small (qwen3-4b)", - "cost_per_1m_in": 0.15, - "cost_per_1m_out": 0.6, + "id": "claude-opus-4-6", + "name": "Claude Opus 4.6", + "cost_per_1m_in": 6, + "cost_per_1m_out": 30, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, - "context_window": 32768, - "default_max_tokens": 25000, + "context_window": 1000000, + "default_max_tokens": 32768, "can_reason": true, - "supports_attachments": false + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": true, + "options": {} }, { - "id": "mistral-31-24b", - "name": "Venice Medium (mistral-31-24b)", - "cost_per_1m_in": 0.5, - "cost_per_1m_out": 2, + "id": "claude-sonnet-4-5", + "name": "Claude Sonnet 4.5", + "cost_per_1m_in": 3.75, + "cost_per_1m_out": 18.75, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 198000, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": true, + "options": {} + }, + { + "id": "claude-sonnet-4-6", + "name": "Claude Sonnet 4.6", + "cost_per_1m_in": 3.6, + "cost_per_1m_out": 18, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 1000000, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": true, + "options": {} + }, + { + "id": "zai-org-glm-4.6", + "name": "GLM 4.6", + "cost_per_1m_in": 0.85, + "cost_per_1m_out": 2.75, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 198000, + "default_max_tokens": 32768, + "can_reason": false, + "supports_attachments": false, + "options": {} + }, + { + "id": "zai-org-glm-4.7", + "name": "GLM 4.7", + "cost_per_1m_in": 0.55, + "cost_per_1m_out": 2.65, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 198000, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": {} + }, + { + "id": "zai-org-glm-4.7-flash", + "name": "GLM 4.7 Flash", + "cost_per_1m_in": 0.125, + "cost_per_1m_out": 0.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 128000, + "default_max_tokens": 32000, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": {} + }, + { + "id": "olafangensan-glm-4.7-flash-heretic", + "name": "GLM 4.7 Flash Heretic", + "cost_per_1m_in": 0.14, + "cost_per_1m_out": 0.8, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, - "context_window": 131072, - "default_max_tokens": 50000, + "context_window": 128000, + "default_max_tokens": 32000, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": {} + }, + { + "id": "zai-org-glm-5", + "name": "GLM 5", + "cost_per_1m_in": 1, + "cost_per_1m_out": 3.2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 198000, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": {} + }, + { + "id": "openai-gpt-4o-2024-11-20", + "name": "GPT-4o", + "cost_per_1m_in": 3.125, + "cost_per_1m_out": 12.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 128000, + "default_max_tokens": 32000, "can_reason": false, - "supports_attachments": true + "supports_attachments": true, + "options": {} + }, + { + "id": "openai-gpt-4o-mini-2024-07-18", + "name": "GPT-4o Mini", + "cost_per_1m_in": 0.1875, + "cost_per_1m_out": 0.75, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 128000, + "default_max_tokens": 32000, + "can_reason": false, + "supports_attachments": true, + "options": {} + }, + { + "id": "openai-gpt-52", + "name": "GPT-5.2", + "cost_per_1m_in": 2.19, + "cost_per_1m_out": 17.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 256000, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": {} + }, + { + "id": "openai-gpt-52-codex", + "name": "GPT-5.2 Codex", + "cost_per_1m_in": 2.19, + "cost_per_1m_out": 17.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 256000, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": true, + "options": {} + }, + { + "id": "openai-gpt-53-codex", + "name": "GPT-5.3 Codex", + "cost_per_1m_in": 2.19, + "cost_per_1m_out": 17.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 400000, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": true, + "options": {} + }, + { + "id": "gemini-3-flash-preview", + "name": "Gemini 3 Flash Preview", + "cost_per_1m_in": 0.7, + "cost_per_1m_out": 3.75, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 256000, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": true, + "options": {} + }, + { + "id": "gemini-3-pro-preview", + "name": "Gemini 3 Pro Preview", + "cost_per_1m_in": 2.5, + "cost_per_1m_out": 15, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 198000, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": true, + "options": {} + }, + { + "id": "gemini-3-1-pro-preview", + "name": "Gemini 3.1 Pro Preview", + "cost_per_1m_in": 2.5, + "cost_per_1m_out": 15, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 1000000, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": true, + "options": {} + }, + { + "id": "google-gemma-3-27b-it", + "name": "Google Gemma 3 27B Instruct", + "cost_per_1m_in": 0.12, + "cost_per_1m_out": 0.2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 198000, + "default_max_tokens": 32768, + "can_reason": false, + "supports_attachments": true, + "options": {} + }, + { + "id": "grok-41-fast", + "name": "Grok 4.1 Fast", + "cost_per_1m_in": 0.25, + "cost_per_1m_out": 0.625, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 1000000, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": true, + "options": {} + }, + { + "id": "grok-code-fast-1", + "name": "Grok Code Fast 1", + "cost_per_1m_in": 0.25, + "cost_per_1m_out": 1.87, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 256000, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": {} + }, + { + "id": "kimi-k2-thinking", + "name": "Kimi K2 Thinking", + "cost_per_1m_in": 0.75, + "cost_per_1m_out": 3.2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 256000, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": {} + }, + { + "id": "kimi-k2-5", + "name": "Kimi K2.5", + "cost_per_1m_in": 0.75, + "cost_per_1m_out": 3.75, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 256000, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": true, + "options": {} }, { "id": "llama-3.2-3b", @@ -50,10 +408,11 @@ "cost_per_1m_out": 0.6, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, - "context_window": 131072, - "default_max_tokens": 25000, + "context_window": 128000, + "default_max_tokens": 32000, "can_reason": false, - "supports_attachments": false + "supports_attachments": false, + "options": {} }, { "id": "llama-3.3-70b", @@ -62,10 +421,213 @@ "cost_per_1m_out": 2.8, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0, - "context_window": 65536, + "context_window": 128000, + "default_max_tokens": 32000, + "can_reason": false, + "supports_attachments": false, + "options": {} + }, + { + "id": "minimax-m21", + "name": "MiniMax M2.1", + "cost_per_1m_in": 0.4, + "cost_per_1m_out": 1.6, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 198000, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": {} + }, + { + "id": "minimax-m25", + "name": "MiniMax M2.5", + "cost_per_1m_in": 0.4, + "cost_per_1m_out": 1.6, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 198000, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": {} + }, + { + "id": "nvidia-nemotron-3-nano-30b-a3b", + "name": "NVIDIA Nemotron 3 Nano 30B", + "cost_per_1m_in": 0.075, + "cost_per_1m_out": 0.3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 128000, + "default_max_tokens": 32000, + "can_reason": false, + "supports_attachments": false, + "options": {} + }, + { + "id": "openai-gpt-oss-120b", + "name": "OpenAI GPT OSS 120B", + "cost_per_1m_in": 0.07, + "cost_per_1m_out": 0.3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 128000, + "default_max_tokens": 32000, + "can_reason": false, + "supports_attachments": false, + "options": {} + }, + { + "id": "qwen3-235b-a22b-instruct-2507", + "name": "Qwen 3 235B A22B Instruct 2507", + "cost_per_1m_in": 0.15, + "cost_per_1m_out": 0.75, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 128000, "default_max_tokens": 32000, "can_reason": false, - "supports_attachments": false + "supports_attachments": false, + "options": {} + }, + { + "id": "qwen3-235b-a22b-thinking-2507", + "name": "Qwen 3 235B A22B Thinking 2507", + "cost_per_1m_in": 0.45, + "cost_per_1m_out": 3.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 128000, + "default_max_tokens": 32000, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": {} + }, + { + "id": "qwen3-coder-480b-a35b-instruct-turbo", + "name": "Qwen 3 Coder 480B Turbo", + "cost_per_1m_in": 0.35, + "cost_per_1m_out": 1.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 256000, + "default_max_tokens": 32768, + "can_reason": false, + "supports_attachments": false, + "options": {} + }, + { + "id": "qwen3-coder-480b-a35b-instruct", + "name": "Qwen 3 Coder 480b", + "cost_per_1m_in": 0.75, + "cost_per_1m_out": 3, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 256000, + "default_max_tokens": 32768, + "can_reason": false, + "supports_attachments": false, + "options": {} + }, + { + "id": "qwen3-next-80b", + "name": "Qwen 3 Next 80b", + "cost_per_1m_in": 0.35, + "cost_per_1m_out": 1.9, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 256000, + "default_max_tokens": 32768, + "can_reason": false, + "supports_attachments": false, + "options": {} + }, + { + "id": "qwen3-5-35b-a3b", + "name": "Qwen 3.5 35B A3B", + "cost_per_1m_in": 0.3125, + "cost_per_1m_out": 1.25, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 256000, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": true, + "options": { + "temperature": 1, + "top_p": 0.95 + } + }, + { + "id": "qwen3-vl-235b-a22b", + "name": "Qwen3 VL 235B", + "cost_per_1m_in": 0.25, + "cost_per_1m_out": 1.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 256000, + "default_max_tokens": 32768, + "can_reason": false, + "supports_attachments": true, + "options": {} + }, + { + "id": "mistral-31-24b", + "name": "Venice Medium", + "cost_per_1m_in": 0.5, + "cost_per_1m_out": 2, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 128000, + "default_max_tokens": 32000, + "can_reason": false, + "supports_attachments": true, + "options": {} + }, + { + "id": "qwen3-4b", + "name": "Venice Small", + "cost_per_1m_in": 0.05, + "cost_per_1m_out": 0.15, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 32000, + "default_max_tokens": 8000, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": {} } ] -} +} \ No newline at end of file