diff --git a/.github/workflows/update.yml b/.github/workflows/update.yml index c4e34451f1af4e1ad5a7a634c8cf8760c2094ead..382347530897131b48bd7ef0c2bb7722aba7191e 100644 --- a/.github/workflows/update.yml +++ b/.github/workflows/update.yml @@ -25,6 +25,9 @@ jobs: - run: go run ./cmd/avian/main.go continue-on-error: true + - run: go run ./cmd/chutes/main.go + continue-on-error: true + - run: go run ./cmd/cortecs/main.go continue-on-error: true diff --git a/Taskfile.yaml b/Taskfile.yaml index 1fde70ae786efdae23725505ed3942668850658e..4f53ebd31914a6d59569aec1fa1ecf178bc3e657 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -59,6 +59,7 @@ tasks: cmds: - task: gen:aihubmix - task: gen:avian + - task: gen:chutes - task: gen:copilot - task: gen:cortecs - task: gen:huggingface @@ -79,6 +80,11 @@ tasks: cmds: - go run cmd/avian/main.go + gen:chutes: + desc: Generate chutes provider configurations + cmds: + - go run cmd/chutes/main.go + gen:copilot: desc: Generate copilot provider configurations cmds: diff --git a/cmd/chutes/main.go b/cmd/chutes/main.go new file mode 100644 index 0000000000000000000000000000000000000000..7eca645cc5df5b0e6cec526c66d736380913b950 --- /dev/null +++ b/cmd/chutes/main.go @@ -0,0 +1,155 @@ +// Package main provides a command-line tool to fetch models from Chutes +// and generate a configuration file for the provider. +package main + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log" + "math" + "net/http" + "os" + "slices" + "strings" + "time" + + "charm.land/catwalk/pkg/catwalk" +) + +type ChutesModel struct { + ID string `json:"id"` + ContextLength int64 `json:"context_length"` + MaxOutputLength int64 `json:"max_output_length"` + InputModalities []string `json:"input_modalities"` + OutputModalities []string `json:"output_modalities"` + SupportedFeatures []string `json:"supported_features"` + Pricing Pricing `json:"pricing"` +} + +type Pricing struct { + Prompt float64 `json:"prompt"` + Completion float64 `json:"completion"` + InputCacheRead float64 `json:"input_cache_read"` +} + +type ModelsResponse struct { + Data []ChutesModel `json:"data"` +} + +func roundCost(v float64) float64 { + return math.Round(v*1e5) / 1e5 +} + +func hasFeature(m ChutesModel, feature string) bool { + return slices.Contains(m.SupportedFeatures, feature) +} + +func hasModality(m ChutesModel, modality string) bool { + return slices.Contains(m.InputModalities, modality) +} + +func modelDisplayName(id string) string { + return strings.SplitN(id, "/", 2)[1] +} + +func main() { + client := &http.Client{Timeout: 30 * time.Second} + req, _ := http.NewRequestWithContext( + context.Background(), + "GET", + "https://llm.chutes.ai/v1/models", + nil, + ) + req.Header.Set("User-Agent", "Crush-Client/1.0") + + resp, err := client.Do(req) + if err != nil { + log.Fatal("Error fetching Chutes models:", err) + } + defer resp.Body.Close() //nolint:errcheck + + body, err := io.ReadAll(resp.Body) + if err != nil { + log.Fatal("Error reading Chutes models response:", err) + } + + if resp.StatusCode != http.StatusOK { + log.Fatalf("Error fetching Chutes models: status %d: %s", resp.StatusCode, body) + } + + _ = os.MkdirAll("tmp", 0o700) + _ = os.WriteFile("tmp/chutes-response.json", body, 0o600) + + var modelsResp ModelsResponse + if err := json.Unmarshal(body, &modelsResp); err != nil { + log.Fatal("Error parsing Chutes models response:", err) + } + + var models []catwalk.Model + for _, m := range modelsResp.Data { + if !hasFeature(m, "tools") { + continue + } + if !hasModality(m, "text") { + continue + } + if !slices.Contains(m.OutputModalities, "text") { + continue + } + + var ( + canReason = hasFeature(m, "reasoning") + reasoningLevels []string + defaultReasoning string + ) + if canReason { + reasoningLevels = []string{"low", "medium", "high"} + defaultReasoning = "medium" + } + + model := catwalk.Model{ + ID: m.ID, + Name: modelDisplayName(m.ID), + CostPer1MIn: roundCost(m.Pricing.Prompt), + CostPer1MOut: roundCost(m.Pricing.Completion), + CostPer1MInCached: roundCost(m.Pricing.InputCacheRead), + ContextWindow: m.ContextLength, + DefaultMaxTokens: m.MaxOutputLength, + CanReason: canReason, + DefaultReasoningEffort: defaultReasoning, + ReasoningLevels: reasoningLevels, + SupportsImages: hasModality(m, "image"), + } + models = append(models, model) + fmt.Printf("Added model %s\n", m.ID) + } + + slices.SortFunc(models, func(a, b catwalk.Model) int { + return strings.Compare(a.Name, b.Name) + }) + + chutesProvider := catwalk.Provider{ + Name: "Chutes", + ID: "chutes", + APIKey: "$CHUTES_API_KEY", + APIEndpoint: "https://llm.chutes.ai/v1", + Type: catwalk.TypeOpenAICompat, + DefaultLargeModelID: "zai-org/GLM-5-TEE", + DefaultSmallModelID: "zai-org/GLM-5-Turbo", + Models: models, + } + + data, err := json.MarshalIndent(chutesProvider, "", " ") + if err != nil { + log.Fatal("Error marshaling Chutes provider:", err) + } + data = append(data, '\n') + + if err := os.WriteFile("./internal/providers/configs/chutes.json", data, 0o600); err != nil { + log.Fatal("Error writing Chutes provider config:", err) + } + + fmt.Println("Chutes provider configuration generated successfully!") +} diff --git a/internal/providers/configs/chutes.json b/internal/providers/configs/chutes.json index ccaff676ae0f423a0015e0b29203659ac52045b6..f836e90ade1e248112347e3bab8321452cd2e2b4 100644 --- a/internal/providers/configs/chutes.json +++ b/internal/providers/configs/chutes.json @@ -1,19 +1,33 @@ { "name": "Chutes", "id": "chutes", - "type": "openai-compat", "api_key": "$CHUTES_API_KEY", "api_endpoint": "https://llm.chutes.ai/v1", - "default_large_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", - "default_small_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", + "type": "openai-compat", + "default_large_model_id": "zai-org/GLM-5-TEE", + "default_small_model_id": "zai-org/GLM-5-Turbo", "models": [ { - "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", - "name": "Qwen3 Coder 480B A35B Instruct (FP8)", - "cost_per_1m_in": 0.2, - "cost_per_1m_out": 0.8, - "context_window": 262000, + "id": "NousResearch/DeepHermes-3-Mistral-24B-Preview", + "name": "DeepHermes-3-Mistral-24B-Preview", + "cost_per_1m_in": 0.0245, + "cost_per_1m_out": 0.0978, + "cost_per_1m_in_cached": 0.01225, + "cost_per_1m_out_cached": 0, + "context_window": 32768, "default_max_tokens": 32768, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "deepseek-ai/DeepSeek-R1-0528-TEE", + "name": "DeepSeek-R1-0528-TEE", + "cost_per_1m_in": 0.45, + "cost_per_1m_out": 2.15, + "cost_per_1m_in_cached": 0.225, + "cost_per_1m_out_cached": 0, + "context_window": 163840, + "default_max_tokens": 65536, "can_reason": true, "reasoning_levels": [ "low", @@ -21,15 +35,17 @@ "high" ], "default_reasoning_effort": "medium", - "supports_attachments": true + "supports_attachments": false }, { - "id": "zai-org/GLM-4.5-FP8", - "name": "GLM 4.5 FP8", - "cost_per_1m_in": 0.0, - "cost_per_1m_out": 0.0, - "context_window": 98000, - "default_max_tokens": 32768, + "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "name": "DeepSeek-R1-Distill-Llama-70B", + "cost_per_1m_in": 0.0272, + "cost_per_1m_out": 0.1087, + "cost_per_1m_in_cached": 0.0136, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 131072, "can_reason": true, "reasoning_levels": [ "low", @@ -37,15 +53,17 @@ "high" ], "default_reasoning_effort": "medium", - "supports_attachments": true + "supports_attachments": false }, { - "id": "moonshotai/Kimi-K2-Instruct-75k", - "name": "Kimi K2 Instruct", - "cost_per_1m_in": 0.15, - "cost_per_1m_out": 0.59, - "context_window": 75000, - "default_max_tokens": 32768, + "id": "tngtech/DeepSeek-TNG-R1T2-Chimera-TEE", + "name": "DeepSeek-TNG-R1T2-Chimera-TEE", + "cost_per_1m_in": 0.3, + "cost_per_1m_out": 1.1, + "cost_per_1m_in_cached": 0.15, + "cost_per_1m_out_cached": 0, + "context_window": 163840, + "default_max_tokens": 163840, "can_reason": true, "reasoning_levels": [ "low", @@ -53,15 +71,29 @@ "high" ], "default_reasoning_effort": "medium", - "supports_attachments": true + "supports_attachments": false }, { - "id": "deepseek-ai/DeepSeek-R1-0528", - "name": "DeepSeek R1 0528", - "cost_per_1m_in": 0.18, - "cost_per_1m_out": 0.72, - "context_window": 75000, - "default_max_tokens": 32768, + "id": "deepseek-ai/DeepSeek-V3-0324-TEE", + "name": "DeepSeek-V3-0324-TEE", + "cost_per_1m_in": 0.25, + "cost_per_1m_out": 1, + "cost_per_1m_in_cached": 0.125, + "cost_per_1m_out_cached": 0, + "context_window": 163840, + "default_max_tokens": 65536, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "deepseek-ai/DeepSeek-V3.1-TEE", + "name": "DeepSeek-V3.1-TEE", + "cost_per_1m_in": 0.27, + "cost_per_1m_out": 1, + "cost_per_1m_in_cached": 0.135, + "cost_per_1m_out_cached": 0, + "context_window": 163840, + "default_max_tokens": 65536, "can_reason": true, "reasoning_levels": [ "low", @@ -69,37 +101,53 @@ "high" ], "default_reasoning_effort": "medium", - "supports_attachments": true + "supports_attachments": false }, { - "id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", - "name": "DeepSeek R1 0528 Qwen3 8B", - "cost_per_1m_in": 0.02, - "cost_per_1m_out": 0.07, - "context_window": 32768, - "default_max_tokens": 8192, - "can_reason": false, - "has_reasoning_efforts": false, - "supports_attachments": true + "id": "deepseek-ai/DeepSeek-V3.1-Terminus-TEE", + "name": "DeepSeek-V3.1-Terminus-TEE", + "cost_per_1m_in": 0.27, + "cost_per_1m_out": 1, + "cost_per_1m_in_cached": 0.135, + "cost_per_1m_out_cached": 0, + "context_window": 163840, + "default_max_tokens": 65536, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false }, { - "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", - "name": "DeepSeek R1 Distill Llama 70B", - "cost_per_1m_in": 0.03, - "cost_per_1m_out": 0.14, - "context_window": 65536, - "default_max_tokens": 8192, - "can_reason": false, - "has_reasoning_efforts": false, - "supports_attachments": true + "id": "deepseek-ai/DeepSeek-V3.2-TEE", + "name": "DeepSeek-V3.2-TEE", + "cost_per_1m_in": 0.28, + "cost_per_1m_out": 0.42, + "cost_per_1m_in_cached": 0.14, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 65536, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false }, { - "id": "tngtech/DeepSeek-R1T-Chimera", - "name": "DeepSeek R1T Chimera", - "cost_per_1m_in": 0.18, - "cost_per_1m_out": 0.72, - "context_window": 131072, - "default_max_tokens": 32768, + "id": "zai-org/GLM-4.6-FP8", + "name": "GLM-4.6-FP8", + "cost_per_1m_in": 0.2989, + "cost_per_1m_out": 1.1957, + "cost_per_1m_in_cached": 0.14945, + "cost_per_1m_out_cached": 0, + "context_window": 202752, + "default_max_tokens": 65535, "can_reason": true, "reasoning_levels": [ "low", @@ -107,14 +155,16 @@ "high" ], "default_reasoning_effort": "medium", - "supports_attachments": true + "supports_attachments": false }, { - "id": "tngtech/DeepSeek-TNG-R1T2-Chimera", - "name": "DeepSeek TNG R1T2 Chimera", - "cost_per_1m_in": 0.2, - "cost_per_1m_out": 0.8, - "context_window": 262144, + "id": "zai-org/GLM-4.6-TEE", + "name": "GLM-4.6-TEE", + "cost_per_1m_in": 0.4, + "cost_per_1m_out": 1.7, + "cost_per_1m_in_cached": 0.2, + "cost_per_1m_out_cached": 0, + "context_window": 202752, "default_max_tokens": 65536, "can_reason": true, "reasoning_levels": [ @@ -123,15 +173,17 @@ "high" ], "default_reasoning_effort": "medium", - "supports_attachments": true + "supports_attachments": false }, { - "id": "deepseek-ai/DeepSeek-V3-0324", - "name": "DeepSeek V3 0324", - "cost_per_1m_in": 0.18, - "cost_per_1m_out": 0.72, - "context_window": 75000, - "default_max_tokens": 32768, + "id": "zai-org/GLM-4.6V", + "name": "GLM-4.6V", + "cost_per_1m_in": 0.3, + "cost_per_1m_out": 0.9, + "cost_per_1m_in_cached": 0.15, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 65536, "can_reason": true, "reasoning_levels": [ "low", @@ -142,23 +194,32 @@ "supports_attachments": true }, { - "id": "chutesai/Devstral-Small-2505", - "name": "Devstral Small 2505", - "cost_per_1m_in": 0.02, - "cost_per_1m_out": 0.08, - "context_window": 32768, - "default_max_tokens": 8192, - "can_reason": false, - "has_reasoning_efforts": false, - "supports_attachments": true + "id": "zai-org/GLM-4.7-FP8", + "name": "GLM-4.7-FP8", + "cost_per_1m_in": 0.2989, + "cost_per_1m_out": 1.1957, + "cost_per_1m_in_cached": 0.14945, + "cost_per_1m_out_cached": 0, + "context_window": 202752, + "default_max_tokens": 65535, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false }, { - "id": "zai-org/GLM-4.5-Air", - "name": "GLM 4.5 Air", - "cost_per_1m_in": 0.0, - "cost_per_1m_out": 0.0, - "context_window": 131072, - "default_max_tokens": 32768, + "id": "zai-org/GLM-4.7-TEE", + "name": "GLM-4.7-TEE", + "cost_per_1m_in": 0.39, + "cost_per_1m_out": 1.75, + "cost_per_1m_in_cached": 0.195, + "cost_per_1m_out_cached": 0, + "context_window": 202752, + "default_max_tokens": 65535, "can_reason": true, "reasoning_levels": [ "low", @@ -166,15 +227,71 @@ "high" ], "default_reasoning_effort": "medium", - "supports_attachments": true + "supports_attachments": false }, { - "id": "openai/gpt-oss-120b", - "name": "GPT OSS 120B", - "cost_per_1m_in": 0.1, - "cost_per_1m_out": 0.41, - "context_window": 131072, - "default_max_tokens": 32768, + "id": "zai-org/GLM-5-TEE", + "name": "GLM-5-TEE", + "cost_per_1m_in": 0.95, + "cost_per_1m_out": 3.15, + "cost_per_1m_in_cached": 0.475, + "cost_per_1m_out_cached": 0, + "context_window": 202752, + "default_max_tokens": 65535, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "zai-org/GLM-5-Turbo", + "name": "GLM-5-Turbo", + "cost_per_1m_in": 0.4891, + "cost_per_1m_out": 1.9565, + "cost_per_1m_in_cached": 0.24455, + "cost_per_1m_out_cached": 0, + "context_window": 202752, + "default_max_tokens": 65535, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "NousResearch/Hermes-4-14B", + "name": "Hermes-4-14B", + "cost_per_1m_in": 0.0136, + "cost_per_1m_out": 0.0543, + "cost_per_1m_in_cached": 0.0068, + "cost_per_1m_out_cached": 0, + "context_window": 40960, + "default_max_tokens": 40960, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "moonshotai/Kimi-K2.5-TEE", + "name": "Kimi-K2.5-TEE", + "cost_per_1m_in": 0.3827, + "cost_per_1m_out": 1.72, + "cost_per_1m_in_cached": 0.19135, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 65535, "can_reason": true, "reasoning_levels": [ "low", @@ -185,45 +302,140 @@ "supports_attachments": true }, { - "id": "chutesai/Mistral-Small-3.2-24B-Instruct-2506", - "name": "Mistral Small 3.2 24B Instruct 2506", - "cost_per_1m_in": 0.02, - "cost_per_1m_out": 0.08, + "id": "XiaomiMiMo/MiMo-V2-Flash-TEE", + "name": "MiMo-V2-Flash-TEE", + "cost_per_1m_in": 0.09, + "cost_per_1m_out": 0.29, + "cost_per_1m_in_cached": 0.045, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 65536, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "MiniMaxAI/MiniMax-M2.5-TEE", + "name": "MiniMax-M2.5-TEE", + "cost_per_1m_in": 0.118, + "cost_per_1m_out": 0.99, + "cost_per_1m_in_cached": 0.059, + "cost_per_1m_out_cached": 0, + "context_window": 196608, + "default_max_tokens": 65536, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "unsloth/Mistral-Small-24B-Instruct-2501", + "name": "Mistral-Small-24B-Instruct-2501", + "cost_per_1m_in": 0.0747, + "cost_per_1m_out": 0.2989, + "cost_per_1m_in_cached": 0.03735, + "cost_per_1m_out_cached": 0, "context_window": 32768, - "default_max_tokens": 8192, + "default_max_tokens": 32768, "can_reason": false, - "has_reasoning_efforts": false, "supports_attachments": true }, { - "id": "Qwen/Qwen3-235B-A22B-Instruct-2507", - "name": "Qwen3 235B A22B Instruct 2507", - "cost_per_1m_in": 0.08, - "cost_per_1m_out": 0.31, - "context_window": 32768, - "default_max_tokens": 8192, + "id": "chutesai/Mistral-Small-3.1-24B-Instruct-2503-TEE", + "name": "Mistral-Small-3.1-24B-Instruct-2503-TEE", + "cost_per_1m_in": 0.03, + "cost_per_1m_out": 0.11, + "cost_per_1m_in_cached": 0.015, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 131072, "can_reason": false, - "has_reasoning_efforts": false, "supports_attachments": true }, { - "id": "Qwen/Qwen3-30B-A3B", - "name": "Qwen3 30B A3B", - "cost_per_1m_in": 0.02, - "cost_per_1m_out": 0.08, - "context_window": 32768, - "default_max_tokens": 8192, + "id": "chutesai/Mistral-Small-3.2-24B-Instruct-2506", + "name": "Mistral-Small-3.2-24B-Instruct-2506", + "cost_per_1m_in": 0.06, + "cost_per_1m_out": 0.18, + "cost_per_1m_in_cached": 0.03, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 131072, "can_reason": false, - "has_reasoning_efforts": false, "supports_attachments": true }, + { + "id": "Qwen/Qwen2.5-72B-Instruct", + "name": "Qwen2.5-72B-Instruct", + "cost_per_1m_in": 0.2989, + "cost_per_1m_out": 1.1957, + "cost_per_1m_in_cached": 0.14945, + "cost_per_1m_out_cached": 0, + "context_window": 32768, + "default_max_tokens": 32768, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3-235B-A22B-Instruct-2507-TEE", + "name": "Qwen3-235B-A22B-Instruct-2507-TEE", + "cost_per_1m_in": 0.1, + "cost_per_1m_out": 0.6, + "cost_per_1m_in_cached": 0.05, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 65536, + "can_reason": false, + "supports_attachments": false + }, { "id": "Qwen/Qwen3-235B-A22B-Thinking-2507", - "name": "Qwen3 235B A22B Thinking 2507", + "name": "Qwen3-235B-A22B-Thinking-2507", + "cost_per_1m_in": 0.11, + "cost_per_1m_out": 0.6, + "cost_per_1m_in_cached": 0.055, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 262144, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3-30B-A3B", + "name": "Qwen3-30B-A3B", + "cost_per_1m_in": 0.06, + "cost_per_1m_out": 0.22, + "cost_per_1m_in_cached": 0.03, + "cost_per_1m_out_cached": 0, + "context_window": 40960, + "default_max_tokens": 40960, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3-32B-TEE", + "name": "Qwen3-32B-TEE", "cost_per_1m_in": 0.08, - "cost_per_1m_out": 0.31, - "context_window": 32768, - "default_max_tokens": 8192, + "cost_per_1m_out": 0.24, + "cost_per_1m_in_cached": 0.04, + "cost_per_1m_out_cached": 0, + "context_window": 40960, + "default_max_tokens": 40960, "can_reason": true, "reasoning_levels": [ "low", @@ -231,26 +443,41 @@ "high" ], "default_reasoning_effort": "medium", - "supports_attachments": true + "supports_attachments": false }, { - "id": "deepseek-ai/DeepSeek-V3.1", - "name": "DeepSeek V3.1", - "cost_per_1m_in": 0.2, - "cost_per_1m_out": 0.8, - "context_window": 163840, - "default_max_tokens": 32768, + "id": "Qwen/Qwen3-Coder-Next-TEE", + "name": "Qwen3-Coder-Next-TEE", + "cost_per_1m_in": 0.12, + "cost_per_1m_out": 0.75, + "cost_per_1m_in_cached": 0.06, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 65536, "can_reason": false, - "has_reasoning_efforts": false, - "supports_attachments": true + "supports_attachments": false }, { - "id": "deepseek-ai/DeepSeek-V3.1:THINKING", - "name": "DeepSeek V3.1 Reasoning", - "cost_per_1m_in": 0.2, + "id": "Qwen/Qwen3-Next-80B-A3B-Instruct", + "name": "Qwen3-Next-80B-A3B-Instruct", + "cost_per_1m_in": 0.1, "cost_per_1m_out": 0.8, - "context_window": 163840, - "default_max_tokens": 32768, + "cost_per_1m_in_cached": 0.05, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 262144, + "can_reason": false, + "supports_attachments": false + }, + { + "id": "Qwen/Qwen3.5-397B-A17B-TEE", + "name": "Qwen3.5-397B-A17B-TEE", + "cost_per_1m_in": 0.39, + "cost_per_1m_out": 2.34, + "cost_per_1m_in_cached": 0.195, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 65536, "can_reason": true, "reasoning_levels": [ "low", @@ -261,26 +488,52 @@ "supports_attachments": true }, { - "id": "Qwen/Qwen3-30B-A3B-Instruct-2507", - "name": "Qwen3 30B A3B Instruct 2507", - "cost_per_1m_in": 0.05, - "cost_per_1m_out": 0.2, - "context_window": 262144, - "default_max_tokens": 32768, + "id": "unsloth/gemma-3-27b-it", + "name": "gemma-3-27b-it", + "cost_per_1m_in": 0.0272, + "cost_per_1m_out": 0.1087, + "cost_per_1m_in_cached": 0.0136, + "cost_per_1m_out_cached": 0, + "context_window": 128000, + "default_max_tokens": 65536, "can_reason": false, - "has_reasoning_efforts": false, "supports_attachments": true }, { - "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", - "name": "Qwen3 Coder 30B A3B Instruct", - "cost_per_1m_in": 0.0, - "cost_per_1m_out": 0.0, - "context_window": 262144, - "default_max_tokens": 32768, - "can_reason": false, - "has_reasoning_efforts": false, - "supports_attachments": true + "id": "openai/gpt-oss-120b-TEE", + "name": "gpt-oss-120b-TEE", + "cost_per_1m_in": 0.09, + "cost_per_1m_out": 0.36, + "cost_per_1m_in_cached": 0.045, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 65536, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false + }, + { + "id": "openai/gpt-oss-20b-TEE", + "name": "gpt-oss-20b-TEE", + "cost_per_1m_in": 0.03, + "cost_per_1m_out": 0.11, + "cost_per_1m_in_cached": 0.015, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 131072, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false } ] }