diff --git a/.github/workflows/update.yml b/.github/workflows/update.yml index 8a27401939fd05ab60584e45dced47d9b1390565..6dfc44aa01982d04f754bd60e9ce83e7a91cc22c 100644 --- a/.github/workflows/update.yml +++ b/.github/workflows/update.yml @@ -17,9 +17,12 @@ jobs: - uses: actions/setup-go@v6 with: go-version-file: go.mod - - run: go run ./cmd/openrouter/main.go - # we need to add this back when we know that the providers/models all work - # - run: go run ./cmd/huggingface/main.go + - name: Generate provider configurations + run: | + go run ./cmd/openrouter/main.go + go run ./cmd/synthetic/main.go + # we need to add this back when we know that the providers/models all work + # go run ./cmd/huggingface/main.go - uses: stefanzweifel/git-auto-commit-action@28e16e81777b558cc906c8750092100bbb34c5e3 # v5 with: commit_message: "chore: auto-update generated files" diff --git a/Taskfile.yaml b/Taskfile.yaml index d056ed669fa5dccf97e712aeb9cb7341c76345e8..ff759160eaf173906576b05c7cba58ee6feeea44 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -2,10 +2,11 @@ version: '3' tasks: generate: - desc: Generate OpenRouter models + desc: Generate provider configurations aliases: [gen] cmds: - go run cmd/openrouter/main.go + - go run cmd/synthetic/main.go lint: desc: Run linters diff --git a/cmd/synthetic/main.go b/cmd/synthetic/main.go new file mode 100644 index 0000000000000000000000000000000000000000..9ee078019a03546c6a066844027a72c9fd5a703b --- /dev/null +++ b/cmd/synthetic/main.go @@ -0,0 +1,252 @@ +// Package main provides a command-line tool to fetch models from Synthetic +// and generate a configuration file for the provider. +package main + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "os" + "slices" + "strconv" + "strings" + "time" + + "github.com/charmbracelet/catwalk/pkg/catwalk" +) + +// Model represents a model from the Synthetic API. +type Model struct { + ID string `json:"id"` + Name string `json:"name"` + InputModalities []string `json:"input_modalities"` + OutputModalities []string `json:"output_modalities"` + ContextLength int64 `json:"context_length"` + MaxOutputLength int64 `json:"max_output_length,omitempty"` + Pricing Pricing `json:"pricing"` + SupportedFeatures []string `json:"supported_features,omitempty"` +} + +// Pricing contains the pricing information for different operations. +type Pricing struct { + Prompt string `json:"prompt"` + Completion string `json:"completion"` + Image string `json:"image"` + Request string `json:"request"` + InputCacheReads string `json:"input_cache_reads"` + InputCacheWrites string `json:"input_cache_writes"` +} + +// ModelsResponse is the response structure for the Synthetic models API. +type ModelsResponse struct { + Data []Model `json:"data"` +} + +// ModelPricing is the pricing structure for a model, detailing costs per +// million tokens for input and output, both cached and uncached. +type ModelPricing struct { + CostPer1MIn float64 `json:"cost_per_1m_in"` + CostPer1MOut float64 `json:"cost_per_1m_out"` + CostPer1MInCached float64 `json:"cost_per_1m_in_cached"` + CostPer1MOutCached float64 `json:"cost_per_1m_out_cached"` +} + +func getPricing(model Model) ModelPricing { + pricing := ModelPricing{} + costPrompt, err := strconv.ParseFloat(model.Pricing.Prompt, 64) + if err != nil { + costPrompt = 0.0 + } + pricing.CostPer1MIn = costPrompt * 1_000_000 + costCompletion, err := strconv.ParseFloat(model.Pricing.Completion, 64) + if err != nil { + costCompletion = 0.0 + } + pricing.CostPer1MOut = costCompletion * 1_000_000 + + costPromptCached, err := strconv.ParseFloat(model.Pricing.InputCacheWrites, 64) + if err != nil { + costPromptCached = 0.0 + } + pricing.CostPer1MInCached = costPromptCached * 1_000_000 + costCompletionCached, err := strconv.ParseFloat(model.Pricing.InputCacheReads, 64) + if err != nil { + costCompletionCached = 0.0 + } + pricing.CostPer1MOutCached = costCompletionCached * 1_000_000 + return pricing +} + +// applyModelOverrides sets supported_features for models where Synthetic +// omits this metadata. +// TODO: Remove this when they add the missing metadata. +func applyModelOverrides(model *Model) { + switch { + // All of llama support tools, none do reasoning yet + case strings.HasPrefix(model.ID, "hf:meta-llama/Llama-"): + model.SupportedFeatures = []string{"tools"} + + case strings.HasPrefix(model.ID, "hf:deepseek-ai/DeepSeek-R1"): + model.SupportedFeatures = []string{"tools", "reasoning"} + + case strings.HasPrefix(model.ID, "hf:deepseek-ai/DeepSeek-V3.1"): + model.SupportedFeatures = []string{"tools", "reasoning"} + + case strings.HasPrefix(model.ID, "hf:deepseek-ai/DeepSeek-V3"): + model.SupportedFeatures = []string{"tools"} + + case strings.HasPrefix(model.ID, "hf:Qwen/Qwen3-235B-A22B-Thinking"): + model.SupportedFeatures = []string{"tools", "reasoning"} + + case strings.HasPrefix(model.ID, "hf:Qwen/Qwen3-235B-A22B-Instruct"): + model.SupportedFeatures = []string{"tools", "reasoning"} + + // The rest of Qwen3 don't support reasoning but do tools + case strings.HasPrefix(model.ID, "hf:Qwen/Qwen3"): + model.SupportedFeatures = []string{"tools"} + + // Has correct metadata already, but the Kimi-K2 matcher (next) would + // override it to omit reasoning + case strings.HasPrefix(model.ID, "hf:moonshotai/Kimi-K2-Thinking"): + model.SupportedFeatures = []string{"tools", "reasoning"} + + case strings.HasPrefix(model.ID, "hf:moonshotai/Kimi-K2"): + model.SupportedFeatures = []string{"tools"} + + case strings.HasPrefix(model.ID, "hf:zai-org/GLM-4.5"): + model.SupportedFeatures = []string{"tools"} + + case strings.HasPrefix(model.ID, "hf:openai/gpt-oss"): + model.SupportedFeatures = []string{"tools"} + } +} + +func fetchSyntheticModels(apiEndpoint string) (*ModelsResponse, error) { + client := &http.Client{Timeout: 30 * time.Second} + req, _ := http.NewRequestWithContext(context.Background(), "GET", apiEndpoint+"/models", nil) + req.Header.Set("User-Agent", "Crush-Client/1.0") + resp, err := client.Do(req) + if err != nil { + return nil, err //nolint:wrapcheck + } + defer resp.Body.Close() //nolint:errcheck + if resp.StatusCode != 200 { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("status %d: %s", resp.StatusCode, body) + } + var mr ModelsResponse + if err := json.NewDecoder(resp.Body).Decode(&mr); err != nil { + return nil, err //nolint:wrapcheck + } + return &mr, nil +} + +// This is used to generate the synthetic.json config file. +func main() { + syntheticProvider := catwalk.Provider{ + Name: "Synthetic", + ID: "synthetic", + APIKey: "$SYNTHETIC_API_KEY", + APIEndpoint: "https://api.synthetic.new/openai/v1", + Type: catwalk.TypeOpenAICompat, + DefaultLargeModelID: "hf:zai-org/GLM-4.6", + DefaultSmallModelID: "hf:deepseek-ai/DeepSeek-V3.1-Terminus", + Models: []catwalk.Model{}, + } + + modelsResp, err := fetchSyntheticModels(syntheticProvider.APIEndpoint) + if err != nil { + log.Fatal("Error fetching Synthetic models:", err) + } + + // Apply overrides for models missing supported_features metadata + for i := range modelsResp.Data { + applyModelOverrides(&modelsResp.Data[i]) + } + + for _, model := range modelsResp.Data { + // Skip models with small context windows + if model.ContextLength < 20000 { + continue + } + + // Skip non-text models + if !slices.Contains(model.InputModalities, "text") || + !slices.Contains(model.OutputModalities, "text") { + continue + } + + // Ensure they support tools + supportsTools := slices.Contains(model.SupportedFeatures, "tools") + if !supportsTools { + continue + } + + pricing := getPricing(model) + supportsImages := slices.Contains(model.InputModalities, "image") + + // Check if model supports reasoning + canReason := slices.Contains(model.SupportedFeatures, "reasoning") + var reasoningLevels []string + var defaultReasoning string + if canReason { + reasoningLevels = []string{"low", "medium", "high"} + defaultReasoning = "medium" + } + + // Strip everything before the first / for a cleaner name + modelName := model.Name + if idx := strings.Index(model.Name, "/"); idx != -1 { + modelName = model.Name[idx+1:] + } + // Replace hyphens with spaces + modelName = strings.ReplaceAll(modelName, "-", " ") + + m := catwalk.Model{ + ID: model.ID, + Name: modelName, + CostPer1MIn: pricing.CostPer1MIn, + CostPer1MOut: pricing.CostPer1MOut, + CostPer1MInCached: pricing.CostPer1MInCached, + CostPer1MOutCached: pricing.CostPer1MOutCached, + ContextWindow: model.ContextLength, + CanReason: canReason, + DefaultReasoningEffort: defaultReasoning, + ReasoningLevels: reasoningLevels, + SupportsImages: supportsImages, + } + + // Set max tokens based on max_output_length if available, but cap at + // 15% of context length + maxFromOutput := model.MaxOutputLength / 2 + maxAt15Pct := (model.ContextLength * 15) / 100 + if model.MaxOutputLength > 0 && maxFromOutput <= maxAt15Pct { + m.DefaultMaxTokens = maxFromOutput + } else { + m.DefaultMaxTokens = model.ContextLength / 10 + } + + syntheticProvider.Models = append(syntheticProvider.Models, m) + fmt.Printf("Added model %s with context window %d\n", + model.ID, model.ContextLength) + } + + slices.SortFunc(syntheticProvider.Models, func(a catwalk.Model, b catwalk.Model) int { + return strings.Compare(a.Name, b.Name) + }) + + // Save the JSON in internal/providers/configs/synthetic.json + data, err := json.MarshalIndent(syntheticProvider, "", " ") + if err != nil { + log.Fatal("Error marshaling Synthetic provider:", err) + } + + if err := os.WriteFile("internal/providers/configs/synthetic.json", data, 0o600); err != nil { + log.Fatal("Error writing Synthetic provider config:", err) + } + + fmt.Printf("Generated synthetic.json with %d models\n", len(syntheticProvider.Models)) +} diff --git a/crush.json b/crush.json new file mode 100644 index 0000000000000000000000000000000000000000..5f2ed2f0295b558a998c805524d632650b30f585 --- /dev/null +++ b/crush.json @@ -0,0 +1,40 @@ +{ + "$schema": "https://charm.land/crush.json", + "lsp": { + "gopls": { + "options": { + "gofumpt": true, + "codelenses": { + "gc_details": true, + "generate": true, + "run_govulncheck": true, + "test": true, + "tidy": true, + "upgrade_dependency": true + }, + "hints": { + "assignVariableTypes": true, + "compositeLiteralFields": true, + "compositeLiteralTypes": true, + "constantValues": true, + "functionTypeParameters": true, + "parameterNames": true, + "rangeVariableTypes": true + }, + "analyses": { + "nilness": true, + "unusedparams": true, + "unusedvariable": true, + "unusedwrite": true, + "useany": true + }, + "staticcheck": true, + "directoryFilters": [ + "-.git", + "-node_modules" + ], + "semanticTokens": true + } + } + } +} diff --git a/internal/providers/configs/synthetic.json b/internal/providers/configs/synthetic.json index 94cae92395092036b310a0914acd12c80ba3a7d6..024c706a13bf2bc7747e1ba0f265c6832dcce5fa 100644 --- a/internal/providers/configs/synthetic.json +++ b/internal/providers/configs/synthetic.json @@ -1,21 +1,21 @@ { "name": "Synthetic", "id": "synthetic", - "type": "openai-compat", "api_key": "$SYNTHETIC_API_KEY", "api_endpoint": "https://api.synthetic.new/openai/v1", + "type": "openai-compat", "default_large_model_id": "hf:zai-org/GLM-4.6", - "default_small_model_id": "hf:openai/gpt-oss-120b", + "default_small_model_id": "hf:deepseek-ai/DeepSeek-V3.1-Terminus", "models": [ { "id": "hf:deepseek-ai/DeepSeek-R1", "name": "DeepSeek R1", - "cost_per_1m_in": 0.55, - "cost_per_1m_out": 2.19, - "cost_per_1m_in_cached": 0.07, - "cost_per_1m_out_cached": 0.14, - "context_window": 128000, - "default_max_tokens": 65536, + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, "can_reason": true, "reasoning_levels": [ "low", @@ -23,17 +23,18 @@ "high" ], "default_reasoning_effort": "medium", - "supports_attachments": false + "supports_attachments": false, + "options": {} }, { "id": "hf:deepseek-ai/DeepSeek-R1-0528", "name": "DeepSeek R1 0528", - "cost_per_1m_in": 3.0, - "cost_per_1m_out": 8.0, - "cost_per_1m_in_cached": 0.07, - "cost_per_1m_out_cached": 0.14, - "context_window": 128000, - "default_max_tokens": 65536, + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, "can_reason": true, "reasoning_levels": [ "low", @@ -41,41 +42,44 @@ "high" ], "default_reasoning_effort": "medium", - "supports_attachments": false + "supports_attachments": false, + "options": {} }, { "id": "hf:deepseek-ai/DeepSeek-V3", "name": "DeepSeek V3", - "cost_per_1m_in": 1.25, - "cost_per_1m_out": 1.25, - "cost_per_1m_in_cached": 0.07, - "cost_per_1m_out_cached": 0.14, - "context_window": 128000, - "default_max_tokens": 8192, + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, "can_reason": false, - "supports_attachments": false + "supports_attachments": false, + "options": {} }, { "id": "hf:deepseek-ai/DeepSeek-V3-0324", "name": "DeepSeek V3 0324", - "cost_per_1m_in": 1.2, - "cost_per_1m_out": 1.2, - "cost_per_1m_in_cached": 0.07, - "cost_per_1m_out_cached": 0.14, - "context_window": 128000, - "default_max_tokens": 8192, + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, "can_reason": false, - "supports_attachments": false + "supports_attachments": false, + "options": {} }, { "id": "hf:deepseek-ai/DeepSeek-V3.1", "name": "DeepSeek V3.1", - "cost_per_1m_in": 0.56, - "cost_per_1m_out": 1.68, - "cost_per_1m_in_cached": 0.07, - "cost_per_1m_out_cached": 0.14, + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, "context_window": 131072, - "default_max_tokens": 8192, + "default_max_tokens": 13107, "can_reason": true, "reasoning_levels": [ "low", @@ -83,17 +87,18 @@ "high" ], "default_reasoning_effort": "medium", - "supports_attachments": false + "supports_attachments": false, + "options": {} }, { "id": "hf:deepseek-ai/DeepSeek-V3.1-Terminus", "name": "DeepSeek V3.1 Terminus", - "cost_per_1m_in": 1.2, - "cost_per_1m_out": 1.2, - "cost_per_1m_in_cached": 0.07, - "cost_per_1m_out_cached": 0.14, - "context_window": 128000, - "default_max_tokens": 8192, + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, "can_reason": true, "reasoning_levels": [ "low", @@ -101,17 +106,31 @@ "high" ], "default_reasoning_effort": "medium", - "supports_attachments": false + "supports_attachments": false, + "options": {} }, { - "id": "hf:meta-llama/Llama-3.1-405B-Instruct", - "name": "Llama 3.1 405B Instruct", - "cost_per_1m_in": 3.0, - "cost_per_1m_out": 3.0, - "cost_per_1m_in_cached": 0.27, - "cost_per_1m_out_cached": 0.55, + "id": "hf:zai-org/GLM-4.5", + "name": "GLM 4.5", + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, "context_window": 131072, - "default_max_tokens": 4096, + "default_max_tokens": 13107, + "can_reason": false, + "supports_attachments": false, + "options": {} + }, + { + "id": "hf:zai-org/GLM-4.6", + "name": "GLM 4.6", + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 202752, + "default_max_tokens": 20275, "can_reason": true, "reasoning_levels": [ "low", @@ -119,77 +138,141 @@ "high" ], "default_reasoning_effort": "medium", - "supports_attachments": false + "supports_attachments": false, + "options": {} + }, + { + "id": "hf:moonshotai/Kimi-K2-Instruct", + "name": "Kimi K2 Instruct", + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": false, + "supports_attachments": false, + "options": {} + }, + { + "id": "hf:moonshotai/Kimi-K2-Instruct-0905", + "name": "Kimi K2 Instruct 0905", + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": false, + "supports_attachments": false, + "options": {} + }, + { + "id": "hf:moonshotai/Kimi-K2-Thinking", + "name": "Kimi K2 Thinking", + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 32768, + "can_reason": true, + "reasoning_levels": [ + "low", + "medium", + "high" + ], + "default_reasoning_effort": "medium", + "supports_attachments": false, + "options": {} + }, + { + "id": "hf:meta-llama/Llama-3.1-405B-Instruct", + "name": "Llama 3.1 405B Instruct", + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": false, + "supports_attachments": false, + "options": {} }, { "id": "hf:meta-llama/Llama-3.1-70B-Instruct", "name": "Llama 3.1 70B Instruct", - "cost_per_1m_in": 0.9, - "cost_per_1m_out": 0.9, - "cost_per_1m_in_cached": 0.59, - "cost_per_1m_out_cached": 1.1, + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, "context_window": 131072, - "default_max_tokens": 4096, + "default_max_tokens": 13107, "can_reason": false, - "supports_attachments": false + "supports_attachments": false, + "options": {} }, { "id": "hf:meta-llama/Llama-3.1-8B-Instruct", "name": "Llama 3.1 8B Instruct", - "cost_per_1m_in": 0.2, - "cost_per_1m_out": 0.2, - "cost_per_1m_in_cached": 0.07, - "cost_per_1m_out_cached": 0.2, - "context_window": 128000, - "default_max_tokens": 4096, + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, "can_reason": false, - "supports_attachments": false + "supports_attachments": false, + "options": {} }, { "id": "hf:meta-llama/Llama-3.3-70B-Instruct", "name": "Llama 3.3 70B Instruct", - "cost_per_1m_in": 0.9, - "cost_per_1m_out": 0.9, - "cost_per_1m_in_cached": 0.59, - "cost_per_1m_out_cached": 1.1, - "context_window": 128000, - "default_max_tokens": 4096, + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, "can_reason": false, - "supports_attachments": false + "supports_attachments": false, + "options": {} }, { "id": "hf:meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8", "name": "Llama 4 Maverick 17B 128E Instruct FP8", - "cost_per_1m_in": 0.22, - "cost_per_1m_out": 0.88, - "cost_per_1m_in_cached": 0.14, - "cost_per_1m_out_cached": 0.55, + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, "context_window": 536576, - "default_max_tokens": 4096, + "default_max_tokens": 53657, "can_reason": false, - "supports_attachments": true + "supports_attachments": true, + "options": {} }, { "id": "hf:meta-llama/Llama-4-Scout-17B-16E-Instruct", "name": "Llama 4 Scout 17B 16E Instruct", - "cost_per_1m_in": 0.15, - "cost_per_1m_out": 0.6, - "cost_per_1m_in_cached": 0.14, - "cost_per_1m_out_cached": 0.55, - "context_window": 328000, - "default_max_tokens": 8192, + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 335872, + "default_max_tokens": 33587, "can_reason": false, - "supports_attachments": true + "supports_attachments": true, + "options": {} }, { "id": "hf:MiniMaxAI/MiniMax-M2", "name": "MiniMax M2", - "cost_per_1m_in": 0.55, - "cost_per_1m_out": 2.19, - "cost_per_1m_in_cached": 0.27, - "cost_per_1m_out_cached": 0.55, + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, "context_window": 196608, - "default_max_tokens": 65536, + "default_max_tokens": 19660, "can_reason": true, "reasoning_levels": [ "low", @@ -197,95 +280,18 @@ "high" ], "default_reasoning_effort": "medium", - "supports_attachments": false - }, - { - "id": "hf:moonshotai/Kimi-K2-Instruct", - "name": "Kimi K2 Instruct", - "cost_per_1m_in": 0.6, - "cost_per_1m_out": 2.5, - "cost_per_1m_in_cached": 0.27, - "cost_per_1m_out_cached": 0.55, - "context_window": 128000, - "default_max_tokens": 131072, - "can_reason": false, - "supports_attachments": false - }, - { - "id": "hf:moonshotai/Kimi-K2-Thinking", - "name": "Kimi K2 Thinking", - "cost_per_1m_in": 0.55, - "cost_per_1m_out": 2.19, - "cost_per_1m_in_cached": 0.55, - "cost_per_1m_out_cached": 2.19, - "context_window": 196608, - "default_max_tokens": 65536, - "can_reason": true, - "reasoning_levels": [ - "low", - "medium", - "high" - ], - "default_reasoning_effort": "medium", - "supports_attachments": false - }, - { - "id": "hf:moonshotai/Kimi-K2-Instruct-0905", - "name": "Kimi K2 Instruct 0905", - "cost_per_1m_in": 1.2, - "cost_per_1m_out": 1.2, - "cost_per_1m_in_cached": 0.55, - "cost_per_1m_out_cached": 1.1, - "context_window": 262144, - "default_max_tokens": 262144, - "can_reason": false, - "supports_attachments": false - }, - { - "id": "hf:openai/gpt-oss-120b", - "name": "GPT-OSS 120B", - "cost_per_1m_in": 0.1, - "cost_per_1m_out": 0.1, - "cost_per_1m_in_cached": 0.55, - "cost_per_1m_out_cached": 1.1, - "context_window": 128000, - "default_max_tokens": 65536, - "can_reason": true, - "reasoning_levels": [ - "low", - "medium", - "high" - ], - "default_reasoning_effort": "medium", - "supports_attachments": false - }, - { - "id": "hf:Qwen/Qwen2.5-Coder-32B-Instruct", - "name": "Qwen2.5 Coder 32B Instruct", - "cost_per_1m_in": 0.14, - "cost_per_1m_out": 0.55, - "cost_per_1m_in_cached": 0.14, - "cost_per_1m_out_cached": 0.55, - "context_window": 32768, - "default_max_tokens": 32768, - "can_reason": true, - "reasoning_levels": [ - "low", - "medium", - "high" - ], - "default_reasoning_effort": "medium", - "supports_attachments": false + "supports_attachments": false, + "options": {} }, { "id": "hf:Qwen/Qwen3-235B-A22B-Instruct-2507", "name": "Qwen3 235B A22B Instruct 2507", - "cost_per_1m_in": 0.22, - "cost_per_1m_out": 0.88, - "cost_per_1m_in_cached": 0.55, - "cost_per_1m_out_cached": 1.1, + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, "context_window": 262144, - "default_max_tokens": 6912, + "default_max_tokens": 26214, "can_reason": true, "reasoning_levels": [ "low", @@ -293,17 +299,18 @@ "high" ], "default_reasoning_effort": "medium", - "supports_attachments": false + "supports_attachments": false, + "options": {} }, { "id": "hf:Qwen/Qwen3-235B-A22B-Thinking-2507", "name": "Qwen3 235B A22B Thinking 2507", - "cost_per_1m_in": 0.65, - "cost_per_1m_out": 3.0, - "cost_per_1m_in_cached": 0.55, - "cost_per_1m_out_cached": 1.1, - "context_window": 256000, - "default_max_tokens": 81920, + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 26214, "can_reason": true, "reasoning_levels": [ "low", @@ -311,61 +318,47 @@ "high" ], "default_reasoning_effort": "medium", - "supports_attachments": false + "supports_attachments": false, + "options": {} }, { "id": "hf:Qwen/Qwen3-Coder-480B-A35B-Instruct", "name": "Qwen3 Coder 480B A35B Instruct", - "cost_per_1m_in": 0.45, - "cost_per_1m_out": 1.8, - "cost_per_1m_in_cached": 0.82, - "cost_per_1m_out_cached": 1.65, - "context_window": 256000, - "default_max_tokens": 262144, - "can_reason": true, - "reasoning_levels": [ - "low", - "medium", - "high" - ], - "default_reasoning_effort": "medium", - "supports_attachments": false + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 262144, + "default_max_tokens": 26214, + "can_reason": false, + "supports_attachments": false, + "options": {} }, { - "id": "hf:zai-org/GLM-4.5", - "name": "GLM-4.5", - "cost_per_1m_in": 0.55, - "cost_per_1m_out": 2.19, - "cost_per_1m_in_cached": 0.14, - "cost_per_1m_out_cached": 0.55, - "context_window": 128000, - "default_max_tokens": 98304, - "can_reason": true, - "reasoning_levels": [ - "low", - "medium", - "high" - ], - "default_reasoning_effort": "medium", - "supports_attachments": false + "id": "hf:Qwen/Qwen3-VL-235B-A22B-Instruct", + "name": "Qwen3 VL 235B A22B Instruct", + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 256000, + "default_max_tokens": 25600, + "can_reason": false, + "supports_attachments": true, + "options": {} }, { - "id": "hf:zai-org/GLM-4.6", - "name": "GLM-4.6", - "cost_per_1m_in": 0.55, - "cost_per_1m_out": 0.55, - "cost_per_1m_in_cached": 0.27, - "cost_per_1m_out_cached": 0.55, - "context_window": 202752, - "default_max_tokens": 65536, - "can_reason": true, - "reasoning_levels": [ - "low", - "medium", - "high" - ], - "default_reasoning_effort": "medium", - "supports_attachments": false - } + "id": "hf:openai/gpt-oss-120b", + "name": "gpt oss 120b", + "cost_per_1m_in": 0, + "cost_per_1m_out": 0, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": false, + "supports_attachments": false, + "options": {} + } ] -} +} \ No newline at end of file