diff --git a/.github/workflows/update.yml b/.github/workflows/update.yml index 3053f8e2d9b9e40074d2535593cbb0f5d5a803c3..9cbd0988a93726e053085ac02188f54dbe997402 100644 --- a/.github/workflows/update.yml +++ b/.github/workflows/update.yml @@ -18,6 +18,8 @@ jobs: with: go-version-file: go.mod - name: Generate provider configurations + env: + XAI_API_KEY: ${{ secrets.XAI_API_KEY }} run: | go run ./cmd/aihubmix/main.go go run ./cmd/avian/main.go @@ -27,6 +29,7 @@ jobs: go run ./cmd/synthetic/main.go go run ./cmd/venice/main.go go run ./cmd/vercel/main.go + go run ./cmd/xai/main.go - uses: stefanzweifel/git-auto-commit-action@04702edda442b2e678b25b537cec683a1493fcb9 # v5 with: commit_message: "chore: auto-update generated files" diff --git a/Taskfile.yaml b/Taskfile.yaml index a7848ee88faa15afce685d7e9c5f2627fce51c9d..f18acb4bb7e9167f594fb95f981e71971cc355df 100644 --- a/Taskfile.yaml +++ b/Taskfile.yaml @@ -111,6 +111,11 @@ tasks: cmds: - go run cmd/vercel/main.go + gen:xai: + desc: Generate xAI provider configurations + cmds: + - go run cmd/xai/main.go + update: desc: Trigger the update workflow on GitHub cmds: diff --git a/cmd/xai/main.go b/cmd/xai/main.go new file mode 100644 index 0000000000000000000000000000000000000000..d39c3cfea0fc6d4450bf5660961b8aa563cecb20 --- /dev/null +++ b/cmd/xai/main.go @@ -0,0 +1,189 @@ +// Package main provides a command-line tool to fetch models from xAI +// and generate a configuration file for the provider. +package main + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log" + "math" + "net/http" + "os" + "slices" + "strings" + "time" + + "charm.land/catwalk/pkg/catwalk" +) + +type ModelsResponse struct { + Models []XAIModel `json:"models"` +} + +type XAIModel struct { + ID string `json:"id"` + Aliases []string `json:"aliases"` + InputModalities []string `json:"input_modalities"` + OutputModalities []string `json:"output_modalities"` + PromptTextTokenPrice int64 `json:"prompt_text_token_price"` + CompletionTextTokenPrice int64 `json:"completion_text_token_price"` + CachedPromptTextTokenPrc int64 `json:"cached_prompt_text_token_price"` +} + +func shortestAlias(model XAIModel) string { + if len(model.Aliases) == 0 { + return model.ID + } + shortest := model.Aliases[0] + for _, a := range model.Aliases[1:] { + if len(a) < len(shortest) { + shortest = a + } + } + if len(shortest) < len(model.ID) { + return shortest + } + return model.ID +} + +var prettyNames = map[string]string{ + "grok-3": "Grok 3", + "grok-3-mini": "Grok 3 Mini", + "grok-4": "Grok 4", + "grok-4-fast": "Grok 4 Fast", + "grok-4-fast-non-reasoning": "Grok 4 Fast Non-Reasoning", + "grok-4-1-fast": "Grok 4.1 Fast", + "grok-4-1-fast-non-reasoning": "Grok 4.1 Fast Non-Reasoning", + "grok-4.20": "Grok 4.20", + "grok-4.20-non-reasoning": "Grok 4.20 Non-Reasoning", + "grok-4.20-multi-agent": "Grok 4.20 Multi-Agent", + "grok-code-fast": "Grok Code Fast", +} + +func prettyName(id string) string { + if name, ok := prettyNames[id]; ok { + return name + } + return id +} + +func contextWindow(modelID string) int64 { + if strings.Contains(modelID, "grok-4") { + return 200_000 + } + return 131_072 +} + +func roundCost(v float64) float64 { + return math.Round(v*1e5) / 1e5 +} + +func priceToDollarsPerMillion(centsPerHundredMillion int64) float64 { + return roundCost(float64(centsPerHundredMillion) / 10_000) +} + +func fetchXAIModels() (*ModelsResponse, error) { + apiKey := os.Getenv("XAI_API_KEY") + if apiKey == "" { + return nil, fmt.Errorf("XAI_API_KEY environment variable is not set") + } + + client := &http.Client{Timeout: 30 * time.Second} + req, _ := http.NewRequestWithContext( + context.Background(), + "GET", + "https://api.x.ai/v1/language-models", + nil, + ) + req.Header.Set("User-Agent", "Crush-Client/1.0") + req.Header.Set("Authorization", "Bearer "+apiKey) + + resp, err := client.Do(req) + if err != nil { + return nil, err //nolint:wrapcheck + } + defer resp.Body.Close() //nolint:errcheck + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("unable to read response body: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("status %d: %s", resp.StatusCode, body) + } + + _ = os.MkdirAll("tmp", 0o700) + _ = os.WriteFile("tmp/xai-response.json", body, 0o600) + + var mr ModelsResponse + if err := json.Unmarshal(body, &mr); err != nil { + return nil, err //nolint:wrapcheck + } + return &mr, nil +} + +func main() { + modelsResp, err := fetchXAIModels() + if err != nil { + log.Fatal("Error fetching xAI models:", err) + } + + provider := catwalk.Provider{ + Name: "xAI", + ID: catwalk.InferenceProviderXAI, + APIKey: "$XAI_API_KEY", + APIEndpoint: "https://api.x.ai/v1", + Type: catwalk.TypeOpenAICompat, + DefaultLargeModelID: "grok-4.20", + DefaultSmallModelID: "grok-4-1-fast", + } + + for _, model := range modelsResp.Models { + if strings.Contains(model.ID, "multi-agent") { + continue + } + + id := shortestAlias(model) + ctxWindow := contextWindow(model.ID) + defaultMaxTokens := ctxWindow / 10 + + canReason := !strings.Contains(model.ID, "non-reasoning") && + model.ID != "grok-3" + supportsImages := slices.Contains(model.InputModalities, "image") + + m := catwalk.Model{ + ID: id, + Name: prettyName(id), + CostPer1MIn: priceToDollarsPerMillion(model.PromptTextTokenPrice), + CostPer1MOut: priceToDollarsPerMillion(model.CompletionTextTokenPrice), + CostPer1MInCached: 0, + CostPer1MOutCached: priceToDollarsPerMillion(model.CachedPromptTextTokenPrc), + ContextWindow: ctxWindow, + DefaultMaxTokens: defaultMaxTokens, + CanReason: canReason, + SupportsImages: supportsImages, + } + + provider.Models = append(provider.Models, m) + fmt.Printf("Added model %s (alias: %s)\n", model.ID, id) + } + + slices.SortFunc(provider.Models, func(a, b catwalk.Model) int { + return strings.Compare(a.ID, b.ID) + }) + + data, err := json.MarshalIndent(provider, "", " ") + if err != nil { + log.Fatal("Error marshaling xAI provider:", err) + } + data = append(data, '\n') + + if err := os.WriteFile("internal/providers/configs/xai.json", data, 0o600); err != nil { + log.Fatal("Error writing xAI provider config:", err) + } + + fmt.Printf("Generated xai.json with %d models\n", len(provider.Models)) +} diff --git a/internal/providers/configs/xai.json b/internal/providers/configs/xai.json index c2dd17aad486f265e18885bb9b38c5139eaa4cff..5e4144ff8c0a6cc1f3517e909d6551366bd3877c 100644 --- a/internal/providers/configs/xai.json +++ b/internal/providers/configs/xai.json @@ -4,32 +4,60 @@ "api_key": "$XAI_API_KEY", "api_endpoint": "https://api.x.ai/v1", "type": "openai-compat", - "default_large_model_id": "grok-code-fast", - "default_small_model_id": "grok-3-mini", + "default_large_model_id": "grok-4.20", + "default_small_model_id": "grok-4-1-fast", "models": [ { - "id": "grok-code-fast", - "name": "Grok Code Fast", - "cost_per_1m_in": 0.2, - "cost_per_1m_out": 1.5, + "id": "grok-3", + "name": "Grok 3", + "cost_per_1m_in": 3, + "cost_per_1m_out": 15, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0.02, - "context_window": 256000, + "cost_per_1m_out_cached": 0.75, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": false, + "supports_attachments": false, + "options": {} + }, + { + "id": "grok-3-mini", + "name": "Grok 3 Mini", + "cost_per_1m_in": 0.3, + "cost_per_1m_out": 0.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0.075, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": true, + "supports_attachments": false, + "options": {} + }, + { + "id": "grok-4", + "name": "Grok 4", + "cost_per_1m_in": 3, + "cost_per_1m_out": 15, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0.75, + "context_window": 200000, "default_max_tokens": 20000, "can_reason": true, - "supports_attachments": false + "supports_attachments": true, + "options": {} }, { - "id": "grok-4-1-fast-reasoning", - "name": "Grok 4.1 Fast Reasoning", + "id": "grok-4-1-fast", + "name": "Grok 4.1 Fast", "cost_per_1m_in": 0.2, "cost_per_1m_out": 0.5, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0.05, - "context_window": 2000000, - "default_max_tokens": 200000, + "context_window": 200000, + "default_max_tokens": 20000, "can_reason": true, - "supports_attachments": true + "supports_attachments": true, + "options": {} }, { "id": "grok-4-1-fast-non-reasoning", @@ -38,58 +66,76 @@ "cost_per_1m_out": 0.5, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0.05, - "context_window": 2000000, - "default_max_tokens": 200000, + "context_window": 200000, + "default_max_tokens": 20000, "can_reason": false, - "supports_attachments": true + "supports_attachments": true, + "options": {} }, { - "id": "grok-4", - "name": "Grok 4", - "cost_per_1m_in": 3, - "cost_per_1m_out": 15, + "id": "grok-4-fast", + "name": "Grok 4 Fast", + "cost_per_1m_in": 0.2, + "cost_per_1m_out": 0.5, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0.75, - "context_window": 256000, + "cost_per_1m_out_cached": 0.05, + "context_window": 200000, "default_max_tokens": 20000, "can_reason": true, - "supports_attachments": true + "supports_attachments": true, + "options": {} }, { - "id": "grok-4-fast", - "name": "Grok 4 Fast", + "id": "grok-4-fast-non-reasoning", + "name": "Grok 4 Fast Non-Reasoning", "cost_per_1m_in": 0.2, "cost_per_1m_out": 0.5, "cost_per_1m_in_cached": 0, "cost_per_1m_out_cached": 0.05, - "context_window": 2000000, - "default_max_tokens": 200000, - "can_reason": true, - "supports_attachments": true + "context_window": 200000, + "default_max_tokens": 20000, + "can_reason": false, + "supports_attachments": true, + "options": {} }, { - "id": "grok-3-mini", - "name": "Grok 3 Mini", - "cost_per_1m_in": 0.3, - "cost_per_1m_out": 0.5, + "id": "grok-4.20", + "name": "Grok 4.20", + "cost_per_1m_in": 2, + "cost_per_1m_out": 6, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0.075, - "context_window": 131072, + "cost_per_1m_out_cached": 0.2, + "context_window": 200000, "default_max_tokens": 20000, "can_reason": true, - "supports_attachments": false + "supports_attachments": true, + "options": {} }, { - "id": "grok-3", - "name": "Grok 3", - "cost_per_1m_in": 3, - "cost_per_1m_out": 15, + "id": "grok-4.20-non-reasoning", + "name": "Grok 4.20 Non-Reasoning", + "cost_per_1m_in": 2, + "cost_per_1m_out": 6, "cost_per_1m_in_cached": 0, - "cost_per_1m_out_cached": 0.75, - "context_window": 131072, + "cost_per_1m_out_cached": 0.2, + "context_window": 200000, "default_max_tokens": 20000, "can_reason": false, - "supports_attachments": false + "supports_attachments": true, + "options": {} + }, + { + "id": "grok-code-fast", + "name": "Grok Code Fast", + "cost_per_1m_in": 0.2, + "cost_per_1m_out": 1.5, + "cost_per_1m_in_cached": 0, + "cost_per_1m_out_cached": 0.02, + "context_window": 131072, + "default_max_tokens": 13107, + "can_reason": true, + "supports_attachments": false, + "options": {} } ] }