From b59776d46e6e9cdce1e41709358128dc4cb181b5 Mon Sep 17 00:00:00 2001 From: Stenn Kool Date: Tue, 26 Aug 2025 17:51:40 +0200 Subject: [PATCH] Add chutes provider (#42) * feat: add Chutes AI provider with Qwen, GLM and Kimi models * feat: update model pricing and add new DeepSeek V3.1 and Qwen3 models --- internal/providers/configs/chutes.json | 242 +++++++++++++++++++++++++ internal/providers/providers.go | 8 + pkg/catwalk/provider.go | 2 + 3 files changed, 252 insertions(+) create mode 100644 internal/providers/configs/chutes.json diff --git a/internal/providers/configs/chutes.json b/internal/providers/configs/chutes.json new file mode 100644 index 0000000000000000000000000000000000000000..e5c146dd981551c238f23b8ae30872856f5f3a76 --- /dev/null +++ b/internal/providers/configs/chutes.json @@ -0,0 +1,242 @@ +{ + "name": "Chutes", + "id": "chutes", + "type": "openai", + "api_key": "$CHUTES_API_KEY", + "api_endpoint": "https://llm.chutes.ai/v1", + "default_large_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", + "default_small_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", + "models": [ + { + "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8", + "name": "Qwen3 Coder 480B A35B Instruct (FP8)", + "cost_per_1m_in": 0.2, + "cost_per_1m_out": 0.8, + "context_window": 262000, + "default_max_tokens": 32768, + "can_reason": true, + "has_reasoning_efforts": true, + "default_reasoning_efforts": "medium", + "supports_attachments": true + }, + { + "id": "zai-org/GLM-4.5-FP8", + "name": "GLM 4.5 FP8", + "cost_per_1m_in": 0.0, + "cost_per_1m_out": 0.0, + "context_window": 98000, + "default_max_tokens": 32768, + "can_reason": true, + "has_reasoning_efforts": true, + "default_reasoning_efforts": "medium", + "supports_attachments": true + }, + { + "id": "moonshotai/Kimi-K2-Instruct-75k", + "name": "Kimi K2 Instruct", + "cost_per_1m_in": 0.15, + "cost_per_1m_out": 0.59, + "context_window": 75000, + "default_max_tokens": 32768, + "can_reason": true, + "has_reasoning_efforts": true, + "default_reasoning_efforts": "medium", + "supports_attachments": true + }, + { + "id": "deepseek-ai/DeepSeek-R1-0528", + "name": "DeepSeek R1 0528", + "cost_per_1m_in": 0.18, + "cost_per_1m_out": 0.72, + "context_window": 75000, + "default_max_tokens": 32768, + "can_reason": true, + "has_reasoning_efforts": true, + "default_reasoning_efforts": "medium", + "supports_attachments": true + }, + { + "id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B", + "name": "DeepSeek R1 0528 Qwen3 8B", + "cost_per_1m_in": 0.02, + "cost_per_1m_out": 0.07, + "context_window": 32768, + "default_max_tokens": 8192, + "can_reason": false, + "has_reasoning_efforts": false, + "supports_attachments": true + }, + { + "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "name": "DeepSeek R1 Distill Llama 70B", + "cost_per_1m_in": 0.03, + "cost_per_1m_out": 0.14, + "context_window": 65536, + "default_max_tokens": 8192, + "can_reason": false, + "has_reasoning_efforts": false, + "supports_attachments": true + }, + { + "id": "tngtech/DeepSeek-R1T-Chimera", + "name": "DeepSeek R1T Chimera", + "cost_per_1m_in": 0.18, + "cost_per_1m_out": 0.72, + "context_window": 131072, + "default_max_tokens": 32768, + "can_reason": true, + "has_reasoning_efforts": true, + "default_reasoning_efforts": "medium", + "supports_attachments": true + }, + { + "id": "tngtech/DeepSeek-TNG-R1T2-Chimera", + "name": "DeepSeek TNG R1T2 Chimera", + "cost_per_1m_in": 0.20, + "cost_per_1m_out": 0.80, + "context_window": 262144, + "default_max_tokens": 65536, + "can_reason": true, + "has_reasoning_efforts": true, + "default_reasoning_efforts": "high", + "supports_attachments": true + }, + { + "id": "deepseek-ai/DeepSeek-V3-0324", + "name": "DeepSeek V3 0324", + "cost_per_1m_in": 0.18, + "cost_per_1m_out": 0.72, + "context_window": 75000, + "default_max_tokens": 32768, + "can_reason": true, + "has_reasoning_efforts": true, + "default_reasoning_efforts": "medium", + "supports_attachments": true + }, + { + "id": "chutesai/Devstral-Small-2505", + "name": "Devstral Small 2505", + "cost_per_1m_in": 0.02, + "cost_per_1m_out": 0.08, + "context_window": 32768, + "default_max_tokens": 8192, + "can_reason": false, + "has_reasoning_efforts": false, + "supports_attachments": true + }, + { + "id": "zai-org/GLM-4.5-Air", + "name": "GLM 4.5 Air", + "cost_per_1m_in": 0.0, + "cost_per_1m_out": 0.0, + "context_window": 131072, + "default_max_tokens": 32768, + "can_reason": true, + "has_reasoning_efforts": true, + "default_reasoning_efforts": "medium", + "supports_attachments": true + }, + { + "id": "openai/gpt-oss-120b", + "name": "GPT OSS 120B", + "cost_per_1m_in": 0.10, + "cost_per_1m_out": 0.41, + "context_window": 131072, + "default_max_tokens": 32768, + "can_reason": true, + "has_reasoning_efforts": true, + "default_reasoning_efforts": "medium", + "supports_attachments": true + }, + { + "id": "chutesai/Mistral-Small-3.2-24B-Instruct-2506", + "name": "Mistral Small 3.2 24B Instruct 2506", + "cost_per_1m_in": 0.02, + "cost_per_1m_out": 0.08, + "context_window": 32768, + "default_max_tokens": 8192, + "can_reason": false, + "has_reasoning_efforts": false, + "supports_attachments": true + }, + { + "id": "Qwen/Qwen3-235B-A22B-Instruct-2507", + "name": "Qwen3 235B A22B Instruct 2507", + "cost_per_1m_in": 0.08, + "cost_per_1m_out": 0.31, + "context_window": 32768, + "default_max_tokens": 8192, + "can_reason": false, + "has_reasoning_efforts": false, + "supports_attachments": true + }, + { + "id": "Qwen/Qwen3-30B-A3B", + "name": "Qwen3 30B A3B", + "cost_per_1m_in": 0.02, + "cost_per_1m_out": 0.08, + "context_window": 32768, + "default_max_tokens": 8192, + "can_reason": false, + "has_reasoning_efforts": false, + "supports_attachments": true + }, + { + "id": "Qwen/Qwen3-235B-A22B-Thinking-2507", + "name": "Qwen3 235B A22B Thinking 2507", + "cost_per_1m_in": 0.08, + "cost_per_1m_out": 0.31, + "context_window": 32768, + "default_max_tokens": 8192, + "can_reason": true, + "has_reasoning_efforts": true, + "default_reasoning_efforts": "high", + "supports_attachments": true + }, + { + "id": "deepseek-ai/DeepSeek-V3.1", + "name": "DeepSeek V3.1", + "cost_per_1m_in": 0.20, + "cost_per_1m_out": 0.80, + "context_window": 163840, + "default_max_tokens": 32768, + "can_reason": false, + "has_reasoning_efforts": false, + "supports_attachments": true + }, + { + "id": "deepseek-ai/DeepSeek-V3.1:THINKING", + "name": "DeepSeek V3.1 Reasoning", + "cost_per_1m_in": 0.20, + "cost_per_1m_out": 0.80, + "context_window": 163840, + "default_max_tokens": 32768, + "can_reason": true, + "has_reasoning_efforts": true, + "default_reasoning_efforts": "medium", + "supports_attachments": true + }, + { + "id": "Qwen/Qwen3-30B-A3B-Instruct-2507", + "name": "Qwen3 30B A3B Instruct 2507", + "cost_per_1m_in": 0.05, + "cost_per_1m_out": 0.20, + "context_window": 262144, + "default_max_tokens": 32768, + "can_reason": false, + "has_reasoning_efforts": false, + "supports_attachments": true + }, + { + "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct", + "name": "Qwen3 Coder 30B A3B Instruct", + "cost_per_1m_in": 0.00, + "cost_per_1m_out": 0.00, + "context_window": 262144, + "default_max_tokens": 32768, + "can_reason": false, + "has_reasoning_efforts": false, + "supports_attachments": true + } + ] +} \ No newline at end of file diff --git a/internal/providers/providers.go b/internal/providers/providers.go index 0953764dc2ec68d3c305199d0408b8164ba3d27e..76a1200f64e9d544629e5ef74d2f2eb1f73ef07d 100644 --- a/internal/providers/providers.go +++ b/internal/providers/providers.go @@ -48,6 +48,9 @@ var cerebrasConfig []byte //go:embed configs/venice.json var veniceConfig []byte +//go:embed configs/chutes.json +var chutesConfig []byte + //go:embed configs/deepseek.json var deepSeekConfig []byte @@ -68,6 +71,7 @@ var providerRegistry = []ProviderFunc{ lambdaProvider, cerebrasProvider, veniceProvider, + chutesProvider, deepSeekProvider, } @@ -141,6 +145,10 @@ func veniceProvider() catwalk.Provider { return loadProviderFromConfig(veniceConfig) } +func chutesProvider() catwalk.Provider { + return loadProviderFromConfig(chutesConfig) +} + func deepSeekProvider() catwalk.Provider { return loadProviderFromConfig(deepSeekConfig) } diff --git a/pkg/catwalk/provider.go b/pkg/catwalk/provider.go index 6640d0c9bd79429e0ff245b1b3459c79d606c936..589d0ff6704ca1f0c0f2fde57bffb832e47198d9 100644 --- a/pkg/catwalk/provider.go +++ b/pkg/catwalk/provider.go @@ -31,6 +31,7 @@ const ( InferenceProviderLambda InferenceProvider = "lambda" InferenceProviderCerebras InferenceProvider = "cerebras" InferenceProviderVenice InferenceProvider = "venice" + InferenceProviderChutes InferenceProvider = "chutes" ) // Provider represents an AI provider configuration. @@ -78,5 +79,6 @@ func KnownProviders() []InferenceProvider { InferenceProviderLambda, InferenceProviderCerebras, InferenceProviderVenice, + InferenceProviderChutes, } }