From be334ac0804ffdf58ff3776bcdbd5f32c7c15868 Mon Sep 17 00:00:00 2001 From: "Agusti F." <6601142+agustif@users.noreply.github.com> Date: Sat, 2 Aug 2025 04:42:25 +0200 Subject: [PATCH 1/3] feat: add cerebras.ai provider --- internal/providers/configs/cerebras.json | 110 +++++++++++++++++++++++ internal/providers/providers.go | 8 ++ 2 files changed, 118 insertions(+) create mode 100644 internal/providers/configs/cerebras.json diff --git a/internal/providers/configs/cerebras.json b/internal/providers/configs/cerebras.json new file mode 100644 index 0000000000000000000000000000000000000000..c73ca67401ba4f31fd3d6d05305dd01a8a70e367 --- /dev/null +++ b/internal/providers/configs/cerebras.json @@ -0,0 +1,110 @@ +{ + "name": "Cerebras", + "id": "cerebras", + "type": "openai", + "api_key": "$CEREBRAS_API_KEY", + "api_endpoint": "https://api.cerebras.ai/v1", + "default_large_model_id": "qwen-3-235b-a22b-instruct-2507", + "default_small_model_id": "qwen-3-32b", + "models": [ + { + "id": "llama-4-scout-17b-16e-instruct", + "name": "Llama 4 Scout", + "parameters": "109B", + "speed_tokens_per_s": 2600, + "cost_per_1m_in": 0.65, + "cost_per_1m_out": 0.85, + "context_window": 32768, + "default_max_tokens": 4000, + "can_reason": true, + "supports_attachments": false + }, + { + "id": "llama3.1-8b", + "name": "Llama 3.1 8B", + "parameters": "8B", + "speed_tokens_per_s": 2200, + "cost_per_1m_in": 0.10, + "cost_per_1m_out": 0.10, + "context_window": 32768, + "default_max_tokens": 4000, + "can_reason": true, + "supports_attachments": false + }, + { + "id": "llama-3.3-70b", + "name": "Llama 3.3 70B", + "parameters": "70B", + "speed_tokens_per_s": 2100, + "cost_per_1m_in": 0.85, + "cost_per_1m_out": 1.20, + "context_window": 128000, + "default_max_tokens": 4000, + "can_reason": true, + "supports_attachments": false + }, + { + "id": "qwen-3-32b", + "name": "Qwen 3 32B", + "parameters": "32B", + "speed_tokens_per_s": 2600, + "cost_per_1m_in": 0.40, + "cost_per_1m_out": 0.80, + "context_window": 128000, + "default_max_tokens": 32768, + "can_reason": true, + "supports_attachments": false + }, + { + "id": "llama-4-maverick-17b-128e-instruct", + "name": "Llama 4 Maverick", + "parameters": "17B", + "speed_tokens_per_s": 1500, + "cost_per_1m_in": 0.20, + "cost_per_1m_out": 0.60, + "context_window": 32768, + "default_max_tokens": 4000, + "can_reason": true, + "supports_attachments": false + }, + { + "id": "qwen-3-235b-a22b-instruct-2507", + "name": "Qwen 3 235B Instruct", + "parameters": "235B", + "speed_tokens_per_s": 1400, + "cost_per_1m_in": 0.60, + "cost_per_1m_out": 1.20, + "context_window": 131072, + "default_max_tokens": 16384, + "can_reason": true, + "supports_attachments": false + }, + { + "id": "qwen-3-235b-a22b-thinking-2507", + "name": "Qwen 3 235B Thinking", + "parameters": "235B", + "speed_tokens_per_s": 1700, + "cost_per_1m_in": 0.60, + "cost_per_1m_out": 1.20, + "context_window": 128000, + "default_max_tokens": 32768, + "can_reason": true, + "supports_attachments": false + }, + { + "id": "qwen-3-coder-480b", + "name": "Qwen 3 480B Coder", + "parameters": "480B", + "speed_tokens_per_s": 2000, + "cost_per_1m_in": 2.00, + "cost_per_1m_out": 2.00, + "context_window": 131072, + "default_max_tokens": 65536, + "can_reason": true, + "supports_attachments": false + } + ], + "default_headers": { + "User-Agent": "Crush-Client/1.0" + } +} \ No newline at end of file diff --git a/internal/providers/providers.go b/internal/providers/providers.go index 0d89ba16950e0687a2af03c14df53a452d4b29a8..a3dcc303bbff85b8d01e51ab95ce11ebef19113b 100644 --- a/internal/providers/providers.go +++ b/internal/providers/providers.go @@ -36,6 +36,9 @@ var bedrockConfig []byte //go:embed configs/groq.json var groqConfig []byte +//go:embed configs/cerebras.json +var cerebrasConfig []byte + // ProviderFunc is a function that returns a Provider. type ProviderFunc func() catwalk.Provider @@ -48,6 +51,7 @@ var providerRegistry = []ProviderFunc{ vertexAIProvider, xAIProvider, groqProvider, + cerebrasProvider, openRouterProvider, } @@ -104,3 +108,7 @@ func openRouterProvider() catwalk.Provider { func groqProvider() catwalk.Provider { return loadProviderFromConfig(groqConfig) } + +func cerebrasProvider() catwalk.Provider { + return loadProviderFromConfig(cerebrasConfig) +} From ac8893f5aaf8a863a59eb98d84e119f998c7b6ab Mon Sep 17 00:00:00 2001 From: Kujtim Hoxha Date: Sat, 2 Aug 2025 10:46:57 +0200 Subject: [PATCH 2/3] chore: change default models --- internal/providers/configs/cerebras.json | 51 ++++++++---------------- 1 file changed, 16 insertions(+), 35 deletions(-) diff --git a/internal/providers/configs/cerebras.json b/internal/providers/configs/cerebras.json index c73ca67401ba4f31fd3d6d05305dd01a8a70e367..30a79396a0705fa6ad064d4484fa07d582694141 100644 --- a/internal/providers/configs/cerebras.json +++ b/internal/providers/configs/cerebras.json @@ -4,14 +4,12 @@ "type": "openai", "api_key": "$CEREBRAS_API_KEY", "api_endpoint": "https://api.cerebras.ai/v1", - "default_large_model_id": "qwen-3-235b-a22b-instruct-2507", + "default_large_model_id": "qwen-3-coder-480b", "default_small_model_id": "qwen-3-32b", "models": [ { "id": "llama-4-scout-17b-16e-instruct", "name": "Llama 4 Scout", - "parameters": "109B", - "speed_tokens_per_s": 2600, "cost_per_1m_in": 0.65, "cost_per_1m_out": 0.85, "context_window": 32768, @@ -22,10 +20,8 @@ { "id": "llama3.1-8b", "name": "Llama 3.1 8B", - "parameters": "8B", - "speed_tokens_per_s": 2200, - "cost_per_1m_in": 0.10, - "cost_per_1m_out": 0.10, + "cost_per_1m_in": 0.1, + "cost_per_1m_out": 0.1, "context_window": 32768, "default_max_tokens": 4000, "can_reason": true, @@ -34,10 +30,8 @@ { "id": "llama-3.3-70b", "name": "Llama 3.3 70B", - "parameters": "70B", - "speed_tokens_per_s": 2100, "cost_per_1m_in": 0.85, - "cost_per_1m_out": 1.20, + "cost_per_1m_out": 1.2, "context_window": 128000, "default_max_tokens": 4000, "can_reason": true, @@ -46,10 +40,8 @@ { "id": "qwen-3-32b", "name": "Qwen 3 32B", - "parameters": "32B", - "speed_tokens_per_s": 2600, - "cost_per_1m_in": 0.40, - "cost_per_1m_out": 0.80, + "cost_per_1m_in": 0.4, + "cost_per_1m_out": 0.8, "context_window": 128000, "default_max_tokens": 32768, "can_reason": true, @@ -58,10 +50,8 @@ { "id": "llama-4-maverick-17b-128e-instruct", "name": "Llama 4 Maverick", - "parameters": "17B", - "speed_tokens_per_s": 1500, - "cost_per_1m_in": 0.20, - "cost_per_1m_out": 0.60, + "cost_per_1m_in": 0.2, + "cost_per_1m_out": 0.6, "context_window": 32768, "default_max_tokens": 4000, "can_reason": true, @@ -70,10 +60,8 @@ { "id": "qwen-3-235b-a22b-instruct-2507", "name": "Qwen 3 235B Instruct", - "parameters": "235B", - "speed_tokens_per_s": 1400, - "cost_per_1m_in": 0.60, - "cost_per_1m_out": 1.20, + "cost_per_1m_in": 0.6, + "cost_per_1m_out": 1.2, "context_window": 131072, "default_max_tokens": 16384, "can_reason": true, @@ -82,10 +70,8 @@ { "id": "qwen-3-235b-a22b-thinking-2507", "name": "Qwen 3 235B Thinking", - "parameters": "235B", - "speed_tokens_per_s": 1700, - "cost_per_1m_in": 0.60, - "cost_per_1m_out": 1.20, + "cost_per_1m_in": 0.6, + "cost_per_1m_out": 1.2, "context_window": 128000, "default_max_tokens": 32768, "can_reason": true, @@ -94,17 +80,12 @@ { "id": "qwen-3-coder-480b", "name": "Qwen 3 480B Coder", - "parameters": "480B", - "speed_tokens_per_s": 2000, - "cost_per_1m_in": 2.00, - "cost_per_1m_out": 2.00, + "cost_per_1m_in": 2.0, + "cost_per_1m_out": 2.0, "context_window": 131072, "default_max_tokens": 65536, "can_reason": true, "supports_attachments": false } - ], - "default_headers": { - "User-Agent": "Crush-Client/1.0" - } -} \ No newline at end of file + ] +} From 1a2d41db48ddeac1e5db1e1e0c2897919fdda66c Mon Sep 17 00:00:00 2001 From: Kujtim Hoxha Date: Sat, 2 Aug 2025 12:44:59 +0200 Subject: [PATCH 3/3] chore: add to providers --- internal/providers/providers.go | 2 +- pkg/catwalk/provider.go | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/internal/providers/providers.go b/internal/providers/providers.go index bd4e48db3014773351cc180ec58a49a31dd47384..8c6754954dda6bd8478fbf1b7ba43da467bbccc5 100644 --- a/internal/providers/providers.go +++ b/internal/providers/providers.go @@ -54,9 +54,9 @@ var providerRegistry = []ProviderFunc{ vertexAIProvider, xAIProvider, groqProvider, - cerebrasProvider, openRouterProvider, lambdaProvider, + cerebrasProvider, } // GetAll returns all registered providers. diff --git a/pkg/catwalk/provider.go b/pkg/catwalk/provider.go index fa0094bb07a0d476b35e1858bf4ab79dc64b0647..e72dbdb9f8d7966b621fd4e4031bec4a63215c33 100644 --- a/pkg/catwalk/provider.go +++ b/pkg/catwalk/provider.go @@ -28,6 +28,7 @@ const ( InferenceProviderGROQ InferenceProvider = "groq" InferenceProviderOpenRouter InferenceProvider = "openrouter" InferenceProviderLambda InferenceProvider = "lambda" + InferenceProviderCerebras InferenceProvider = "cerebras" ) // Provider represents an AI provider configuration. @@ -72,5 +73,6 @@ func KnownProviders() []InferenceProvider { InferenceProviderGROQ, InferenceProviderOpenRouter, InferenceProviderLambda, + InferenceProviderCerebras, } }