diff --git a/internal/providers/configs/cerebras.json b/internal/providers/configs/cerebras.json new file mode 100644 index 0000000000000000000000000000000000000000..30a79396a0705fa6ad064d4484fa07d582694141 --- /dev/null +++ b/internal/providers/configs/cerebras.json @@ -0,0 +1,91 @@ +{ + "name": "Cerebras", + "id": "cerebras", + "type": "openai", + "api_key": "$CEREBRAS_API_KEY", + "api_endpoint": "https://api.cerebras.ai/v1", + "default_large_model_id": "qwen-3-coder-480b", + "default_small_model_id": "qwen-3-32b", + "models": [ + { + "id": "llama-4-scout-17b-16e-instruct", + "name": "Llama 4 Scout", + "cost_per_1m_in": 0.65, + "cost_per_1m_out": 0.85, + "context_window": 32768, + "default_max_tokens": 4000, + "can_reason": true, + "supports_attachments": false + }, + { + "id": "llama3.1-8b", + "name": "Llama 3.1 8B", + "cost_per_1m_in": 0.1, + "cost_per_1m_out": 0.1, + "context_window": 32768, + "default_max_tokens": 4000, + "can_reason": true, + "supports_attachments": false + }, + { + "id": "llama-3.3-70b", + "name": "Llama 3.3 70B", + "cost_per_1m_in": 0.85, + "cost_per_1m_out": 1.2, + "context_window": 128000, + "default_max_tokens": 4000, + "can_reason": true, + "supports_attachments": false + }, + { + "id": "qwen-3-32b", + "name": "Qwen 3 32B", + "cost_per_1m_in": 0.4, + "cost_per_1m_out": 0.8, + "context_window": 128000, + "default_max_tokens": 32768, + "can_reason": true, + "supports_attachments": false + }, + { + "id": "llama-4-maverick-17b-128e-instruct", + "name": "Llama 4 Maverick", + "cost_per_1m_in": 0.2, + "cost_per_1m_out": 0.6, + "context_window": 32768, + "default_max_tokens": 4000, + "can_reason": true, + "supports_attachments": false + }, + { + "id": "qwen-3-235b-a22b-instruct-2507", + "name": "Qwen 3 235B Instruct", + "cost_per_1m_in": 0.6, + "cost_per_1m_out": 1.2, + "context_window": 131072, + "default_max_tokens": 16384, + "can_reason": true, + "supports_attachments": false + }, + { + "id": "qwen-3-235b-a22b-thinking-2507", + "name": "Qwen 3 235B Thinking", + "cost_per_1m_in": 0.6, + "cost_per_1m_out": 1.2, + "context_window": 128000, + "default_max_tokens": 32768, + "can_reason": true, + "supports_attachments": false + }, + { + "id": "qwen-3-coder-480b", + "name": "Qwen 3 480B Coder", + "cost_per_1m_in": 2.0, + "cost_per_1m_out": 2.0, + "context_window": 131072, + "default_max_tokens": 65536, + "can_reason": true, + "supports_attachments": false + } + ] +} diff --git a/internal/providers/providers.go b/internal/providers/providers.go index a3634474452f634b81f9aa1b3604269fc5d7ed9e..8c6754954dda6bd8478fbf1b7ba43da467bbccc5 100644 --- a/internal/providers/providers.go +++ b/internal/providers/providers.go @@ -39,6 +39,9 @@ var groqConfig []byte //go:embed configs/lambda.json var lambdaConfig []byte +//go:embed configs/cerebras.json +var cerebrasConfig []byte + // ProviderFunc is a function that returns a Provider. type ProviderFunc func() catwalk.Provider @@ -53,6 +56,7 @@ var providerRegistry = []ProviderFunc{ groqProvider, openRouterProvider, lambdaProvider, + cerebrasProvider, } // GetAll returns all registered providers. @@ -112,3 +116,7 @@ func groqProvider() catwalk.Provider { func lambdaProvider() catwalk.Provider { return loadProviderFromConfig(lambdaConfig) } + +func cerebrasProvider() catwalk.Provider { + return loadProviderFromConfig(cerebrasConfig) +} diff --git a/pkg/catwalk/provider.go b/pkg/catwalk/provider.go index fa0094bb07a0d476b35e1858bf4ab79dc64b0647..e72dbdb9f8d7966b621fd4e4031bec4a63215c33 100644 --- a/pkg/catwalk/provider.go +++ b/pkg/catwalk/provider.go @@ -28,6 +28,7 @@ const ( InferenceProviderGROQ InferenceProvider = "groq" InferenceProviderOpenRouter InferenceProvider = "openrouter" InferenceProviderLambda InferenceProvider = "lambda" + InferenceProviderCerebras InferenceProvider = "cerebras" ) // Provider represents an AI provider configuration. @@ -72,5 +73,6 @@ func KnownProviders() []InferenceProvider { InferenceProviderGROQ, InferenceProviderOpenRouter, InferenceProviderLambda, + InferenceProviderCerebras, } }