From 58ef5687c36559f911325747b0da1440c1a560f1 Mon Sep 17 00:00:00 2001 From: Seb Duerr Date: Mon, 8 Dec 2025 10:28:10 -0800 Subject: [PATCH] fix(cerebras): update models to match current api and increase max tokens (#114) --- internal/deprecated/configs/cerebras.json | 72 ++++----------- internal/providers/configs/cerebras.json | 105 +++++----------------- 2 files changed, 36 insertions(+), 141 deletions(-) diff --git a/internal/deprecated/configs/cerebras.json b/internal/deprecated/configs/cerebras.json index 618e5761f0b84a6ba2ab7966d738f1dd47a2c30b..a35726957261059d00152176108b79fb52083e78 100644 --- a/internal/deprecated/configs/cerebras.json +++ b/internal/deprecated/configs/cerebras.json @@ -4,46 +4,26 @@ "type": "openai", "api_key": "$CEREBRAS_API_KEY", "api_endpoint": "https://api.cerebras.ai/v1", - "default_large_model_id": "qwen-3-coder-480b", + "default_large_model_id": "zai-glm-4.6", "default_small_model_id": "qwen-3-32b", "models": [ - { - "id": "llama-4-scout-17b-16e-instruct", - "name": "Llama 4 Scout", - "cost_per_1m_in": 0.65, - "cost_per_1m_out": 0.85, - "context_window": 32768, - "default_max_tokens": 4000, - "can_reason": false, - "supports_attachments": false - }, - { - "id": "llama3.1-8b", - "name": "Llama 3.1 8B", - "cost_per_1m_in": 0.1, - "cost_per_1m_out": 0.1, - "context_window": 32768, - "default_max_tokens": 4000, - "can_reason": false, - "supports_attachments": false - }, { "id": "llama-3.3-70b", "name": "Llama 3.3 70B", "cost_per_1m_in": 0.85, "cost_per_1m_out": 1.2, - "context_window": 128000, - "default_max_tokens": 4000, + "context_window": 131072, + "default_max_tokens": 25000, "can_reason": false, "supports_attachments": false }, { "id": "gpt-oss-120b", - "name": "gpt-oss-120b", - "cost_per_1m_in": 0.4, - "cost_per_1m_out": 0.8, - "context_window": 128000, - "default_max_tokens": 65536, + "name": "OpenAI GPT OSS", + "cost_per_1m_in": 0.35, + "cost_per_1m_out": 0.75, + "context_window": 131072, + "default_max_tokens": 25000, "can_reason": true, "has_reasoning_efforts": true, "default_reasoning_efforts": "medium", @@ -54,18 +34,8 @@ "name": "Qwen 3 32B", "cost_per_1m_in": 0.4, "cost_per_1m_out": 0.8, - "context_window": 128000, - "default_max_tokens": 32768, - "can_reason": false, - "supports_attachments": false - }, - { - "id": "llama-4-maverick-17b-128e-instruct", - "name": "Llama 4 Maverick", - "cost_per_1m_in": 0.2, - "cost_per_1m_out": 0.6, - "context_window": 32768, - "default_max_tokens": 4000, + "context_window": 131072, + "default_max_tokens": 25000, "can_reason": false, "supports_attachments": false }, @@ -75,27 +45,17 @@ "cost_per_1m_in": 0.6, "cost_per_1m_out": 1.2, "context_window": 131072, - "default_max_tokens": 16384, - "can_reason": false, - "supports_attachments": false - }, - { - "id": "qwen-3-235b-a22b-thinking-2507", - "name": "Qwen 3 235B Thinking", - "cost_per_1m_in": 0.6, - "cost_per_1m_out": 1.2, - "context_window": 128000, - "default_max_tokens": 32768, + "default_max_tokens": 25000, "can_reason": false, "supports_attachments": false }, { - "id": "qwen-3-coder-480b", - "name": "Qwen 3 480B Coder", - "cost_per_1m_in": 2.0, - "cost_per_1m_out": 2.0, + "id": "zai-glm-4.6", + "name": "Z.ai GLM 4.6", + "cost_per_1m_in": 2.25, + "cost_per_1m_out": 2.75, "context_window": 131072, - "default_max_tokens": 65536, + "default_max_tokens": 25000, "can_reason": false, "supports_attachments": false } diff --git a/internal/providers/configs/cerebras.json b/internal/providers/configs/cerebras.json index 1ce1b858d943b43119947632d59198ece7cc5884..d6ae695f9606ca914ff39ec307d21847f5255423 100644 --- a/internal/providers/configs/cerebras.json +++ b/internal/providers/configs/cerebras.json @@ -6,54 +6,27 @@ "api_endpoint": "https://api.cerebras.ai/v1", "default_large_model_id": "gpt-oss-120b", "default_small_model_id": "qwen-3-32b", + "default_headers": { + "X-Cerebras-3rd-Party-Integration": "crush" + }, "models": [ - { - "id": "zai-glm-4.6", - "name": "Z.AI GLM 4.6", - "cost_per_1m_in": 0, - "cost_per_1m_out": 0, - "context_window": 131072, - "default_max_tokens": 40960, - "can_reason": false, - "supports_attachments": false - }, - { - "id": "llama-4-scout-17b-16e-instruct", - "name": "Llama 4 Scout", - "cost_per_1m_in": 0.65, - "cost_per_1m_out": 0.85, - "context_window": 32768, - "default_max_tokens": 4000, - "can_reason": false, - "supports_attachments": false - }, - { - "id": "llama3.1-8b", - "name": "Llama 3.1 8B", - "cost_per_1m_in": 0.1, - "cost_per_1m_out": 0.1, - "context_window": 32768, - "default_max_tokens": 4000, - "can_reason": false, - "supports_attachments": false - }, { "id": "llama-3.3-70b", "name": "Llama 3.3 70B", "cost_per_1m_in": 0.85, "cost_per_1m_out": 1.2, - "context_window": 128000, - "default_max_tokens": 4000, + "context_window": 131072, + "default_max_tokens": 25000, "can_reason": false, "supports_attachments": false }, { "id": "gpt-oss-120b", - "name": "gpt-oss-120b", - "cost_per_1m_in": 0.4, - "cost_per_1m_out": 0.8, - "context_window": 128000, - "default_max_tokens": 65536, + "name": "OpenAI GPT OSS", + "cost_per_1m_in": 0.35, + "cost_per_1m_out": 0.75, + "context_window": 131072, + "default_max_tokens": 25000, "can_reason": true, "reasoning_levels": [ "low", @@ -68,18 +41,8 @@ "name": "Qwen 3 32B", "cost_per_1m_in": 0.4, "cost_per_1m_out": 0.8, - "context_window": 128000, - "default_max_tokens": 32768, - "can_reason": false, - "supports_attachments": false - }, - { - "id": "llama-4-maverick-17b-128e-instruct", - "name": "Llama 4 Maverick", - "cost_per_1m_in": 0.2, - "cost_per_1m_out": 0.6, - "context_window": 32768, - "default_max_tokens": 4000, + "context_window": 131072, + "default_max_tokens": 25000, "can_reason": false, "supports_attachments": false }, @@ -89,46 +52,18 @@ "cost_per_1m_in": 0.6, "cost_per_1m_out": 1.2, "context_window": 131072, - "default_max_tokens": 16384, - "can_reason": true, - "reasoning_levels": [ - "low", - "medium", - "high" - ], - "default_reasoning_efforts": "medium", - "supports_attachments": false - }, - { - "id": "qwen-3-235b-a22b-thinking-2507", - "name": "Qwen 3 235B Thinking", - "cost_per_1m_in": 0.6, - "cost_per_1m_out": 1.2, - "context_window": 128000, - "default_max_tokens": 32768, - "can_reason": true, - "reasoning_levels": [ - "low", - "medium", - "high" - ], - "default_reasoning_efforts": "medium", + "default_max_tokens": 25000, + "can_reason": false, "supports_attachments": false }, { - "id": "qwen-3-coder-480b", - "name": "Qwen 3 480B Coder", - "cost_per_1m_in": 2.0, - "cost_per_1m_out": 2.0, + "id": "zai-glm-4.6", + "name": "Z.ai GLM 4.6", + "cost_per_1m_in": 2.25, + "cost_per_1m_out": 2.75, "context_window": 131072, - "default_max_tokens": 65536, - "can_reason": true, - "reasoning_levels": [ - "low", - "medium", - "high" - ], - "default_reasoning_efforts": "medium", + "default_max_tokens": 25000, + "can_reason": false, "supports_attachments": false } ]