@@ -4,46 +4,26 @@
"type": "openai",
"api_key": "$CEREBRAS_API_KEY",
"api_endpoint": "https://api.cerebras.ai/v1",
- "default_large_model_id": "qwen-3-coder-480b",
+ "default_large_model_id": "zai-glm-4.6",
"default_small_model_id": "qwen-3-32b",
"models": [
- {
- "id": "llama-4-scout-17b-16e-instruct",
- "name": "Llama 4 Scout",
- "cost_per_1m_in": 0.65,
- "cost_per_1m_out": 0.85,
- "context_window": 32768,
- "default_max_tokens": 4000,
- "can_reason": false,
- "supports_attachments": false
- },
- {
- "id": "llama3.1-8b",
- "name": "Llama 3.1 8B",
- "cost_per_1m_in": 0.1,
- "cost_per_1m_out": 0.1,
- "context_window": 32768,
- "default_max_tokens": 4000,
- "can_reason": false,
- "supports_attachments": false
- },
{
"id": "llama-3.3-70b",
"name": "Llama 3.3 70B",
"cost_per_1m_in": 0.85,
"cost_per_1m_out": 1.2,
- "context_window": 128000,
- "default_max_tokens": 4000,
+ "context_window": 131072,
+ "default_max_tokens": 25000,
"can_reason": false,
"supports_attachments": false
},
{
"id": "gpt-oss-120b",
- "name": "gpt-oss-120b",
- "cost_per_1m_in": 0.4,
- "cost_per_1m_out": 0.8,
- "context_window": 128000,
- "default_max_tokens": 65536,
+ "name": "OpenAI GPT OSS",
+ "cost_per_1m_in": 0.35,
+ "cost_per_1m_out": 0.75,
+ "context_window": 131072,
+ "default_max_tokens": 25000,
"can_reason": true,
"has_reasoning_efforts": true,
"default_reasoning_efforts": "medium",
@@ -54,18 +34,8 @@
"name": "Qwen 3 32B",
"cost_per_1m_in": 0.4,
"cost_per_1m_out": 0.8,
- "context_window": 128000,
- "default_max_tokens": 32768,
- "can_reason": false,
- "supports_attachments": false
- },
- {
- "id": "llama-4-maverick-17b-128e-instruct",
- "name": "Llama 4 Maverick",
- "cost_per_1m_in": 0.2,
- "cost_per_1m_out": 0.6,
- "context_window": 32768,
- "default_max_tokens": 4000,
+ "context_window": 131072,
+ "default_max_tokens": 25000,
"can_reason": false,
"supports_attachments": false
},
@@ -75,27 +45,17 @@
"cost_per_1m_in": 0.6,
"cost_per_1m_out": 1.2,
"context_window": 131072,
- "default_max_tokens": 16384,
- "can_reason": false,
- "supports_attachments": false
- },
- {
- "id": "qwen-3-235b-a22b-thinking-2507",
- "name": "Qwen 3 235B Thinking",
- "cost_per_1m_in": 0.6,
- "cost_per_1m_out": 1.2,
- "context_window": 128000,
- "default_max_tokens": 32768,
+ "default_max_tokens": 25000,
"can_reason": false,
"supports_attachments": false
},
{
- "id": "qwen-3-coder-480b",
- "name": "Qwen 3 480B Coder",
- "cost_per_1m_in": 2.0,
- "cost_per_1m_out": 2.0,
+ "id": "zai-glm-4.6",
+ "name": "Z.ai GLM 4.6",
+ "cost_per_1m_in": 2.25,
+ "cost_per_1m_out": 2.75,
"context_window": 131072,
- "default_max_tokens": 65536,
+ "default_max_tokens": 25000,
"can_reason": false,
"supports_attachments": false
}
@@ -6,54 +6,27 @@
"api_endpoint": "https://api.cerebras.ai/v1",
"default_large_model_id": "gpt-oss-120b",
"default_small_model_id": "qwen-3-32b",
+ "default_headers": {
+ "X-Cerebras-3rd-Party-Integration": "crush"
+ },
"models": [
- {
- "id": "zai-glm-4.6",
- "name": "Z.AI GLM 4.6",
- "cost_per_1m_in": 0,
- "cost_per_1m_out": 0,
- "context_window": 131072,
- "default_max_tokens": 40960,
- "can_reason": false,
- "supports_attachments": false
- },
- {
- "id": "llama-4-scout-17b-16e-instruct",
- "name": "Llama 4 Scout",
- "cost_per_1m_in": 0.65,
- "cost_per_1m_out": 0.85,
- "context_window": 32768,
- "default_max_tokens": 4000,
- "can_reason": false,
- "supports_attachments": false
- },
- {
- "id": "llama3.1-8b",
- "name": "Llama 3.1 8B",
- "cost_per_1m_in": 0.1,
- "cost_per_1m_out": 0.1,
- "context_window": 32768,
- "default_max_tokens": 4000,
- "can_reason": false,
- "supports_attachments": false
- },
{
"id": "llama-3.3-70b",
"name": "Llama 3.3 70B",
"cost_per_1m_in": 0.85,
"cost_per_1m_out": 1.2,
- "context_window": 128000,
- "default_max_tokens": 4000,
+ "context_window": 131072,
+ "default_max_tokens": 25000,
"can_reason": false,
"supports_attachments": false
},
{
"id": "gpt-oss-120b",
- "name": "gpt-oss-120b",
- "cost_per_1m_in": 0.4,
- "cost_per_1m_out": 0.8,
- "context_window": 128000,
- "default_max_tokens": 65536,
+ "name": "OpenAI GPT OSS",
+ "cost_per_1m_in": 0.35,
+ "cost_per_1m_out": 0.75,
+ "context_window": 131072,
+ "default_max_tokens": 25000,
"can_reason": true,
"reasoning_levels": [
"low",
@@ -68,18 +41,8 @@
"name": "Qwen 3 32B",
"cost_per_1m_in": 0.4,
"cost_per_1m_out": 0.8,
- "context_window": 128000,
- "default_max_tokens": 32768,
- "can_reason": false,
- "supports_attachments": false
- },
- {
- "id": "llama-4-maverick-17b-128e-instruct",
- "name": "Llama 4 Maverick",
- "cost_per_1m_in": 0.2,
- "cost_per_1m_out": 0.6,
- "context_window": 32768,
- "default_max_tokens": 4000,
+ "context_window": 131072,
+ "default_max_tokens": 25000,
"can_reason": false,
"supports_attachments": false
},
@@ -89,46 +52,18 @@
"cost_per_1m_in": 0.6,
"cost_per_1m_out": 1.2,
"context_window": 131072,
- "default_max_tokens": 16384,
- "can_reason": true,
- "reasoning_levels": [
- "low",
- "medium",
- "high"
- ],
- "default_reasoning_efforts": "medium",
- "supports_attachments": false
- },
- {
- "id": "qwen-3-235b-a22b-thinking-2507",
- "name": "Qwen 3 235B Thinking",
- "cost_per_1m_in": 0.6,
- "cost_per_1m_out": 1.2,
- "context_window": 128000,
- "default_max_tokens": 32768,
- "can_reason": true,
- "reasoning_levels": [
- "low",
- "medium",
- "high"
- ],
- "default_reasoning_efforts": "medium",
+ "default_max_tokens": 25000,
+ "can_reason": false,
"supports_attachments": false
},
{
- "id": "qwen-3-coder-480b",
- "name": "Qwen 3 480B Coder",
- "cost_per_1m_in": 2.0,
- "cost_per_1m_out": 2.0,
+ "id": "zai-glm-4.6",
+ "name": "Z.ai GLM 4.6",
+ "cost_per_1m_in": 2.25,
+ "cost_per_1m_out": 2.75,
"context_window": 131072,
- "default_max_tokens": 65536,
- "can_reason": true,
- "reasoning_levels": [
- "low",
- "medium",
- "high"
- ],
- "default_reasoning_efforts": "medium",
+ "default_max_tokens": 25000,
+ "can_reason": false,
"supports_attachments": false
}
]