chutes.json

  1{
  2  "name": "Chutes",
  3  "id": "chutes",
  4  "type": "openai-compat",
  5  "api_key": "$CHUTES_API_KEY",
  6  "api_endpoint": "https://llm.chutes.ai/v1",
  7  "default_large_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8",
  8  "default_small_model_id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8",
  9  "models": [
 10    {
 11      "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8",
 12      "name": "Qwen3 Coder 480B A35B Instruct (FP8)",
 13      "cost_per_1m_in": 0.2,
 14      "cost_per_1m_out": 0.8,
 15      "context_window": 262000,
 16      "default_max_tokens": 32768,
 17      "can_reason": true,
 18      "reasoning_levels": [
 19        "low",
 20        "medium",
 21        "high"
 22      ],
 23      "default_reasoning_efforts": "medium",
 24      "supports_attachments": true
 25    },
 26    {
 27      "id": "zai-org/GLM-4.5-FP8",
 28      "name": "GLM 4.5 FP8",
 29      "cost_per_1m_in": 0.0,
 30      "cost_per_1m_out": 0.0,
 31      "context_window": 98000,
 32      "default_max_tokens": 32768,
 33      "can_reason": true,
 34      "reasoning_levels": [
 35        "low",
 36        "medium",
 37        "high"
 38      ],
 39      "default_reasoning_efforts": "medium",
 40      "supports_attachments": true
 41    },
 42    {
 43      "id": "moonshotai/Kimi-K2-Instruct-75k",
 44      "name": "Kimi K2 Instruct",
 45      "cost_per_1m_in": 0.15,
 46      "cost_per_1m_out": 0.59,
 47      "context_window": 75000,
 48      "default_max_tokens": 32768,
 49      "can_reason": true,
 50      "reasoning_levels": [
 51        "low",
 52        "medium",
 53        "high"
 54      ],
 55      "default_reasoning_efforts": "medium",
 56      "supports_attachments": true
 57    },
 58    {
 59      "id": "deepseek-ai/DeepSeek-R1-0528",
 60      "name": "DeepSeek R1 0528",
 61      "cost_per_1m_in": 0.18,
 62      "cost_per_1m_out": 0.72,
 63      "context_window": 75000,
 64      "default_max_tokens": 32768,
 65      "can_reason": true,
 66      "reasoning_levels": [
 67        "low",
 68        "medium",
 69        "high"
 70      ],
 71      "default_reasoning_efforts": "medium",
 72      "supports_attachments": true
 73    },
 74    {
 75      "id": "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
 76      "name": "DeepSeek R1 0528 Qwen3 8B",
 77      "cost_per_1m_in": 0.02,
 78      "cost_per_1m_out": 0.07,
 79      "context_window": 32768,
 80      "default_max_tokens": 8192,
 81      "can_reason": false,
 82      "has_reasoning_efforts": false,
 83      "supports_attachments": true
 84    },
 85    {
 86      "id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B",
 87      "name": "DeepSeek R1 Distill Llama 70B",
 88      "cost_per_1m_in": 0.03,
 89      "cost_per_1m_out": 0.14,
 90      "context_window": 65536,
 91      "default_max_tokens": 8192,
 92      "can_reason": false,
 93      "has_reasoning_efforts": false,
 94      "supports_attachments": true
 95    },
 96    {
 97      "id": "tngtech/DeepSeek-R1T-Chimera",
 98      "name": "DeepSeek R1T Chimera",
 99      "cost_per_1m_in": 0.18,
100      "cost_per_1m_out": 0.72,
101      "context_window": 131072,
102      "default_max_tokens": 32768,
103      "can_reason": true,
104      "reasoning_levels": [
105        "low",
106        "medium",
107        "high"
108      ],
109      "default_reasoning_efforts": "medium",
110      "supports_attachments": true
111    },
112    {
113      "id": "tngtech/DeepSeek-TNG-R1T2-Chimera",
114      "name": "DeepSeek TNG R1T2 Chimera",
115      "cost_per_1m_in": 0.2,
116      "cost_per_1m_out": 0.8,
117      "context_window": 262144,
118      "default_max_tokens": 65536,
119      "can_reason": true,
120      "reasoning_levels": [
121        "low",
122        "medium",
123        "high"
124      ],
125      "default_reasoning_efforts": "medium",
126      "supports_attachments": true
127    },
128    {
129      "id": "deepseek-ai/DeepSeek-V3-0324",
130      "name": "DeepSeek V3 0324",
131      "cost_per_1m_in": 0.18,
132      "cost_per_1m_out": 0.72,
133      "context_window": 75000,
134      "default_max_tokens": 32768,
135      "can_reason": true,
136      "reasoning_levels": [
137        "low",
138        "medium",
139        "high"
140      ],
141      "default_reasoning_efforts": "medium",
142      "supports_attachments": true
143    },
144    {
145      "id": "chutesai/Devstral-Small-2505",
146      "name": "Devstral Small 2505",
147      "cost_per_1m_in": 0.02,
148      "cost_per_1m_out": 0.08,
149      "context_window": 32768,
150      "default_max_tokens": 8192,
151      "can_reason": false,
152      "has_reasoning_efforts": false,
153      "supports_attachments": true
154    },
155    {
156      "id": "zai-org/GLM-4.5-Air",
157      "name": "GLM 4.5 Air",
158      "cost_per_1m_in": 0.0,
159      "cost_per_1m_out": 0.0,
160      "context_window": 131072,
161      "default_max_tokens": 32768,
162      "can_reason": true,
163      "reasoning_levels": [
164        "low",
165        "medium",
166        "high"
167      ],
168      "default_reasoning_efforts": "medium",
169      "supports_attachments": true
170    },
171    {
172      "id": "openai/gpt-oss-120b",
173      "name": "GPT OSS 120B",
174      "cost_per_1m_in": 0.1,
175      "cost_per_1m_out": 0.41,
176      "context_window": 131072,
177      "default_max_tokens": 32768,
178      "can_reason": true,
179      "reasoning_levels": [
180        "low",
181        "medium",
182        "high"
183      ],
184      "default_reasoning_efforts": "medium",
185      "supports_attachments": true
186    },
187    {
188      "id": "chutesai/Mistral-Small-3.2-24B-Instruct-2506",
189      "name": "Mistral Small 3.2 24B Instruct 2506",
190      "cost_per_1m_in": 0.02,
191      "cost_per_1m_out": 0.08,
192      "context_window": 32768,
193      "default_max_tokens": 8192,
194      "can_reason": false,
195      "has_reasoning_efforts": false,
196      "supports_attachments": true
197    },
198    {
199      "id": "Qwen/Qwen3-235B-A22B-Instruct-2507",
200      "name": "Qwen3 235B A22B Instruct 2507",
201      "cost_per_1m_in": 0.08,
202      "cost_per_1m_out": 0.31,
203      "context_window": 32768,
204      "default_max_tokens": 8192,
205      "can_reason": false,
206      "has_reasoning_efforts": false,
207      "supports_attachments": true
208    },
209    {
210      "id": "Qwen/Qwen3-30B-A3B",
211      "name": "Qwen3 30B A3B",
212      "cost_per_1m_in": 0.02,
213      "cost_per_1m_out": 0.08,
214      "context_window": 32768,
215      "default_max_tokens": 8192,
216      "can_reason": false,
217      "has_reasoning_efforts": false,
218      "supports_attachments": true
219    },
220    {
221      "id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
222      "name": "Qwen3 235B A22B Thinking 2507",
223      "cost_per_1m_in": 0.08,
224      "cost_per_1m_out": 0.31,
225      "context_window": 32768,
226      "default_max_tokens": 8192,
227      "can_reason": true,
228      "reasoning_levels": [
229        "low",
230        "medium",
231        "high"
232      ],
233      "default_reasoning_efforts": "medium",
234      "supports_attachments": true
235    },
236    {
237      "id": "deepseek-ai/DeepSeek-V3.1",
238      "name": "DeepSeek V3.1",
239      "cost_per_1m_in": 0.2,
240      "cost_per_1m_out": 0.8,
241      "context_window": 163840,
242      "default_max_tokens": 32768,
243      "can_reason": false,
244      "has_reasoning_efforts": false,
245      "supports_attachments": true
246    },
247    {
248      "id": "deepseek-ai/DeepSeek-V3.1:THINKING",
249      "name": "DeepSeek V3.1 Reasoning",
250      "cost_per_1m_in": 0.2,
251      "cost_per_1m_out": 0.8,
252      "context_window": 163840,
253      "default_max_tokens": 32768,
254      "can_reason": true,
255      "reasoning_levels": [
256        "low",
257        "medium",
258        "high"
259      ],
260      "default_reasoning_efforts": "medium",
261      "supports_attachments": true
262    },
263    {
264      "id": "Qwen/Qwen3-30B-A3B-Instruct-2507",
265      "name": "Qwen3 30B A3B Instruct 2507",
266      "cost_per_1m_in": 0.05,
267      "cost_per_1m_out": 0.2,
268      "context_window": 262144,
269      "default_max_tokens": 32768,
270      "can_reason": false,
271      "has_reasoning_efforts": false,
272      "supports_attachments": true
273    },
274    {
275      "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
276      "name": "Qwen3 Coder 30B A3B Instruct",
277      "cost_per_1m_in": 0.0,
278      "cost_per_1m_out": 0.0,
279      "context_window": 262144,
280      "default_max_tokens": 32768,
281      "can_reason": false,
282      "has_reasoning_efforts": false,
283      "supports_attachments": true
284    }
285  ]
286}