nebius.json

  1{
  2  "name": "Nebius Token Factory",
  3  "id": "nebius",
  4  "api_key": "$NEBIUS_API_KEY",
  5  "api_endpoint": "https://api.tokenfactory.nebius.com/v1",
  6  "type": "openai-compat",
  7  "default_large_model_id": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
  8  "default_small_model_id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B",
  9  "models": [
 10    {
 11      "id": "deepseek-ai/DeepSeek-R1-0528",
 12      "name": "DeepSeek-R1-0528",
 13      "cost_per_1m_in": 0.8,
 14      "cost_per_1m_out": 2.4,
 15      "cost_per_1m_in_cached": 0,
 16      "cost_per_1m_out_cached": 0,
 17      "context_window": 163840,
 18      "default_max_tokens": 16384,
 19      "can_reason": true,
 20      "reasoning_levels": [
 21        "low",
 22        "medium",
 23        "high"
 24      ],
 25      "default_reasoning_effort": "medium",
 26      "supports_attachments": false
 27    },
 28    {
 29      "id": "deepseek-ai/DeepSeek-V3-0324",
 30      "name": "DeepSeek-V3-0324",
 31      "cost_per_1m_in": 0.5,
 32      "cost_per_1m_out": 1.5,
 33      "cost_per_1m_in_cached": 0,
 34      "cost_per_1m_out_cached": 0,
 35      "context_window": 163840,
 36      "default_max_tokens": 16384,
 37      "can_reason": false,
 38      "supports_attachments": false
 39    },
 40    {
 41      "id": "deepseek-ai/DeepSeek-V3.2",
 42      "name": "DeepSeek-V3.2",
 43      "cost_per_1m_in": 0.3,
 44      "cost_per_1m_out": 0.45,
 45      "cost_per_1m_in_cached": 0,
 46      "cost_per_1m_out_cached": 0,
 47      "context_window": 163000,
 48      "default_max_tokens": 16300,
 49      "can_reason": true,
 50      "reasoning_levels": [
 51        "low",
 52        "medium",
 53        "high"
 54      ],
 55      "default_reasoning_effort": "medium",
 56      "supports_attachments": false
 57    },
 58    {
 59      "id": "deepseek-ai/DeepSeek-V3.2-fast",
 60      "name": "DeepSeek-V3.2 (fast)",
 61      "cost_per_1m_in": 0.4,
 62      "cost_per_1m_out": 2,
 63      "cost_per_1m_in_cached": 0,
 64      "cost_per_1m_out_cached": 0,
 65      "context_window": 8000,
 66      "default_max_tokens": 800,
 67      "can_reason": true,
 68      "reasoning_levels": [
 69        "low",
 70        "medium",
 71        "high"
 72      ],
 73      "default_reasoning_effort": "medium",
 74      "supports_attachments": false
 75    },
 76    {
 77      "id": "zai-org/GLM-4.5",
 78      "name": "GLM-4.5",
 79      "cost_per_1m_in": 0.6,
 80      "cost_per_1m_out": 2.2,
 81      "cost_per_1m_in_cached": 0,
 82      "cost_per_1m_out_cached": 0,
 83      "context_window": 131072,
 84      "default_max_tokens": 13107,
 85      "can_reason": true,
 86      "reasoning_levels": [
 87        "low",
 88        "medium",
 89        "high"
 90      ],
 91      "default_reasoning_effort": "medium",
 92      "supports_attachments": false
 93    },
 94    {
 95      "id": "zai-org/GLM-4.5-Air",
 96      "name": "GLM-4.5-Air",
 97      "cost_per_1m_in": 0.2,
 98      "cost_per_1m_out": 1.2,
 99      "cost_per_1m_in_cached": 0,
100      "cost_per_1m_out_cached": 0,
101      "context_window": 131072,
102      "default_max_tokens": 13107,
103      "can_reason": true,
104      "reasoning_levels": [
105        "low",
106        "medium",
107        "high"
108      ],
109      "default_reasoning_effort": "medium",
110      "supports_attachments": false
111    },
112    {
113      "id": "zai-org/GLM-4.7-FP8",
114      "name": "GLM-4.7",
115      "cost_per_1m_in": 0.4,
116      "cost_per_1m_out": 2,
117      "cost_per_1m_in_cached": 0,
118      "cost_per_1m_out_cached": 0,
119      "context_window": 202752,
120      "default_max_tokens": 20275,
121      "can_reason": true,
122      "reasoning_levels": [
123        "low",
124        "medium",
125        "high"
126      ],
127      "default_reasoning_effort": "medium",
128      "supports_attachments": false
129    },
130    {
131      "id": "zai-org/GLM-5",
132      "name": "GLM-5",
133      "cost_per_1m_in": 1,
134      "cost_per_1m_out": 3.2,
135      "cost_per_1m_in_cached": 0,
136      "cost_per_1m_out_cached": 0,
137      "context_window": 202752,
138      "default_max_tokens": 20275,
139      "can_reason": true,
140      "reasoning_levels": [
141        "low",
142        "medium",
143        "high"
144      ],
145      "default_reasoning_effort": "medium",
146      "supports_attachments": false
147    },
148    {
149      "id": "NousResearch/Hermes-4-405B",
150      "name": "Hermes-4-405B",
151      "cost_per_1m_in": 1,
152      "cost_per_1m_out": 3,
153      "cost_per_1m_in_cached": 0,
154      "cost_per_1m_out_cached": 0,
155      "context_window": 131072,
156      "default_max_tokens": 13107,
157      "can_reason": true,
158      "reasoning_levels": [
159        "low",
160        "medium",
161        "high"
162      ],
163      "default_reasoning_effort": "medium",
164      "supports_attachments": false
165    },
166    {
167      "id": "NousResearch/Hermes-4-70B",
168      "name": "Hermes-4-70B",
169      "cost_per_1m_in": 0.13,
170      "cost_per_1m_out": 0.4,
171      "cost_per_1m_in_cached": 0,
172      "cost_per_1m_out_cached": 0,
173      "context_window": 131072,
174      "default_max_tokens": 13107,
175      "can_reason": true,
176      "reasoning_levels": [
177        "low",
178        "medium",
179        "high"
180      ],
181      "default_reasoning_effort": "medium",
182      "supports_attachments": false
183    },
184    {
185      "id": "PrimeIntellect/INTELLECT-3",
186      "name": "INTELLECT-3",
187      "cost_per_1m_in": 0.2,
188      "cost_per_1m_out": 1.1,
189      "cost_per_1m_in_cached": 0,
190      "cost_per_1m_out_cached": 0,
191      "context_window": 131072,
192      "default_max_tokens": 13107,
193      "can_reason": true,
194      "reasoning_levels": [
195        "low",
196        "medium",
197        "high"
198      ],
199      "default_reasoning_effort": "medium",
200      "supports_attachments": false
201    },
202    {
203      "id": "moonshotai/Kimi-K2-Instruct",
204      "name": "Kimi-K2-Instruct",
205      "cost_per_1m_in": 0.5,
206      "cost_per_1m_out": 2.4,
207      "cost_per_1m_in_cached": 0,
208      "cost_per_1m_out_cached": 0,
209      "context_window": 131072,
210      "default_max_tokens": 13107,
211      "can_reason": false,
212      "supports_attachments": false
213    },
214    {
215      "id": "moonshotai/Kimi-K2-Thinking",
216      "name": "Kimi-K2-Thinking",
217      "cost_per_1m_in": 0.6,
218      "cost_per_1m_out": 2.5,
219      "cost_per_1m_in_cached": 0,
220      "cost_per_1m_out_cached": 0,
221      "context_window": 262144,
222      "default_max_tokens": 26214,
223      "can_reason": true,
224      "reasoning_levels": [
225        "low",
226        "medium",
227        "high"
228      ],
229      "default_reasoning_effort": "medium",
230      "supports_attachments": false
231    },
232    {
233      "id": "moonshotai/Kimi-K2.5",
234      "name": "Kimi-K2.5",
235      "cost_per_1m_in": 0.5,
236      "cost_per_1m_out": 2.5,
237      "cost_per_1m_in_cached": 0,
238      "cost_per_1m_out_cached": 0,
239      "context_window": 262144,
240      "default_max_tokens": 26214,
241      "can_reason": true,
242      "reasoning_levels": [
243        "low",
244        "medium",
245        "high"
246      ],
247      "default_reasoning_effort": "medium",
248      "supports_attachments": false
249    },
250    {
251      "id": "moonshotai/Kimi-K2.5-fast",
252      "name": "Kimi-K2.5 (fast)",
253      "cost_per_1m_in": 0.5,
254      "cost_per_1m_out": 2.5,
255      "cost_per_1m_in_cached": 0,
256      "cost_per_1m_out_cached": 0,
257      "context_window": 8000,
258      "default_max_tokens": 800,
259      "can_reason": true,
260      "reasoning_levels": [
261        "low",
262        "medium",
263        "high"
264      ],
265      "default_reasoning_effort": "medium",
266      "supports_attachments": false
267    },
268    {
269      "id": "meta-llama/Llama-3.3-70B-Instruct",
270      "name": "Llama-3.3-70B-Instruct",
271      "cost_per_1m_in": 0.13,
272      "cost_per_1m_out": 0.4,
273      "cost_per_1m_in_cached": 0,
274      "cost_per_1m_out_cached": 0,
275      "context_window": 131072,
276      "default_max_tokens": 13107,
277      "can_reason": false,
278      "supports_attachments": false
279    },
280    {
281      "id": "meta-llama/Llama-3.3-70B-Instruct-fast",
282      "name": "Llama-3.3-70B-Instruct (fast)",
283      "cost_per_1m_in": 0.25,
284      "cost_per_1m_out": 0.75,
285      "cost_per_1m_in_cached": 0,
286      "cost_per_1m_out_cached": 0,
287      "context_window": 131072,
288      "default_max_tokens": 13107,
289      "can_reason": false,
290      "supports_attachments": false
291    },
292    {
293      "id": "meta-llama/Meta-Llama-3.1-8B-Instruct",
294      "name": "Meta-Llama-3.1-8B-Instruct",
295      "cost_per_1m_in": 0.02,
296      "cost_per_1m_out": 0.06,
297      "cost_per_1m_in_cached": 0,
298      "cost_per_1m_out_cached": 0,
299      "context_window": 131072,
300      "default_max_tokens": 13107,
301      "can_reason": false,
302      "supports_attachments": false
303    },
304    {
305      "id": "meta-llama/Meta-Llama-3.1-8B-Instruct-fast",
306      "name": "Meta-Llama-3.1-8B-Instruct (fast)",
307      "cost_per_1m_in": 0.03,
308      "cost_per_1m_out": 0.09,
309      "cost_per_1m_in_cached": 0,
310      "cost_per_1m_out_cached": 0,
311      "context_window": 131072,
312      "default_max_tokens": 13107,
313      "can_reason": false,
314      "supports_attachments": false
315    },
316    {
317      "id": "MiniMaxAI/MiniMax-M2.1",
318      "name": "MiniMax-M2.1",
319      "cost_per_1m_in": 0.3,
320      "cost_per_1m_out": 1.2,
321      "cost_per_1m_in_cached": 0,
322      "cost_per_1m_out_cached": 0,
323      "context_window": 196608,
324      "default_max_tokens": 19660,
325      "can_reason": true,
326      "reasoning_levels": [
327        "low",
328        "medium",
329        "high"
330      ],
331      "default_reasoning_effort": "medium",
332      "supports_attachments": false
333    },
334    {
335      "id": "MiniMaxAI/MiniMax-M2.5",
336      "name": "MiniMax-M2.5",
337      "cost_per_1m_in": 0.3,
338      "cost_per_1m_out": 1.2,
339      "cost_per_1m_in_cached": 0,
340      "cost_per_1m_out_cached": 0,
341      "context_window": 196608,
342      "default_max_tokens": 19660,
343      "can_reason": true,
344      "reasoning_levels": [
345        "low",
346        "medium",
347        "high"
348      ],
349      "default_reasoning_effort": "medium",
350      "supports_attachments": false
351    },
352    {
353      "id": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B",
354      "name": "Nemotron-3-Nano-30B-A3B",
355      "cost_per_1m_in": 0.06,
356      "cost_per_1m_out": 0.24,
357      "cost_per_1m_in_cached": 0,
358      "cost_per_1m_out_cached": 0,
359      "context_window": 262144,
360      "default_max_tokens": 26214,
361      "can_reason": true,
362      "reasoning_levels": [
363        "low",
364        "medium",
365        "high"
366      ],
367      "default_reasoning_effort": "medium",
368      "supports_attachments": false
369    },
370    {
371      "id": "nvidia/nemotron-3-super-120b-a12b",
372      "name": "Nemotron-3-Super-120b-a12b",
373      "cost_per_1m_in": 0.3,
374      "cost_per_1m_out": 0.9,
375      "cost_per_1m_in_cached": 0,
376      "cost_per_1m_out_cached": 0,
377      "context_window": 262144,
378      "default_max_tokens": 26214,
379      "can_reason": true,
380      "reasoning_levels": [
381        "low",
382        "medium",
383        "high"
384      ],
385      "default_reasoning_effort": "medium",
386      "supports_attachments": false
387    },
388    {
389      "id": "Qwen/Qwen3-235B-A22B-Instruct-2507",
390      "name": "Qwen3-235B-A22B-Instruct-2507",
391      "cost_per_1m_in": 0.2,
392      "cost_per_1m_out": 0.6,
393      "cost_per_1m_in_cached": 0,
394      "cost_per_1m_out_cached": 0,
395      "context_window": 262144,
396      "default_max_tokens": 26214,
397      "can_reason": false,
398      "supports_attachments": false
399    },
400    {
401      "id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
402      "name": "Qwen3-235B-A22B-Thinking-2507",
403      "cost_per_1m_in": 0.2,
404      "cost_per_1m_out": 0.8,
405      "cost_per_1m_in_cached": 0,
406      "cost_per_1m_out_cached": 0,
407      "context_window": 262144,
408      "default_max_tokens": 26214,
409      "can_reason": true,
410      "reasoning_levels": [
411        "low",
412        "medium",
413        "high"
414      ],
415      "default_reasoning_effort": "medium",
416      "supports_attachments": false
417    },
418    {
419      "id": "Qwen/Qwen3-235B-A22B-Thinking-2507-fast",
420      "name": "Qwen3-235B-A22B-Thinking-2507 (fast)",
421      "cost_per_1m_in": 0.5,
422      "cost_per_1m_out": 2,
423      "cost_per_1m_in_cached": 0,
424      "cost_per_1m_out_cached": 0,
425      "context_window": 8000,
426      "default_max_tokens": 800,
427      "can_reason": true,
428      "reasoning_levels": [
429        "low",
430        "medium",
431        "high"
432      ],
433      "default_reasoning_effort": "medium",
434      "supports_attachments": false
435    },
436    {
437      "id": "Qwen/Qwen3-30B-A3B-Instruct-2507",
438      "name": "Qwen3-30B-A3B-Instruct-2507",
439      "cost_per_1m_in": 0.1,
440      "cost_per_1m_out": 0.3,
441      "cost_per_1m_in_cached": 0,
442      "cost_per_1m_out_cached": 0,
443      "context_window": 262144,
444      "default_max_tokens": 26214,
445      "can_reason": false,
446      "supports_attachments": false
447    },
448    {
449      "id": "Qwen/Qwen3-30B-A3B-Thinking-2507",
450      "name": "Qwen3-30B-A3B-Thinking-2507",
451      "cost_per_1m_in": 0.1,
452      "cost_per_1m_out": 0.3,
453      "cost_per_1m_in_cached": 0,
454      "cost_per_1m_out_cached": 0,
455      "context_window": 262144,
456      "default_max_tokens": 26214,
457      "can_reason": true,
458      "reasoning_levels": [
459        "low",
460        "medium",
461        "high"
462      ],
463      "default_reasoning_effort": "medium",
464      "supports_attachments": false
465    },
466    {
467      "id": "Qwen/Qwen3-32B",
468      "name": "Qwen3-32B",
469      "cost_per_1m_in": 0.1,
470      "cost_per_1m_out": 0.3,
471      "cost_per_1m_in_cached": 0,
472      "cost_per_1m_out_cached": 0,
473      "context_window": 40960,
474      "default_max_tokens": 4096,
475      "can_reason": true,
476      "reasoning_levels": [
477        "low",
478        "medium",
479        "high"
480      ],
481      "default_reasoning_effort": "medium",
482      "supports_attachments": false
483    },
484    {
485      "id": "Qwen/Qwen3-32B-fast",
486      "name": "Qwen3-32B (fast)",
487      "cost_per_1m_in": 0.2,
488      "cost_per_1m_out": 0.6,
489      "cost_per_1m_in_cached": 0,
490      "cost_per_1m_out_cached": 0,
491      "context_window": 40960,
492      "default_max_tokens": 4096,
493      "can_reason": true,
494      "reasoning_levels": [
495        "low",
496        "medium",
497        "high"
498      ],
499      "default_reasoning_effort": "medium",
500      "supports_attachments": false
501    },
502    {
503      "id": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
504      "name": "Qwen3-Coder-30B-A3B-Instruct",
505      "cost_per_1m_in": 0.1,
506      "cost_per_1m_out": 0.3,
507      "cost_per_1m_in_cached": 0,
508      "cost_per_1m_out_cached": 0,
509      "context_window": 262144,
510      "default_max_tokens": 26214,
511      "can_reason": false,
512      "supports_attachments": false
513    },
514    {
515      "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
516      "name": "Qwen3-Coder-480B-A35B-Instruct",
517      "cost_per_1m_in": 0.4,
518      "cost_per_1m_out": 1.8,
519      "cost_per_1m_in_cached": 0,
520      "cost_per_1m_out_cached": 0,
521      "context_window": 262144,
522      "default_max_tokens": 26214,
523      "can_reason": false,
524      "supports_attachments": false
525    },
526    {
527      "id": "Qwen/Qwen3-Next-80B-A3B-Thinking",
528      "name": "Qwen3-Next-80B-A3B-Thinking",
529      "cost_per_1m_in": 0.15,
530      "cost_per_1m_out": 1.2,
531      "cost_per_1m_in_cached": 0,
532      "cost_per_1m_out_cached": 0,
533      "context_window": 128000,
534      "default_max_tokens": 12800,
535      "can_reason": true,
536      "reasoning_levels": [
537        "low",
538        "medium",
539        "high"
540      ],
541      "default_reasoning_effort": "medium",
542      "supports_attachments": false
543    },
544    {
545      "id": "Qwen/Qwen3-Next-80B-A3B-Thinking-fast",
546      "name": "Qwen3-Next-80B-A3B-Thinking (fast)",
547      "cost_per_1m_in": 0.15,
548      "cost_per_1m_out": 1.2,
549      "cost_per_1m_in_cached": 0,
550      "cost_per_1m_out_cached": 0,
551      "context_window": 8000,
552      "default_max_tokens": 800,
553      "can_reason": true,
554      "reasoning_levels": [
555        "low",
556        "medium",
557        "high"
558      ],
559      "default_reasoning_effort": "medium",
560      "supports_attachments": false
561    },
562    {
563      "id": "Qwen/Qwen3.5-397B-A17B",
564      "name": "Qwen3.5-397B-A17B",
565      "cost_per_1m_in": 0.6,
566      "cost_per_1m_out": 3.6,
567      "cost_per_1m_in_cached": 0,
568      "cost_per_1m_out_cached": 0,
569      "context_window": 262144,
570      "default_max_tokens": 26214,
571      "can_reason": true,
572      "reasoning_levels": [
573        "low",
574        "medium",
575        "high"
576      ],
577      "default_reasoning_effort": "medium",
578      "supports_attachments": false
579    },
580    {
581      "id": "Qwen/Qwen3.5-397B-A17B-fast",
582      "name": "Qwen3.5-397B-A17B (fast)",
583      "cost_per_1m_in": 0.6,
584      "cost_per_1m_out": 3.6,
585      "cost_per_1m_in_cached": 0,
586      "cost_per_1m_out_cached": 0,
587      "context_window": 8000,
588      "default_max_tokens": 800,
589      "can_reason": true,
590      "reasoning_levels": [
591        "low",
592        "medium",
593        "high"
594      ],
595      "default_reasoning_effort": "medium",
596      "supports_attachments": false
597    },
598    {
599      "id": "openai/gpt-oss-120b",
600      "name": "gpt-oss-120b",
601      "cost_per_1m_in": 0.15,
602      "cost_per_1m_out": 0.6,
603      "cost_per_1m_in_cached": 0,
604      "cost_per_1m_out_cached": 0,
605      "context_window": 131072,
606      "default_max_tokens": 13107,
607      "can_reason": true,
608      "reasoning_levels": [
609        "low",
610        "medium",
611        "high"
612      ],
613      "default_reasoning_effort": "medium",
614      "supports_attachments": false
615    },
616    {
617      "id": "openai/gpt-oss-120b-fast",
618      "name": "gpt-oss-120b (fast)",
619      "cost_per_1m_in": 0.1,
620      "cost_per_1m_out": 0.5,
621      "cost_per_1m_in_cached": 0,
622      "cost_per_1m_out_cached": 0,
623      "context_window": 8000,
624      "default_max_tokens": 800,
625      "can_reason": true,
626      "reasoning_levels": [
627        "low",
628        "medium",
629        "high"
630      ],
631      "default_reasoning_effort": "medium",
632      "supports_attachments": false
633    },
634    {
635      "id": "openai/gpt-oss-20b",
636      "name": "gpt-oss-20b",
637      "cost_per_1m_in": 0.05,
638      "cost_per_1m_out": 0.2,
639      "cost_per_1m_in_cached": 0,
640      "cost_per_1m_out_cached": 0,
641      "context_window": 131072,
642      "default_max_tokens": 13107,
643      "can_reason": true,
644      "reasoning_levels": [
645        "low",
646        "medium",
647        "high"
648      ],
649      "default_reasoning_effort": "medium",
650      "supports_attachments": false
651    }
652  ]
653}