config.example.toml

 1# Example custom models configuration for rumilo
 2# Copy relevant sections to your $XDG_CONFIG_HOME/rumilo/config.toml
 3
 4[defaults]
 5model = "anthropic:claude-sonnet-4-20250514"
 6cleanup = true
 7# kagi_session_token = "your-kagi-token"
 8# tabstack_api_key = "your-tabstack-key"
 9
10[web]
11model = "anthropic:claude-sonnet-4-20250514"
12# system_prompt_path = "~/.config/rumilo/web-prompt.txt"
13
14[repo]
15model = "anthropic:claude-sonnet-4-20250514"
16default_depth = 1
17blob_limit = "5m"
18# system_prompt_path = "~/.config/rumilo/repo-prompt.txt"
19
20# Custom models allow you to use any OpenAI-compatible endpoint
21# Reference them with: rumilo web "query" --model custom:ollama
22
23[custom_models.ollama]
24# Ollama running locally
25provider = "ollama"
26api = "openai-completions"
27base_url = "http://localhost:11434/v1"
28id = "ollama/llama3.2"
29name = "Llama 3.2 (Ollama)"
30reasoning = false
31input = ["text"]
32cost = { input = 0, output = 0 }
33context_window = 128000
34max_tokens = 4096
35
36[custom_models.vllm]
37# vLLM inference server
38provider = "vllm"
39api = "openai-completions"
40base_url = "http://localhost:8000/v1"
41id = "meta-llama/Llama-3.3-70B-Instruct"
42name = "Llama 3.3 70B (vLLM)"
43reasoning = false
44input = ["text"]
45cost = { input = 0, output = 0 }
46context_window = 131072
47max_tokens = 8192
48
49[custom_models.groq]
50# Groq (built-in to pi-ai, but shown here as example)
51provider = "groq"
52api = "openai-completions"
53base_url = "https://api.groq.com/openai/v1"
54id = "groq/llama-3.3-70b-versatile"
55name = "Llama 3.3 70B Versatile (Groq)"
56reasoning = false
57input = ["text"]
58cost = { input = 0.59, output = 0.79 }
59context_window = 131072
60max_tokens = 8192
61
62[custom_models.openrouter]
63# OpenRouter API (model aggregation)
64provider = "openrouter"
65api = "openai-completions"
66base_url = "https://openrouter.ai/api/v1"
67id = "openrouter/zai/glm-4.5v"
68name = "GLM-4.5V (OpenRouter)"
69reasoning = true
70input = ["text", "image"]
71cost = { input = 0.5, output = 1.5 }
72context_window = 128000
73max_tokens = 4096