Detailed changes
@@ -28,7 +28,7 @@ stdin β garble.gleam β provider dispatch β stdout
β
providers.gleam (fetches provider list from catwalk.charm.sh)
β
- openai/anthropic/gemini via starlet, or openai_compat.gleam for custom endpoints
+ openai/anthropic/gemini via starlet, or starlet/openai_compat for custom endpoints
```
**Flow:**
@@ -43,7 +43,7 @@ stdin β garble.gleam β provider dispatch β stdout
- `config.gleam` β TOML config loading from XDG paths, CLI/config merging
- `providers.gleam` β Fetches provider/model list from remote API, validation
- `prompts.gleam` β System prompt, user message construction, code block extraction
-- `openai_compat.gleam` β Manual HTTP client for OpenAI-compatible endpoints (when starlet doesn't apply)
+- `clients.gleam` β Provider-specific client wrappers (starlet-based)
## Key Libraries
@@ -77,4 +77,4 @@ The `prompts_test.gleam` demonstrates the pattern: test public functions, use `s
- **Provider list is remote:** `providers.gleam` fetches from `https://catwalk.charm.sh/v2/providers` at runtimeβnetwork errors are possible.
- **Code block extraction:** The system prompt instructs models to wrap output in fenced code blocks; `prompts.extract_code_block` strips them. If the model doesn't comply, raw output passes through.
- **API key resolution order:** `api_key_cmd` (shell command) β `api_key` (literal) β environment variable from provider config
-- **Custom OpenAI-compat client:** We use our own `openai_compat.gleam` instead of starlet's `openai.with_url` because most OpenAI-compatible providers don't implement the `/responses` endpoint that starlet expectsβthey only support the traditional `/chat/completions` endpoint.
+- **OpenAI-compat dialects:** For openai-compat providers, the `dialect` config field maps to `starlet/openai_compat/thinking.Dialect` variants. This controls how reasoning requests are encoded and responses decoded. The default `Generic` dialect works for most providers; provider-specific dialects (Together, Groq, etc.) optimize for their particular APIs.
@@ -55,6 +55,7 @@ All flags are optional if configured in `~/.config/garble/config.toml`.
| `--provider` | Provider ID | `openai`, `anthropic`, `google` |
| `--model` | Model ID | `gpt-4o`, `claude-3-opus`, `gemini-1.5-pro` |
| `--directions` | Instructions for how to transform the input | `"fix grammar and spelling"` |
+| `--reasoning` | Reasoning effort for thinking models | `low`, `medium`, `high` |
Valid provider and model IDs are listed at https://catwalk-fe.secluded.site (an unofficial frontend for Charm's [Catwalk](https://github.com/charmbracelet/catwalk) service).
@@ -75,6 +76,28 @@ api_key_cmd = "op read 'op://Private/Anthropic/credential'"
# api_key = "sk-..."
```
+#### OpenAI-compatible providers
+
+For `openai-compat` providers, you can optionally specify a `dialect` to control how reasoning requests are encoded:
+
+```toml
+provider = "openai-compat"
+endpoint = "https://api.together.xyz/v1"
+model = "deepseek-ai/DeepSeek-R1"
+dialect = "together" # Optional: generic (default), together, groq, cerebras, llamacpp, tags
+```
+
+Available dialects:
+
+| Dialect | Provider |
+| ---------- | --------------------------------- |
+| `generic` | Most OpenAI-compatible providers |
+| `together` | Together AI |
+| `groq` | Groq |
+| `cerebras` | Cerebras |
+| `llamacpp` | Local llama.cpp / vLLM instances |
+| `tags` | Providers using `<think>` tags |
+
If neither `api_key_cmd` nor `api_key` is set, garble falls back to the
provider's environment variable (e.g. `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`).
@@ -34,7 +34,8 @@ pub fn send_openai(
|> starlet.user(user_prompt)
let chat = case reasoning {
- Some(config.ReasoningLow) -> openai.with_reasoning(chat, openai.ReasoningLow)
+ Some(config.ReasoningLow) ->
+ openai.with_reasoning(chat, openai.ReasoningLow)
Some(config.ReasoningMedium) ->
openai.with_reasoning(chat, openai.ReasoningMedium)
Some(config.ReasoningHigh) ->
@@ -94,7 +95,10 @@ pub fn reasoning_budget(effort: config.ReasoningEffort) -> #(Int, Int) {
config.ReasoningHigh -> 0.8
}
let budget =
- int.max(int.min(float.truncate(int.to_float(base_max) *. ratio), 128_000), 1024)
+ int.max(
+ int.min(float.truncate(int.to_float(base_max) *. ratio), 128_000),
+ 1024,
+ )
let max_tokens = budget + 16_384
#(budget, max_tokens)
}
@@ -130,6 +134,18 @@ pub fn send_gemini(
|> result.map_error(format_starlet_error)
}
+/// Map config Dialect to starlet's thinking.Dialect.
+fn map_dialect(dialect: config.Dialect) -> thinking.Dialect {
+ case dialect {
+ config.DialectGeneric -> thinking.Generic
+ config.DialectTogether -> thinking.Together
+ config.DialectGroq -> thinking.Groq
+ config.DialectCerebras -> thinking.Cerebras
+ config.DialectLlamaCpp -> thinking.LlamaCpp
+ config.DialectTags -> thinking.Tags
+ }
+}
+
pub fn send_openai_compat(
api_key: String,
base_url: String,
@@ -137,10 +153,9 @@ pub fn send_openai_compat(
system_prompt: String,
user_prompt: String,
reasoning: Option(config.ReasoningEffort),
+ dialect: config.Dialect,
) -> Result(String, String) {
- // Default to Generic dialect - this is the most widely compatible option
- // Provider-specific dialects can be configured via garble's config file
- let client = openai_compat.new(base_url, api_key, thinking.Generic)
+ let client = openai_compat.new(base_url, api_key, map_dialect(dialect))
let chat =
starlet.chat(client, model)
@@ -148,9 +163,12 @@ pub fn send_openai_compat(
|> starlet.user(user_prompt)
let chat = case reasoning {
- Some(config.ReasoningLow) -> openai_compat.with_reasoning(chat, thinking.EffortLow)
- Some(config.ReasoningMedium) -> openai_compat.with_reasoning(chat, thinking.EffortMedium)
- Some(config.ReasoningHigh) -> openai_compat.with_reasoning(chat, thinking.EffortHigh)
+ Some(config.ReasoningLow) ->
+ openai_compat.with_reasoning(chat, thinking.EffortLow)
+ Some(config.ReasoningMedium) ->
+ openai_compat.with_reasoning(chat, thinking.EffortMedium)
+ Some(config.ReasoningHigh) ->
+ openai_compat.with_reasoning(chat, thinking.EffortHigh)
None -> chat
}
@@ -178,7 +196,8 @@ pub fn send_ollama(
|> starlet.user(user_prompt)
let chat = case reasoning {
- Some(config.ReasoningLow) -> ollama.with_thinking(chat, ollama.ThinkingLow)
+ Some(config.ReasoningLow) ->
+ ollama.with_thinking(chat, ollama.ThinkingLow)
Some(config.ReasoningMedium) ->
ollama.with_thinking(chat, ollama.ThinkingMedium)
Some(config.ReasoningHigh) ->
@@ -46,6 +46,41 @@ pub fn reasoning_to_string(effort: ReasoningEffort) -> String {
}
}
+/// OpenAI-compatible provider dialect for reasoning support.
+/// Each dialect determines how reasoning requests are encoded and responses decoded.
+pub type Dialect {
+ DialectGeneric
+ DialectTogether
+ DialectGroq
+ DialectCerebras
+ DialectLlamaCpp
+ DialectTags
+}
+
+/// Parse a dialect string into a Dialect type.
+pub fn parse_dialect(value: String) -> Dialect {
+ case string.lowercase(value) {
+ "together" -> DialectTogether
+ "groq" -> DialectGroq
+ "cerebras" -> DialectCerebras
+ "llamacpp" | "llama.cpp" -> DialectLlamaCpp
+ "tags" -> DialectTags
+ _ -> DialectGeneric
+ }
+}
+
+/// Convert Dialect to the API string representation.
+pub fn dialect_to_string(dialect: Dialect) -> String {
+ case dialect {
+ DialectGeneric -> "generic"
+ DialectTogether -> "together"
+ DialectGroq -> "groq"
+ DialectCerebras -> "cerebras"
+ DialectLlamaCpp -> "llamacpp"
+ DialectTags -> "tags"
+ }
+}
+
pub type Config {
Config(
provider: String,
@@ -55,6 +90,7 @@ pub type Config {
directions: String,
reasoning: ReasoningSetting,
endpoint: String,
+ dialect: Dialect,
)
}
@@ -67,6 +103,7 @@ pub fn default() -> Config {
directions: "",
reasoning: ReasoningNotSet,
endpoint: "",
+ dialect: DialectGeneric,
)
}
@@ -117,6 +154,7 @@ fn parse_config(parsed: Dict(String, Toml)) -> Config {
directions: get_string(parsed, "directions"),
reasoning: parse_reasoning(get_string(parsed, "reasoning")),
endpoint: get_string(parsed, "endpoint"),
+ dialect: parse_dialect(get_string(parsed, "dialect")),
)
}
@@ -142,6 +180,7 @@ pub fn merge(
directions: prefer_nonempty(cli_directions, cfg.directions),
reasoning: prefer_reasoning(cli_reasoning, cfg.reasoning),
endpoint: prefer_nonempty(cli_endpoint, cfg.endpoint),
+ dialect: cfg.dialect,
)
}
@@ -47,12 +47,16 @@ fn garble_command() -> glint.Command(Nil) {
use reasoning <- glint.flag(
glint.string_flag("reasoning")
|> glint.flag_default("")
- |> glint.flag_help("Reasoning effort for thinking models (low, medium, high)"),
+ |> glint.flag_help(
+ "Reasoning effort for thinking models (low, medium, high)",
+ ),
)
use endpoint <- glint.flag(
glint.string_flag("endpoint")
|> glint.flag_default("")
- |> glint.flag_help("API endpoint URL (required for ollama, e.g. http://localhost:11434)"),
+ |> glint.flag_help(
+ "API endpoint URL (required for ollama, e.g. http://localhost:11434)",
+ ),
)
use _, _args, flags <- glint.command()
@@ -119,11 +123,18 @@ fn garble_command() -> glint.Command(Nil) {
}
}
}
- send_request(provider_info, merged, effective_reasoning, prompts.system(), user_message)
+ send_request(
+ provider_info,
+ merged,
+ effective_reasoning,
+ prompts.system(),
+ user_message,
+ )
}
Error(providers.FetchError(msg)) ->
Error("Error fetching providers: " <> msg)
- Error(providers.ProviderNotFound(id)) -> Error("Unknown provider: " <> id)
+ Error(providers.ProviderNotFound(id)) ->
+ Error("Unknown provider: " <> id)
Error(providers.ModelNotFound(provider, model)) ->
Error(
"Unknown model '" <> model <> "' for provider '" <> provider <> "'",
@@ -153,13 +164,28 @@ fn send_request(
"openai" ->
clients.send_openai(key, None, cfg.model, system, user_prompt, reasoning)
"anthropic" ->
- clients.send_anthropic(key, None, cfg.model, system, user_prompt, reasoning)
+ clients.send_anthropic(
+ key,
+ None,
+ cfg.model,
+ system,
+ user_prompt,
+ reasoning,
+ )
"google" ->
clients.send_gemini(key, cfg.model, system, user_prompt, reasoning)
"openai-compat" -> {
case provider.api_endpoint {
Some(endpoint) ->
- clients.send_openai_compat(key, endpoint, cfg.model, system, user_prompt, reasoning)
+ clients.send_openai_compat(
+ key,
+ endpoint,
+ cfg.model,
+ system,
+ user_prompt,
+ reasoning,
+ cfg.dialect,
+ )
None -> Error("No endpoint configured for " <> provider.id)
}
}
@@ -76,7 +76,12 @@ fn parse_providers(body: String) -> Result(List(Provider), ValidationError) {
None,
decode.string |> decode.map(Some),
)
- decode.success(Model(id:, can_reason:, reasoning_levels:, default_reasoning_effort:))
+ decode.success(Model(
+ id:,
+ can_reason:,
+ reasoning_levels:,
+ default_reasoning_effort:,
+ ))
}
let provider_decoder = {
@@ -126,7 +131,10 @@ fn find_model(
}
/// Get a model by ID from a provider
-pub fn get_model(provider: Provider, model_id: String) -> Result(Model, ValidationError) {
+pub fn get_model(
+ provider: Provider,
+ model_id: String,
+) -> Result(Model, ValidationError) {
provider.models
|> list.find(fn(m) { m.id == model_id })
|> result.map_error(fn(_) { ModelNotFound(provider.id, model_id) })