From 5eba5d4ae94a6d709e5a7ae4e0bcd76ea334005d Mon Sep 17 00:00:00 2001 From: Amolith Date: Tue, 10 Feb 2026 20:58:07 -0700 Subject: [PATCH] feat(openai-compat): add dialect support Add Dialect type and parsing to support provider-specific request/response handling for reasoning/thinking content. - Map config dialects to starlet Dialect variants - Support: generic, together, groq, cerebras, llamacpp, tags - Remove old manual HTTP client code (now fully migrated) The Generic dialect remains the default for most providers. --- AGENTS.md | 6 +++--- README.md | 23 +++++++++++++++++++++++ src/clients.gleam | 37 ++++++++++++++++++++++++++++--------- src/config.gleam | 39 +++++++++++++++++++++++++++++++++++++++ src/garble.gleam | 38 ++++++++++++++++++++++++++++++++------ src/providers.gleam | 12 ++++++++++-- 6 files changed, 135 insertions(+), 20 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index a0a9bb15615c9520835db4fa86fe5bccbb03e22f..6abd79ee98d22fc5891cfe12426d29e9bd514060 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -28,7 +28,7 @@ stdin → garble.gleam → provider dispatch → stdout ↓ providers.gleam (fetches provider list from catwalk.charm.sh) ↓ - openai/anthropic/gemini via starlet, or openai_compat.gleam for custom endpoints + openai/anthropic/gemini via starlet, or starlet/openai_compat for custom endpoints ``` **Flow:** @@ -43,7 +43,7 @@ stdin → garble.gleam → provider dispatch → stdout - `config.gleam` — TOML config loading from XDG paths, CLI/config merging - `providers.gleam` — Fetches provider/model list from remote API, validation - `prompts.gleam` — System prompt, user message construction, code block extraction -- `openai_compat.gleam` — Manual HTTP client for OpenAI-compatible endpoints (when starlet doesn't apply) +- `clients.gleam` — Provider-specific client wrappers (starlet-based) ## Key Libraries @@ -77,4 +77,4 @@ The `prompts_test.gleam` demonstrates the pattern: test public functions, use `s - **Provider list is remote:** `providers.gleam` fetches from `https://catwalk.charm.sh/v2/providers` at runtime—network errors are possible. - **Code block extraction:** The system prompt instructs models to wrap output in fenced code blocks; `prompts.extract_code_block` strips them. If the model doesn't comply, raw output passes through. - **API key resolution order:** `api_key_cmd` (shell command) → `api_key` (literal) → environment variable from provider config -- **Custom OpenAI-compat client:** We use our own `openai_compat.gleam` instead of starlet's `openai.with_url` because most OpenAI-compatible providers don't implement the `/responses` endpoint that starlet expects—they only support the traditional `/chat/completions` endpoint. +- **OpenAI-compat dialects:** For openai-compat providers, the `dialect` config field maps to `starlet/openai_compat/thinking.Dialect` variants. This controls how reasoning requests are encoded and responses decoded. The default `Generic` dialect works for most providers; provider-specific dialects (Together, Groq, etc.) optimize for their particular APIs. diff --git a/README.md b/README.md index 9aecf93b977b1570d5d3d8a5f14c8f154a3dbac2..b87d05ae9fceb61d6f36f8176721a25e5dc52d3b 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,7 @@ All flags are optional if configured in `~/.config/garble/config.toml`. | `--provider` | Provider ID | `openai`, `anthropic`, `google` | | `--model` | Model ID | `gpt-4o`, `claude-3-opus`, `gemini-1.5-pro` | | `--directions` | Instructions for how to transform the input | `"fix grammar and spelling"` | +| `--reasoning` | Reasoning effort for thinking models | `low`, `medium`, `high` | Valid provider and model IDs are listed at https://catwalk-fe.secluded.site (an unofficial frontend for Charm's [Catwalk](https://github.com/charmbracelet/catwalk) service). @@ -75,6 +76,28 @@ api_key_cmd = "op read 'op://Private/Anthropic/credential'" # api_key = "sk-..." ``` +#### OpenAI-compatible providers + +For `openai-compat` providers, you can optionally specify a `dialect` to control how reasoning requests are encoded: + +```toml +provider = "openai-compat" +endpoint = "https://api.together.xyz/v1" +model = "deepseek-ai/DeepSeek-R1" +dialect = "together" # Optional: generic (default), together, groq, cerebras, llamacpp, tags +``` + +Available dialects: + +| Dialect | Provider | +| ---------- | --------------------------------- | +| `generic` | Most OpenAI-compatible providers | +| `together` | Together AI | +| `groq` | Groq | +| `cerebras` | Cerebras | +| `llamacpp` | Local llama.cpp / vLLM instances | +| `tags` | Providers using `` tags | + If neither `api_key_cmd` nor `api_key` is set, garble falls back to the provider's environment variable (e.g. `ANTHROPIC_API_KEY`, `OPENAI_API_KEY`). diff --git a/src/clients.gleam b/src/clients.gleam index 70bc389e19aef8d680f72fd569852e5a4466603a..2243e8afbd20fb869d0fa6dfacbf10ea22145e4e 100644 --- a/src/clients.gleam +++ b/src/clients.gleam @@ -34,7 +34,8 @@ pub fn send_openai( |> starlet.user(user_prompt) let chat = case reasoning { - Some(config.ReasoningLow) -> openai.with_reasoning(chat, openai.ReasoningLow) + Some(config.ReasoningLow) -> + openai.with_reasoning(chat, openai.ReasoningLow) Some(config.ReasoningMedium) -> openai.with_reasoning(chat, openai.ReasoningMedium) Some(config.ReasoningHigh) -> @@ -94,7 +95,10 @@ pub fn reasoning_budget(effort: config.ReasoningEffort) -> #(Int, Int) { config.ReasoningHigh -> 0.8 } let budget = - int.max(int.min(float.truncate(int.to_float(base_max) *. ratio), 128_000), 1024) + int.max( + int.min(float.truncate(int.to_float(base_max) *. ratio), 128_000), + 1024, + ) let max_tokens = budget + 16_384 #(budget, max_tokens) } @@ -130,6 +134,18 @@ pub fn send_gemini( |> result.map_error(format_starlet_error) } +/// Map config Dialect to starlet's thinking.Dialect. +fn map_dialect(dialect: config.Dialect) -> thinking.Dialect { + case dialect { + config.DialectGeneric -> thinking.Generic + config.DialectTogether -> thinking.Together + config.DialectGroq -> thinking.Groq + config.DialectCerebras -> thinking.Cerebras + config.DialectLlamaCpp -> thinking.LlamaCpp + config.DialectTags -> thinking.Tags + } +} + pub fn send_openai_compat( api_key: String, base_url: String, @@ -137,10 +153,9 @@ pub fn send_openai_compat( system_prompt: String, user_prompt: String, reasoning: Option(config.ReasoningEffort), + dialect: config.Dialect, ) -> Result(String, String) { - // Default to Generic dialect - this is the most widely compatible option - // Provider-specific dialects can be configured via garble's config file - let client = openai_compat.new(base_url, api_key, thinking.Generic) + let client = openai_compat.new(base_url, api_key, map_dialect(dialect)) let chat = starlet.chat(client, model) @@ -148,9 +163,12 @@ pub fn send_openai_compat( |> starlet.user(user_prompt) let chat = case reasoning { - Some(config.ReasoningLow) -> openai_compat.with_reasoning(chat, thinking.EffortLow) - Some(config.ReasoningMedium) -> openai_compat.with_reasoning(chat, thinking.EffortMedium) - Some(config.ReasoningHigh) -> openai_compat.with_reasoning(chat, thinking.EffortHigh) + Some(config.ReasoningLow) -> + openai_compat.with_reasoning(chat, thinking.EffortLow) + Some(config.ReasoningMedium) -> + openai_compat.with_reasoning(chat, thinking.EffortMedium) + Some(config.ReasoningHigh) -> + openai_compat.with_reasoning(chat, thinking.EffortHigh) None -> chat } @@ -178,7 +196,8 @@ pub fn send_ollama( |> starlet.user(user_prompt) let chat = case reasoning { - Some(config.ReasoningLow) -> ollama.with_thinking(chat, ollama.ThinkingLow) + Some(config.ReasoningLow) -> + ollama.with_thinking(chat, ollama.ThinkingLow) Some(config.ReasoningMedium) -> ollama.with_thinking(chat, ollama.ThinkingMedium) Some(config.ReasoningHigh) -> diff --git a/src/config.gleam b/src/config.gleam index 3e2b80308fb583b3f6bcc634f74fbbd51c76e377..2d6c6ef7e10d580d569e63b06a01226104ba9ffa 100644 --- a/src/config.gleam +++ b/src/config.gleam @@ -46,6 +46,41 @@ pub fn reasoning_to_string(effort: ReasoningEffort) -> String { } } +/// OpenAI-compatible provider dialect for reasoning support. +/// Each dialect determines how reasoning requests are encoded and responses decoded. +pub type Dialect { + DialectGeneric + DialectTogether + DialectGroq + DialectCerebras + DialectLlamaCpp + DialectTags +} + +/// Parse a dialect string into a Dialect type. +pub fn parse_dialect(value: String) -> Dialect { + case string.lowercase(value) { + "together" -> DialectTogether + "groq" -> DialectGroq + "cerebras" -> DialectCerebras + "llamacpp" | "llama.cpp" -> DialectLlamaCpp + "tags" -> DialectTags + _ -> DialectGeneric + } +} + +/// Convert Dialect to the API string representation. +pub fn dialect_to_string(dialect: Dialect) -> String { + case dialect { + DialectGeneric -> "generic" + DialectTogether -> "together" + DialectGroq -> "groq" + DialectCerebras -> "cerebras" + DialectLlamaCpp -> "llamacpp" + DialectTags -> "tags" + } +} + pub type Config { Config( provider: String, @@ -55,6 +90,7 @@ pub type Config { directions: String, reasoning: ReasoningSetting, endpoint: String, + dialect: Dialect, ) } @@ -67,6 +103,7 @@ pub fn default() -> Config { directions: "", reasoning: ReasoningNotSet, endpoint: "", + dialect: DialectGeneric, ) } @@ -117,6 +154,7 @@ fn parse_config(parsed: Dict(String, Toml)) -> Config { directions: get_string(parsed, "directions"), reasoning: parse_reasoning(get_string(parsed, "reasoning")), endpoint: get_string(parsed, "endpoint"), + dialect: parse_dialect(get_string(parsed, "dialect")), ) } @@ -142,6 +180,7 @@ pub fn merge( directions: prefer_nonempty(cli_directions, cfg.directions), reasoning: prefer_reasoning(cli_reasoning, cfg.reasoning), endpoint: prefer_nonempty(cli_endpoint, cfg.endpoint), + dialect: cfg.dialect, ) } diff --git a/src/garble.gleam b/src/garble.gleam index b53b3018b221a18f3286c7febfed7f68e1246068..e8819b3e7362e4637ce1d7f969db4610be8cf5b9 100644 --- a/src/garble.gleam +++ b/src/garble.gleam @@ -47,12 +47,16 @@ fn garble_command() -> glint.Command(Nil) { use reasoning <- glint.flag( glint.string_flag("reasoning") |> glint.flag_default("") - |> glint.flag_help("Reasoning effort for thinking models (low, medium, high)"), + |> glint.flag_help( + "Reasoning effort for thinking models (low, medium, high)", + ), ) use endpoint <- glint.flag( glint.string_flag("endpoint") |> glint.flag_default("") - |> glint.flag_help("API endpoint URL (required for ollama, e.g. http://localhost:11434)"), + |> glint.flag_help( + "API endpoint URL (required for ollama, e.g. http://localhost:11434)", + ), ) use _, _args, flags <- glint.command() @@ -119,11 +123,18 @@ fn garble_command() -> glint.Command(Nil) { } } } - send_request(provider_info, merged, effective_reasoning, prompts.system(), user_message) + send_request( + provider_info, + merged, + effective_reasoning, + prompts.system(), + user_message, + ) } Error(providers.FetchError(msg)) -> Error("Error fetching providers: " <> msg) - Error(providers.ProviderNotFound(id)) -> Error("Unknown provider: " <> id) + Error(providers.ProviderNotFound(id)) -> + Error("Unknown provider: " <> id) Error(providers.ModelNotFound(provider, model)) -> Error( "Unknown model '" <> model <> "' for provider '" <> provider <> "'", @@ -153,13 +164,28 @@ fn send_request( "openai" -> clients.send_openai(key, None, cfg.model, system, user_prompt, reasoning) "anthropic" -> - clients.send_anthropic(key, None, cfg.model, system, user_prompt, reasoning) + clients.send_anthropic( + key, + None, + cfg.model, + system, + user_prompt, + reasoning, + ) "google" -> clients.send_gemini(key, cfg.model, system, user_prompt, reasoning) "openai-compat" -> { case provider.api_endpoint { Some(endpoint) -> - clients.send_openai_compat(key, endpoint, cfg.model, system, user_prompt, reasoning) + clients.send_openai_compat( + key, + endpoint, + cfg.model, + system, + user_prompt, + reasoning, + cfg.dialect, + ) None -> Error("No endpoint configured for " <> provider.id) } } diff --git a/src/providers.gleam b/src/providers.gleam index 62ef33d6211ea51621072ab71c1021d122addbdb..b80c8ed918dff0be050eeb0bb77cd1fa0028f5de 100644 --- a/src/providers.gleam +++ b/src/providers.gleam @@ -76,7 +76,12 @@ fn parse_providers(body: String) -> Result(List(Provider), ValidationError) { None, decode.string |> decode.map(Some), ) - decode.success(Model(id:, can_reason:, reasoning_levels:, default_reasoning_effort:)) + decode.success(Model( + id:, + can_reason:, + reasoning_levels:, + default_reasoning_effort:, + )) } let provider_decoder = { @@ -126,7 +131,10 @@ fn find_model( } /// Get a model by ID from a provider -pub fn get_model(provider: Provider, model_id: String) -> Result(Model, ValidationError) { +pub fn get_model( + provider: Provider, + model_id: String, +) -> Result(Model, ValidationError) { provider.models |> list.find(fn(m) { m.id == model_id }) |> result.map_error(fn(_) { ModelNotFound(provider.id, model_id) })