From b48618b8e88f434bde851b02a4c2663216bb010b Mon Sep 17 00:00:00 2001 From: Amolith Date: Tue, 13 Jan 2026 18:02:02 -0700 Subject: [PATCH] refactor: modularize API key and client logic - Add api_key.gleam for API key resolution (cmd, literal, env) - Add clients.gleam for provider-specific client logic (openai, anthropic, gemini, ollama) - Add reasoning support with --reasoning flag (low, medium, high) - Add --endpoint flag for ollama and custom endpoints - Parse can_reason, reasoning_levels, default_reasoning_effort from provider API - Update openai_compat to support reasoning_effort parameter Assisted-by: Claude Opus 4.5 via Amp --- AGENTS.md | 1 + src/api_key.gleam | 49 +++++++++ src/clients.gleam | 180 ++++++++++++++++++++++++++++++++ src/config.gleam | 62 ++++++++++- src/garble.gleam | 220 ++++++++++++++-------------------------- src/openai_compat.gleam | 21 ++-- src/providers.gleam | 27 ++++- 7 files changed, 407 insertions(+), 153 deletions(-) create mode 100644 src/api_key.gleam create mode 100644 src/clients.gleam diff --git a/AGENTS.md b/AGENTS.md index acf7ecf81d22d905761f520f764a28b6c863c624..a0a9bb15615c9520835db4fa86fe5bccbb03e22f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -77,3 +77,4 @@ The `prompts_test.gleam` demonstrates the pattern: test public functions, use `s - **Provider list is remote:** `providers.gleam` fetches from `https://catwalk.charm.sh/v2/providers` at runtime—network errors are possible. - **Code block extraction:** The system prompt instructs models to wrap output in fenced code blocks; `prompts.extract_code_block` strips them. If the model doesn't comply, raw output passes through. - **API key resolution order:** `api_key_cmd` (shell command) → `api_key` (literal) → environment variable from provider config +- **Custom OpenAI-compat client:** We use our own `openai_compat.gleam` instead of starlet's `openai.with_url` because most OpenAI-compatible providers don't implement the `/responses` endpoint that starlet expects—they only support the traditional `/chat/completions` endpoint. diff --git a/src/api_key.gleam b/src/api_key.gleam new file mode 100644 index 0000000000000000000000000000000000000000..bd2b442d4e35a69594e231ed8678558dfe416ede --- /dev/null +++ b/src/api_key.gleam @@ -0,0 +1,49 @@ +// SPDX-FileCopyrightText: Amolith +// +// SPDX-License-Identifier: AGPL-3.0-or-later + +import config +import envoy +import gleam/option.{None, Some} +import gleam/string +import providers.{type Provider} +import shellout + +pub fn get(provider: Provider, cfg: config.Config) -> Result(String, String) { + // Precedence: api_key_cmd > api_key > environment variable + case cfg.api_key_cmd { + "" -> + case cfg.api_key { + "" -> get_from_env(provider) + key -> Ok(key) + } + cmd -> run_cmd(cmd) + } +} + +fn run_cmd(cmd: String) -> Result(String, String) { + case shellout.command(run: "sh", with: ["-c", cmd], in: ".", opt: []) { + Ok(output) -> Ok(string.trim(output)) + Error(#(_status, msg)) -> Error("api_key_cmd failed: " <> msg) + } +} + +fn get_from_env(provider: Provider) -> Result(String, String) { + case provider.api_key_env { + Some(env_ref) -> { + let env_var = resolve_env_var(env_ref) + case envoy.get(env_var) { + Ok(key) -> Ok(key) + Error(_) -> Error("Missing environment variable: " <> env_var) + } + } + None -> Error("No API key configured for provider: " <> provider.id) + } +} + +fn resolve_env_var(value: String) -> String { + case value { + "$" <> rest -> rest + other -> other + } +} diff --git a/src/clients.gleam b/src/clients.gleam new file mode 100644 index 0000000000000000000000000000000000000000..0464bf81af5d9d47be10e11070edb3be394e1b54 --- /dev/null +++ b/src/clients.gleam @@ -0,0 +1,180 @@ +// SPDX-FileCopyrightText: Amolith +// +// SPDX-License-Identifier: AGPL-3.0-or-later + +import config +import gleam/float +import gleam/int +import gleam/option.{type Option, None, Some} +import gleam/result +import starlet +import starlet/anthropic +import starlet/gemini +import starlet/ollama +import starlet/openai + +pub fn send_openai( + api_key: String, + base_url: Option(String), + model: String, + system_prompt: String, + user_prompt: String, + reasoning: Option(config.ReasoningEffort), +) -> Result(String, String) { + let client = case base_url { + Some(url) -> openai.new_with_base_url(api_key, url) + None -> openai.new(api_key) + } + + let chat = + starlet.chat(client, model) + |> starlet.system(system_prompt) + |> starlet.user(user_prompt) + + let chat = case reasoning { + Some(config.ReasoningLow) -> openai.with_reasoning(chat, openai.ReasoningLow) + Some(config.ReasoningMedium) -> + openai.with_reasoning(chat, openai.ReasoningMedium) + Some(config.ReasoningHigh) -> + openai.with_reasoning(chat, openai.ReasoningHigh) + None -> chat + } + + chat + |> starlet.send() + |> result.map(fn(resp) { starlet.text(resp.1) }) + |> result.map_error(format_starlet_error) +} + +pub fn send_anthropic( + api_key: String, + base_url: Option(String), + model: String, + system_prompt: String, + user_prompt: String, + reasoning: Option(config.ReasoningEffort), +) -> Result(String, String) { + let client = case base_url { + Some(url) -> anthropic.new_with_base_url(api_key, url) + None -> anthropic.new(api_key) + } + + let chat = + starlet.chat(client, model) + |> starlet.system(system_prompt) + |> starlet.user(user_prompt) + + let chat = case reasoning { + Some(effort) -> { + let #(budget, max_tokens) = reasoning_budget(effort) + case anthropic.with_thinking(chat, budget) { + Ok(c) -> c |> starlet.max_tokens(max_tokens) + Error(_) -> chat + } + } + None -> chat + } + + chat + |> starlet.send() + |> result.map(fn(resp) { starlet.text(resp.1) }) + |> result.map_error(format_starlet_error) +} + +/// Calculate reasoning budget using OpenRouter's formula: +/// budget_tokens = max(min(max_tokens * effort_ratio, 128000), 1024) +/// Returns (budget, max_tokens) where max_tokens > budget +pub fn reasoning_budget(effort: config.ReasoningEffort) -> #(Int, Int) { + let base_max = 64_000 + let ratio = case effort { + config.ReasoningLow -> 0.2 + config.ReasoningMedium -> 0.5 + config.ReasoningHigh -> 0.8 + } + let budget = + int.max(int.min(float.truncate(int.to_float(base_max) *. ratio), 128_000), 1024) + let max_tokens = budget + 16_384 + #(budget, max_tokens) +} + +pub fn send_gemini( + api_key: String, + model: String, + system_prompt: String, + user_prompt: String, + reasoning: Option(config.ReasoningEffort), +) -> Result(String, String) { + let client = gemini.new(api_key) + + let chat = + starlet.chat(client, model) + |> starlet.system(system_prompt) + |> starlet.user(user_prompt) + + let chat = case reasoning { + Some(effort) -> { + let #(budget, _) = reasoning_budget(effort) + case gemini.with_thinking(chat, gemini.ThinkingFixed(budget)) { + Ok(c) -> c + Error(_) -> chat + } + } + None -> chat + } + + chat + |> starlet.send() + |> result.map(fn(resp) { starlet.text(resp.1) }) + |> result.map_error(format_starlet_error) +} + +pub fn send_ollama( + endpoint: String, + model: String, + system_prompt: String, + user_prompt: String, + reasoning: Option(config.ReasoningEffort), +) -> Result(String, String) { + case endpoint { + "" -> Error("Ollama requires --endpoint (e.g. http://localhost:11434)") + base_url -> { + let client = ollama.new(base_url) + + let chat = + starlet.chat(client, model) + |> starlet.system(system_prompt) + |> starlet.user(user_prompt) + + let chat = case reasoning { + Some(config.ReasoningLow) -> ollama.with_thinking(chat, ollama.ThinkingLow) + Some(config.ReasoningMedium) -> + ollama.with_thinking(chat, ollama.ThinkingMedium) + Some(config.ReasoningHigh) -> + ollama.with_thinking(chat, ollama.ThinkingHigh) + None -> chat + } + + chat + |> starlet.send() + |> result.map(fn(resp) { starlet.text(resp.1) }) + |> result.map_error(format_starlet_error) + } + } +} + +pub fn format_starlet_error(err: starlet.StarletError) -> String { + case err { + starlet.Transport(msg) -> "Network error: " <> msg + starlet.Http(status, body) -> + "HTTP " <> int.to_string(status) <> ": " <> body + starlet.Decode(msg) -> "Parse error: " <> msg + starlet.Provider(name, msg, _) -> name <> " error: " <> msg + starlet.Tool(_error) -> "Tool error" + starlet.RateLimited(retry_after) -> { + case retry_after { + Some(secs) -> "Rate limited, retry after " <> int.to_string(secs) <> "s" + None -> "Rate limited" + } + } + } +} diff --git a/src/config.gleam b/src/config.gleam index b6aabfaef846915f82eb624fe652ec4d509cbbf7..3e2b80308fb583b3f6bcc634f74fbbd51c76e377 100644 --- a/src/config.gleam +++ b/src/config.gleam @@ -5,10 +5,47 @@ import envoy import filepath import gleam/dict.{type Dict} + import gleam/result +import gleam/string import simplifile import tom.{type Toml} +/// Reasoning effort level for thinking models. +pub type ReasoningEffort { + ReasoningLow + ReasoningMedium + ReasoningHigh +} + +/// User's reasoning preference: not specified, explicitly disabled, or a specific level. +pub type ReasoningSetting { + ReasoningNotSet + ReasoningDisabled + ReasoningEnabled(ReasoningEffort) +} + +/// Parse a reasoning effort string into a ReasoningSetting. +pub fn parse_reasoning(value: String) -> ReasoningSetting { + case string.lowercase(value) { + "" -> ReasoningNotSet + "none" | "off" | "disabled" -> ReasoningDisabled + "low" -> ReasoningEnabled(ReasoningLow) + "medium" -> ReasoningEnabled(ReasoningMedium) + "high" -> ReasoningEnabled(ReasoningHigh) + _ -> ReasoningNotSet + } +} + +/// Convert ReasoningEffort to the API string representation. +pub fn reasoning_to_string(effort: ReasoningEffort) -> String { + case effort { + ReasoningLow -> "low" + ReasoningMedium -> "medium" + ReasoningHigh -> "high" + } +} + pub type Config { Config( provider: String, @@ -16,11 +53,21 @@ pub type Config { api_key: String, api_key_cmd: String, directions: String, + reasoning: ReasoningSetting, + endpoint: String, ) } pub fn default() -> Config { - Config(provider: "", model: "", api_key: "", api_key_cmd: "", directions: "") + Config( + provider: "", + model: "", + api_key: "", + api_key_cmd: "", + directions: "", + reasoning: ReasoningNotSet, + endpoint: "", + ) } /// Load config from XDG_CONFIG_HOME/garble/config.toml or ~/.config/garble/config.toml @@ -68,6 +115,8 @@ fn parse_config(parsed: Dict(String, Toml)) -> Config { api_key: get_string(parsed, "api_key"), api_key_cmd: get_string(parsed, "api_key_cmd"), directions: get_string(parsed, "directions"), + reasoning: parse_reasoning(get_string(parsed, "reasoning")), + endpoint: get_string(parsed, "endpoint"), ) } @@ -82,6 +131,8 @@ pub fn merge( cli_provider cli_provider: String, cli_model cli_model: String, cli_directions cli_directions: String, + cli_reasoning cli_reasoning: String, + cli_endpoint cli_endpoint: String, ) -> Config { Config( provider: prefer_nonempty(cli_provider, cfg.provider), @@ -89,9 +140,18 @@ pub fn merge( api_key: cfg.api_key, api_key_cmd: cfg.api_key_cmd, directions: prefer_nonempty(cli_directions, cfg.directions), + reasoning: prefer_reasoning(cli_reasoning, cfg.reasoning), + endpoint: prefer_nonempty(cli_endpoint, cfg.endpoint), ) } +fn prefer_reasoning(cli: String, fallback: ReasoningSetting) -> ReasoningSetting { + case parse_reasoning(cli) { + ReasoningNotSet -> fallback + setting -> setting + } +} + fn prefer_nonempty(cli: String, fallback: String) -> String { case cli { "" -> fallback diff --git a/src/garble.gleam b/src/garble.gleam index daa036862fdc7c1fe144689236f6a6400715ad09..95b00ed248ead42e05b2b2afa2d87aa732630b32 100644 --- a/src/garble.gleam +++ b/src/garble.gleam @@ -2,10 +2,10 @@ // // SPDX-License-Identifier: AGPL-3.0-or-later +import api_key import argv +import clients import config -import envoy -import gleam/int import gleam/io import gleam/option.{None, Some} import gleam/result @@ -15,11 +15,6 @@ import glint import openai_compat import prompts import providers.{type Provider} -import shellout -import starlet -import starlet/anthropic -import starlet/gemini -import starlet/openai import stdin @external(erlang, "erlang", "halt") @@ -50,6 +45,16 @@ fn garble_command() -> glint.Command(Nil) { |> glint.flag_default("") |> glint.flag_help("Provider (e.g. openai, anthropic)"), ) + use reasoning <- glint.flag( + glint.string_flag("reasoning") + |> glint.flag_default("") + |> glint.flag_help("Reasoning effort for thinking models (low, medium, high)"), + ) + use endpoint <- glint.flag( + glint.string_flag("endpoint") + |> glint.flag_default("") + |> glint.flag_help("API endpoint URL (required for ollama, e.g. http://localhost:11434)"), + ) use _, _args, flags <- glint.command() // Load config file (if present) and merge with CLI flags @@ -57,12 +62,16 @@ fn garble_command() -> glint.Command(Nil) { let assert Ok(directions_cli) = directions(flags) let assert Ok(model_cli) = model(flags) let assert Ok(provider_cli) = provider(flags) + let assert Ok(reasoning_cli) = reasoning(flags) + let assert Ok(endpoint_cli) = endpoint(flags) let merged = config.merge( cfg, cli_provider: provider_cli, cli_model: model_cli, cli_directions: directions_cli, + cli_reasoning: reasoning_cli, + cli_endpoint: endpoint_cli, ) // Read all stdin into a single string @@ -74,28 +83,59 @@ fn garble_command() -> glint.Command(Nil) { // Build the user message with raw input and directions let user_message = prompts.build_user_message(input, merged.directions) - case providers.get_provider(merged.provider) { - Ok(provider_info) -> { - case send_request(provider_info, merged, prompts.system(), user_message) { - Ok(response) -> io.print(prompts.extract_code_block(response)) - Error(msg) -> { - io.println_error(msg) - halt(1) + let ollama_reasoning = case merged.reasoning { + config.ReasoningEnabled(effort) -> Some(effort) + _ -> None + } + + let result = case merged.provider { + "ollama" -> + clients.send_ollama( + merged.endpoint, + merged.model, + prompts.system(), + user_message, + ollama_reasoning, + ) + _ -> + case providers.get_provider(merged.provider) { + Ok(provider_info) -> { + // Resolve reasoning: explicit setting wins, then model default, then none + let effective_reasoning = case merged.reasoning { + config.ReasoningEnabled(effort) -> Some(effort) + config.ReasoningDisabled -> None + config.ReasoningNotSet -> { + let model_info = providers.get_model(provider_info, merged.model) + case model_info { + Ok(m) -> + case m.default_reasoning_effort { + Some(default) -> + case config.parse_reasoning(default) { + config.ReasoningEnabled(effort) -> Some(effort) + _ -> None + } + None -> None + } + Error(_) -> None + } + } + } + send_request(provider_info, merged, effective_reasoning, prompts.system(), user_message) } + Error(providers.FetchError(msg)) -> + Error("Error fetching providers: " <> msg) + Error(providers.ProviderNotFound(id)) -> Error("Unknown provider: " <> id) + Error(providers.ModelNotFound(provider, model)) -> + Error( + "Unknown model '" <> model <> "' for provider '" <> provider <> "'", + ) } - } - Error(providers.FetchError(msg)) -> { - io.println_error("Error fetching providers: " <> msg) - halt(1) - } - Error(providers.ProviderNotFound(id)) -> { - io.println_error("Unknown provider: " <> id) - halt(1) - } - Error(providers.ModelNotFound(provider, model)) -> { - io.println_error( - "Unknown model '" <> model <> "' for provider '" <> provider <> "'", - ) + } + + case result { + Ok(response) -> io.print(prompts.extract_code_block(response)) + Error(msg) -> { + io.println_error(msg) halt(1) } } @@ -104,134 +144,26 @@ fn garble_command() -> glint.Command(Nil) { fn send_request( provider: Provider, cfg: config.Config, + reasoning: option.Option(config.ReasoningEffort), system: String, user_prompt: String, ) -> Result(String, String) { - use api_key <- result.try(get_api_key(provider, cfg)) + use key <- result.try(api_key.get(provider, cfg)) case provider.provider_type { - "openai" -> send_openai(api_key, None, cfg.model, system, user_prompt) - "anthropic" -> send_anthropic(api_key, None, cfg.model, system, user_prompt) - "google" -> send_gemini(api_key, cfg.model, system, user_prompt) + "openai" -> + clients.send_openai(key, None, cfg.model, system, user_prompt, reasoning) + "anthropic" -> + clients.send_anthropic(key, None, cfg.model, system, user_prompt, reasoning) + "google" -> + clients.send_gemini(key, cfg.model, system, user_prompt, reasoning) "openai-compat" -> { case provider.api_endpoint { Some(endpoint) -> - openai_compat.send(endpoint, api_key, cfg.model, system, user_prompt) + openai_compat.send(endpoint, key, cfg.model, system, user_prompt, reasoning) None -> Error("No endpoint configured for " <> provider.id) } } other -> Error("Unsupported provider type: " <> other) } } - -fn send_openai( - api_key: String, - base_url: option.Option(String), - model: String, - system_prompt: String, - user_prompt: String, -) -> Result(String, String) { - let client = case base_url { - Some(url) -> openai.new_with_base_url(api_key, url) - None -> openai.new(api_key) - } - - starlet.chat(client, model) - |> starlet.system(system_prompt) - |> starlet.user(user_prompt) - |> starlet.send() - |> result.map(fn(resp) { starlet.text(resp.1) }) - |> result.map_error(format_starlet_error) -} - -fn send_anthropic( - api_key: String, - base_url: option.Option(String), - model: String, - system_prompt: String, - user_prompt: String, -) -> Result(String, String) { - let client = case base_url { - Some(url) -> anthropic.new_with_base_url(api_key, url) - None -> anthropic.new(api_key) - } - - starlet.chat(client, model) - |> starlet.system(system_prompt) - |> starlet.user(user_prompt) - |> starlet.send() - |> result.map(fn(resp) { starlet.text(resp.1) }) - |> result.map_error(format_starlet_error) -} - -fn send_gemini( - api_key: String, - model: String, - system_prompt: String, - user_prompt: String, -) -> Result(String, String) { - let client = gemini.new(api_key) - - starlet.chat(client, model) - |> starlet.system(system_prompt) - |> starlet.user(user_prompt) - |> starlet.send() - |> result.map(fn(resp) { starlet.text(resp.1) }) - |> result.map_error(format_starlet_error) -} - -fn format_starlet_error(err: starlet.StarletError) -> String { - case err { - starlet.Transport(msg) -> "Network error: " <> msg - starlet.Http(status, body) -> - "HTTP " <> int.to_string(status) <> ": " <> body - starlet.Decode(msg) -> "Parse error: " <> msg - starlet.Provider(name, msg, _) -> name <> " error: " <> msg - starlet.Tool(_error) -> "Tool error" - starlet.RateLimited(retry_after) -> { - case retry_after { - Some(secs) -> "Rate limited, retry after " <> int.to_string(secs) <> "s" - None -> "Rate limited" - } - } - } -} - -fn get_api_key(provider: Provider, cfg: config.Config) -> Result(String, String) { - // Precedence: api_key_cmd > api_key > environment variable - case cfg.api_key_cmd { - "" -> - case cfg.api_key { - "" -> get_api_key_from_env(provider) - key -> Ok(key) - } - cmd -> run_api_key_cmd(cmd) - } -} - -fn run_api_key_cmd(cmd: String) -> Result(String, String) { - case shellout.command(run: "sh", with: ["-c", cmd], in: ".", opt: []) { - Ok(output) -> Ok(string.trim(output)) - Error(#(_status, msg)) -> Error("api_key_cmd failed: " <> msg) - } -} - -fn get_api_key_from_env(provider: Provider) -> Result(String, String) { - case provider.api_key_env { - Some(env_ref) -> { - let env_var = resolve_env_var(env_ref) - case envoy.get(env_var) { - Ok(key) -> Ok(key) - Error(_) -> Error("Missing environment variable: " <> env_var) - } - } - None -> Error("No API key configured for provider: " <> provider.id) - } -} - -fn resolve_env_var(value: String) -> String { - case value { - "$" <> rest -> rest - other -> other - } -} diff --git a/src/openai_compat.gleam b/src/openai_compat.gleam index fb44bb9b7586d77b1fae40d0e5e9c0420529a817..d3041d10916cbe6e723b668d635756185f172771 100644 --- a/src/openai_compat.gleam +++ b/src/openai_compat.gleam @@ -2,12 +2,14 @@ // // SPDX-License-Identifier: AGPL-3.0-or-later +import config.{type ReasoningEffort} import gleam/dynamic/decode import gleam/http import gleam/http/request import gleam/httpc import gleam/json import gleam/list +import gleam/option.{type Option, None, Some} pub fn send( endpoint: String, @@ -15,14 +17,21 @@ pub fn send( model: String, system_prompt: String, user_prompt: String, + reasoning: Option(ReasoningEffort), ) -> Result(String, String) { let messages = build_messages(system_prompt, user_prompt) - let body = - json.object([ - #("model", json.string(model)), - #("messages", json.array(messages, fn(m) { m })), - ]) - |> json.to_string + let base_fields = [ + #("model", json.string(model)), + #("messages", json.array(messages, fn(m) { m })), + ] + let fields = case reasoning { + Some(effort) -> + list.append(base_fields, [ + #("reasoning_effort", json.string(config.reasoning_to_string(effort))), + ]) + None -> base_fields + } + let body = json.object(fields) |> json.to_string let url = endpoint <> "/chat/completions" diff --git a/src/providers.gleam b/src/providers.gleam index e69b3dad8db6b215c8da0ddb57c7f7c6647d1c1f..62ef33d6211ea51621072ab71c1021d122addbdb 100644 --- a/src/providers.gleam +++ b/src/providers.gleam @@ -24,7 +24,12 @@ pub type Provider { } pub type Model { - Model(id: String) + Model( + id: String, + can_reason: Bool, + reasoning_levels: List(String), + default_reasoning_effort: Option(String), + ) } pub type ValidationError { @@ -60,7 +65,18 @@ fn fetch_providers() -> Result(List(Provider), ValidationError) { fn parse_providers(body: String) -> Result(List(Provider), ValidationError) { let model_decoder = { use id <- decode.field("id", decode.string) - decode.success(Model(id:)) + use can_reason <- decode.field("can_reason", decode.bool) + use reasoning_levels <- decode.optional_field( + "reasoning_levels", + [], + decode.list(decode.string), + ) + use default_reasoning_effort <- decode.optional_field( + "default_reasoning_effort", + None, + decode.string |> decode.map(Some), + ) + decode.success(Model(id:, can_reason:, reasoning_levels:, default_reasoning_effort:)) } let provider_decoder = { @@ -109,6 +125,13 @@ fn find_model( |> result.map_error(fn(_) { ModelNotFound(provider.id, model_id) }) } +/// Get a model by ID from a provider +pub fn get_model(provider: Provider, model_id: String) -> Result(Model, ValidationError) { + provider.models + |> list.find(fn(m) { m.id == model_id }) + |> result.map_error(fn(_) { ModelNotFound(provider.id, model_id) }) +} + /// Resolve an environment variable reference like "$OPENAI_API_KEY" to just "OPENAI_API_KEY" pub fn resolve_env_var_name(value: String) -> Option(String) { case value {