fix: split-on-first-delimiter bugs, -u flag guard, web_search errors, git_log validation

Amolith and Shelley created 1 month ago

- #5: modelString.split(':') now splits on first colon only, preserving
  segments after second colon (e.g. 'openrouter:google/gemini:free')
- #6: --key=value parsing splits on first '=' only, preserving values
  containing '=' (e.g. '--header=Authorization=Bearer token')
- #7: -u short flag guards against swallowing next arg when it starts
  with '-' (e.g. '-u --verbose' no longer sets uri='--verbose')
- #10: loadConfig already rethrows ConfigError directly (verified with
  test); TOML parse errors wrapped as ConfigError with original message
- #11: web_search errors from kagi API now wrapped as FetchError with
  query context, consistent with web_fetch error handling
- #12: git_log author/since/until validation rewritten: check
  '!== undefined' then validate, so empty strings are now caught
  (previously slipped through due to falsy short-circuit)

Also: extract parseArgs to src/cli/parse-args.ts for testability,
remove unused ToolInputError import from runner.ts.

Co-authored-by: Shelley <shelley@exe.dev>

Change summary

src/agent/model-resolver.ts     |  8 +++
src/agent/runner.ts             |  1 
src/agent/tools/git/log.ts      | 24 +++++++----
src/agent/tools/web-search.ts   | 19 ++++++---
src/cli/index.ts                | 45 +---------------------
src/cli/parse-args.ts           | 51 +++++++++++++++++++++++++
test/cli-parser.test.ts         | 44 ++++++++++++++++++++++
test/config-loader.test.ts      | 55 +++++++++++++++++++++++++++
test/git-log-validation.test.ts | 56 ++++++++++++++++++++++++++++
test/model-resolver.test.ts     | 70 +++++++++++++++++++++++++++++++++++
test/web-search.test.ts         | 22 +++++++++++
11 files changed, 335 insertions(+), 60 deletions(-)

Detailed changes

src/agent/model-resolver.ts 🔗

@@ -9,7 +9,13 @@ export function resolveModel(
   modelString: string,
   config: RumiloConfig,
 ): Model<any> {
-  const [provider, modelName] = modelString.split(":");
+  const colonIndex = modelString.indexOf(":");
+  if (colonIndex === -1) {
+    throw new ConfigError("Model must be in provider:model format");
+  }
+
+  const provider = modelString.slice(0, colonIndex);
+  const modelName = modelString.slice(colonIndex + 1);
 
   if (!provider || !modelName) {
     throw new ConfigError("Model must be in provider:model format");

src/agent/runner.ts 🔗

@@ -1,6 +1,5 @@
 import { Agent, ProviderTransport, type AgentEvent } from "@mariozechner/pi-agent";
 import { type AgentTool } from "@mariozechner/pi-ai";
-import { ToolInputError } from "../util/errors.js";
 import type { RumiloConfig } from "../config/schema.js";
 import { resolveModel } from "./model-resolver.js";

src/agent/tools/git/log.ts 🔗

@@ -29,18 +29,24 @@ export const createGitLogTool = (workspacePath: string): AgentTool => ({
     }
     options.push("-n", String(Math.floor(limit)));
     if (params.oneline) options.push("--oneline");
-    if (params.author && !String(params.author).trim()) {
-      throw new ToolInputError("author must be a non-empty string");
+    if (params.author !== undefined) {
+      if (!String(params.author).trim()) {
+        throw new ToolInputError("author must be a non-empty string");
+      }
+      options.push(`--author=${params.author}`);
     }
-    if (params.author) options.push(`--author=${params.author}`);
-    if (params.since && !String(params.since).trim()) {
-      throw new ToolInputError("since must be a non-empty string");
+    if (params.since !== undefined) {
+      if (!String(params.since).trim()) {
+        throw new ToolInputError("since must be a non-empty string");
+      }
+      options.push(`--since=${params.since}`);
     }
-    if (params.since) options.push(`--since=${params.since}`);
-    if (params.until && !String(params.until).trim()) {
-      throw new ToolInputError("until must be a non-empty string");
+    if (params.until !== undefined) {
+      if (!String(params.until).trim()) {
+        throw new ToolInputError("until must be a non-empty string");
+      }
+      options.push(`--until=${params.until}`);
     }
-    if (params.until) options.push(`--until=${params.until}`);
 
     const result = await git.log(options.concat(params.path ? ["--", params.path] : []));

src/agent/tools/web-search.ts 🔗

@@ -1,7 +1,7 @@
 import { Type } from "@sinclair/typebox";
 import type { AgentTool } from "@mariozechner/pi-ai";
 import { search } from "kagi-ken";
-import { ToolInputError } from "../../util/errors.js";
+import { FetchError, ToolInputError } from "../../util/errors.js";
 
 const SearchSchema = Type.Object({
   query: Type.String({ description: "Search query" }),
@@ -17,10 +17,17 @@ export const createWebSearchTool = (sessionToken: string): AgentTool => ({
       throw new ToolInputError("Missing Kagi session token");
     }
 
-    const result = await search(params.query, sessionToken);
-    return {
-      content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
-      details: { query: params.query, resultCount: result?.data?.length ?? 0 },
-    };
+    try {
+      const result = await search(params.query, sessionToken);
+      return {
+        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        details: { query: params.query, resultCount: result?.data?.length ?? 0 },
+      };
+    } catch (error: any) {
+      throw new FetchError(
+        `kagi:search?q=${encodeURIComponent(params.query)}`,
+        error?.message ?? String(error),
+      );
+    }
   },
 });

src/cli/index.ts 🔗

@@ -3,49 +3,8 @@ import { runWebCommand } from "./commands/web.js";
 import { runRepoCommand } from "./commands/repo.js";
 import { RumiloError } from "../util/errors.js";
 
-interface ParsedArgs {
-  command?: string;
-  options: Record<string, string | boolean>;
-  positional: string[];
-}
-
-function parseArgs(args: string[]): ParsedArgs {
-  const [, , command, ...rest] = args;
-  const options: Record<string, string | boolean> = {};
-  const positional: string[] = [];
-
-  for (let i = 0; i < rest.length; i += 1) {
-    const arg = rest[i];
-    if (!arg) continue;
-
-    if (arg.startsWith("--")) {
-      const [key, value] = arg.slice(2).split("=");
-      if (!key) continue;
-      if (value !== undefined) {
-        options[key] = value;
-      } else if (rest[i + 1] && !rest[i + 1]?.startsWith("-")) {
-        options[key] = rest[i + 1] as string;
-        i += 1;
-      } else {
-        options[key] = true;
-      }
-    } else if (arg.startsWith("-")) {
-      const short = arg.slice(1);
-      if (short === "u" && rest[i + 1]) {
-        options["uri"] = rest[i + 1] as string;
-        i += 1;
-      } else if (short === "f") {
-        options["full"] = true;
-      } else {
-        options[short] = true;
-      }
-    } else {
-      positional.push(arg);
-    }
-  }
-
-  return { command, options, positional };
-}
+import { parseArgs } from "./parse-args.js";
+export { parseArgs };
 
 async function main() {
   const { command, options, positional } = parseArgs(process.argv);

src/cli/parse-args.ts 🔗

@@ -0,0 +1,51 @@
+export interface ParsedArgs {
+  command?: string;
+  options: Record<string, string | boolean>;
+  positional: string[];
+}
+
+export function parseArgs(args: string[]): ParsedArgs {
+  const [, , command, ...rest] = args;
+  const options: Record<string, string | boolean> = {};
+  const positional: string[] = [];
+
+  for (let i = 0; i < rest.length; i += 1) {
+    const arg = rest[i];
+    if (!arg) continue;
+
+    if (arg.startsWith("--")) {
+      const eqIndex = arg.indexOf("=", 2);
+      let key: string;
+      let value: string | undefined;
+      if (eqIndex !== -1) {
+        key = arg.slice(2, eqIndex);
+        value = arg.slice(eqIndex + 1);
+      } else {
+        key = arg.slice(2);
+      }
+      if (!key) continue;
+      if (value !== undefined) {
+        options[key] = value;
+      } else if (rest[i + 1] && !rest[i + 1]?.startsWith("-")) {
+        options[key] = rest[i + 1] as string;
+        i += 1;
+      } else {
+        options[key] = true;
+      }
+    } else if (arg.startsWith("-")) {
+      const short = arg.slice(1);
+      if (short === "u" && rest[i + 1] && !rest[i + 1]!.startsWith("-")) {
+        options["uri"] = rest[i + 1] as string;
+        i += 1;
+      } else if (short === "f") {
+        options["full"] = true;
+      } else {
+        options[short] = true;
+      }
+    } else {
+      positional.push(arg);
+    }
+  }
+
+  return { command, options, positional };
+}

test/cli-parser.test.ts 🔗

@@ -0,0 +1,44 @@
+import { describe, test, expect } from "bun:test";
+import { parseArgs } from "../src/cli/parse-args.js";
+
+describe("CLI --key=value parsing (issue #6)", () => {
+  test("--key=value with '=' in value preserves full value", () => {
+    const result = parseArgs(["node", "script", "web", "--key=a=b=c"]);
+    expect(result.options["key"]).toBe("a=b=c");
+  });
+
+  test("--key=value without extra '=' still works", () => {
+    const result = parseArgs(["node", "script", "web", "--model=openai:gpt-4"]);
+    expect(result.options["model"]).toBe("openai:gpt-4");
+  });
+
+  test("--flag without value is boolean true", () => {
+    const result = parseArgs(["node", "script", "web", "--verbose"]);
+    expect(result.options["verbose"]).toBe(true);
+  });
+
+  test("--key value (space-separated) works", () => {
+    const result = parseArgs(["node", "script", "web", "--model", "openai:gpt-4"]);
+    expect(result.options["model"]).toBe("openai:gpt-4");
+  });
+});
+
+describe("CLI -u short flag (issue #7)", () => {
+  test("-u does not swallow a following flag as its value", () => {
+    const result = parseArgs(["node", "script", "web", "-u", "--verbose"]);
+    expect(result.options["uri"]).not.toBe("--verbose");
+    expect(result.options["verbose"]).toBe(true);
+  });
+
+  test("-u with valid URL works normally", () => {
+    const result = parseArgs(["node", "script", "web", "-u", "https://example.com"]);
+    expect(result.options["uri"]).toBe("https://example.com");
+  });
+
+  test("-u swallowing -f as value is prevented", () => {
+    const result = parseArgs(["node", "script", "repo", "-u", "-f"]);
+    expect(result.options["uri"]).not.toBe("-f");
+    // -u becomes boolean-like (true), -f should be parsed as full flag
+    expect(result.options["full"]).toBe(true);
+  });
+});

test/config-loader.test.ts 🔗

@@ -0,0 +1,55 @@
+import { describe, test, expect, beforeEach, afterEach } from "bun:test";
+import { mkdtempSync, writeFileSync, rmSync, mkdirSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { ConfigError } from "../src/util/errors.js";
+import { loadConfig } from "../src/config/loader.js";
+
+describe("loadConfig - ConfigError rethrown directly (issue #10)", () => {
+  let configDir: string;
+  let configPath: string;
+  const originalEnv = { ...process.env };
+
+  beforeEach(() => {
+    configDir = mkdtempSync(join(tmpdir(), "rumilo-cfg-test10-"));
+    const xdgBase = join(configDir, "xdg");
+    const rumiloDir = join(xdgBase, "rumilo");
+    mkdirSync(rumiloDir, { recursive: true });
+    configPath = join(rumiloDir, "config.toml");
+    process.env["XDG_CONFIG_HOME"] = xdgBase;
+  });
+
+  afterEach(() => {
+    process.env = { ...originalEnv };
+    try {
+      rmSync(configDir, { recursive: true, force: true });
+    } catch {}
+  });
+
+  test("ConfigError from validation is rethrown with original message and stack", async () => {
+    // Write invalid config that triggers ConfigError from validatePartialConfig
+    writeFileSync(configPath, `[defaults]\nmodel = 42\n`);
+    try {
+      await loadConfig();
+      throw new Error("should have thrown");
+    } catch (e: any) {
+      expect(e).toBeInstanceOf(ConfigError);
+      // The original message should include the validation details, not be re-wrapped
+      expect(e.message).toContain("/defaults/model");
+      // Stack should reference the validation function, not be a generic re-wrap
+      expect(e.stack).toBeDefined();
+    }
+  });
+
+  test("TOML parse error is wrapped as ConfigError with original message", async () => {
+    writeFileSync(configPath, `[invalid toml !!!`);
+    try {
+      await loadConfig();
+      throw new Error("should have thrown");
+    } catch (e: any) {
+      expect(e).toBeInstanceOf(ConfigError);
+      // Should contain the original TOML parse error message
+      expect(e.message.length).toBeGreaterThan(0);
+    }
+  });
+});

test/git-log-validation.test.ts 🔗

@@ -0,0 +1,56 @@
+import { describe, test, expect, beforeAll, afterAll } from "bun:test";
+import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import simpleGit from "simple-git";
+import { createGitLogTool } from "../src/agent/tools/git/log.js";
+import { ToolInputError } from "../src/util/errors.js";
+
+let workDir: string;
+let git: ReturnType<typeof simpleGit>;
+
+beforeAll(async () => {
+  workDir = mkdtempSync(join(tmpdir(), "rumilo-gitlog-test-"));
+  git = simpleGit(workDir);
+  await git.init();
+  await git.addConfig("user.name", "Test");
+  await git.addConfig("user.email", "test@test.com");
+  writeFileSync(join(workDir, "file.txt"), "hello");
+  await git.add("file.txt");
+  await git.commit("initial commit");
+});
+
+afterAll(() => {
+  try {
+    rmSync(workDir, { recursive: true, force: true });
+  } catch {}
+});
+
+describe("git_log validation - dead code fix (issue #12)", () => {
+  test("whitespace-only author throws ToolInputError", async () => {
+    const tool = createGitLogTool(workDir);
+    await expect(tool.execute("id", { author: "   " })).rejects.toThrow(ToolInputError);
+  });
+
+  test("empty-string author throws ToolInputError", async () => {
+    const tool = createGitLogTool(workDir);
+    await expect(tool.execute("id", { author: "" })).rejects.toThrow(ToolInputError);
+  });
+
+  test("empty-string since throws ToolInputError", async () => {
+    const tool = createGitLogTool(workDir);
+    await expect(tool.execute("id", { since: "  " })).rejects.toThrow(ToolInputError);
+  });
+
+  test("empty-string until throws ToolInputError", async () => {
+    const tool = createGitLogTool(workDir);
+    await expect(tool.execute("id", { until: "  " })).rejects.toThrow(ToolInputError);
+  });
+
+  test("valid author is accepted", async () => {
+    const tool = createGitLogTool(workDir);
+    // Should not throw
+    const result: any = await tool.execute("id", { author: "Test" });
+    expect(result.details.count).toBeGreaterThanOrEqual(1);
+  });
+});

test/model-resolver.test.ts 🔗

@@ -0,0 +1,70 @@
+import { describe, test, expect } from "bun:test";
+import { resolveModel } from "../src/agent/model-resolver.js";
+import type { RumiloConfig } from "../src/config/schema.js";
+import { ConfigError } from "../src/util/errors.js";
+
+// Minimal config stub for tests
+const stubConfig: RumiloConfig = {
+  defaults: { model: "test:m", cleanup: true },
+  web: { model: "test:m" },
+  repo: { model: "test:m", default_depth: 1, blob_limit: "5m" },
+  custom_models: {},
+};
+
+describe("resolveModel - colon handling (issue #5)", () => {
+  test("model string with multiple colons preserves segments after second colon", () => {
+    // e.g. "openrouter:google/gemini-2.5-pro:free" should parse as
+    // provider = "openrouter", modelName = "google/gemini-2.5-pro:free"
+    // This will throw from getModel (unknown provider) but the parsed modelName
+    // should contain the full string after the first colon.
+    // We test via custom: prefix where we can control resolution.
+    const config: RumiloConfig = {
+      ...stubConfig,
+      custom_models: {
+        "name:with:colons": {
+          id: "test-id",
+          name: "test",
+          api: "openai",
+          provider: "test",
+          base_url: "http://localhost",
+          reasoning: false,
+          input: ["text"],
+          cost: { input: 0, output: 0 },
+          context_window: 1000,
+          max_tokens: 500,
+        },
+      },
+    };
+    // "custom:name:with:colons" should split as provider="custom", modelName="name:with:colons"
+    const model = resolveModel("custom:name:with:colons", config);
+    expect(model.id).toBe("test-id");
+  });
+
+  test("simple provider:model still works", () => {
+    // This will call getModel which may throw for unknown providers,
+    // but at minimum the split should be correct. Test with custom.
+    const config: RumiloConfig = {
+      ...stubConfig,
+      custom_models: {
+        "simple": {
+          id: "simple-id",
+          name: "simple",
+          api: "openai",
+          provider: "test",
+          base_url: "http://localhost",
+          reasoning: false,
+          input: ["text"],
+          cost: { input: 0, output: 0 },
+          context_window: 1000,
+          max_tokens: 500,
+        },
+      },
+    };
+    const model = resolveModel("custom:simple", config);
+    expect(model.id).toBe("simple-id");
+  });
+
+  test("rejects model string without colon", () => {
+    expect(() => resolveModel("nocodelimiter", stubConfig)).toThrow(ConfigError);
+  });
+});

test/web-search.test.ts 🔗

@@ -0,0 +1,22 @@
+import { describe, test, expect } from "bun:test";
+import { FetchError, ToolInputError } from "../src/util/errors.js";
+import { createWebSearchTool } from "../src/agent/tools/web-search.js";
+
+describe("web_search error handling (issue #11)", () => {
+  test("missing session token throws ToolInputError", async () => {
+    const tool = createWebSearchTool("");
+    await expect(tool.execute("id", { query: "test" })).rejects.toThrow(ToolInputError);
+  });
+
+  test("search API failure is wrapped as FetchError", async () => {
+    // Use a bogus token so the kagi API call fails
+    const tool = createWebSearchTool("invalid-token-xxx");
+    try {
+      await tool.execute("id", { query: "test query" });
+      // If it somehow succeeds (unlikely), that's fine
+    } catch (e: any) {
+      // After fix, errors from the search API should be wrapped as FetchError
+      expect(e).toBeInstanceOf(FetchError);
+    }
+  });
+});