From 4819fd7977c366135a1d321bc1db7159b2756c68 Mon Sep 17 00:00:00 2001
From: Amolith <amolith@secluded.site>
Date: Sat, 7 Feb 2026 02:16:47 +0000
Subject: [PATCH] fix: wrap all post-workspace code in try/finally to prevent
 leak on early throws

Move the try/finally cleanup block to start immediately after
createWorkspace() in both web.ts and repo.ts so that early failures
(credential validation, custom prompt read, checkout) trigger cleanup.

Remove redundant manual workspace.cleanup() calls in catch blocks
(clone error in repo.ts, fetch error in web.ts) since the outer
finally now handles all paths, preventing double-cleanup.

Fixes #2

Co-authored-by: Shelley <shelley@exe.dev>
---
 src/cli/commands/repo.ts       |  78 +++++++++---------
 src/cli/commands/web.ts        |  94 +++++++++++-----------
 test/workspace-cleanup.test.ts | 139 +++++++++++++++++++++++++++++++++
 3 files changed, 225 insertions(+), 86 deletions(-)
 create mode 100644 test/workspace-cleanup.test.ts
diff --git a/src/cli/commands/repo.ts b/src/cli/commands/repo.ts
index 5e9915c79e200b28c4946b2420e1d6728a072b49..7f14cdf977afcabe6a4817cbd028be9f0e1be11f 100644
--- a/src/cli/commands/repo.ts
+++ b/src/cli/commands/repo.ts
@@ -35,52 +35,52 @@ export async function runRepoCommand(options: RepoCommandOptions): Promise<void>
   });
 
   const workspace = await createWorkspace({ cleanup: overrides.defaults.cleanup });
-  const logger = createEventLogger({ verbose: options.verbose });
 
-  let systemPrompt = REPO_SYSTEM_PROMPT;
-  const promptPath = overrides.repo.system_prompt_path;
-  if (promptPath) {
-    const home = process.env["HOME"] ?? "";
-    systemPrompt = await readFile(promptPath.replace(/^~\//, `${home}/`), "utf8");
-  }
+  try {
+    const logger = createEventLogger({ verbose: options.verbose });
 
-  const git = simpleGit();
-  const cloneArgs: string[] = [];
-  if (!options.full) {
-    const depth = overrides.repo.default_depth ?? 1;
-    const blobLimit = overrides.repo.blob_limit ?? "5m";
-    cloneArgs.push("--depth", String(depth), `--filter=blob:limit=${blobLimit}`);
-  }
+    let systemPrompt = REPO_SYSTEM_PROMPT;
+    const promptPath = overrides.repo.system_prompt_path;
+    if (promptPath) {
+      const home = process.env["HOME"] ?? "";
+      systemPrompt = await readFile(promptPath.replace(/^~\//, `${home}/`), "utf8");
+    }
 
-  try {
-    await git.clone(options.uri, workspace.path, cloneArgs);
-  } catch (error: any) {
-    await workspace.cleanup();
-    if (!overrides.defaults.cleanup) {
-      console.error(`Workspace preserved at ${workspace.path}`);
+    const git = simpleGit();
+    const cloneArgs: string[] = [];
+    if (!options.full) {
+      const depth = overrides.repo.default_depth ?? 1;
+      const blobLimit = overrides.repo.blob_limit ?? "5m";
+      cloneArgs.push("--depth", String(depth), `--filter=blob:limit=${blobLimit}`);
     }
-    throw new CloneError(options.uri, error?.message ?? String(error));
-  }
 
-  const repoGit = simpleGit(workspace.path);
-  if (options.ref) {
-    await repoGit.checkout(options.ref);
-  }
+    try {
+      await git.clone(options.uri, workspace.path, cloneArgs);
+    } catch (error: any) {
+      if (!overrides.defaults.cleanup) {
+        console.error(`Workspace preserved at ${workspace.path}`);
+      }
+      throw new CloneError(options.uri, error?.message ?? String(error));
+    }
 
-  const tools = [
-    createReadTool(workspace.path),
-    createGrepTool(workspace.path),
-    createLsTool(workspace.path),
-    createFindTool(workspace.path),
-    createGitLogTool(workspace.path),
-    createGitShowTool(workspace.path),
-    createGitBlameTool(workspace.path),
-    createGitDiffTool(workspace.path),
-    createGitRefsTool(workspace.path),
-    createGitCheckoutTool(workspace.path),
-  ];
+    const repoGit = simpleGit(workspace.path);
+    if (options.ref) {
+      await repoGit.checkout(options.ref);
+    }
+
+    const tools = [
+      createReadTool(workspace.path),
+      createGrepTool(workspace.path),
+      createLsTool(workspace.path),
+      createFindTool(workspace.path),
+      createGitLogTool(workspace.path),
+      createGitShowTool(workspace.path),
+      createGitBlameTool(workspace.path),
+      createGitDiffTool(workspace.path),
+      createGitRefsTool(workspace.path),
+      createGitCheckoutTool(workspace.path),
+    ];
 
-  try {
     const result = await runAgent(options.query, {
       model: overrides.repo.model ?? overrides.defaults.model,
       systemPrompt,
diff --git a/src/cli/commands/web.ts b/src/cli/commands/web.ts
index 13abe98ba9c5c453c7b6df922592517819589e53..ea887ab6fa55ee6f1c0a607cee393406f31fc684 100644
--- a/src/cli/commands/web.ts
+++ b/src/cli/commands/web.ts
@@ -32,62 +32,62 @@ export async function runWebCommand(options: WebCommandOptions): Promise<void> {
   });
 
   const workspace = await createWorkspace({ cleanup: overrides.defaults.cleanup });
-  const logger = createEventLogger({ verbose: options.verbose });
 
-  const kagiSession =
-    overrides.web.kagi_session_token ?? overrides.defaults.kagi_session_token ?? process.env["KAGI_SESSION_TOKEN"];
-  const tabstackKey =
-    overrides.web.tabstack_api_key ??
-    overrides.defaults.tabstack_api_key ??
-    process.env["TABSTACK_API_KEY"];
+  try {
+    const logger = createEventLogger({ verbose: options.verbose });
 
-  if (!kagiSession) {
-    throw new ToolInputError("Missing Kagi session token (set KAGI_SESSION_TOKEN or config)");
-  }
-  if (!tabstackKey) {
-    throw new ToolInputError("Missing Tabstack API key (set TABSTACK_API_KEY or config)");
-  }
+    const kagiSession =
+      overrides.web.kagi_session_token ?? overrides.defaults.kagi_session_token ?? process.env["KAGI_SESSION_TOKEN"];
+    const tabstackKey =
+      overrides.web.tabstack_api_key ??
+      overrides.defaults.tabstack_api_key ??
+      process.env["TABSTACK_API_KEY"];
 
-  let systemPrompt = WEB_SYSTEM_PROMPT;
-  const promptPath = overrides.web.system_prompt_path;
-  if (promptPath) {
-    const home = process.env["HOME"] ?? "";
-    systemPrompt = await readFile(promptPath.replace(/^~\//, `${home}/`), "utf8");
-  }
+    if (!kagiSession) {
+      throw new ToolInputError("Missing Kagi session token (set KAGI_SESSION_TOKEN or config)");
+    }
+    if (!tabstackKey) {
+      throw new ToolInputError("Missing Tabstack API key (set TABSTACK_API_KEY or config)");
+    }
+
+    let systemPrompt = WEB_SYSTEM_PROMPT;
+    const promptPath = overrides.web.system_prompt_path;
+    if (promptPath) {
+      const home = process.env["HOME"] ?? "";
+      systemPrompt = await readFile(promptPath.replace(/^~\//, `${home}/`), "utf8");
+    }
 
-  const tools = [
-    createWebSearchTool(kagiSession),
-    createWebFetchTool(tabstackKey),
-    createReadTool(workspace.path),
-    createGrepTool(workspace.path),
-    createLsTool(workspace.path),
-    createFindTool(workspace.path),
-  ];
+    const tools = [
+      createWebSearchTool(kagiSession),
+      createWebFetchTool(tabstackKey),
+      createReadTool(workspace.path),
+      createGrepTool(workspace.path),
+      createLsTool(workspace.path),
+      createFindTool(workspace.path),
+    ];
 
-  let seededContext = "";
-  if (options.url) {
-    const fetchTool = createWebFetchTool(tabstackKey);
-    try {
-      const result = await fetchTool.execute("prefetch", { url: options.url, nocache: false });
-      const text = result.content
-        .map((block) => (block.type === "text" ? block.text ?? "" : ""))
-        .join("");
-      if (text.length <= INJECT_THRESHOLD) {
-        seededContext = text;
-      } else {
-        const filename = `web/${basename(new URL(options.url).pathname) || "index"}.md`;
-        await writeWorkspaceFile(workspace.path, filename, text);
-        seededContext = `Fetched content stored at ${filename}`;
+    let seededContext = "";
+    if (options.url) {
+      const fetchTool = createWebFetchTool(tabstackKey);
+      try {
+        const result = await fetchTool.execute("prefetch", { url: options.url, nocache: false });
+        const text = result.content
+          .map((block) => (block.type === "text" ? block.text ?? "" : ""))
+          .join("");
+        if (text.length <= INJECT_THRESHOLD) {
+          seededContext = text;
+        } else {
+          const filename = `web/${basename(new URL(options.url).pathname) || "index"}.md`;
+          await writeWorkspaceFile(workspace.path, filename, text);
+          seededContext = `Fetched content stored at ${filename}`;
+        }
+      } catch (error: any) {
+        throw new FetchError(options.url, error?.message ?? String(error));
       }
-    } catch (error: any) {
-      await workspace.cleanup();
-      throw new FetchError(options.url, error?.message ?? String(error));
     }
-  }
 
-  const query = seededContext ? `${options.query}\n\n${seededContext}` : options.query;
+    const query = seededContext ? `${options.query}\n\n${seededContext}` : options.query;
 
-  try {
     const result = await runAgent(query, {
       model: overrides.web.model ?? overrides.defaults.model,
       systemPrompt,
diff --git a/test/workspace-cleanup.test.ts b/test/workspace-cleanup.test.ts
new file mode 100644
index 0000000000000000000000000000000000000000..0e9c4c6c7616f983c77c912cb1b0cfcf6c5c12cd
--- /dev/null
+++ b/test/workspace-cleanup.test.ts
@@ -0,0 +1,139 @@
+import { describe, test, expect, beforeEach, afterEach } from "bun:test";
+import { readdirSync, mkdtempSync } from "node:fs";
+import { rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { execSync } from "node:child_process";
+import { ToolInputError } from "../src/util/errors.js";
+
+/**
+ * Snapshot rumilo-* dirs in tmpdir so we can detect leaks.
+ */
+function rumiloTmpDirs(): Set<string> {
+  return new Set(readdirSync(tmpdir()).filter((n) => n.startsWith("rumilo-")));
+}
+
+function leakedDirs(before: Set<string>, after: Set<string>): string[] {
+  return [...after].filter((d) => !before.has(d));
+}
+
+async function cleanupLeaked(leaked: string[]): Promise<void> {
+  for (const d of leaked) {
+    await rm(join(tmpdir(), d), { recursive: true, force: true });
+  }
+}
+
+// ─── web command: workspace leaked on missing credentials ───────────
+
+describe("web command – workspace cleanup on early failure", () => {
+  const origEnv = { ...process.env };
+
+  beforeEach(() => {
+    // Ensure credential env vars are absent so validation throws.
+    delete process.env["KAGI_SESSION_TOKEN"];
+    delete process.env["TABSTACK_API_KEY"];
+  });
+
+  afterEach(() => {
+    process.env = { ...origEnv };
+  });
+
+  test("workspace dir is removed when credential validation throws", async () => {
+    const before = rumiloTmpDirs();
+
+    const { runWebCommand } = await import("../src/cli/commands/web.js");
+
+    try {
+      await runWebCommand({
+        query: "test",
+        verbose: false,
+        cleanup: true,
+      });
+    } catch (e: any) {
+      expect(e).toBeInstanceOf(ToolInputError);
+    }
+
+    const after = rumiloTmpDirs();
+    const leaked = leakedDirs(before, after);
+
+    // Safety: clean up any leaked dirs so the test doesn't pollute.
+    await cleanupLeaked(leaked);
+
+    // If this fails, the workspace was created but not cleaned up – a leak.
+    expect(leaked).toEqual([]);
+  });
+});
+
+// ─── repo command: workspace leaked on checkout failure ─────────────
+
+describe("repo command – workspace cleanup on early failure", () => {
+  const origEnv = { ...process.env };
+  let localRepo: string;
+
+  beforeEach(() => {
+    // Create a small local bare git repo so clone succeeds without network.
+    localRepo = mkdtempSync(join(tmpdir(), "rumilo-test-bare-"));
+    execSync("git init --bare", { cwd: localRepo, stdio: "ignore" });
+    // Create a temporary work clone to add a commit (bare repos need content)
+    const workClone = mkdtempSync(join(tmpdir(), "rumilo-test-work-"));
+    execSync(`git clone ${localRepo} work`, { cwd: workClone, stdio: "ignore" });
+    const workDir = join(workClone, "work");
+    execSync("git config user.email test@test.com && git config user.name Test", { cwd: workDir, stdio: "ignore" });
+    execSync("echo hello > README.md && git add . && git commit -m init", { cwd: workDir, stdio: "ignore" });
+    execSync("git push", { cwd: workDir, stdio: "ignore" });
+    // Clean up work clone
+    execSync(`rm -rf ${workClone}`, { stdio: "ignore" });
+  });
+
+  afterEach(async () => {
+    process.env = { ...origEnv };
+    await rm(localRepo, { recursive: true, force: true });
+  });
+
+  test("workspace dir is removed when clone fails", async () => {
+    const before = rumiloTmpDirs();
+
+    const { runRepoCommand } = await import("../src/cli/commands/repo.js");
+
+    try {
+      await runRepoCommand({
+        query: "test",
+        uri: "file:///nonexistent-path/repo.git",
+        full: false,
+        verbose: false,
+        cleanup: true,
+      });
+    } catch {
+      // expected – clone will fail
+    }
+
+    const after = rumiloTmpDirs();
+    const leaked = leakedDirs(before, after);
+    await cleanupLeaked(leaked);
+    expect(leaked).toEqual([]);
+  });
+
+  test("workspace dir is removed when ref checkout fails after clone", async () => {
+    const before = rumiloTmpDirs();
+
+    const { runRepoCommand } = await import("../src/cli/commands/repo.js");
+
+    try {
+      await runRepoCommand({
+        query: "test",
+        uri: localRepo,
+        ref: "nonexistent-ref-abc123",
+        full: true,  // full clone for local bare repo compatibility
+        verbose: false,
+        cleanup: true,
+      });
+    } catch {
+      // expected – checkout of bad ref will fail
+    }
+
+    const after = rumiloTmpDirs();
+    const leaked = leakedDirs(before, after);
+    await cleanupLeaked(leaked);
+    expect(leaked).toEqual([]);
+  });
+});