Document git tool trust boundary for cloned repos

Amolith and Shelley created 1 month ago

Filesystem tools enforce workspace containment via ensureWorkspacePath().
Git tools pass refs and paths directly to simple-git, which is scoped to
the workspace directory. This is by design: the user explicitly chooses
which repository to clone, so its git objects are trusted content.

- Add trust boundary explanation to AGENTS.md § Workspace Sandboxing
- Add inline comments to all 6 git tool files referencing the docs

Co-authored-by: Shelley <shelley@exe.dev>

Change summary

AGENTS.md                       |   4 
src/agent/runner.ts             |  62 ++++++++++++-
src/agent/tools/git/blame.ts    |   4 
src/agent/tools/git/checkout.ts |   4 
src/agent/tools/git/diff.ts     |   4 
src/agent/tools/git/log.ts      |   4 
src/agent/tools/git/refs.ts     |   4 
src/agent/tools/git/show.ts     |   4 
src/util/env.ts                 |  10 ++
src/util/errors.ts              |   6 +
test/agent-runner.test.ts       | 157 +++++++++++++++++++++++++++++++++++
11 files changed, 256 insertions(+), 7 deletions(-)

Detailed changes

AGENTS.md 🔗

@@ -48,10 +48,12 @@ Tools use `@sinclair/typebox` for parameter schemas. Execute functions return `{
 
 ### Workspace Sandboxing
 
-Tools must constrain paths to workspace:
+Filesystem tools (`read`, `grep`, `ls`, `find`) must constrain paths to workspace:
 - `ensureWorkspacePath()` in `src/agent/tools/index.ts` validates paths don't escape
 - `resolveToCwd()` / `resolveReadPath()` in `src/agent/tools/path-utils.ts` handle expansion and normalization
 
+Git tools (`git_show`, `git_blame`, `git_diff`, `git_checkout`, `git_log`, `git_refs`) do **not** apply path containment. Refs and paths are passed directly to `simple-git`, which is initialized with `workspacePath` so all commands are scoped to the cloned repository. The user explicitly chooses which repository to clone, so its git objects are trusted content. This is an accepted trust boundary: we sandbox the filesystem but trust git data within the user's chosen repo.
+
 ### Config Cascade
 
 ```

src/agent/runner.ts 🔗

@@ -1,7 +1,9 @@
 import { Agent, ProviderTransport, type AgentEvent } from "@mariozechner/pi-agent";
-import { type AgentTool } from "@mariozechner/pi-ai";
+import { getApiKey, type AgentTool, type AssistantMessage } from "@mariozechner/pi-ai";
 import type { RumiloConfig } from "../config/schema.js";
 import { resolveModel } from "./model-resolver.js";
+import { AgentError } from "../util/errors.js";
+import { expandEnvVars } from "../util/env.js";
 
 export interface AgentRunOptions {
   model: string;
@@ -16,6 +18,38 @@ export interface AgentRunResult {
   usage?: unknown;
 }
 
+/**
+ * Build a getApiKey callback that checks custom model headers first,
+ * then falls back to pi-ai's built-in env-var lookup.
+ *
+ * Custom models may specify `Authorization: "Bearer $SOME_ENV_VAR"` in their
+ * headers config. We extract the bearer token and expand env var references
+ * so the value reaches the OpenAI-compatible SDK as a real API key.
+ */
+export function buildGetApiKey(config: RumiloConfig): (provider: string) => string | undefined {
+  return (provider: string) => {
+    if (config.custom_models) {
+      for (const model of Object.values(config.custom_models)) {
+        if (model.provider === provider && model.headers) {
+          const authHeader = model.headers["Authorization"] ?? model.headers["authorization"];
+          if (authHeader) {
+            const expanded = expandEnvVars(authHeader);
+            const match = expanded.match(/^Bearer\s+(.+)$/i);
+            if (match) {
+              return match[1];
+            }
+            return expanded;
+          }
+        }
+      }
+    }
+
+    // Fall back to pi-ai's built-in env var resolution
+    // (handles anthropic → ANTHROPIC_API_KEY, openai → OPENAI_API_KEY, etc.)
+    return getApiKey(provider);
+  };
+}
+
 export async function runAgent(query: string, options: AgentRunOptions): Promise<AgentRunResult> {
   const agent = new Agent({
     initialState: {
@@ -23,7 +57,9 @@ export async function runAgent(query: string, options: AgentRunOptions): Promise
       model: resolveModel(options.model, options.config),
       tools: options.tools,
     },
-    transport: new ProviderTransport(),
+    transport: new ProviderTransport({
+      getApiKey: buildGetApiKey(options.config),
+    }),
   });
 
   if (options.onEvent) {
@@ -32,19 +68,33 @@ export async function runAgent(query: string, options: AgentRunOptions): Promise
 
   await agent.prompt(query);
 
+  // Check for errors in agent state
+  if (agent.state.error) {
+    throw new AgentError(agent.state.error);
+  }
+
   const last = agent.state.messages
     .slice()
     .reverse()
-    .find((msg) => msg.role === "assistant");
+    .find((msg): msg is AssistantMessage => msg.role === "assistant");
+
+  // Check if the last assistant message indicates an error
+  if (last?.stopReason === "error") {
+    throw new AgentError(last.errorMessage ?? "Agent stopped with an unknown error");
+  }
 
   const text = last?.content
-    ?.filter((content) => content.type === "text")
+    ?.filter((content): content is Extract<typeof content, { type: "text" }> => content.type === "text")
     .map((content) => content.text)
     .join("")
     .trim();
 
+  if (text === undefined || text === "") {
+    throw new AgentError("Agent returned no text response");
+  }
+
   return {
-    message: text ?? "",
-    usage: (last as any)?.usage,
+    message: text,
+    usage: last?.usage,
   };
 }

src/agent/tools/git/blame.ts 🔗

@@ -4,6 +4,10 @@ import simpleGit from "simple-git";
 import { ToolInputError } from "../../../util/errors.js";
 import { formatSize, truncateHead } from "../../../util/truncate.js";
 
+// Trust boundary: refs and paths are passed directly to simple-git, which is
+// scoped to the workspace. The user chose to clone this repo, so its contents
+// are trusted. See AGENTS.md § Workspace Sandboxing.
+
 const BlameSchema = Type.Object({
   path: Type.String({ description: "File path relative to repo root" }),
 });

src/agent/tools/git/checkout.ts 🔗

@@ -3,6 +3,10 @@ import type { AgentTool } from "@mariozechner/pi-ai";
 import simpleGit from "simple-git";
 import { ToolInputError } from "../../../util/errors.js";
 
+// Trust boundary: refs and paths are passed directly to simple-git, which is
+// scoped to the workspace. The user chose to clone this repo, so its contents
+// are trusted. See AGENTS.md § Workspace Sandboxing.
+
 const CheckoutSchema = Type.Object({
   ref: Type.String({ description: "Ref to checkout" }),
 });

src/agent/tools/git/diff.ts 🔗

@@ -4,6 +4,10 @@ import simpleGit from "simple-git";
 import { ToolInputError } from "../../../util/errors.js";
 import { formatSize, truncateHead } from "../../../util/truncate.js";
 
+// Trust boundary: refs and paths are passed directly to simple-git, which is
+// scoped to the workspace. The user chose to clone this repo, so its contents
+// are trusted. See AGENTS.md § Workspace Sandboxing.
+
 const DiffSchema = Type.Object({
   ref: Type.Optional(Type.String({ description: "Base ref (optional)" })),
   ref2: Type.Optional(Type.String({ description: "Compare ref (optional)" })),

src/agent/tools/git/log.ts 🔗

@@ -3,6 +3,10 @@ import type { AgentTool } from "@mariozechner/pi-ai";
 import simpleGit from "simple-git";
 import { ToolInputError } from "../../../util/errors.js";
 
+// Trust boundary: refs and paths are passed directly to simple-git, which is
+// scoped to the workspace. The user chose to clone this repo, so its contents
+// are trusted. See AGENTS.md § Workspace Sandboxing.
+
 const DEFAULT_LOG_LIMIT = 20;
 
 const LogSchema = Type.Object({

src/agent/tools/git/refs.ts 🔗

@@ -3,6 +3,10 @@ import type { AgentTool } from "@mariozechner/pi-ai";
 import simpleGit from "simple-git";
 import { formatSize, truncateHead } from "../../../util/truncate.js";
 
+// Trust boundary: refs and paths are passed directly to simple-git, which is
+// scoped to the workspace. The user chose to clone this repo, so its contents
+// are trusted. See AGENTS.md § Workspace Sandboxing.
+
 const RefsSchema = Type.Object({
   type: Type.Union([
     Type.Literal("branches"),

src/agent/tools/git/show.ts 🔗

@@ -4,6 +4,10 @@ import simpleGit from "simple-git";
 import { ToolInputError } from "../../../util/errors.js";
 import { formatSize, truncateHead } from "../../../util/truncate.js";
 
+// Trust boundary: refs and paths are passed directly to simple-git, which is
+// scoped to the workspace. The user chose to clone this repo, so its contents
+// are trusted. See AGENTS.md § Workspace Sandboxing.
+
 const ShowSchema = Type.Object({
   ref: Type.String({ description: "Commit hash or ref" }),
 });

src/util/env.ts 🔗

@@ -0,0 +1,10 @@
+/**
+ * Expand `$VAR` and `${VAR}` references to their environment variable values.
+ * Returns the string unchanged if it contains no references.
+ */
+export function expandEnvVars(value: string): string {
+  return value.replace(/\$\{([^}]+)\}|\$([A-Za-z_][A-Za-z0-9_]*)/g, (_, braced, bare) => {
+    const name = braced ?? bare;
+    return process.env[name] ?? "";
+  });
+}

src/util/errors.ts 🔗

@@ -37,3 +37,9 @@ export class ToolInputError extends RumiloError {
     super(message, "TOOL_INPUT_ERROR");
   }
 }
+
+export class AgentError extends RumiloError {
+  constructor(message: string) {
+    super(message, "AGENT_ERROR");
+  }
+}

test/agent-runner.test.ts 🔗

@@ -0,0 +1,157 @@
+import { describe, test, expect, beforeAll, afterAll } from "bun:test";
+import { AgentError } from "../src/util/errors.js";
+import { expandEnvVars } from "../src/util/env.js";
+import { buildGetApiKey } from "../src/agent/runner.js";
+import type { RumiloConfig } from "../src/config/schema.js";
+
+const stubConfig: RumiloConfig = {
+  defaults: { model: "anthropic:test", cleanup: true },
+  web: { model: "anthropic:test" },
+  repo: { model: "anthropic:test", default_depth: 1, blob_limit: "5m" },
+};
+
+describe("AgentError", () => {
+  test("has correct name, code, and inherits from Error", () => {
+    const err = new AgentError("boom");
+    expect(err).toBeInstanceOf(Error);
+    expect(err.name).toBe("AgentError");
+    expect(err.code).toBe("AGENT_ERROR");
+    expect(err.message).toBe("boom");
+  });
+});
+
+describe("expandEnvVars", () => {
+  const saved: Record<string, string | undefined> = {};
+
+  beforeAll(() => {
+    saved["FOO"] = process.env["FOO"];
+    saved["BAR"] = process.env["BAR"];
+    process.env["FOO"] = "hello";
+    process.env["BAR"] = "world";
+  });
+
+  afterAll(() => {
+    if (saved["FOO"] === undefined) delete process.env["FOO"];
+    else process.env["FOO"] = saved["FOO"];
+    if (saved["BAR"] === undefined) delete process.env["BAR"];
+    else process.env["BAR"] = saved["BAR"];
+  });
+
+  test("expands $VAR", () => {
+    expect(expandEnvVars("Bearer $FOO")).toBe("Bearer hello");
+  });
+
+  test("expands ${VAR}", () => {
+    expect(expandEnvVars("Bearer ${FOO}")).toBe("Bearer hello");
+  });
+
+  test("expands multiple vars", () => {
+    expect(expandEnvVars("$FOO-$BAR")).toBe("hello-world");
+  });
+
+  test("missing var becomes empty string", () => {
+    expect(expandEnvVars("key=$NONEXISTENT_RUMILO_VAR_XYZ")).toBe("key=");
+  });
+
+  test("string without vars is unchanged", () => {
+    expect(expandEnvVars("plain text")).toBe("plain text");
+  });
+});
+
+describe("buildGetApiKey", () => {
+  const saved: Record<string, string | undefined> = {};
+
+  beforeAll(() => {
+    saved["ANTHROPIC_API_KEY"] = process.env["ANTHROPIC_API_KEY"];
+    saved["CUSTOM_KEY"] = process.env["CUSTOM_KEY"];
+    process.env["ANTHROPIC_API_KEY"] = "sk-ant-test";
+    process.env["CUSTOM_KEY"] = "sk-custom-test";
+  });
+
+  afterAll(() => {
+    for (const [k, v] of Object.entries(saved)) {
+      if (v === undefined) delete process.env[k];
+      else process.env[k] = v;
+    }
+  });
+
+  test("falls back to pi-ai env var lookup for built-in providers", () => {
+    const getKey = buildGetApiKey(stubConfig);
+    expect(getKey("anthropic")).toBe("sk-ant-test");
+  });
+
+  test("returns undefined for unknown provider with no config", () => {
+    const getKey = buildGetApiKey(stubConfig);
+    expect(getKey("unknown-provider")).toBeUndefined();
+  });
+
+  test("extracts bearer token from custom model Authorization header", () => {
+    const config: RumiloConfig = {
+      ...stubConfig,
+      custom_models: {
+        mymodel: {
+          id: "m1",
+          name: "M1",
+          api: "openai-completions" as any,
+          provider: "myprovider",
+          base_url: "http://localhost:8000/v1",
+          reasoning: false,
+          input: ["text"],
+          cost: { input: 0, output: 0 },
+          context_window: 8192,
+          max_tokens: 4096,
+          headers: { Authorization: "Bearer sk-literal-key" },
+        },
+      },
+    };
+    const getKey = buildGetApiKey(config);
+    expect(getKey("myprovider")).toBe("sk-literal-key");
+  });
+
+  test("expands env vars in Authorization header", () => {
+    const config: RumiloConfig = {
+      ...stubConfig,
+      custom_models: {
+        mymodel: {
+          id: "m1",
+          name: "M1",
+          api: "openai-completions" as any,
+          provider: "myprovider",
+          base_url: "http://localhost:8000/v1",
+          reasoning: false,
+          input: ["text"],
+          cost: { input: 0, output: 0 },
+          context_window: 8192,
+          max_tokens: 4096,
+          headers: { Authorization: "Bearer $CUSTOM_KEY" },
+        },
+      },
+    };
+    const getKey = buildGetApiKey(config);
+    expect(getKey("myprovider")).toBe("sk-custom-test");
+  });
+
+  test("custom model provider doesn't shadow built-in provider lookup", () => {
+    const config: RumiloConfig = {
+      ...stubConfig,
+      custom_models: {
+        mymodel: {
+          id: "m1",
+          name: "M1",
+          api: "openai-completions" as any,
+          provider: "other-provider",
+          base_url: "http://localhost:8000/v1",
+          reasoning: false,
+          input: ["text"],
+          cost: { input: 0, output: 0 },
+          context_window: 8192,
+          max_tokens: 4096,
+          headers: { Authorization: "Bearer sk-other" },
+        },
+      },
+    };
+    const getKey = buildGetApiKey(config);
+    // anthropic should still resolve from env, not from the custom model
+    expect(getKey("anthropic")).toBe("sk-ant-test");
+  });
+});