fix(config): validate parsed TOML against ConfigSchema at runtime

Amolith and Shelley created

Export ConfigSchema and PartialConfigSchema from schema.ts.
Use TypeBox Value.Check/Value.Errors in loader.ts to validate
parsed TOML before merging with defaults, and validate the
merged result against the full schema. Invalid config now throws
ConfigError with path, expected type, and actual value for each
violation.

Closes review issue #3.

Co-authored-by: Shelley <shelley@exe.dev>

Change summary

src/config/loader.ts           |  40 ++++++++--
src/config/schema.ts           |  20 +++++
test/config-validation.test.ts | 128 ++++++++++++++++++++++++++++++++++++
3 files changed, 177 insertions(+), 11 deletions(-)

Detailed changes

src/config/loader.ts 🔗

@@ -1,6 +1,8 @@
 import { readFile } from "node:fs/promises";
 import { resolve } from "node:path";
+import { Value } from "@sinclair/typebox/value";
 import { defaultConfig } from "./defaults.js";
+import { ConfigSchema, PartialConfigSchema } from "./schema.js";
 import type { RumiloConfig } from "./schema.js";
 import { ConfigError } from "../util/errors.js";
 import toml from "toml";
@@ -31,12 +33,27 @@ function mergeConfig(base: RumiloConfig, override: Partial<RumiloConfig>): Rumil
   };
 }
 
-function validateConfig(config: RumiloConfig): void {
-  if (!config.defaults.model) {
-    throw new ConfigError("defaults.model is required");
+function validatePartialConfig(parsed: unknown): asserts parsed is Partial<RumiloConfig> {
+  if (!Value.Check(PartialConfigSchema, parsed)) {
+    const errors = [...Value.Errors(PartialConfigSchema, parsed)];
+    const details = errors
+      .map((e) => `  ${e.path}: ${e.message} (got ${JSON.stringify(e.value)})`)
+      .join("\n");
+    throw new ConfigError(
+      `Invalid config:\n${details}`,
+    );
   }
-  if (typeof config.defaults.cleanup !== "boolean") {
-    throw new ConfigError("defaults.cleanup must be a boolean");
+}
+
+function validateFullConfig(config: unknown): asserts config is RumiloConfig {
+  if (!Value.Check(ConfigSchema, config)) {
+    const errors = [...Value.Errors(ConfigSchema, config)];
+    const details = errors
+      .map((e) => `  ${e.path}: ${e.message} (got ${JSON.stringify(e.value)})`)
+      .join("\n");
+    throw new ConfigError(
+      `Invalid merged config:\n${details}`,
+    );
   }
 }
 
@@ -47,16 +64,21 @@ export async function loadConfig(): Promise<LoadedConfig> {
 
   try {
     const raw = await readFile(configPath, "utf8");
-    const parsed = toml.parse(raw) as Partial<RumiloConfig>;
+    const parsed: unknown = toml.parse(raw);
+    validatePartialConfig(parsed);
     const merged = mergeConfig(base, parsed);
-    validateConfig(merged);
+    validateFullConfig(merged);
     return { config: merged, path: configPath };
   } catch (error: any) {
     if (error?.code === "ENOENT") {
-      validateConfig(base);
+      validateFullConfig(base);
       return { config: base };
     }
 
+    if (error instanceof ConfigError) {
+      throw error;
+    }
+
     if (error instanceof Error) {
       throw new ConfigError(error.message);
     }
@@ -70,6 +92,6 @@ export function applyConfigOverrides(
   overrides: Partial<RumiloConfig>,
 ): RumiloConfig {
   const merged = mergeConfig(config, overrides);
-  validateConfig(merged);
+  validateFullConfig(merged);
   return merged;
 }

src/config/schema.ts 🔗

@@ -1,4 +1,4 @@
-import { Type, type Static } from "@sinclair/typebox";
+import { Type, type Static, type TObject, type TProperties } from "@sinclair/typebox";
 
 const CustomModelSchema = Type.Object({
   provider: Type.String(),
@@ -33,7 +33,7 @@ const CustomModelSchema = Type.Object({
   ),
 });
 
-const ConfigSchema = Type.Object({
+export const ConfigSchema = Type.Object({
   defaults: Type.Object({
     model: Type.String(),
     cleanup: Type.Boolean(),
@@ -55,5 +55,21 @@ const ConfigSchema = Type.Object({
   custom_models: Type.Optional(Type.Record(Type.String(), CustomModelSchema)),
 });
 
+/** Deep-partial version of ConfigSchema for validating TOML override files. */
+function partialObject<T extends TProperties>(schema: TObject<T>) {
+  const partial: Record<string, unknown> = {};
+  for (const [key, value] of Object.entries(schema.properties)) {
+    partial[key] = Type.Optional(value as any);
+  }
+  return Type.Object(partial as any);
+}
+
+export const PartialConfigSchema = Type.Object({
+  defaults: Type.Optional(partialObject(ConfigSchema.properties.defaults)),
+  web: Type.Optional(partialObject(ConfigSchema.properties.web)),
+  repo: Type.Optional(partialObject(ConfigSchema.properties.repo)),
+  custom_models: Type.Optional(Type.Record(Type.String(), CustomModelSchema)),
+});
+
 export type RumiloConfig = Static<typeof ConfigSchema>;
 export type CustomModelConfig = Static<typeof CustomModelSchema>;

test/config-validation.test.ts 🔗

@@ -0,0 +1,128 @@
+import { describe, test, expect, beforeEach, afterEach } from "bun:test";
+import { mkdtempSync, writeFileSync, rmSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { ConfigError } from "../src/util/errors.js";
+import { loadConfig } from "../src/config/loader.js";
+
+describe("config validation", () => {
+  let configDir: string;
+  let configPath: string;
+  const originalEnv = { ...process.env };
+
+  beforeEach(() => {
+    configDir = mkdtempSync(join(tmpdir(), "rumilo-cfg-test-"));
+    configPath = join(configDir, "config.toml");
+    process.env["XDG_CONFIG_HOME"] = join(configDir, "..");
+    // loadConfig looks for <XDG_CONFIG_HOME>/rumilo/config.toml
+    // So we need the dir structure to match
+    const rumiloDir = join(configDir, "..", "rumilo");
+    require("node:fs").mkdirSync(rumiloDir, { recursive: true });
+    configPath = join(rumiloDir, "config.toml");
+  });
+
+  afterEach(() => {
+    process.env = { ...originalEnv };
+    try {
+      rmSync(configDir, { recursive: true, force: true });
+      // Also clean up the rumilo dir we created
+      const rumiloDir = join(configDir, "..", "rumilo");
+      rmSync(rumiloDir, { recursive: true, force: true });
+    } catch {}
+  });
+
+  test("rejects defaults.model with wrong type (number instead of string)", async () => {
+    writeFileSync(
+      configPath,
+      `[defaults]\nmodel = 42\ncleanup = true\n`,
+    );
+    await expect(loadConfig()).rejects.toThrow(ConfigError);
+    await expect(loadConfig()).rejects.toThrow(/defaults\/model/);
+  });
+
+  test("rejects defaults.cleanup with wrong type (string instead of boolean)", async () => {
+    writeFileSync(
+      configPath,
+      `[defaults]\nmodel = "anthropic:claude-sonnet-4-20250514"\ncleanup = "yes"\n`,
+    );
+    await expect(loadConfig()).rejects.toThrow(ConfigError);
+    await expect(loadConfig()).rejects.toThrow(/defaults\/cleanup/);
+  });
+
+  test("rejects repo.default_depth with wrong type (string instead of number)", async () => {
+    writeFileSync(
+      configPath,
+      `[repo]\ndefault_depth = "deep"\n`,
+    );
+    await expect(loadConfig()).rejects.toThrow(ConfigError);
+    await expect(loadConfig()).rejects.toThrow(/repo\/default_depth/);
+  });
+
+  test("rejects repo.default_depth below minimum (0)", async () => {
+    writeFileSync(
+      configPath,
+      `[repo]\ndefault_depth = 0\n`,
+    );
+    await expect(loadConfig()).rejects.toThrow(ConfigError);
+    await expect(loadConfig()).rejects.toThrow(/default_depth/);
+  });
+
+  test("rejects unknown top-level section type (number instead of object)", async () => {
+    // web should be an object but we pass a string value at top level
+    writeFileSync(
+      configPath,
+      `[defaults]\nmodel = "x"\ncleanup = true\n[web]\nmodel = 123\n`,
+    );
+    await expect(loadConfig()).rejects.toThrow(ConfigError);
+  });
+
+  test("accepts valid partial config (only [repo] section)", async () => {
+    writeFileSync(
+      configPath,
+      `[repo]\nmodel = "anthropic:claude-sonnet-4-20250514"\ndefault_depth = 5\n`,
+    );
+    const { config } = await loadConfig();
+    expect(config.repo.model).toBe("anthropic:claude-sonnet-4-20250514");
+    expect(config.repo.default_depth).toBe(5);
+    // defaults should come from defaultConfig
+    expect(config.defaults.model).toBe("anthropic:claude-sonnet-4-20250514");
+  });
+
+  test("accepts valid complete config", async () => {
+    writeFileSync(
+      configPath,
+      [
+        `[defaults]`,
+        `model = "openai:gpt-4"`,
+        `cleanup = false`,
+        ``,
+        `[web]`,
+        `model = "openai:gpt-4"`,
+        ``,
+        `[repo]`,
+        `model = "openai:gpt-4"`,
+        `default_depth = 3`,
+        `blob_limit = "10m"`,
+      ].join("\n"),
+    );
+    const { config } = await loadConfig();
+    expect(config.defaults.model).toBe("openai:gpt-4");
+    expect(config.defaults.cleanup).toBe(false);
+    expect(config.repo.default_depth).toBe(3);
+  });
+
+  test("error message includes path and expected type for diagnostics", async () => {
+    writeFileSync(
+      configPath,
+      `[defaults]\nmodel = 42\ncleanup = true\n`,
+    );
+    try {
+      await loadConfig();
+      throw new Error("should have thrown");
+    } catch (e: any) {
+      expect(e).toBeInstanceOf(ConfigError);
+      expect(e.message).toContain("/defaults/model");
+      expect(e.message).toMatch(/string/i);
+    }
+  });
+});