diff --git a/internal/hooks/hooks_test.go b/internal/hooks/hooks_test.go index 4c7353aed49fbf98ca6d9bf00b372d8a63e0a2ab..2eee4dc8e145ef84ab8b9a620d033019e0202b14 100644 --- a/internal/hooks/hooks_test.go +++ b/internal/hooks/hooks_test.go @@ -2,11 +2,14 @@ package hooks import ( "context" + "io" "strings" + "sync" "testing" "time" "github.com/charmbracelet/crush/internal/config" + "github.com/charmbracelet/crush/internal/shell" "github.com/stretchr/testify/require" ) @@ -568,6 +571,63 @@ func TestAggregationUpdatedInput(t *testing.T) { }) } +// TestRunnerAbandonRaceSafety verifies that if a hook's shell execution +// does not yield to ctx cancellation within abandonGrace, runOne returns +// promptly and never touches the shared stdout/stderr buffers again — +// even while the abandoned goroutine continues to write to them. +// +// The substitute shell executor ignores ctx entirely, writes to Stdout +// both before and after the abandon deadline, and only then returns. +// Under -race this catches any code path in runOne that reads those +// buffers after returning the DecisionNone abandon result. +func TestRunnerAbandonRaceSafety(t *testing.T) { + origRunShell := runShell + t.Cleanup(func() { runShell = origRunShell }) + + // Synchronize shutdown with the abandoned goroutine so the test + // exits cleanly even under -race. + var wg sync.WaitGroup + release := make(chan struct{}) + t.Cleanup(func() { + close(release) + wg.Wait() + }) + + runShell = func(_ context.Context, opts shell.RunOptions) error { + wg.Add(1) + defer wg.Done() + // Write before the caller observes ctx.Done(); the caller will + // not read the buffer while we still own it. + _, _ = io.WriteString(opts.Stdout, "before\n") + // Hold past ctx deadline + abandonGrace so the caller takes + // the abandon branch, then continue writing. If the caller + // reads these buffers after abandoning, -race will flag it. + select { + case <-time.After(5 * time.Second): + case <-release: + } + _, _ = io.WriteString(opts.Stdout, "after\n") + return nil + } + + hookCfg := config.HookConfig{ + Command: "# irrelevant; runShell is stubbed", + Timeout: 1, + } + r := NewRunner([]config.HookConfig{hookCfg}, t.TempDir(), t.TempDir()) + + start := time.Now() + result, err := r.Run(context.Background(), EventPreToolUse, "sess", "bash", `{}`) + elapsed := time.Since(start) + + require.NoError(t, err) + require.Equal(t, DecisionNone, result.Decision) + // Abandon must happen at ~timeout + abandonGrace. Allow generous + // slack so CI noise doesn't flake the test. + require.Less(t, elapsed, 3500*time.Millisecond, + "runOne should return within timeout+abandonGrace+slack") +} + func TestRunnerUpdatedInput(t *testing.T) { t.Parallel() hookCfg := config.HookConfig{ diff --git a/internal/hooks/runner.go b/internal/hooks/runner.go index e3b462b39f721620522d4f2a6d61a114a07d0fa6..e3bf6ca29c36ef35524d6b4d645b7112118c79e5 100644 --- a/internal/hooks/runner.go +++ b/internal/hooks/runner.go @@ -4,14 +4,26 @@ import ( "bytes" "context" "log/slog" - "os/exec" "strings" "sync" "time" "github.com/charmbracelet/crush/internal/config" + "github.com/charmbracelet/crush/internal/shell" ) +// abandonGrace is how long runOne waits after ctx cancellation for the +// shell goroutine to yield before returning control to the caller and +// letting the goroutine finish on its own. Mirrors the historical +// cmd.WaitDelay = time.Second behavior of the previous os/exec path. +const abandonGrace = time.Second + +// runShell is the shell executor used by runOne. It is a package-level +// variable so tests can substitute a blocking or non-yielding +// implementation to exercise the abandon-on-timeout path without +// depending on the scheduling behavior of the real interpreter. +var runShell = shell.Run + // Runner executes hook commands and aggregates their results. type Runner struct { hooks []config.HookConfig @@ -102,24 +114,59 @@ func (r *Runner) matchingHooks(toolName string) []config.HookConfig { } // runOne executes a single hook command and returns its result. +// +// Execution goes through Crush's embedded POSIX shell (shell.Run) so the +// same interpreter, builtins, and coreutils are visible to hooks as to +// the bash tool. BlockFuncs are intentionally omitted: hooks are +// user-authored config that carry the same trust as a shell alias. +// +// A hook that fails to yield after its deadline has passed is abandoned +// after abandonGrace so the caller never blocks longer than +// timeout + abandonGrace. Ownership of the stdout and stderr buffers is +// strictly single-goroutine: +// - before receiving from `done`, only the goroutine writes to them; +// - after `done` delivers a value, the goroutine is finished and the +// outer frame reads them; +// - on the abandon path, the goroutine may still be writing and the +// outer frame must not touch them again. func (r *Runner) runOne(parentCtx context.Context, hook config.HookConfig, envVars []string, payload []byte) HookResult { timeout := hook.TimeoutDuration() ctx, cancel := context.WithTimeout(parentCtx, timeout) defer cancel() - cmd := exec.CommandContext(ctx, "sh", "-c", hook.Command) - cmd.WaitDelay = time.Second - cmd.Env = envVars - cmd.Dir = r.cwd - cmd.Stdin = bytes.NewReader(payload) - var stdout, stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - err := cmd.Run() + done := make(chan error, 1) + go func() { + done <- runShell(ctx, shell.RunOptions{ + Command: hook.Command, + Cwd: r.cwd, + Env: envVars, + Stdin: bytes.NewReader(payload), + Stdout: &stdout, + Stderr: &stderr, + }) + }() + + var err error + select { + case err = <-done: + // Normal path: goroutine has finished, buffers are safe to read. + case <-ctx.Done(): + select { + case err = <-done: + // Interpreter yielded within the grace period; safe to read. + case <-time.After(abandonGrace): + slog.Warn("Hook did not yield after cancel; abandoning goroutine", + "command", hook.Command, + "timeout", timeout, + ) + // The goroutine may still be writing to stdout/stderr; do + // not read either buffer below this point. + return HookResult{Decision: DecisionNone} + } + } - if ctx.Err() != nil { + if shell.IsInterrupt(err) { // Distinguish timeout from parent cancellation. if parentCtx.Err() != nil { slog.Debug("Hook cancelled by parent context", "command", hook.Command) @@ -130,10 +177,7 @@ func (r *Runner) runOne(parentCtx context.Context, hook config.HookConfig, envVa } if err != nil { - exitCode := -1 - if cmd.ProcessState != nil { - exitCode = cmd.ProcessState.ExitCode() - } + exitCode := shell.ExitCode(err) switch exitCode { case 2: // Exit code 2 = block this tool call. Stderr is the reason. @@ -162,6 +206,7 @@ func (r *Runner) runOne(parentCtx context.Context, hook config.HookConfig, envVa "command", hook.Command, "exit_code", exitCode, "stderr", strings.TrimSpace(stderr.String()), + "error", err, ) return HookResult{Decision: DecisionNone} }