feat(hooks): run via shell.Run instead of sh -c

Christian Rocha created

Change summary

internal/hooks/hooks_test.go | 60 +++++++++++++++++++++++++++++
internal/hooks/runner.go     | 77 ++++++++++++++++++++++++++++++-------
2 files changed, 121 insertions(+), 16 deletions(-)

Detailed changes

internal/hooks/hooks_test.go 🔗

@@ -2,11 +2,14 @@ package hooks
 
 import (
 	"context"
+	"io"
 	"strings"
+	"sync"
 	"testing"
 	"time"
 
 	"github.com/charmbracelet/crush/internal/config"
+	"github.com/charmbracelet/crush/internal/shell"
 	"github.com/stretchr/testify/require"
 )
 
@@ -568,6 +571,63 @@ func TestAggregationUpdatedInput(t *testing.T) {
 	})
 }
 
+// TestRunnerAbandonRaceSafety verifies that if a hook's shell execution
+// does not yield to ctx cancellation within abandonGrace, runOne returns
+// promptly and never touches the shared stdout/stderr buffers again —
+// even while the abandoned goroutine continues to write to them.
+//
+// The substitute shell executor ignores ctx entirely, writes to Stdout
+// both before and after the abandon deadline, and only then returns.
+// Under -race this catches any code path in runOne that reads those
+// buffers after returning the DecisionNone abandon result.
+func TestRunnerAbandonRaceSafety(t *testing.T) {
+	origRunShell := runShell
+	t.Cleanup(func() { runShell = origRunShell })
+
+	// Synchronize shutdown with the abandoned goroutine so the test
+	// exits cleanly even under -race.
+	var wg sync.WaitGroup
+	release := make(chan struct{})
+	t.Cleanup(func() {
+		close(release)
+		wg.Wait()
+	})
+
+	runShell = func(_ context.Context, opts shell.RunOptions) error {
+		wg.Add(1)
+		defer wg.Done()
+		// Write before the caller observes ctx.Done(); the caller will
+		// not read the buffer while we still own it.
+		_, _ = io.WriteString(opts.Stdout, "before\n")
+		// Hold past ctx deadline + abandonGrace so the caller takes
+		// the abandon branch, then continue writing. If the caller
+		// reads these buffers after abandoning, -race will flag it.
+		select {
+		case <-time.After(5 * time.Second):
+		case <-release:
+		}
+		_, _ = io.WriteString(opts.Stdout, "after\n")
+		return nil
+	}
+
+	hookCfg := config.HookConfig{
+		Command: "# irrelevant; runShell is stubbed",
+		Timeout: 1,
+	}
+	r := NewRunner([]config.HookConfig{hookCfg}, t.TempDir(), t.TempDir())
+
+	start := time.Now()
+	result, err := r.Run(context.Background(), EventPreToolUse, "sess", "bash", `{}`)
+	elapsed := time.Since(start)
+
+	require.NoError(t, err)
+	require.Equal(t, DecisionNone, result.Decision)
+	// Abandon must happen at ~timeout + abandonGrace. Allow generous
+	// slack so CI noise doesn't flake the test.
+	require.Less(t, elapsed, 3500*time.Millisecond,
+		"runOne should return within timeout+abandonGrace+slack")
+}
+
 func TestRunnerUpdatedInput(t *testing.T) {
 	t.Parallel()
 	hookCfg := config.HookConfig{

internal/hooks/runner.go 🔗

@@ -4,14 +4,26 @@ import (
 	"bytes"
 	"context"
 	"log/slog"
-	"os/exec"
 	"strings"
 	"sync"
 	"time"
 
 	"github.com/charmbracelet/crush/internal/config"
+	"github.com/charmbracelet/crush/internal/shell"
 )
 
+// abandonGrace is how long runOne waits after ctx cancellation for the
+// shell goroutine to yield before returning control to the caller and
+// letting the goroutine finish on its own. Mirrors the historical
+// cmd.WaitDelay = time.Second behavior of the previous os/exec path.
+const abandonGrace = time.Second
+
+// runShell is the shell executor used by runOne. It is a package-level
+// variable so tests can substitute a blocking or non-yielding
+// implementation to exercise the abandon-on-timeout path without
+// depending on the scheduling behavior of the real interpreter.
+var runShell = shell.Run
+
 // Runner executes hook commands and aggregates their results.
 type Runner struct {
 	hooks      []config.HookConfig
@@ -102,24 +114,59 @@ func (r *Runner) matchingHooks(toolName string) []config.HookConfig {
 }
 
 // runOne executes a single hook command and returns its result.
+//
+// Execution goes through Crush's embedded POSIX shell (shell.Run) so the
+// same interpreter, builtins, and coreutils are visible to hooks as to
+// the bash tool. BlockFuncs are intentionally omitted: hooks are
+// user-authored config that carry the same trust as a shell alias.
+//
+// A hook that fails to yield after its deadline has passed is abandoned
+// after abandonGrace so the caller never blocks longer than
+// timeout + abandonGrace. Ownership of the stdout and stderr buffers is
+// strictly single-goroutine:
+//   - before receiving from `done`, only the goroutine writes to them;
+//   - after `done` delivers a value, the goroutine is finished and the
+//     outer frame reads them;
+//   - on the abandon path, the goroutine may still be writing and the
+//     outer frame must not touch them again.
 func (r *Runner) runOne(parentCtx context.Context, hook config.HookConfig, envVars []string, payload []byte) HookResult {
 	timeout := hook.TimeoutDuration()
 	ctx, cancel := context.WithTimeout(parentCtx, timeout)
 	defer cancel()
 
-	cmd := exec.CommandContext(ctx, "sh", "-c", hook.Command)
-	cmd.WaitDelay = time.Second
-	cmd.Env = envVars
-	cmd.Dir = r.cwd
-	cmd.Stdin = bytes.NewReader(payload)
-
 	var stdout, stderr bytes.Buffer
-	cmd.Stdout = &stdout
-	cmd.Stderr = &stderr
-
-	err := cmd.Run()
+	done := make(chan error, 1)
+	go func() {
+		done <- runShell(ctx, shell.RunOptions{
+			Command: hook.Command,
+			Cwd:     r.cwd,
+			Env:     envVars,
+			Stdin:   bytes.NewReader(payload),
+			Stdout:  &stdout,
+			Stderr:  &stderr,
+		})
+	}()
+
+	var err error
+	select {
+	case err = <-done:
+		// Normal path: goroutine has finished, buffers are safe to read.
+	case <-ctx.Done():
+		select {
+		case err = <-done:
+			// Interpreter yielded within the grace period; safe to read.
+		case <-time.After(abandonGrace):
+			slog.Warn("Hook did not yield after cancel; abandoning goroutine",
+				"command", hook.Command,
+				"timeout", timeout,
+			)
+			// The goroutine may still be writing to stdout/stderr; do
+			// not read either buffer below this point.
+			return HookResult{Decision: DecisionNone}
+		}
+	}
 
-	if ctx.Err() != nil {
+	if shell.IsInterrupt(err) {
 		// Distinguish timeout from parent cancellation.
 		if parentCtx.Err() != nil {
 			slog.Debug("Hook cancelled by parent context", "command", hook.Command)
@@ -130,10 +177,7 @@ func (r *Runner) runOne(parentCtx context.Context, hook config.HookConfig, envVa
 	}
 
 	if err != nil {
-		exitCode := -1
-		if cmd.ProcessState != nil {
-			exitCode = cmd.ProcessState.ExitCode()
-		}
+		exitCode := shell.ExitCode(err)
 		switch exitCode {
 		case 2:
 			// Exit code 2 = block this tool call. Stderr is the reason.
@@ -162,6 +206,7 @@ func (r *Runner) runOne(parentCtx context.Context, hook config.HookConfig, envVa
 				"command", hook.Command,
 				"exit_code", exitCode,
 				"stderr", strings.TrimSpace(stderr.String()),
+				"error", err,
 			)
 			return HookResult{Decision: DecisionNone}
 		}