feat(shell): shebang/binary/in-process dispatch handler

Christian Rocha created 6 days ago

Change summary

internal/shell/dispatch.go              | 426 +++++++++++++++++++
internal/shell/dispatch_test.go         | 594 +++++++++++++++++++++++++++
internal/shell/dispatch_windows_test.go |  40 +
internal/shell/run.go                   |  12 
4 files changed, 1,069 insertions(+), 3 deletions(-)

Detailed changes

internal/shell/dispatch.go 🔗

@@ -0,0 +1,426 @@
+package shell
+
+import (
+	"bytes"
+	"context"
+	"errors"
+	"fmt"
+	"io"
+	"io/fs"
+	"log/slog"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"strings"
+
+	"mvdan.cc/sh/v3/expand"
+	"mvdan.cc/sh/v3/interp"
+	"mvdan.cc/sh/v3/syntax"
+)
+
+// probeWindow is how many bytes we read from the head of a file to decide
+// how to dispatch it. 128 is plenty for a shebang line and for magic-byte
+// inspection, while small enough to make the probe cheap for users whose
+// hooks invoke many scripts.
+const probeWindow = 128
+
+// scriptDispatchHandler returns middleware that intercepts exec of a
+// path-prefixed argv[0] (e.g. ./foo.sh, /opt/bin/tool, C:\foo\bar.exe) and
+// dispatches based on the file's contents:
+//
+//  1. Shebang line (#!...) → exec the named interpreter via os/exec. The
+//     interpreter is resolved literally first, then via PATH on the
+//     basename as a permissive fallback (so #!/bin/bash works on Windows
+//     boxes where Git for Windows puts bash.exe on PATH).
+//  2. Known binary magic (MZ, ELF, Mach-O) or a NUL byte in the probe
+//     window → pass through to the next handler (mvdan's default exec).
+//  3. Otherwise → treat the file as shell source and run it in-process via
+//     a nested interp.Runner that reuses the same handler stack.
+//
+// Non-path-prefixed argv[0] and empty args are passed straight through; this
+// handler is a no-op for ordinary commands like `echo` or `jq`.
+//
+// blockFuncs is the block list used when building the nested runner for the
+// shell-source case, so deny rules apply recursively to commands invoked
+// from in-process scripts.
+func scriptDispatchHandler(blockFuncs []BlockFunc) func(next interp.ExecHandlerFunc) interp.ExecHandlerFunc {
+	return func(next interp.ExecHandlerFunc) interp.ExecHandlerFunc {
+		return func(ctx context.Context, args []string) error {
+			if len(args) == 0 || !isPathPrefixed(args[0]) {
+				return next(ctx, args)
+			}
+
+			scriptPath := args[0]
+			// Resolve relative paths against the interpreter's cwd, not
+			// the process cwd — hook commands are authored with the hook
+			// Runner's cwd in mind and sub-shells can cd before an exec.
+			if !filepath.IsAbs(scriptPath) {
+				scriptPath = filepath.Join(interp.HandlerCtx(ctx).Dir, scriptPath)
+			}
+			probe, err := probeFile(scriptPath)
+			if err != nil {
+				return err
+			}
+
+			switch {
+			case hasShebang(probe):
+				return dispatchShebang(ctx, scriptPath, probe, args)
+			case isBinary(probe):
+				return next(ctx, args)
+			default:
+				return runShellSource(ctx, scriptPath, args, blockFuncs)
+			}
+		}
+	}
+}
+
+// isPathPrefixed reports whether argv[0] is a file reference (as opposed
+// to a bare command to be resolved via PATH). A path reference starts with
+// `./`, `../`, `/`, or — on Windows — a drive-letter prefix.
+//
+// Note: mvdan already performs tilde expansion during word expansion, so
+// `~/script.sh` arrives here as an absolute path. We still call the helper
+// on the raw string to stay robust if a future change ever bypasses that
+// expansion; cover that path with a regression test.
+func isPathPrefixed(arg string) bool {
+	switch {
+	case strings.HasPrefix(arg, "./"),
+		strings.HasPrefix(arg, "../"),
+		strings.HasPrefix(arg, "/"):
+		return true
+	}
+	if runtime.GOOS == "windows" {
+		// Drive-letter paths: C:\foo or C:/foo (length check avoids
+		// accidentally matching a single letter followed by a colon).
+		if len(arg) >= 3 && isDriveLetter(arg[0]) && arg[1] == ':' &&
+			(arg[2] == '\\' || arg[2] == '/') {
+			return true
+		}
+		// Also treat backslash-prefixed UNC-like paths as path-prefixed.
+		if strings.HasPrefix(arg, "\\") {
+			return true
+		}
+	}
+	return false
+}
+
+func isDriveLetter(b byte) bool {
+	return (b >= 'A' && b <= 'Z') || (b >= 'a' && b <= 'z')
+}
+
+// probeFile reads the first probeWindow bytes of the target path. It
+// deliberately does not slurp the whole file: callers that need the full
+// contents (only the shell-source branch) re-open via os.ReadFile. This
+// keeps memory bounded when argv[0] turns out to be a large binary.
+//
+// Returns errors surfaced by os.Open/os.Stat directly so callers see the
+// real reason: ENOENT, EACCES, EISDIR, ELOOP, etc.
+func probeFile(path string) ([]byte, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+	fi, err := f.Stat()
+	if err != nil {
+		return nil, err
+	}
+	if fi.IsDir() {
+		return nil, fmt.Errorf("%s: is a directory", path)
+	}
+	probe := make([]byte, probeWindow)
+	n, err := io.ReadFull(f, probe)
+	if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
+		return nil, err
+	}
+	return probe[:n], nil
+}
+
+// hasShebang reports whether probe starts with the `#!` marker. A
+// one-byte file that happens to be `#` is not a shebang.
+func hasShebang(probe []byte) bool {
+	return len(probe) >= 2 && probe[0] == '#' && probe[1] == '!'
+}
+
+// isBinary heuristically classifies probe as an executable or otherwise
+// non-text file. A NUL byte in the first probeWindow bytes is the classic
+// Unix-y text-vs-binary signal; we additionally recognize known magic
+// numbers so we can fast-path well-formed binaries that happen to have no
+// NUL in the first 128 bytes (rare but possible for small binaries).
+func isBinary(probe []byte) bool {
+	if bytes.IndexByte(probe, 0) >= 0 {
+		return true
+	}
+	magics := [][]byte{
+		{'M', 'Z'},               // Windows PE / DOS MZ.
+		{0x7F, 'E', 'L', 'F'},    // ELF.
+		{0xFE, 0xED, 0xFA, 0xCE}, // Mach-O 32-bit BE.
+		{0xFE, 0xED, 0xFA, 0xCF}, // Mach-O 64-bit BE.
+		{0xCF, 0xFA, 0xED, 0xFE}, // Mach-O 64-bit LE.
+		{0xCE, 0xFA, 0xED, 0xFE}, // Mach-O 32-bit LE.
+		{0xCA, 0xFE, 0xBA, 0xBE}, // Mach-O fat binary.
+	}
+	for _, m := range magics {
+		if bytes.HasPrefix(probe, m) {
+			return true
+		}
+	}
+	return false
+}
+
+// dispatchShebang parses probe's shebang line and execs the resolved
+// interpreter via os/exec, inheriting the parent runner's cwd, env, and
+// stdio. Returns interp.ExitStatus on non-zero interpreter exit so the
+// parent interpreter sees it as a normal non-zero status.
+func dispatchShebang(ctx context.Context, scriptPath string, probe []byte, args []string) error {
+	sb, err := parseShebang(probe)
+	if err != nil {
+		hc := interp.HandlerCtx(ctx)
+		fmt.Fprintf(hc.Stderr, "crush: %s: %s\n", scriptPath, err)
+		return interp.ExitStatus(126)
+	}
+
+	interpreter, err := resolveInterpreter(sb.interpreter)
+	if err != nil {
+		hc := interp.HandlerCtx(ctx)
+		fmt.Fprintf(hc.Stderr, "crush: %s: %s\n", scriptPath, err)
+		return interp.ExitStatus(127)
+	}
+
+	cmdArgs := append([]string{}, sb.args...)
+	cmdArgs = append(cmdArgs, scriptPath)
+	cmdArgs = append(cmdArgs, args[1:]...)
+
+	cmd := exec.CommandContext(ctx, interpreter, cmdArgs...)
+	hc := interp.HandlerCtx(ctx)
+	cmd.Dir = hc.Dir
+	cmd.Env = execEnvList(hc.Env)
+	cmd.Stdin = hc.Stdin
+	cmd.Stdout = hc.Stdout
+	cmd.Stderr = hc.Stderr
+
+	if err := cmd.Run(); err != nil {
+		var exitErr *exec.ExitError
+		if errors.As(err, &exitErr) {
+			code := exitErr.ExitCode()
+			if code < 0 {
+				code = 1
+			}
+			return interp.ExitStatus(uint8(code))
+		}
+		return err
+	}
+	return nil
+}
+
+// resolveInterpreter tries the literal shebang path first, then falls back
+// to PATH-lookup on its basename — but only when the literal path is
+// genuinely missing. A file that exists but fails stat for another reason
+// (EACCES, ELOOP, etc.) surfaces the real error: silently resolving a
+// different binary off PATH in that case would hide a real problem and
+// produce surprising behavior for the user.
+//
+// The permissive fallback is what makes #!/bin/bash portable to Windows
+// boxes where Git for Windows puts bash.exe on PATH but there is no
+// /bin/bash on disk.
+func resolveInterpreter(path string) (string, error) {
+	_, statErr := os.Stat(path)
+	if statErr == nil {
+		return path, nil
+	}
+	if !errors.Is(statErr, fs.ErrNotExist) {
+		return "", statErr
+	}
+
+	base := filepath.Base(path)
+	if base == "" || base == path && !strings.ContainsAny(path, `/\`) {
+		// Already a bare name — just do a PATH lookup.
+		resolved, err := exec.LookPath(path)
+		if err != nil {
+			return "", fmt.Errorf("interpreter %q not found in PATH", path)
+		}
+		return resolved, nil
+	}
+	resolved, err := exec.LookPath(base)
+	if err != nil {
+		return "", fmt.Errorf("interpreter %q not found and %q not in PATH", path, base)
+	}
+	slog.Debug("Shebang interpreter not found; falling back to PATH",
+		"requested", path, "resolved", resolved)
+	return resolved, nil
+}
+
+// shebang captures the parsed `#!` line. interpreter is the program to
+// invoke; args is the list of extra arguments to pass before the script
+// path. The kernel's single-arg semantics (for literal paths and for env
+// without `-S`) is encoded by returning a single-element args slice
+// containing the un-tokenized remainder.
+type shebang struct {
+	interpreter string
+	args        []string
+}
+
+// parseShebang extracts the interpreter invocation from probe. It tolerates
+// CRLF line endings and a single leading space between `#!` and the path.
+// env special-cases: `/usr/bin/env NAME [args...]` unwraps to NAME with
+// kernel single-arg semantics; `-S` enables tokenized argument splitting.
+func parseShebang(probe []byte) (*shebang, error) {
+	if !hasShebang(probe) {
+		return nil, errors.New("not a shebang")
+	}
+	line := probe[2:]
+	// Take up to the first newline.
+	if idx := bytes.IndexByte(line, '\n'); idx >= 0 {
+		line = line[:idx]
+	}
+	// Strip trailing CR (CRLF-authored scripts).
+	line = bytes.TrimRight(line, "\r")
+	// Strip leading whitespace ("#! /usr/bin/env bash" is legal).
+	line = bytes.TrimLeft(line, " \t")
+	if len(line) == 0 {
+		return nil, errors.New("empty shebang")
+	}
+
+	var pathStr, rest string
+	if idx := bytes.IndexAny(line, " \t"); idx >= 0 {
+		pathStr = string(line[:idx])
+		rest = strings.TrimLeft(string(line[idx+1:]), " \t")
+	} else {
+		pathStr = string(line)
+	}
+
+	if isEnvShebang(pathStr) {
+		return parseEnvShebang(rest)
+	}
+
+	// Literal-path shebang: kernel semantics pass the remainder as a
+	// single argv[1], not tokenized.
+	sb := &shebang{interpreter: pathStr}
+	if rest != "" {
+		sb.args = []string{rest}
+	}
+	return sb, nil
+}
+
+// isEnvShebang reports whether the shebang path targets `env`. We accept
+// both common absolute paths and a bare `env` so that unusual setups
+// (NixOS, BSDs) still work.
+func isEnvShebang(p string) bool {
+	if p == "/usr/bin/env" || p == "/bin/env" {
+		return true
+	}
+	return filepath.Base(p) == "env"
+}
+
+// parseEnvShebang handles `/usr/bin/env` rewriting. Without `-S`, the
+// remainder after the program name is a single argv[1] (kernel
+// single-arg semantics via env, even though real env would fail to find a
+// program named "bash -x"). With `-S`, the remainder is tokenized on
+// whitespace. Any other `env` flag is rejected — forwarding unknown flags
+// to a /usr/bin/env on disk is a subtle portability footgun we don't want.
+func parseEnvShebang(rest string) (*shebang, error) {
+	if rest == "" {
+		return nil, errors.New("env: missing program name")
+	}
+
+	useSplit := false
+	if strings.HasPrefix(rest, "-") {
+		var flag, after string
+		if idx := strings.IndexAny(rest, " \t"); idx >= 0 {
+			flag = rest[:idx]
+			after = strings.TrimLeft(rest[idx+1:], " \t")
+		} else {
+			flag = rest
+			after = ""
+		}
+		if flag != "-S" {
+			return nil, fmt.Errorf("unsupported env flag: %s", flag)
+		}
+		useSplit = true
+		rest = after
+		if rest == "" {
+			return nil, errors.New("env -S requires a program")
+		}
+	}
+
+	if rest == "" {
+		return nil, errors.New("env: missing program name")
+	}
+
+	var prog, remainder string
+	if idx := strings.IndexAny(rest, " \t"); idx >= 0 {
+		prog = rest[:idx]
+		remainder = strings.TrimLeft(rest[idx+1:], " \t")
+	} else {
+		prog = rest
+	}
+
+	sb := &shebang{interpreter: prog}
+	if remainder != "" {
+		if useSplit {
+			sb.args = strings.Fields(remainder)
+		} else {
+			sb.args = []string{remainder}
+		}
+	}
+	return sb, nil
+}
+
+// runShellSource parses path's contents as POSIX shell and runs it
+// in-process via a nested interp.Runner. It reuses the parent runner's cwd,
+// env, and stdio, and rebuilds the Crush handler stack so builtins and the
+// dispatch handler itself remain available to anything the script invokes.
+// Positional parameters ($1, $2, …) come from args[1:].
+//
+// This is the only branch that reads the full file; probeFile keeps its
+// read to probeWindow bytes so the binary/shebang paths never touch more
+// than 128 bytes of I/O.
+func runShellSource(ctx context.Context, path string, args []string, blockFuncs []BlockFunc) error {
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return err
+	}
+
+	file, err := syntax.NewParser().Parse(bytes.NewReader(data), path)
+	if err != nil {
+		return fmt.Errorf("could not parse %s: %w", path, err)
+	}
+
+	hc := interp.HandlerCtx(ctx)
+
+	opts := []interp.RunnerOption{
+		interp.StdIO(hc.Stdin, hc.Stdout, hc.Stderr),
+		interp.Interactive(false),
+		interp.Env(hc.Env),
+		interp.Dir(hc.Dir),
+		interp.ExecHandlers(standardHandlers(blockFuncs)...),
+	}
+	if len(args) > 1 {
+		// Params with a leading "--" avoids any of args[1:] being
+		// misinterpreted as set-options (e.g. a user passing "-e" as
+		// a positional arg to their script).
+		params := append([]string{"--"}, args[1:]...)
+		opts = append(opts, interp.Params(params...))
+	}
+
+	runner, err := interp.New(opts...)
+	if err != nil {
+		return fmt.Errorf("could not build runner for %s: %w", path, err)
+	}
+	return runner.Run(ctx, file)
+}
+
+// execEnvList converts an expand.Environ to the []string form that
+// os/exec.Cmd.Env expects. Only exported string variables are included,
+// matching what a real shell would pass to a child process.
+func execEnvList(env expand.Environ) []string {
+	var out []string
+	env.Each(func(name string, vr expand.Variable) bool {
+		if vr.Exported && vr.Kind == expand.String {
+			out = append(out, name+"="+vr.Str)
+		}
+		return true
+	})
+	return out
+}

internal/shell/dispatch_test.go 🔗

@@ -0,0 +1,594 @@
+package shell
+
+import (
+	"bytes"
+	"crypto/rand"
+	"encoding/hex"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"reflect"
+	"runtime"
+	"strings"
+	"testing"
+)
+
+// writeScript is a small helper that drops a file with the given contents
+// and executable mode into dir. Tests that need exec semantics rely on the
+// 0o755 mode on Unix; Windows ignores file modes but doesn't need them
+// because dispatch decides what to do from file contents, not permissions.
+func writeScript(t *testing.T, dir, name, contents string) string {
+	t.Helper()
+	path := filepath.Join(dir, name)
+	if err := os.WriteFile(path, []byte(contents), 0o755); err != nil {
+		t.Fatalf("write %s: %v", name, err)
+	}
+	return path
+}
+
+// randSuffix returns a short random hex string, used to build
+// intentionally-unique paths that won't collide with anything on disk.
+func randSuffix() string {
+	var b [4]byte
+	_, _ = rand.Read(b[:])
+	return hex.EncodeToString(b[:])
+}
+
+// TestIsPathPrefixed covers the classification rules used by the dispatch
+// handler to decide whether argv[0] is a file reference.
+func TestIsPathPrefixed(t *testing.T) {
+	cases := []struct {
+		in   string
+		want bool
+	}{
+		{"./foo.sh", true},
+		{"../foo.sh", true},
+		{"/usr/bin/foo", true},
+		{"foo", false},
+		{"foo.sh", false},
+		{"jq", false},
+		{"", false},
+	}
+	for _, c := range cases {
+		if got := isPathPrefixed(c.in); got != c.want {
+			t.Errorf("isPathPrefixed(%q) = %v, want %v", c.in, got, c.want)
+		}
+	}
+
+	if runtime.GOOS == "windows" {
+		winCases := []struct {
+			in   string
+			want bool
+		}{
+			{`C:\foo\bar.exe`, true},
+			{`C:/foo/bar.exe`, true},
+			{`c:\foo`, true},
+			{`Z:/x`, true},
+			{`C:`, false}, // just a drive, no path.
+			{`\\server\share`, true},
+		}
+		for _, c := range winCases {
+			if got := isPathPrefixed(c.in); got != c.want {
+				t.Errorf("isPathPrefixed(%q) = %v, want %v", c.in, got, c.want)
+			}
+		}
+	}
+}
+
+// TestParseShebang covers the shebang grammar: literal paths, env,
+// env -S, kernel single-arg semantics, CRLF tolerance, and every
+// enumerated error case.
+func TestParseShebang(t *testing.T) {
+	type want struct {
+		interp string
+		args   []string
+		errSub string // substring expected in error message (empty → no error)
+	}
+	cases := []struct {
+		name string
+		in   string
+		want want
+	}{
+		{
+			name: "literal-no-args",
+			in:   "#!/bin/bash\necho body\n",
+			want: want{interp: "/bin/bash"},
+		},
+		{
+			name: "literal-kernel-single-arg",
+			in:   "#!/bin/bash -x -y\n",
+			want: want{interp: "/bin/bash", args: []string{"-x -y"}},
+		},
+		{
+			name: "env-basic",
+			in:   "#!/usr/bin/env bash\n",
+			want: want{interp: "bash"},
+		},
+		{
+			name: "env-kernel-single-arg",
+			in:   "#!/usr/bin/env bash -x\n",
+			want: want{interp: "bash", args: []string{"-x"}},
+		},
+		{
+			name: "env-dash-S-splits",
+			in:   "#!/usr/bin/env -S bash -x\n",
+			want: want{interp: "bash", args: []string{"-x"}},
+		},
+		{
+			name: "env-dash-S-multi-args",
+			in:   "#!/usr/bin/env -S bash -x --noprofile\n",
+			want: want{interp: "bash", args: []string{"-x", "--noprofile"}},
+		},
+		{
+			name: "leading-space",
+			in:   "#! /usr/bin/env bash\n",
+			want: want{interp: "bash"},
+		},
+		{
+			name: "crlf",
+			in:   "#!/bin/bash\r\n",
+			want: want{interp: "/bin/bash"},
+		},
+		{
+			name: "bare-env-name",
+			in:   "#!env bash\n",
+			want: want{interp: "bash"},
+		},
+		{
+			name: "empty-after-hashbang",
+			in:   "#!\n",
+			want: want{errSub: "empty shebang"},
+		},
+		{
+			name: "env-alone",
+			in:   "#!/usr/bin/env\n",
+			want: want{errSub: "missing program name"},
+		},
+		{
+			name: "env-dash-S-alone",
+			in:   "#!/usr/bin/env -S\n",
+			want: want{errSub: "env -S requires a program"},
+		},
+		{
+			name: "env-unknown-flag",
+			in:   "#!/usr/bin/env -x bash\n",
+			want: want{errSub: "unsupported env flag"},
+		},
+	}
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			sb, err := parseShebang([]byte(c.in))
+			if c.want.errSub != "" {
+				if err == nil || !strings.Contains(err.Error(), c.want.errSub) {
+					t.Fatalf("expected error containing %q, got: %v", c.want.errSub, err)
+				}
+				return
+			}
+			if err != nil {
+				t.Fatalf("unexpected error: %v", err)
+			}
+			if sb.interpreter != c.want.interp {
+				t.Errorf("interpreter = %q, want %q", sb.interpreter, c.want.interp)
+			}
+			if !equalStringSlice(sb.args, c.want.args) {
+				t.Errorf("args = %v, want %v", sb.args, c.want.args)
+			}
+		})
+	}
+}
+
+func equalStringSlice(a, b []string) bool {
+	if len(a) == 0 && len(b) == 0 {
+		return true
+	}
+	return reflect.DeepEqual(a, b)
+}
+
+// TestIsBinary covers the NUL-byte and magic-byte classification used to
+// keep compiled executables off the in-process shell-source path.
+func TestIsBinary(t *testing.T) {
+	cases := []struct {
+		name string
+		in   []byte
+		want bool
+	}{
+		{"shell", []byte("echo hi\n"), false},
+		{"nul", []byte("hello\x00world"), true},
+		{"elf", []byte{0x7F, 'E', 'L', 'F', 0x02, 0x01}, true},
+		{"mz", []byte("MZ\x90\x00"), true},
+		{"macho-64-le", []byte{0xCF, 0xFA, 0xED, 0xFE}, true},
+		{"short-non-binary", []byte("a"), false},
+	}
+	for _, c := range cases {
+		if got := isBinary(c.in); got != c.want {
+			t.Errorf("%s: isBinary = %v, want %v", c.name, got, c.want)
+		}
+	}
+}
+
+// TestDispatch_ShellSourceNoShebang exercises the in-process shell-source
+// branch: a file without a shebang runs via a nested runner and sees
+// positional params from argv[1:].
+func TestDispatch_ShellSourceNoShebang(t *testing.T) {
+	dir := t.TempDir()
+	script := writeScript(t, dir, "args.sh", `echo "$1 $2"`)
+
+	var stdout bytes.Buffer
+	err := Run(t.Context(), RunOptions{
+		Command: script + " alpha beta",
+		Cwd:     dir,
+		Stdout:  &stdout,
+	})
+	if err != nil {
+		t.Fatalf("Run returned error: %v", err)
+	}
+	if got := stdout.String(); got != "alpha beta\n" {
+		t.Fatalf("stdout = %q, want %q", got, "alpha beta\n")
+	}
+}
+
+// TestDispatch_EmptyFile confirms a zero-byte script runs as empty shell
+// source (exit 0, no output).
+func TestDispatch_EmptyFile(t *testing.T) {
+	dir := t.TempDir()
+	script := writeScript(t, dir, "empty.sh", "")
+
+	var stdout, stderr bytes.Buffer
+	err := Run(t.Context(), RunOptions{
+		Command: script,
+		Cwd:     dir,
+		Stdout:  &stdout,
+		Stderr:  &stderr,
+	})
+	if err != nil {
+		t.Fatalf("Run returned error: %v (stderr=%q)", err, stderr.String())
+	}
+	if stdout.Len() != 0 || stderr.Len() != 0 {
+		t.Fatalf("expected empty output, got stdout=%q stderr=%q", stdout.String(), stderr.String())
+	}
+}
+
+// TestDispatch_ShellSourceComposesWithPipe confirms the dispatch handler
+// plays nicely with mvdan's pipeline logic: a shell-source script on the
+// left feeds the jq builtin on the right.
+func TestDispatch_ShellSourceComposesWithPipe(t *testing.T) {
+	dir := t.TempDir()
+	script := writeScript(t, dir, "emit.sh", `printf '"value"'`)
+
+	var stdout bytes.Buffer
+	err := Run(t.Context(), RunOptions{
+		Command: script + ` | jq -r .`,
+		Cwd:     dir,
+		Stdout:  &stdout,
+	})
+	if err != nil {
+		t.Fatalf("Run returned error: %v", err)
+	}
+	if got := stdout.String(); got != "value\n" {
+		t.Fatalf("stdout = %q, want %q", got, "value\n")
+	}
+}
+
+// TestDispatch_MissingFile returns a clean error for a non-existent path.
+func TestDispatch_MissingFile(t *testing.T) {
+	dir := t.TempDir()
+	missing := filepath.Join(dir, "nope.sh")
+	err := Run(t.Context(), RunOptions{
+		Command: missing,
+		Cwd:     dir,
+	})
+	if err == nil {
+		t.Fatal("expected error for missing script, got nil")
+	}
+}
+
+// TestDispatch_DirectoryNotFile surfaces a distinct error when the path
+// resolves to a directory.
+func TestDispatch_DirectoryNotFile(t *testing.T) {
+	dir := t.TempDir()
+	subDir := filepath.Join(dir, "adir")
+	if err := os.MkdirAll(subDir, 0o755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+
+	var stderr bytes.Buffer
+	err := Run(t.Context(), RunOptions{
+		Command: "./adir",
+		Cwd:     dir,
+		Stderr:  &stderr,
+	})
+	if err == nil {
+		t.Fatal("expected error when invoking a directory, got nil")
+	}
+	if !strings.Contains(err.Error(), "is a directory") {
+		t.Fatalf("expected 'is a directory' in error, got: %v", err)
+	}
+}
+
+// TestDispatch_BashShebang runs a #!/bin/bash script via os/exec. Skipped
+// if bash isn't available (rare in CI, but keep the test robust).
+func TestDispatch_BashShebang(t *testing.T) {
+	bash, err := exec.LookPath("bash")
+	if err != nil {
+		t.Skipf("bash not in PATH: %v", err)
+	}
+	_ = bash
+
+	dir := t.TempDir()
+	script := writeScript(t, dir, "bash-echo.sh", "#!/usr/bin/env bash\necho bashout\n")
+
+	var stdout, stderr bytes.Buffer
+	err = Run(t.Context(), RunOptions{
+		Command: script,
+		Cwd:     dir,
+		Stdout:  &stdout,
+		Stderr:  &stderr,
+	})
+	if err != nil {
+		t.Fatalf("Run returned error: %v (stderr=%q)", err, stderr.String())
+	}
+	if got := stdout.String(); got != "bashout\n" {
+		t.Fatalf("stdout = %q, want %q", got, "bashout\n")
+	}
+}
+
+// TestDispatch_ShebangPassesExitCode maps interpreter exit codes through to
+// interp.ExitStatus so the caller can inspect them with ExitCode.
+func TestDispatch_ShebangPassesExitCode(t *testing.T) {
+	if _, err := exec.LookPath("bash"); err != nil {
+		t.Skipf("bash not in PATH: %v", err)
+	}
+	dir := t.TempDir()
+	script := writeScript(t, dir, "fail.sh", "#!/usr/bin/env bash\nexit 5\n")
+
+	err := Run(t.Context(), RunOptions{
+		Command: script,
+		Cwd:     dir,
+	})
+	if err == nil {
+		t.Fatal("expected non-nil error from exit 5")
+	}
+	if code := ExitCode(err); code != 5 {
+		t.Fatalf("ExitCode = %d, want 5", code)
+	}
+}
+
+// TestDispatch_MissingInterpreter surfaces a clear error (and non-zero
+// exit) when the shebang points to a binary that doesn't exist and has
+// no PATH fallback.
+func TestDispatch_MissingInterpreter(t *testing.T) {
+	dir := t.TempDir()
+	script := writeScript(t, dir, "bad.sh", "#!/no/such/interpreter-"+randSuffix()+"\n:\n")
+
+	var stderr bytes.Buffer
+	err := Run(t.Context(), RunOptions{
+		Command: script,
+		Cwd:     dir,
+		Stderr:  &stderr,
+	})
+	if err == nil {
+		t.Fatal("expected error for missing interpreter, got nil")
+	}
+	if ExitCode(err) == 0 {
+		t.Fatalf("expected non-zero exit code, got 0")
+	}
+	if !strings.Contains(stderr.String(), "not found") {
+		t.Fatalf("expected 'not found' in stderr, got: %q", stderr.String())
+	}
+}
+
+// TestDispatch_BarePathNotHandled confirms the handler ignores
+// non-path-prefixed argv[0] entirely: a benign bare `true` command must
+// not try to open a file in cwd. If dispatch were (incorrectly) firing
+// on bare commands, this test would see probeFile's ENOENT.
+func TestDispatch_BarePathNotHandled(t *testing.T) {
+	dir := t.TempDir()
+	err := Run(t.Context(), RunOptions{
+		Command: "true",
+		Cwd:     dir,
+	})
+	if err != nil {
+		t.Fatalf("bare `true` should not trigger dispatch: %v", err)
+	}
+}
+
+// TestDispatch_ProbeWindowClassifiesByHead confirms that classification is
+// done on the first probeWindow bytes even when the file is much larger;
+// a file whose head is shell source but whose tail contains NUL bytes is
+// classified as shell source, not binary.
+func TestDispatch_ProbeWindowClassifiesByHead(t *testing.T) {
+	dir := t.TempDir()
+	head := "echo prefix\n"
+	// Pad past probeWindow, then append some NULs.
+	padding := strings.Repeat(" ", probeWindow)
+	contents := head + padding + "\x00\x00\x00"
+	script := writeScript(t, dir, "long.sh", contents)
+
+	var stdout bytes.Buffer
+	err := Run(t.Context(), RunOptions{
+		Command: script,
+		Cwd:     dir,
+		Stdout:  &stdout,
+	})
+	if err != nil {
+		t.Fatalf("Run returned error: %v", err)
+	}
+	if got := stdout.String(); !strings.HasPrefix(got, "prefix\n") {
+		t.Fatalf("stdout = %q, want prefix %q", got, "prefix\n")
+	}
+}
+
+// TestDispatch_BinaryPassthroughExecutes copies a real binary from PATH
+// into a tempdir, invokes it via a path-prefixed argv[0], and verifies it
+// ran — i.e. the binary branch correctly returns through `next` to the
+// default exec handler. We use whichever of `true`/`echo` is available on
+// PATH so the test works on any Unix-y system; it skips on Windows where
+// the stock binaries don't share names and the Go test binary approach
+// is heavier than this test deserves.
+func TestDispatch_BinaryPassthroughExecutes(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("relies on a Unix-style PATH binary")
+	}
+	src, err := exec.LookPath("true")
+	if err != nil {
+		t.Skipf("no `true` binary on PATH: %v", err)
+	}
+	data, err := os.ReadFile(src)
+	if err != nil {
+		t.Fatalf("read %s: %v", src, err)
+	}
+	dir := t.TempDir()
+	dst := filepath.Join(dir, "copied-true")
+	if err := os.WriteFile(dst, data, 0o755); err != nil {
+		t.Fatalf("write %s: %v", dst, err)
+	}
+
+	runErr := Run(t.Context(), RunOptions{
+		Command: dst,
+		Cwd:     dir,
+		// Default handler needs PATH to resolve dynamic linker / loader
+		// helpers on some systems; inherit the process env so the copy
+		// can actually start.
+		Env: os.Environ(),
+	})
+	if runErr != nil {
+		t.Fatalf("expected copy of /bin/true to exit 0, got: %v", runErr)
+	}
+}
+
+// TestDispatch_UnreadableFile confirms an EACCES on the script surfaces
+// as a clean error rather than a silent fallback or a mis-classified
+// shell-source attempt. POSIX-only: Windows doesn't have the same
+// permission model and running as root would bypass the check anyway.
+func TestDispatch_UnreadableFile(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("POSIX permission model")
+	}
+	if os.Geteuid() == 0 {
+		t.Skip("root bypasses file mode permission checks")
+	}
+	dir := t.TempDir()
+	script := writeScript(t, dir, "unreadable.sh", "echo nope\n")
+	if err := os.Chmod(script, 0o000); err != nil {
+		t.Fatalf("chmod: %v", err)
+	}
+	t.Cleanup(func() { _ = os.Chmod(script, 0o644) })
+
+	err := Run(t.Context(), RunOptions{
+		Command: script,
+		Cwd:     dir,
+	})
+	if err == nil {
+		t.Fatal("expected permission error, got nil")
+	}
+	if !strings.Contains(err.Error(), "permission") {
+		t.Fatalf("expected 'permission' in error, got: %v", err)
+	}
+}
+
+// TestDispatch_SymlinkLoop confirms that an ELOOP-returning path surfaces
+// cleanly. POSIX-only: creating symlinks reliably on Windows requires
+// elevated privileges or developer mode, and neither is guaranteed in CI.
+func TestDispatch_SymlinkLoop(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("symlink creation requires special privileges on Windows")
+	}
+	dir := t.TempDir()
+	a := filepath.Join(dir, "a")
+	b := filepath.Join(dir, "b")
+	if err := os.Symlink(b, a); err != nil {
+		t.Fatalf("symlink a→b: %v", err)
+	}
+	if err := os.Symlink(a, b); err != nil {
+		t.Fatalf("symlink b→a: %v", err)
+	}
+
+	err := Run(t.Context(), RunOptions{
+		Command: a,
+		Cwd:     dir,
+	})
+	if err == nil {
+		t.Fatal("expected loop error, got nil")
+	}
+	// The exact error varies by OS; any of these message fragments is
+	// acceptable evidence that the loop was detected.
+	msg := err.Error()
+	if !strings.Contains(msg, "too many") &&
+		!strings.Contains(msg, "loop") &&
+		!strings.Contains(msg, "level") {
+		t.Fatalf("expected symlink-loop-ish error, got: %v", err)
+	}
+}
+
+// TestResolveInterpreter_PermissiveFallback confirms the key portability
+// behavior: a literal shebang path that doesn't exist falls back to a
+// PATH-lookup on its basename. This is what makes #!/bin/bash work on a
+// Windows box where bash.exe lives somewhere else on PATH. We construct a
+// fake PATH in a tempdir rather than depending on what the host has
+// installed so the test is deterministic everywhere.
+func TestResolveInterpreter_PermissiveFallback(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		// exec.LookPath on Windows requires a recognized extension
+		// (.exe/.bat/.cmd). Producing one of those without a compiler
+		// run is more ceremony than this smoke test deserves; the
+		// logic under test is exercised by the Unix run.
+		t.Skip("Windows PATH lookup requires an extension-matched binary")
+	}
+	dir := t.TempDir()
+	fake := filepath.Join(dir, "bash")
+	if err := os.WriteFile(fake, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
+		t.Fatalf("write fake bash: %v", err)
+	}
+	t.Setenv("PATH", dir)
+
+	// Basename must match the fake we planted on PATH; the directory
+	// prefix must not exist so the literal stat fails.
+	missingDir := filepath.Join(dir, "definitely-not-here-"+randSuffix())
+	resolved, err := resolveInterpreter(filepath.Join(missingDir, "bash"))
+	if err != nil {
+		t.Fatalf("expected fallback to succeed, got: %v", err)
+	}
+	if resolved != fake {
+		t.Fatalf("resolved = %q, want %q", resolved, fake)
+	}
+}
+
+// TestResolveInterpreter_NonENOENTErrorsSurface guards against silently
+// falling back to PATH when stat fails for a reason other than the file
+// being missing. With a directory at the shebang path, os.Stat succeeds
+// (no fallback needed), but with an EACCES'd file it fails with a non-
+// ENOENT error that must be surfaced — otherwise we'd silently resolve a
+// different binary off PATH and hide the real problem.
+func TestResolveInterpreter_NonENOENTErrorsSurface(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("POSIX permission model")
+	}
+	if os.Geteuid() == 0 {
+		t.Skip("root bypasses dir mode permission checks")
+	}
+	dir := t.TempDir()
+	// Put a candidate interpreter inside an unreadable/untraversable dir.
+	inner := filepath.Join(dir, "private")
+	if err := os.Mkdir(inner, 0o755); err != nil {
+		t.Fatalf("mkdir: %v", err)
+	}
+	interp := filepath.Join(inner, "bash")
+	if err := os.WriteFile(interp, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
+		t.Fatalf("write interpreter: %v", err)
+	}
+	// Drop search permission on inner so os.Stat(interp) returns EACCES.
+	if err := os.Chmod(inner, 0o000); err != nil {
+		t.Fatalf("chmod: %v", err)
+	}
+	t.Cleanup(func() { _ = os.Chmod(inner, 0o755) })
+
+	_, err := resolveInterpreter(interp)
+	if err == nil {
+		t.Fatal("expected error for unreadable interpreter, got nil")
+	}
+	// Must NOT have silently fallen back — the returned path shouldn't
+	// be a valid resolution; either way, the error has to surface.
+	if !strings.Contains(err.Error(), "permission") {
+		t.Fatalf("expected permission-denied error to surface, got: %v", err)
+	}
+}

internal/shell/dispatch_windows_test.go 🔗

@@ -0,0 +1,40 @@
+//go:build windows
+
+package shell
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+// TestResolveInterpreter_PermissiveFallback_Windows is the Windows-native
+// counterpart to the POSIX permissive-fallback test. It proves the one
+// behavior that makes `#!/bin/bash` hooks work on a stock Windows box
+// with Git for Windows installed: when the literal interpreter path does
+// not exist, we fall back to a PATH-lookup on the basename and that
+// lookup accepts any executable extension Windows honors (here, `.bat`).
+//
+// We plant a bash.bat in a tempdir rather than a .exe because producing
+// a .exe would require a toolchain step; LookPath on Windows resolves
+// PATHEXT extensions, so .bat is just as valid for the lookup codepath.
+func TestResolveInterpreter_PermissiveFallback_Windows(t *testing.T) {
+	dir := t.TempDir()
+	fake := filepath.Join(dir, "bash.bat")
+	contents := "@echo off\r\nexit /b 0\r\n"
+	if err := os.WriteFile(fake, []byte(contents), 0o755); err != nil {
+		t.Fatalf("write fake bash.bat: %v", err)
+	}
+	t.Setenv("PATH", dir)
+	t.Setenv("PATHEXT", ".BAT;.CMD;.EXE")
+
+	// Literal path must be absent so the stat fails with ENOENT.
+	missing := filepath.Join(dir, "definitely-not-here-"+randSuffix(), "bash")
+	resolved, err := resolveInterpreter(missing)
+	if err != nil {
+		t.Fatalf("expected fallback to succeed, got: %v", err)
+	}
+	if resolved != fake {
+		t.Fatalf("resolved = %q, want %q", resolved, fake)
+	}
+}

internal/shell/run.go 🔗

@@ -99,12 +99,18 @@ func newRunner(cwd string, env []string, stdin io.Reader, stdout, stderr io.Writ
 }
 
 // standardHandlers returns the exec-handler middleware chain used by both
-// [Run] and [Shell]. Order matters: builtins first (so Crush's in-process
-// jq wins over any PATH binary), then the block list, then optional Go
-// coreutils. Future middleware (shebang dispatch, etc.) inserts here.
+// [Run] and [Shell]. Order matters:
+//  1. builtins first (so Crush's in-process jq wins over any PATH binary);
+//  2. script dispatch (shebang / binary / shell-source for path-prefixed
+//     argv[0], no-op for bare commands) — runs before the block list so
+//     that deny rules see the already-resolved argv of anything the
+//     script exec's rather than the outer path-prefixed wrapper;
+//  3. block list;
+//  4. optional Go coreutils (only when useGoCoreUtils is on).
 func standardHandlers(blockFuncs []BlockFunc) []func(next interp.ExecHandlerFunc) interp.ExecHandlerFunc {
 	handlers := []func(next interp.ExecHandlerFunc) interp.ExecHandlerFunc{
 		builtinHandler(),
+		scriptDispatchHandler(blockFuncs),
 		blockHandler(blockFuncs),
 	}
 	if useGoCoreUtils {