diff --git a/internal/shell/dispatch.go b/internal/shell/dispatch.go new file mode 100644 index 0000000000000000000000000000000000000000..869970639a5d3ba597ddb59e4486ea484972fe5d --- /dev/null +++ b/internal/shell/dispatch.go @@ -0,0 +1,426 @@ +package shell + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "io/fs" + "log/slog" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + + "mvdan.cc/sh/v3/expand" + "mvdan.cc/sh/v3/interp" + "mvdan.cc/sh/v3/syntax" +) + +// probeWindow is how many bytes we read from the head of a file to decide +// how to dispatch it. 128 is plenty for a shebang line and for magic-byte +// inspection, while small enough to make the probe cheap for users whose +// hooks invoke many scripts. +const probeWindow = 128 + +// scriptDispatchHandler returns middleware that intercepts exec of a +// path-prefixed argv[0] (e.g. ./foo.sh, /opt/bin/tool, C:\foo\bar.exe) and +// dispatches based on the file's contents: +// +// 1. Shebang line (#!...) → exec the named interpreter via os/exec. The +// interpreter is resolved literally first, then via PATH on the +// basename as a permissive fallback (so #!/bin/bash works on Windows +// boxes where Git for Windows puts bash.exe on PATH). +// 2. Known binary magic (MZ, ELF, Mach-O) or a NUL byte in the probe +// window → pass through to the next handler (mvdan's default exec). +// 3. Otherwise → treat the file as shell source and run it in-process via +// a nested interp.Runner that reuses the same handler stack. +// +// Non-path-prefixed argv[0] and empty args are passed straight through; this +// handler is a no-op for ordinary commands like `echo` or `jq`. +// +// blockFuncs is the block list used when building the nested runner for the +// shell-source case, so deny rules apply recursively to commands invoked +// from in-process scripts. +func scriptDispatchHandler(blockFuncs []BlockFunc) func(next interp.ExecHandlerFunc) interp.ExecHandlerFunc { + return func(next interp.ExecHandlerFunc) interp.ExecHandlerFunc { + return func(ctx context.Context, args []string) error { + if len(args) == 0 || !isPathPrefixed(args[0]) { + return next(ctx, args) + } + + scriptPath := args[0] + // Resolve relative paths against the interpreter's cwd, not + // the process cwd — hook commands are authored with the hook + // Runner's cwd in mind and sub-shells can cd before an exec. + if !filepath.IsAbs(scriptPath) { + scriptPath = filepath.Join(interp.HandlerCtx(ctx).Dir, scriptPath) + } + probe, err := probeFile(scriptPath) + if err != nil { + return err + } + + switch { + case hasShebang(probe): + return dispatchShebang(ctx, scriptPath, probe, args) + case isBinary(probe): + return next(ctx, args) + default: + return runShellSource(ctx, scriptPath, args, blockFuncs) + } + } + } +} + +// isPathPrefixed reports whether argv[0] is a file reference (as opposed +// to a bare command to be resolved via PATH). A path reference starts with +// `./`, `../`, `/`, or — on Windows — a drive-letter prefix. +// +// Note: mvdan already performs tilde expansion during word expansion, so +// `~/script.sh` arrives here as an absolute path. We still call the helper +// on the raw string to stay robust if a future change ever bypasses that +// expansion; cover that path with a regression test. +func isPathPrefixed(arg string) bool { + switch { + case strings.HasPrefix(arg, "./"), + strings.HasPrefix(arg, "../"), + strings.HasPrefix(arg, "/"): + return true + } + if runtime.GOOS == "windows" { + // Drive-letter paths: C:\foo or C:/foo (length check avoids + // accidentally matching a single letter followed by a colon). + if len(arg) >= 3 && isDriveLetter(arg[0]) && arg[1] == ':' && + (arg[2] == '\\' || arg[2] == '/') { + return true + } + // Also treat backslash-prefixed UNC-like paths as path-prefixed. + if strings.HasPrefix(arg, "\\") { + return true + } + } + return false +} + +func isDriveLetter(b byte) bool { + return (b >= 'A' && b <= 'Z') || (b >= 'a' && b <= 'z') +} + +// probeFile reads the first probeWindow bytes of the target path. It +// deliberately does not slurp the whole file: callers that need the full +// contents (only the shell-source branch) re-open via os.ReadFile. This +// keeps memory bounded when argv[0] turns out to be a large binary. +// +// Returns errors surfaced by os.Open/os.Stat directly so callers see the +// real reason: ENOENT, EACCES, EISDIR, ELOOP, etc. +func probeFile(path string) ([]byte, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + fi, err := f.Stat() + if err != nil { + return nil, err + } + if fi.IsDir() { + return nil, fmt.Errorf("%s: is a directory", path) + } + probe := make([]byte, probeWindow) + n, err := io.ReadFull(f, probe) + if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF { + return nil, err + } + return probe[:n], nil +} + +// hasShebang reports whether probe starts with the `#!` marker. A +// one-byte file that happens to be `#` is not a shebang. +func hasShebang(probe []byte) bool { + return len(probe) >= 2 && probe[0] == '#' && probe[1] == '!' +} + +// isBinary heuristically classifies probe as an executable or otherwise +// non-text file. A NUL byte in the first probeWindow bytes is the classic +// Unix-y text-vs-binary signal; we additionally recognize known magic +// numbers so we can fast-path well-formed binaries that happen to have no +// NUL in the first 128 bytes (rare but possible for small binaries). +func isBinary(probe []byte) bool { + if bytes.IndexByte(probe, 0) >= 0 { + return true + } + magics := [][]byte{ + {'M', 'Z'}, // Windows PE / DOS MZ. + {0x7F, 'E', 'L', 'F'}, // ELF. + {0xFE, 0xED, 0xFA, 0xCE}, // Mach-O 32-bit BE. + {0xFE, 0xED, 0xFA, 0xCF}, // Mach-O 64-bit BE. + {0xCF, 0xFA, 0xED, 0xFE}, // Mach-O 64-bit LE. + {0xCE, 0xFA, 0xED, 0xFE}, // Mach-O 32-bit LE. + {0xCA, 0xFE, 0xBA, 0xBE}, // Mach-O fat binary. + } + for _, m := range magics { + if bytes.HasPrefix(probe, m) { + return true + } + } + return false +} + +// dispatchShebang parses probe's shebang line and execs the resolved +// interpreter via os/exec, inheriting the parent runner's cwd, env, and +// stdio. Returns interp.ExitStatus on non-zero interpreter exit so the +// parent interpreter sees it as a normal non-zero status. +func dispatchShebang(ctx context.Context, scriptPath string, probe []byte, args []string) error { + sb, err := parseShebang(probe) + if err != nil { + hc := interp.HandlerCtx(ctx) + fmt.Fprintf(hc.Stderr, "crush: %s: %s\n", scriptPath, err) + return interp.ExitStatus(126) + } + + interpreter, err := resolveInterpreter(sb.interpreter) + if err != nil { + hc := interp.HandlerCtx(ctx) + fmt.Fprintf(hc.Stderr, "crush: %s: %s\n", scriptPath, err) + return interp.ExitStatus(127) + } + + cmdArgs := append([]string{}, sb.args...) + cmdArgs = append(cmdArgs, scriptPath) + cmdArgs = append(cmdArgs, args[1:]...) + + cmd := exec.CommandContext(ctx, interpreter, cmdArgs...) + hc := interp.HandlerCtx(ctx) + cmd.Dir = hc.Dir + cmd.Env = execEnvList(hc.Env) + cmd.Stdin = hc.Stdin + cmd.Stdout = hc.Stdout + cmd.Stderr = hc.Stderr + + if err := cmd.Run(); err != nil { + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + code := exitErr.ExitCode() + if code < 0 { + code = 1 + } + return interp.ExitStatus(uint8(code)) + } + return err + } + return nil +} + +// resolveInterpreter tries the literal shebang path first, then falls back +// to PATH-lookup on its basename — but only when the literal path is +// genuinely missing. A file that exists but fails stat for another reason +// (EACCES, ELOOP, etc.) surfaces the real error: silently resolving a +// different binary off PATH in that case would hide a real problem and +// produce surprising behavior for the user. +// +// The permissive fallback is what makes #!/bin/bash portable to Windows +// boxes where Git for Windows puts bash.exe on PATH but there is no +// /bin/bash on disk. +func resolveInterpreter(path string) (string, error) { + _, statErr := os.Stat(path) + if statErr == nil { + return path, nil + } + if !errors.Is(statErr, fs.ErrNotExist) { + return "", statErr + } + + base := filepath.Base(path) + if base == "" || base == path && !strings.ContainsAny(path, `/\`) { + // Already a bare name — just do a PATH lookup. + resolved, err := exec.LookPath(path) + if err != nil { + return "", fmt.Errorf("interpreter %q not found in PATH", path) + } + return resolved, nil + } + resolved, err := exec.LookPath(base) + if err != nil { + return "", fmt.Errorf("interpreter %q not found and %q not in PATH", path, base) + } + slog.Debug("Shebang interpreter not found; falling back to PATH", + "requested", path, "resolved", resolved) + return resolved, nil +} + +// shebang captures the parsed `#!` line. interpreter is the program to +// invoke; args is the list of extra arguments to pass before the script +// path. The kernel's single-arg semantics (for literal paths and for env +// without `-S`) is encoded by returning a single-element args slice +// containing the un-tokenized remainder. +type shebang struct { + interpreter string + args []string +} + +// parseShebang extracts the interpreter invocation from probe. It tolerates +// CRLF line endings and a single leading space between `#!` and the path. +// env special-cases: `/usr/bin/env NAME [args...]` unwraps to NAME with +// kernel single-arg semantics; `-S` enables tokenized argument splitting. +func parseShebang(probe []byte) (*shebang, error) { + if !hasShebang(probe) { + return nil, errors.New("not a shebang") + } + line := probe[2:] + // Take up to the first newline. + if idx := bytes.IndexByte(line, '\n'); idx >= 0 { + line = line[:idx] + } + // Strip trailing CR (CRLF-authored scripts). + line = bytes.TrimRight(line, "\r") + // Strip leading whitespace ("#! /usr/bin/env bash" is legal). + line = bytes.TrimLeft(line, " \t") + if len(line) == 0 { + return nil, errors.New("empty shebang") + } + + var pathStr, rest string + if idx := bytes.IndexAny(line, " \t"); idx >= 0 { + pathStr = string(line[:idx]) + rest = strings.TrimLeft(string(line[idx+1:]), " \t") + } else { + pathStr = string(line) + } + + if isEnvShebang(pathStr) { + return parseEnvShebang(rest) + } + + // Literal-path shebang: kernel semantics pass the remainder as a + // single argv[1], not tokenized. + sb := &shebang{interpreter: pathStr} + if rest != "" { + sb.args = []string{rest} + } + return sb, nil +} + +// isEnvShebang reports whether the shebang path targets `env`. We accept +// both common absolute paths and a bare `env` so that unusual setups +// (NixOS, BSDs) still work. +func isEnvShebang(p string) bool { + if p == "/usr/bin/env" || p == "/bin/env" { + return true + } + return filepath.Base(p) == "env" +} + +// parseEnvShebang handles `/usr/bin/env` rewriting. Without `-S`, the +// remainder after the program name is a single argv[1] (kernel +// single-arg semantics via env, even though real env would fail to find a +// program named "bash -x"). With `-S`, the remainder is tokenized on +// whitespace. Any other `env` flag is rejected — forwarding unknown flags +// to a /usr/bin/env on disk is a subtle portability footgun we don't want. +func parseEnvShebang(rest string) (*shebang, error) { + if rest == "" { + return nil, errors.New("env: missing program name") + } + + useSplit := false + if strings.HasPrefix(rest, "-") { + var flag, after string + if idx := strings.IndexAny(rest, " \t"); idx >= 0 { + flag = rest[:idx] + after = strings.TrimLeft(rest[idx+1:], " \t") + } else { + flag = rest + after = "" + } + if flag != "-S" { + return nil, fmt.Errorf("unsupported env flag: %s", flag) + } + useSplit = true + rest = after + if rest == "" { + return nil, errors.New("env -S requires a program") + } + } + + if rest == "" { + return nil, errors.New("env: missing program name") + } + + var prog, remainder string + if idx := strings.IndexAny(rest, " \t"); idx >= 0 { + prog = rest[:idx] + remainder = strings.TrimLeft(rest[idx+1:], " \t") + } else { + prog = rest + } + + sb := &shebang{interpreter: prog} + if remainder != "" { + if useSplit { + sb.args = strings.Fields(remainder) + } else { + sb.args = []string{remainder} + } + } + return sb, nil +} + +// runShellSource parses path's contents as POSIX shell and runs it +// in-process via a nested interp.Runner. It reuses the parent runner's cwd, +// env, and stdio, and rebuilds the Crush handler stack so builtins and the +// dispatch handler itself remain available to anything the script invokes. +// Positional parameters ($1, $2, …) come from args[1:]. +// +// This is the only branch that reads the full file; probeFile keeps its +// read to probeWindow bytes so the binary/shebang paths never touch more +// than 128 bytes of I/O. +func runShellSource(ctx context.Context, path string, args []string, blockFuncs []BlockFunc) error { + data, err := os.ReadFile(path) + if err != nil { + return err + } + + file, err := syntax.NewParser().Parse(bytes.NewReader(data), path) + if err != nil { + return fmt.Errorf("could not parse %s: %w", path, err) + } + + hc := interp.HandlerCtx(ctx) + + opts := []interp.RunnerOption{ + interp.StdIO(hc.Stdin, hc.Stdout, hc.Stderr), + interp.Interactive(false), + interp.Env(hc.Env), + interp.Dir(hc.Dir), + interp.ExecHandlers(standardHandlers(blockFuncs)...), + } + if len(args) > 1 { + // Params with a leading "--" avoids any of args[1:] being + // misinterpreted as set-options (e.g. a user passing "-e" as + // a positional arg to their script). + params := append([]string{"--"}, args[1:]...) + opts = append(opts, interp.Params(params...)) + } + + runner, err := interp.New(opts...) + if err != nil { + return fmt.Errorf("could not build runner for %s: %w", path, err) + } + return runner.Run(ctx, file) +} + +// execEnvList converts an expand.Environ to the []string form that +// os/exec.Cmd.Env expects. Only exported string variables are included, +// matching what a real shell would pass to a child process. +func execEnvList(env expand.Environ) []string { + var out []string + env.Each(func(name string, vr expand.Variable) bool { + if vr.Exported && vr.Kind == expand.String { + out = append(out, name+"="+vr.Str) + } + return true + }) + return out +} diff --git a/internal/shell/dispatch_test.go b/internal/shell/dispatch_test.go new file mode 100644 index 0000000000000000000000000000000000000000..d896c211d259384bce5c1ed07fcebedf75eb3367 --- /dev/null +++ b/internal/shell/dispatch_test.go @@ -0,0 +1,594 @@ +package shell + +import ( + "bytes" + "crypto/rand" + "encoding/hex" + "os" + "os/exec" + "path/filepath" + "reflect" + "runtime" + "strings" + "testing" +) + +// writeScript is a small helper that drops a file with the given contents +// and executable mode into dir. Tests that need exec semantics rely on the +// 0o755 mode on Unix; Windows ignores file modes but doesn't need them +// because dispatch decides what to do from file contents, not permissions. +func writeScript(t *testing.T, dir, name, contents string) string { + t.Helper() + path := filepath.Join(dir, name) + if err := os.WriteFile(path, []byte(contents), 0o755); err != nil { + t.Fatalf("write %s: %v", name, err) + } + return path +} + +// randSuffix returns a short random hex string, used to build +// intentionally-unique paths that won't collide with anything on disk. +func randSuffix() string { + var b [4]byte + _, _ = rand.Read(b[:]) + return hex.EncodeToString(b[:]) +} + +// TestIsPathPrefixed covers the classification rules used by the dispatch +// handler to decide whether argv[0] is a file reference. +func TestIsPathPrefixed(t *testing.T) { + cases := []struct { + in string + want bool + }{ + {"./foo.sh", true}, + {"../foo.sh", true}, + {"/usr/bin/foo", true}, + {"foo", false}, + {"foo.sh", false}, + {"jq", false}, + {"", false}, + } + for _, c := range cases { + if got := isPathPrefixed(c.in); got != c.want { + t.Errorf("isPathPrefixed(%q) = %v, want %v", c.in, got, c.want) + } + } + + if runtime.GOOS == "windows" { + winCases := []struct { + in string + want bool + }{ + {`C:\foo\bar.exe`, true}, + {`C:/foo/bar.exe`, true}, + {`c:\foo`, true}, + {`Z:/x`, true}, + {`C:`, false}, // just a drive, no path. + {`\\server\share`, true}, + } + for _, c := range winCases { + if got := isPathPrefixed(c.in); got != c.want { + t.Errorf("isPathPrefixed(%q) = %v, want %v", c.in, got, c.want) + } + } + } +} + +// TestParseShebang covers the shebang grammar: literal paths, env, +// env -S, kernel single-arg semantics, CRLF tolerance, and every +// enumerated error case. +func TestParseShebang(t *testing.T) { + type want struct { + interp string + args []string + errSub string // substring expected in error message (empty → no error) + } + cases := []struct { + name string + in string + want want + }{ + { + name: "literal-no-args", + in: "#!/bin/bash\necho body\n", + want: want{interp: "/bin/bash"}, + }, + { + name: "literal-kernel-single-arg", + in: "#!/bin/bash -x -y\n", + want: want{interp: "/bin/bash", args: []string{"-x -y"}}, + }, + { + name: "env-basic", + in: "#!/usr/bin/env bash\n", + want: want{interp: "bash"}, + }, + { + name: "env-kernel-single-arg", + in: "#!/usr/bin/env bash -x\n", + want: want{interp: "bash", args: []string{"-x"}}, + }, + { + name: "env-dash-S-splits", + in: "#!/usr/bin/env -S bash -x\n", + want: want{interp: "bash", args: []string{"-x"}}, + }, + { + name: "env-dash-S-multi-args", + in: "#!/usr/bin/env -S bash -x --noprofile\n", + want: want{interp: "bash", args: []string{"-x", "--noprofile"}}, + }, + { + name: "leading-space", + in: "#! /usr/bin/env bash\n", + want: want{interp: "bash"}, + }, + { + name: "crlf", + in: "#!/bin/bash\r\n", + want: want{interp: "/bin/bash"}, + }, + { + name: "bare-env-name", + in: "#!env bash\n", + want: want{interp: "bash"}, + }, + { + name: "empty-after-hashbang", + in: "#!\n", + want: want{errSub: "empty shebang"}, + }, + { + name: "env-alone", + in: "#!/usr/bin/env\n", + want: want{errSub: "missing program name"}, + }, + { + name: "env-dash-S-alone", + in: "#!/usr/bin/env -S\n", + want: want{errSub: "env -S requires a program"}, + }, + { + name: "env-unknown-flag", + in: "#!/usr/bin/env -x bash\n", + want: want{errSub: "unsupported env flag"}, + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + sb, err := parseShebang([]byte(c.in)) + if c.want.errSub != "" { + if err == nil || !strings.Contains(err.Error(), c.want.errSub) { + t.Fatalf("expected error containing %q, got: %v", c.want.errSub, err) + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if sb.interpreter != c.want.interp { + t.Errorf("interpreter = %q, want %q", sb.interpreter, c.want.interp) + } + if !equalStringSlice(sb.args, c.want.args) { + t.Errorf("args = %v, want %v", sb.args, c.want.args) + } + }) + } +} + +func equalStringSlice(a, b []string) bool { + if len(a) == 0 && len(b) == 0 { + return true + } + return reflect.DeepEqual(a, b) +} + +// TestIsBinary covers the NUL-byte and magic-byte classification used to +// keep compiled executables off the in-process shell-source path. +func TestIsBinary(t *testing.T) { + cases := []struct { + name string + in []byte + want bool + }{ + {"shell", []byte("echo hi\n"), false}, + {"nul", []byte("hello\x00world"), true}, + {"elf", []byte{0x7F, 'E', 'L', 'F', 0x02, 0x01}, true}, + {"mz", []byte("MZ\x90\x00"), true}, + {"macho-64-le", []byte{0xCF, 0xFA, 0xED, 0xFE}, true}, + {"short-non-binary", []byte("a"), false}, + } + for _, c := range cases { + if got := isBinary(c.in); got != c.want { + t.Errorf("%s: isBinary = %v, want %v", c.name, got, c.want) + } + } +} + +// TestDispatch_ShellSourceNoShebang exercises the in-process shell-source +// branch: a file without a shebang runs via a nested runner and sees +// positional params from argv[1:]. +func TestDispatch_ShellSourceNoShebang(t *testing.T) { + dir := t.TempDir() + script := writeScript(t, dir, "args.sh", `echo "$1 $2"`) + + var stdout bytes.Buffer + err := Run(t.Context(), RunOptions{ + Command: script + " alpha beta", + Cwd: dir, + Stdout: &stdout, + }) + if err != nil { + t.Fatalf("Run returned error: %v", err) + } + if got := stdout.String(); got != "alpha beta\n" { + t.Fatalf("stdout = %q, want %q", got, "alpha beta\n") + } +} + +// TestDispatch_EmptyFile confirms a zero-byte script runs as empty shell +// source (exit 0, no output). +func TestDispatch_EmptyFile(t *testing.T) { + dir := t.TempDir() + script := writeScript(t, dir, "empty.sh", "") + + var stdout, stderr bytes.Buffer + err := Run(t.Context(), RunOptions{ + Command: script, + Cwd: dir, + Stdout: &stdout, + Stderr: &stderr, + }) + if err != nil { + t.Fatalf("Run returned error: %v (stderr=%q)", err, stderr.String()) + } + if stdout.Len() != 0 || stderr.Len() != 0 { + t.Fatalf("expected empty output, got stdout=%q stderr=%q", stdout.String(), stderr.String()) + } +} + +// TestDispatch_ShellSourceComposesWithPipe confirms the dispatch handler +// plays nicely with mvdan's pipeline logic: a shell-source script on the +// left feeds the jq builtin on the right. +func TestDispatch_ShellSourceComposesWithPipe(t *testing.T) { + dir := t.TempDir() + script := writeScript(t, dir, "emit.sh", `printf '"value"'`) + + var stdout bytes.Buffer + err := Run(t.Context(), RunOptions{ + Command: script + ` | jq -r .`, + Cwd: dir, + Stdout: &stdout, + }) + if err != nil { + t.Fatalf("Run returned error: %v", err) + } + if got := stdout.String(); got != "value\n" { + t.Fatalf("stdout = %q, want %q", got, "value\n") + } +} + +// TestDispatch_MissingFile returns a clean error for a non-existent path. +func TestDispatch_MissingFile(t *testing.T) { + dir := t.TempDir() + missing := filepath.Join(dir, "nope.sh") + err := Run(t.Context(), RunOptions{ + Command: missing, + Cwd: dir, + }) + if err == nil { + t.Fatal("expected error for missing script, got nil") + } +} + +// TestDispatch_DirectoryNotFile surfaces a distinct error when the path +// resolves to a directory. +func TestDispatch_DirectoryNotFile(t *testing.T) { + dir := t.TempDir() + subDir := filepath.Join(dir, "adir") + if err := os.MkdirAll(subDir, 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + + var stderr bytes.Buffer + err := Run(t.Context(), RunOptions{ + Command: "./adir", + Cwd: dir, + Stderr: &stderr, + }) + if err == nil { + t.Fatal("expected error when invoking a directory, got nil") + } + if !strings.Contains(err.Error(), "is a directory") { + t.Fatalf("expected 'is a directory' in error, got: %v", err) + } +} + +// TestDispatch_BashShebang runs a #!/bin/bash script via os/exec. Skipped +// if bash isn't available (rare in CI, but keep the test robust). +func TestDispatch_BashShebang(t *testing.T) { + bash, err := exec.LookPath("bash") + if err != nil { + t.Skipf("bash not in PATH: %v", err) + } + _ = bash + + dir := t.TempDir() + script := writeScript(t, dir, "bash-echo.sh", "#!/usr/bin/env bash\necho bashout\n") + + var stdout, stderr bytes.Buffer + err = Run(t.Context(), RunOptions{ + Command: script, + Cwd: dir, + Stdout: &stdout, + Stderr: &stderr, + }) + if err != nil { + t.Fatalf("Run returned error: %v (stderr=%q)", err, stderr.String()) + } + if got := stdout.String(); got != "bashout\n" { + t.Fatalf("stdout = %q, want %q", got, "bashout\n") + } +} + +// TestDispatch_ShebangPassesExitCode maps interpreter exit codes through to +// interp.ExitStatus so the caller can inspect them with ExitCode. +func TestDispatch_ShebangPassesExitCode(t *testing.T) { + if _, err := exec.LookPath("bash"); err != nil { + t.Skipf("bash not in PATH: %v", err) + } + dir := t.TempDir() + script := writeScript(t, dir, "fail.sh", "#!/usr/bin/env bash\nexit 5\n") + + err := Run(t.Context(), RunOptions{ + Command: script, + Cwd: dir, + }) + if err == nil { + t.Fatal("expected non-nil error from exit 5") + } + if code := ExitCode(err); code != 5 { + t.Fatalf("ExitCode = %d, want 5", code) + } +} + +// TestDispatch_MissingInterpreter surfaces a clear error (and non-zero +// exit) when the shebang points to a binary that doesn't exist and has +// no PATH fallback. +func TestDispatch_MissingInterpreter(t *testing.T) { + dir := t.TempDir() + script := writeScript(t, dir, "bad.sh", "#!/no/such/interpreter-"+randSuffix()+"\n:\n") + + var stderr bytes.Buffer + err := Run(t.Context(), RunOptions{ + Command: script, + Cwd: dir, + Stderr: &stderr, + }) + if err == nil { + t.Fatal("expected error for missing interpreter, got nil") + } + if ExitCode(err) == 0 { + t.Fatalf("expected non-zero exit code, got 0") + } + if !strings.Contains(stderr.String(), "not found") { + t.Fatalf("expected 'not found' in stderr, got: %q", stderr.String()) + } +} + +// TestDispatch_BarePathNotHandled confirms the handler ignores +// non-path-prefixed argv[0] entirely: a benign bare `true` command must +// not try to open a file in cwd. If dispatch were (incorrectly) firing +// on bare commands, this test would see probeFile's ENOENT. +func TestDispatch_BarePathNotHandled(t *testing.T) { + dir := t.TempDir() + err := Run(t.Context(), RunOptions{ + Command: "true", + Cwd: dir, + }) + if err != nil { + t.Fatalf("bare `true` should not trigger dispatch: %v", err) + } +} + +// TestDispatch_ProbeWindowClassifiesByHead confirms that classification is +// done on the first probeWindow bytes even when the file is much larger; +// a file whose head is shell source but whose tail contains NUL bytes is +// classified as shell source, not binary. +func TestDispatch_ProbeWindowClassifiesByHead(t *testing.T) { + dir := t.TempDir() + head := "echo prefix\n" + // Pad past probeWindow, then append some NULs. + padding := strings.Repeat(" ", probeWindow) + contents := head + padding + "\x00\x00\x00" + script := writeScript(t, dir, "long.sh", contents) + + var stdout bytes.Buffer + err := Run(t.Context(), RunOptions{ + Command: script, + Cwd: dir, + Stdout: &stdout, + }) + if err != nil { + t.Fatalf("Run returned error: %v", err) + } + if got := stdout.String(); !strings.HasPrefix(got, "prefix\n") { + t.Fatalf("stdout = %q, want prefix %q", got, "prefix\n") + } +} + +// TestDispatch_BinaryPassthroughExecutes copies a real binary from PATH +// into a tempdir, invokes it via a path-prefixed argv[0], and verifies it +// ran — i.e. the binary branch correctly returns through `next` to the +// default exec handler. We use whichever of `true`/`echo` is available on +// PATH so the test works on any Unix-y system; it skips on Windows where +// the stock binaries don't share names and the Go test binary approach +// is heavier than this test deserves. +func TestDispatch_BinaryPassthroughExecutes(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("relies on a Unix-style PATH binary") + } + src, err := exec.LookPath("true") + if err != nil { + t.Skipf("no `true` binary on PATH: %v", err) + } + data, err := os.ReadFile(src) + if err != nil { + t.Fatalf("read %s: %v", src, err) + } + dir := t.TempDir() + dst := filepath.Join(dir, "copied-true") + if err := os.WriteFile(dst, data, 0o755); err != nil { + t.Fatalf("write %s: %v", dst, err) + } + + runErr := Run(t.Context(), RunOptions{ + Command: dst, + Cwd: dir, + // Default handler needs PATH to resolve dynamic linker / loader + // helpers on some systems; inherit the process env so the copy + // can actually start. + Env: os.Environ(), + }) + if runErr != nil { + t.Fatalf("expected copy of /bin/true to exit 0, got: %v", runErr) + } +} + +// TestDispatch_UnreadableFile confirms an EACCES on the script surfaces +// as a clean error rather than a silent fallback or a mis-classified +// shell-source attempt. POSIX-only: Windows doesn't have the same +// permission model and running as root would bypass the check anyway. +func TestDispatch_UnreadableFile(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("POSIX permission model") + } + if os.Geteuid() == 0 { + t.Skip("root bypasses file mode permission checks") + } + dir := t.TempDir() + script := writeScript(t, dir, "unreadable.sh", "echo nope\n") + if err := os.Chmod(script, 0o000); err != nil { + t.Fatalf("chmod: %v", err) + } + t.Cleanup(func() { _ = os.Chmod(script, 0o644) }) + + err := Run(t.Context(), RunOptions{ + Command: script, + Cwd: dir, + }) + if err == nil { + t.Fatal("expected permission error, got nil") + } + if !strings.Contains(err.Error(), "permission") { + t.Fatalf("expected 'permission' in error, got: %v", err) + } +} + +// TestDispatch_SymlinkLoop confirms that an ELOOP-returning path surfaces +// cleanly. POSIX-only: creating symlinks reliably on Windows requires +// elevated privileges or developer mode, and neither is guaranteed in CI. +func TestDispatch_SymlinkLoop(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("symlink creation requires special privileges on Windows") + } + dir := t.TempDir() + a := filepath.Join(dir, "a") + b := filepath.Join(dir, "b") + if err := os.Symlink(b, a); err != nil { + t.Fatalf("symlink a→b: %v", err) + } + if err := os.Symlink(a, b); err != nil { + t.Fatalf("symlink b→a: %v", err) + } + + err := Run(t.Context(), RunOptions{ + Command: a, + Cwd: dir, + }) + if err == nil { + t.Fatal("expected loop error, got nil") + } + // The exact error varies by OS; any of these message fragments is + // acceptable evidence that the loop was detected. + msg := err.Error() + if !strings.Contains(msg, "too many") && + !strings.Contains(msg, "loop") && + !strings.Contains(msg, "level") { + t.Fatalf("expected symlink-loop-ish error, got: %v", err) + } +} + +// TestResolveInterpreter_PermissiveFallback confirms the key portability +// behavior: a literal shebang path that doesn't exist falls back to a +// PATH-lookup on its basename. This is what makes #!/bin/bash work on a +// Windows box where bash.exe lives somewhere else on PATH. We construct a +// fake PATH in a tempdir rather than depending on what the host has +// installed so the test is deterministic everywhere. +func TestResolveInterpreter_PermissiveFallback(t *testing.T) { + if runtime.GOOS == "windows" { + // exec.LookPath on Windows requires a recognized extension + // (.exe/.bat/.cmd). Producing one of those without a compiler + // run is more ceremony than this smoke test deserves; the + // logic under test is exercised by the Unix run. + t.Skip("Windows PATH lookup requires an extension-matched binary") + } + dir := t.TempDir() + fake := filepath.Join(dir, "bash") + if err := os.WriteFile(fake, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil { + t.Fatalf("write fake bash: %v", err) + } + t.Setenv("PATH", dir) + + // Basename must match the fake we planted on PATH; the directory + // prefix must not exist so the literal stat fails. + missingDir := filepath.Join(dir, "definitely-not-here-"+randSuffix()) + resolved, err := resolveInterpreter(filepath.Join(missingDir, "bash")) + if err != nil { + t.Fatalf("expected fallback to succeed, got: %v", err) + } + if resolved != fake { + t.Fatalf("resolved = %q, want %q", resolved, fake) + } +} + +// TestResolveInterpreter_NonENOENTErrorsSurface guards against silently +// falling back to PATH when stat fails for a reason other than the file +// being missing. With a directory at the shebang path, os.Stat succeeds +// (no fallback needed), but with an EACCES'd file it fails with a non- +// ENOENT error that must be surfaced — otherwise we'd silently resolve a +// different binary off PATH and hide the real problem. +func TestResolveInterpreter_NonENOENTErrorsSurface(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("POSIX permission model") + } + if os.Geteuid() == 0 { + t.Skip("root bypasses dir mode permission checks") + } + dir := t.TempDir() + // Put a candidate interpreter inside an unreadable/untraversable dir. + inner := filepath.Join(dir, "private") + if err := os.Mkdir(inner, 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + interp := filepath.Join(inner, "bash") + if err := os.WriteFile(interp, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil { + t.Fatalf("write interpreter: %v", err) + } + // Drop search permission on inner so os.Stat(interp) returns EACCES. + if err := os.Chmod(inner, 0o000); err != nil { + t.Fatalf("chmod: %v", err) + } + t.Cleanup(func() { _ = os.Chmod(inner, 0o755) }) + + _, err := resolveInterpreter(interp) + if err == nil { + t.Fatal("expected error for unreadable interpreter, got nil") + } + // Must NOT have silently fallen back — the returned path shouldn't + // be a valid resolution; either way, the error has to surface. + if !strings.Contains(err.Error(), "permission") { + t.Fatalf("expected permission-denied error to surface, got: %v", err) + } +} diff --git a/internal/shell/dispatch_windows_test.go b/internal/shell/dispatch_windows_test.go new file mode 100644 index 0000000000000000000000000000000000000000..529945ad5a33eac5d943bdd2a562a46d8d4ff000 --- /dev/null +++ b/internal/shell/dispatch_windows_test.go @@ -0,0 +1,40 @@ +//go:build windows + +package shell + +import ( + "os" + "path/filepath" + "testing" +) + +// TestResolveInterpreter_PermissiveFallback_Windows is the Windows-native +// counterpart to the POSIX permissive-fallback test. It proves the one +// behavior that makes `#!/bin/bash` hooks work on a stock Windows box +// with Git for Windows installed: when the literal interpreter path does +// not exist, we fall back to a PATH-lookup on the basename and that +// lookup accepts any executable extension Windows honors (here, `.bat`). +// +// We plant a bash.bat in a tempdir rather than a .exe because producing +// a .exe would require a toolchain step; LookPath on Windows resolves +// PATHEXT extensions, so .bat is just as valid for the lookup codepath. +func TestResolveInterpreter_PermissiveFallback_Windows(t *testing.T) { + dir := t.TempDir() + fake := filepath.Join(dir, "bash.bat") + contents := "@echo off\r\nexit /b 0\r\n" + if err := os.WriteFile(fake, []byte(contents), 0o755); err != nil { + t.Fatalf("write fake bash.bat: %v", err) + } + t.Setenv("PATH", dir) + t.Setenv("PATHEXT", ".BAT;.CMD;.EXE") + + // Literal path must be absent so the stat fails with ENOENT. + missing := filepath.Join(dir, "definitely-not-here-"+randSuffix(), "bash") + resolved, err := resolveInterpreter(missing) + if err != nil { + t.Fatalf("expected fallback to succeed, got: %v", err) + } + if resolved != fake { + t.Fatalf("resolved = %q, want %q", resolved, fake) + } +} diff --git a/internal/shell/run.go b/internal/shell/run.go index fb785a7b04dee6a64d6ad8e704ae19ff3525561e..d0ff921e7e31a479d9e03ff33ed1fb05340e308c 100644 --- a/internal/shell/run.go +++ b/internal/shell/run.go @@ -99,12 +99,18 @@ func newRunner(cwd string, env []string, stdin io.Reader, stdout, stderr io.Writ } // standardHandlers returns the exec-handler middleware chain used by both -// [Run] and [Shell]. Order matters: builtins first (so Crush's in-process -// jq wins over any PATH binary), then the block list, then optional Go -// coreutils. Future middleware (shebang dispatch, etc.) inserts here. +// [Run] and [Shell]. Order matters: +// 1. builtins first (so Crush's in-process jq wins over any PATH binary); +// 2. script dispatch (shebang / binary / shell-source for path-prefixed +// argv[0], no-op for bare commands) — runs before the block list so +// that deny rules see the already-resolved argv of anything the +// script exec's rather than the outer path-prefixed wrapper; +// 3. block list; +// 4. optional Go coreutils (only when useGoCoreUtils is on). func standardHandlers(blockFuncs []BlockFunc) []func(next interp.ExecHandlerFunc) interp.ExecHandlerFunc { handlers := []func(next interp.ExecHandlerFunc) interp.ExecHandlerFunc{ builtinHandler(), + scriptDispatchHandler(blockFuncs), blockHandler(blockFuncs), } if useGoCoreUtils {