dispatch.go

  1package shell
  2
  3import (
  4	"bytes"
  5	"context"
  6	"errors"
  7	"fmt"
  8	"io"
  9	"io/fs"
 10	"log/slog"
 11	"os"
 12	"os/exec"
 13	"path/filepath"
 14	"runtime"
 15	"strings"
 16
 17	"mvdan.cc/sh/v3/expand"
 18	"mvdan.cc/sh/v3/interp"
 19	"mvdan.cc/sh/v3/syntax"
 20)
 21
 22// probeWindow is how many bytes we read from the head of a file to decide
 23// how to dispatch it. 128 is plenty for a shebang line and for magic-byte
 24// inspection, while small enough to make the probe cheap for users whose
 25// hooks invoke many scripts.
 26const probeWindow = 128
 27
 28// scriptDispatchHandler returns middleware that intercepts exec of a
 29// path-prefixed argv[0] (e.g. ./foo.sh, /opt/bin/tool, C:\foo\bar.exe) and
 30// dispatches based on the file's contents:
 31//
 32//  1. Shebang line (#!...) → exec the named interpreter via os/exec. The
 33//     interpreter is resolved literally first, then via PATH on the
 34//     basename as a permissive fallback (so #!/bin/bash works on Windows
 35//     boxes where Git for Windows puts bash.exe on PATH).
 36//  2. Known binary magic (MZ, ELF, Mach-O) or a NUL byte in the probe
 37//     window → pass through to the next handler (mvdan's default exec).
 38//  3. Otherwise → treat the file as shell source and run it in-process via
 39//     a nested interp.Runner that reuses the same handler stack.
 40//
 41// Non-path-prefixed argv[0] and empty args are passed straight through; this
 42// handler is a no-op for ordinary commands like `echo` or `jq`.
 43//
 44// blockFuncs is the block list used when building the nested runner for the
 45// shell-source case, so deny rules apply recursively to commands invoked
 46// from in-process scripts.
 47func scriptDispatchHandler(blockFuncs []BlockFunc) func(next interp.ExecHandlerFunc) interp.ExecHandlerFunc {
 48	return func(next interp.ExecHandlerFunc) interp.ExecHandlerFunc {
 49		return func(ctx context.Context, args []string) error {
 50			if len(args) == 0 || !isPathPrefixed(args[0]) {
 51				return next(ctx, args)
 52			}
 53
 54			scriptPath := args[0]
 55			// Resolve relative paths against the interpreter's cwd, not
 56			// the process cwd — hook commands are authored with the hook
 57			// Runner's cwd in mind and sub-shells can cd before an exec.
 58			if !filepath.IsAbs(scriptPath) {
 59				scriptPath = filepath.Join(interp.HandlerCtx(ctx).Dir, scriptPath)
 60			}
 61			probe, err := probeFile(scriptPath)
 62			if err != nil {
 63				return err
 64			}
 65
 66			switch {
 67			case hasShebang(probe):
 68				return dispatchShebang(ctx, scriptPath, probe, args)
 69			case isBinary(probe):
 70				return next(ctx, args)
 71			default:
 72				return runShellSource(ctx, scriptPath, args, blockFuncs)
 73			}
 74		}
 75	}
 76}
 77
 78// isPathPrefixed reports whether argv[0] is a file reference (as opposed
 79// to a bare command to be resolved via PATH). A path reference starts with
 80// `./`, `../`, `/`, or — on Windows — a drive-letter prefix.
 81//
 82// Note: mvdan already performs tilde expansion during word expansion, so
 83// `~/script.sh` arrives here as an absolute path. We still call the helper
 84// on the raw string to stay robust if a future change ever bypasses that
 85// expansion; cover that path with a regression test.
 86func isPathPrefixed(arg string) bool {
 87	switch {
 88	case strings.HasPrefix(arg, "./"),
 89		strings.HasPrefix(arg, "../"),
 90		strings.HasPrefix(arg, "/"):
 91		return true
 92	}
 93	if runtime.GOOS == "windows" {
 94		// Drive-letter paths: C:\foo or C:/foo (length check avoids
 95		// accidentally matching a single letter followed by a colon).
 96		if len(arg) >= 3 && isDriveLetter(arg[0]) && arg[1] == ':' &&
 97			(arg[2] == '\\' || arg[2] == '/') {
 98			return true
 99		}
100		// Also treat backslash-prefixed UNC-like paths as path-prefixed.
101		if strings.HasPrefix(arg, "\\") {
102			return true
103		}
104	}
105	return false
106}
107
108func isDriveLetter(b byte) bool {
109	return (b >= 'A' && b <= 'Z') || (b >= 'a' && b <= 'z')
110}
111
112// probeFile reads the first probeWindow bytes of the target path. It
113// deliberately does not slurp the whole file: callers that need the full
114// contents (only the shell-source branch) re-open via os.ReadFile. This
115// keeps memory bounded when argv[0] turns out to be a large binary.
116//
117// Returns errors surfaced by os.Open/os.Stat directly so callers see the
118// real reason: ENOENT, EACCES, EISDIR, ELOOP, etc.
119func probeFile(path string) ([]byte, error) {
120	f, err := os.Open(path)
121	if err != nil {
122		return nil, err
123	}
124	defer f.Close()
125	fi, err := f.Stat()
126	if err != nil {
127		return nil, err
128	}
129	if fi.IsDir() {
130		return nil, fmt.Errorf("%s: is a directory", path)
131	}
132	probe := make([]byte, probeWindow)
133	n, err := io.ReadFull(f, probe)
134	if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
135		return nil, err
136	}
137	return probe[:n], nil
138}
139
140// hasShebang reports whether probe starts with the `#!` marker. A
141// one-byte file that happens to be `#` is not a shebang.
142func hasShebang(probe []byte) bool {
143	return len(probe) >= 2 && probe[0] == '#' && probe[1] == '!'
144}
145
146// isBinary heuristically classifies probe as an executable or otherwise
147// non-text file. A NUL byte in the first probeWindow bytes is the classic
148// Unix-y text-vs-binary signal; we additionally recognize known magic
149// numbers so we can fast-path well-formed binaries that happen to have no
150// NUL in the first 128 bytes (rare but possible for small binaries).
151func isBinary(probe []byte) bool {
152	if bytes.IndexByte(probe, 0) >= 0 {
153		return true
154	}
155	magics := [][]byte{
156		{'M', 'Z'},               // Windows PE / DOS MZ.
157		{0x7F, 'E', 'L', 'F'},    // ELF.
158		{0xFE, 0xED, 0xFA, 0xCE}, // Mach-O 32-bit BE.
159		{0xFE, 0xED, 0xFA, 0xCF}, // Mach-O 64-bit BE.
160		{0xCF, 0xFA, 0xED, 0xFE}, // Mach-O 64-bit LE.
161		{0xCE, 0xFA, 0xED, 0xFE}, // Mach-O 32-bit LE.
162		{0xCA, 0xFE, 0xBA, 0xBE}, // Mach-O fat binary.
163	}
164	for _, m := range magics {
165		if bytes.HasPrefix(probe, m) {
166			return true
167		}
168	}
169	return false
170}
171
172// dispatchShebang parses probe's shebang line and execs the resolved
173// interpreter via os/exec, inheriting the parent runner's cwd, env, and
174// stdio. Returns interp.ExitStatus on non-zero interpreter exit so the
175// parent interpreter sees it as a normal non-zero status.
176func dispatchShebang(ctx context.Context, scriptPath string, probe []byte, args []string) error {
177	sb, err := parseShebang(probe)
178	if err != nil {
179		hc := interp.HandlerCtx(ctx)
180		fmt.Fprintf(hc.Stderr, "crush: %s: %s\n", scriptPath, err)
181		return interp.ExitStatus(126)
182	}
183
184	interpreter, err := resolveInterpreter(sb.interpreter)
185	if err != nil {
186		hc := interp.HandlerCtx(ctx)
187		fmt.Fprintf(hc.Stderr, "crush: %s: %s\n", scriptPath, err)
188		return interp.ExitStatus(127)
189	}
190
191	cmdArgs := append([]string{}, sb.args...)
192	cmdArgs = append(cmdArgs, scriptPath)
193	cmdArgs = append(cmdArgs, args[1:]...)
194
195	cmd := exec.CommandContext(ctx, interpreter, cmdArgs...)
196	hc := interp.HandlerCtx(ctx)
197	cmd.Dir = hc.Dir
198	cmd.Env = execEnvList(hc.Env)
199	cmd.Stdin = hc.Stdin
200	cmd.Stdout = hc.Stdout
201	cmd.Stderr = hc.Stderr
202
203	if err := cmd.Run(); err != nil {
204		var exitErr *exec.ExitError
205		if errors.As(err, &exitErr) {
206			code := exitErr.ExitCode()
207			if code < 0 {
208				code = 1
209			}
210			return interp.ExitStatus(uint8(code))
211		}
212		return err
213	}
214	return nil
215}
216
217// resolveInterpreter tries the literal shebang path first, then falls back
218// to PATH-lookup on its basename — but only when the literal path is
219// genuinely missing. A file that exists but fails stat for another reason
220// (EACCES, ELOOP, etc.) surfaces the real error: silently resolving a
221// different binary off PATH in that case would hide a real problem and
222// produce surprising behavior for the user.
223//
224// The permissive fallback is what makes #!/bin/bash portable to Windows
225// boxes where Git for Windows puts bash.exe on PATH but there is no
226// /bin/bash on disk.
227func resolveInterpreter(path string) (string, error) {
228	_, statErr := os.Stat(path)
229	if statErr == nil {
230		return path, nil
231	}
232	if !errors.Is(statErr, fs.ErrNotExist) {
233		return "", statErr
234	}
235
236	base := filepath.Base(path)
237	if base == "" || base == path && !strings.ContainsAny(path, `/\`) {
238		// Already a bare name — just do a PATH lookup.
239		resolved, err := exec.LookPath(path)
240		if err != nil {
241			return "", fmt.Errorf("interpreter %q not found in PATH", path)
242		}
243		return resolved, nil
244	}
245	resolved, err := exec.LookPath(base)
246	if err != nil {
247		return "", fmt.Errorf("interpreter %q not found and %q not in PATH", path, base)
248	}
249	slog.Debug("Shebang interpreter not found; falling back to PATH",
250		"requested", path, "resolved", resolved)
251	return resolved, nil
252}
253
254// shebang captures the parsed `#!` line. interpreter is the program to
255// invoke; args is the list of extra arguments to pass before the script
256// path. The kernel's single-arg semantics (for literal paths and for env
257// without `-S`) is encoded by returning a single-element args slice
258// containing the un-tokenized remainder.
259type shebang struct {
260	interpreter string
261	args        []string
262}
263
264// parseShebang extracts the interpreter invocation from probe. It tolerates
265// CRLF line endings and a single leading space between `#!` and the path.
266// env special-cases: `/usr/bin/env NAME [args...]` unwraps to NAME with
267// kernel single-arg semantics; `-S` enables tokenized argument splitting.
268func parseShebang(probe []byte) (*shebang, error) {
269	if !hasShebang(probe) {
270		return nil, errors.New("not a shebang")
271	}
272	line := probe[2:]
273	// Take up to the first newline.
274	if idx := bytes.IndexByte(line, '\n'); idx >= 0 {
275		line = line[:idx]
276	}
277	// Strip trailing CR (CRLF-authored scripts).
278	line = bytes.TrimRight(line, "\r")
279	// Strip leading whitespace ("#! /usr/bin/env bash" is legal).
280	line = bytes.TrimLeft(line, " \t")
281	if len(line) == 0 {
282		return nil, errors.New("empty shebang")
283	}
284
285	var pathStr, rest string
286	if idx := bytes.IndexAny(line, " \t"); idx >= 0 {
287		pathStr = string(line[:idx])
288		rest = strings.TrimLeft(string(line[idx+1:]), " \t")
289	} else {
290		pathStr = string(line)
291	}
292
293	if isEnvShebang(pathStr) {
294		return parseEnvShebang(rest)
295	}
296
297	// Literal-path shebang: kernel semantics pass the remainder as a
298	// single argv[1], not tokenized.
299	sb := &shebang{interpreter: pathStr}
300	if rest != "" {
301		sb.args = []string{rest}
302	}
303	return sb, nil
304}
305
306// isEnvShebang reports whether the shebang path targets `env`. We accept
307// both common absolute paths and a bare `env` so that unusual setups
308// (NixOS, BSDs) still work.
309func isEnvShebang(p string) bool {
310	if p == "/usr/bin/env" || p == "/bin/env" {
311		return true
312	}
313	return filepath.Base(p) == "env"
314}
315
316// parseEnvShebang handles `/usr/bin/env` rewriting. Without `-S`, the
317// remainder after the program name is a single argv[1] (kernel
318// single-arg semantics via env, even though real env would fail to find a
319// program named "bash -x"). With `-S`, the remainder is tokenized on
320// whitespace. Any other `env` flag is rejected — forwarding unknown flags
321// to a /usr/bin/env on disk is a subtle portability footgun we don't want.
322func parseEnvShebang(rest string) (*shebang, error) {
323	if rest == "" {
324		return nil, errors.New("env: missing program name")
325	}
326
327	useSplit := false
328	if strings.HasPrefix(rest, "-") {
329		var flag, after string
330		if idx := strings.IndexAny(rest, " \t"); idx >= 0 {
331			flag = rest[:idx]
332			after = strings.TrimLeft(rest[idx+1:], " \t")
333		} else {
334			flag = rest
335			after = ""
336		}
337		if flag != "-S" {
338			return nil, fmt.Errorf("unsupported env flag: %s", flag)
339		}
340		useSplit = true
341		rest = after
342		if rest == "" {
343			return nil, errors.New("env -S requires a program")
344		}
345	}
346
347	if rest == "" {
348		return nil, errors.New("env: missing program name")
349	}
350
351	var prog, remainder string
352	if idx := strings.IndexAny(rest, " \t"); idx >= 0 {
353		prog = rest[:idx]
354		remainder = strings.TrimLeft(rest[idx+1:], " \t")
355	} else {
356		prog = rest
357	}
358
359	sb := &shebang{interpreter: prog}
360	if remainder != "" {
361		if useSplit {
362			sb.args = strings.Fields(remainder)
363		} else {
364			sb.args = []string{remainder}
365		}
366	}
367	return sb, nil
368}
369
370// runShellSource parses path's contents as POSIX shell and runs it
371// in-process via a nested interp.Runner. It reuses the parent runner's cwd,
372// env, and stdio, and rebuilds the Crush handler stack so builtins and the
373// dispatch handler itself remain available to anything the script invokes.
374// Positional parameters ($1, $2, …) come from args[1:].
375//
376// This is the only branch that reads the full file; probeFile keeps its
377// read to probeWindow bytes so the binary/shebang paths never touch more
378// than 128 bytes of I/O.
379func runShellSource(ctx context.Context, path string, args []string, blockFuncs []BlockFunc) error {
380	data, err := os.ReadFile(path)
381	if err != nil {
382		return err
383	}
384
385	file, err := syntax.NewParser().Parse(bytes.NewReader(data), path)
386	if err != nil {
387		return fmt.Errorf("could not parse %s: %w", path, err)
388	}
389
390	hc := interp.HandlerCtx(ctx)
391
392	opts := []interp.RunnerOption{
393		interp.StdIO(hc.Stdin, hc.Stdout, hc.Stderr),
394		interp.Interactive(false),
395		interp.Env(hc.Env),
396		interp.Dir(hc.Dir),
397		interp.ExecHandlers(standardHandlers(blockFuncs)...),
398	}
399	if len(args) > 1 {
400		// Params with a leading "--" avoids any of args[1:] being
401		// misinterpreted as set-options (e.g. a user passing "-e" as
402		// a positional arg to their script).
403		params := append([]string{"--"}, args[1:]...)
404		opts = append(opts, interp.Params(params...))
405	}
406
407	runner, err := interp.New(opts...)
408	if err != nil {
409		return fmt.Errorf("could not build runner for %s: %w", path, err)
410	}
411	return runner.Run(ctx, file)
412}
413
414// execEnvList converts an expand.Environ to the []string form that
415// os/exec.Cmd.Env expects. Only exported string variables are included,
416// matching what a real shell would pass to a child process.
417func execEnvList(env expand.Environ) []string {
418	var out []string
419	env.Each(func(name string, vr expand.Variable) bool {
420		if vr.Exported && vr.Kind == expand.String {
421			out = append(out, name+"="+vr.Str)
422		}
423		return true
424	})
425	return out
426}