dispatch.go

  1package shell
  2
  3import (
  4	"bytes"
  5	"context"
  6	"errors"
  7	"fmt"
  8	"io"
  9	"io/fs"
 10	"log/slog"
 11	"os"
 12	"os/exec"
 13	"path/filepath"
 14	"runtime"
 15	"strings"
 16
 17	"github.com/charmbracelet/crush/internal/filepathext"
 18	"mvdan.cc/sh/v3/expand"
 19	"mvdan.cc/sh/v3/interp"
 20	"mvdan.cc/sh/v3/syntax"
 21)
 22
 23// probeWindow is how many bytes we read from the head of a file to decide
 24// how to dispatch it. 128 is plenty for a shebang line and for magic-byte
 25// inspection, while small enough to make the probe cheap for users whose
 26// hooks invoke many scripts.
 27const probeWindow = 128
 28
 29// scriptDispatchHandler returns middleware that intercepts exec of a
 30// path-prefixed argv[0] (e.g. ./foo.sh, /opt/bin/tool, C:\foo\bar.exe) and
 31// dispatches based on the file's contents:
 32//
 33//  1. Shebang line (#!...) → exec the named interpreter via os/exec. The
 34//     interpreter is resolved literally first, then via PATH on the
 35//     basename as a permissive fallback (so #!/bin/bash works on Windows
 36//     boxes where Git for Windows puts bash.exe on PATH).
 37//  2. Known binary magic (MZ, ELF, Mach-O) or a NUL byte in the probe
 38//     window → pass through to the next handler (mvdan's default exec).
 39//  3. Otherwise → treat the file as shell source and run it in-process via
 40//     a nested interp.Runner that reuses the same handler stack.
 41//
 42// Non-path-prefixed argv[0] and empty args are passed straight through; this
 43// handler is a no-op for ordinary commands like `echo` or `jq`.
 44//
 45// blockFuncs is the block list used when building the nested runner for the
 46// shell-source case, so deny rules apply recursively to commands invoked
 47// from in-process scripts.
 48func scriptDispatchHandler(blockFuncs []BlockFunc) func(next interp.ExecHandlerFunc) interp.ExecHandlerFunc {
 49	return func(next interp.ExecHandlerFunc) interp.ExecHandlerFunc {
 50		return func(ctx context.Context, args []string) error {
 51			if len(args) == 0 || !isPathPrefixed(args[0]) {
 52				return next(ctx, args)
 53			}
 54
 55			// Resolve relative paths against the interpreter's cwd, not
 56			// the process cwd — hook commands are authored with the hook
 57			// Runner's cwd in mind and sub-shells can cd before an exec.
 58			scriptPath := filepathext.SmartJoin(interp.HandlerCtx(ctx).Dir, args[0])
 59			probe, err := probeFile(scriptPath)
 60			if err != nil {
 61				return err
 62			}
 63
 64			switch {
 65			case hasShebang(probe):
 66				return dispatchShebang(ctx, scriptPath, probe, args)
 67			case isBinary(probe):
 68				return next(ctx, args)
 69			default:
 70				return runShellSource(ctx, scriptPath, args, blockFuncs)
 71			}
 72		}
 73	}
 74}
 75
 76// isPathPrefixed reports whether argv[0] is a file reference (as opposed
 77// to a bare command to be resolved via PATH). A path reference starts with
 78// `./`, `../`, `/`, or — on Windows — a drive-letter prefix.
 79//
 80// Note: mvdan already performs tilde expansion during word expansion, so
 81// `~/script.sh` arrives here as an absolute path. We still call the helper
 82// on the raw string to stay robust if a future change ever bypasses that
 83// expansion; cover that path with a regression test.
 84func isPathPrefixed(arg string) bool {
 85	switch {
 86	case strings.HasPrefix(arg, "./"),
 87		strings.HasPrefix(arg, "../"),
 88		strings.HasPrefix(arg, "/"):
 89		return true
 90	}
 91	if runtime.GOOS == "windows" {
 92		// Drive-letter paths: C:\foo or C:/foo (length check avoids
 93		// accidentally matching a single letter followed by a colon).
 94		if len(arg) >= 3 && isDriveLetter(arg[0]) && arg[1] == ':' &&
 95			(arg[2] == '\\' || arg[2] == '/') {
 96			return true
 97		}
 98		// Also treat backslash-prefixed UNC-like paths as path-prefixed.
 99		if strings.HasPrefix(arg, "\\") {
100			return true
101		}
102	}
103	return false
104}
105
106func isDriveLetter(b byte) bool {
107	return (b >= 'A' && b <= 'Z') || (b >= 'a' && b <= 'z')
108}
109
110// probeFile reads the first probeWindow bytes of the target path. It
111// deliberately does not slurp the whole file: callers that need the full
112// contents (only the shell-source branch) re-open via os.ReadFile. This
113// keeps memory bounded when argv[0] turns out to be a large binary.
114//
115// Returns errors surfaced by os.Open/os.Stat directly so callers see the
116// real reason: ENOENT, EACCES, EISDIR, ELOOP, etc.
117func probeFile(path string) ([]byte, error) {
118	f, err := os.Open(path)
119	if err != nil {
120		return nil, err
121	}
122	defer f.Close()
123	fi, err := f.Stat()
124	if err != nil {
125		return nil, err
126	}
127	if fi.IsDir() {
128		return nil, fmt.Errorf("%s: is a directory", path)
129	}
130	probe := make([]byte, probeWindow)
131	n, err := io.ReadFull(f, probe)
132	if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
133		return nil, err
134	}
135	return probe[:n], nil
136}
137
138// hasShebang reports whether probe starts with the `#!` marker. A
139// one-byte file that happens to be `#` is not a shebang.
140func hasShebang(probe []byte) bool {
141	return len(probe) >= 2 && probe[0] == '#' && probe[1] == '!'
142}
143
144// isBinary heuristically classifies probe as an executable or otherwise
145// non-text file. A NUL byte in the first probeWindow bytes is the classic
146// Unix-y text-vs-binary signal; we additionally recognize known magic
147// numbers so we can fast-path well-formed binaries that happen to have no
148// NUL in the first 128 bytes (rare but possible for small binaries).
149func isBinary(probe []byte) bool {
150	if bytes.IndexByte(probe, 0) >= 0 {
151		return true
152	}
153	magics := [][]byte{
154		{'M', 'Z'},               // Windows PE / DOS MZ.
155		{0x7F, 'E', 'L', 'F'},    // ELF.
156		{0xFE, 0xED, 0xFA, 0xCE}, // Mach-O 32-bit BE.
157		{0xFE, 0xED, 0xFA, 0xCF}, // Mach-O 64-bit BE.
158		{0xCF, 0xFA, 0xED, 0xFE}, // Mach-O 64-bit LE.
159		{0xCE, 0xFA, 0xED, 0xFE}, // Mach-O 32-bit LE.
160		{0xCA, 0xFE, 0xBA, 0xBE}, // Mach-O fat binary.
161	}
162	for _, m := range magics {
163		if bytes.HasPrefix(probe, m) {
164			return true
165		}
166	}
167	return false
168}
169
170// dispatchShebang parses probe's shebang line and execs the resolved
171// interpreter via os/exec, inheriting the parent runner's cwd, env, and
172// stdio. Returns interp.ExitStatus on non-zero interpreter exit so the
173// parent interpreter sees it as a normal non-zero status.
174func dispatchShebang(ctx context.Context, scriptPath string, probe []byte, args []string) error {
175	sb, err := parseShebang(probe)
176	if err != nil {
177		hc := interp.HandlerCtx(ctx)
178		fmt.Fprintf(hc.Stderr, "crush: %s: %s\n", scriptPath, err)
179		return interp.ExitStatus(126)
180	}
181
182	interpreter, err := resolveInterpreter(sb.interpreter)
183	if err != nil {
184		hc := interp.HandlerCtx(ctx)
185		fmt.Fprintf(hc.Stderr, "crush: %s: %s\n", scriptPath, err)
186		return interp.ExitStatus(127)
187	}
188
189	cmdArgs := append([]string{}, sb.args...)
190	cmdArgs = append(cmdArgs, scriptPath)
191	cmdArgs = append(cmdArgs, args[1:]...)
192
193	cmd := exec.CommandContext(ctx, interpreter, cmdArgs...)
194	hc := interp.HandlerCtx(ctx)
195	cmd.Dir = hc.Dir
196	cmd.Env = execEnvList(hc.Env)
197	cmd.Stdin = hc.Stdin
198	cmd.Stdout = hc.Stdout
199	cmd.Stderr = hc.Stderr
200
201	if err := cmd.Run(); err != nil {
202		var exitErr *exec.ExitError
203		if errors.As(err, &exitErr) {
204			code := exitErr.ExitCode()
205			if code < 0 {
206				code = 1
207			}
208			return interp.ExitStatus(uint8(code))
209		}
210		return err
211	}
212	return nil
213}
214
215// resolveInterpreter tries the literal shebang path first, then falls back
216// to PATH-lookup on its basename — but only when the literal path is
217// genuinely missing. A file that exists but fails stat for another reason
218// (EACCES, ELOOP, etc.) surfaces the real error: silently resolving a
219// different binary off PATH in that case would hide a real problem and
220// produce surprising behavior for the user.
221//
222// The permissive fallback is what makes #!/bin/bash portable to Windows
223// boxes where Git for Windows puts bash.exe on PATH but there is no
224// /bin/bash on disk.
225func resolveInterpreter(path string) (string, error) {
226	_, statErr := os.Stat(path)
227	if statErr == nil {
228		return path, nil
229	}
230	if !errors.Is(statErr, fs.ErrNotExist) {
231		return "", statErr
232	}
233
234	base := filepath.Base(path)
235	if base == "" || base == path && !strings.ContainsAny(path, `/\`) {
236		// Already a bare name — just do a PATH lookup.
237		resolved, err := exec.LookPath(path)
238		if err != nil {
239			return "", fmt.Errorf("interpreter %q not found in PATH", path)
240		}
241		return resolved, nil
242	}
243	resolved, err := exec.LookPath(base)
244	if err != nil {
245		return "", fmt.Errorf("interpreter %q not found and %q not in PATH", path, base)
246	}
247	slog.Debug("Shebang interpreter not found; falling back to PATH",
248		"requested", path, "resolved", resolved)
249	return resolved, nil
250}
251
252// shebang captures the parsed `#!` line. interpreter is the program to
253// invoke; args is the list of extra arguments to pass before the script
254// path. The kernel's single-arg semantics (for literal paths and for env
255// without `-S`) is encoded by returning a single-element args slice
256// containing the un-tokenized remainder.
257type shebang struct {
258	interpreter string
259	args        []string
260}
261
262// parseShebang extracts the interpreter invocation from probe. It tolerates
263// CRLF line endings and a single leading space between `#!` and the path.
264// env special-cases: `/usr/bin/env NAME [args...]` unwraps to NAME with
265// kernel single-arg semantics; `-S` enables tokenized argument splitting.
266func parseShebang(probe []byte) (*shebang, error) {
267	if !hasShebang(probe) {
268		return nil, errors.New("not a shebang")
269	}
270	line := probe[2:]
271	// Take up to the first newline.
272	if idx := bytes.IndexByte(line, '\n'); idx >= 0 {
273		line = line[:idx]
274	}
275	// Strip trailing CR (CRLF-authored scripts).
276	line = bytes.TrimRight(line, "\r")
277	// Strip leading whitespace ("#! /usr/bin/env bash" is legal).
278	line = bytes.TrimLeft(line, " \t")
279	if len(line) == 0 {
280		return nil, errors.New("empty shebang")
281	}
282
283	var pathStr, rest string
284	if idx := bytes.IndexAny(line, " \t"); idx >= 0 {
285		pathStr = string(line[:idx])
286		rest = strings.TrimLeft(string(line[idx+1:]), " \t")
287	} else {
288		pathStr = string(line)
289	}
290
291	if isEnvShebang(pathStr) {
292		return parseEnvShebang(rest)
293	}
294
295	// Literal-path shebang: kernel semantics pass the remainder as a
296	// single argv[1], not tokenized.
297	sb := &shebang{interpreter: pathStr}
298	if rest != "" {
299		sb.args = []string{rest}
300	}
301	return sb, nil
302}
303
304// isEnvShebang reports whether the shebang path targets `env`. We accept
305// both common absolute paths and a bare `env` so that unusual setups
306// (NixOS, BSDs) still work.
307func isEnvShebang(p string) bool {
308	if p == "/usr/bin/env" || p == "/bin/env" {
309		return true
310	}
311	return filepath.Base(p) == "env"
312}
313
314// parseEnvShebang handles `/usr/bin/env` rewriting. Without `-S`, the
315// remainder after the program name is a single argv[1] (kernel
316// single-arg semantics via env, even though real env would fail to find a
317// program named "bash -x"). With `-S`, the remainder is tokenized on
318// whitespace. Any other `env` flag is rejected — forwarding unknown flags
319// to a /usr/bin/env on disk is a subtle portability footgun we don't want.
320func parseEnvShebang(rest string) (*shebang, error) {
321	if rest == "" {
322		return nil, errors.New("env: missing program name")
323	}
324
325	useSplit := false
326	if strings.HasPrefix(rest, "-") {
327		var flag, after string
328		if idx := strings.IndexAny(rest, " \t"); idx >= 0 {
329			flag = rest[:idx]
330			after = strings.TrimLeft(rest[idx+1:], " \t")
331		} else {
332			flag = rest
333			after = ""
334		}
335		if flag != "-S" {
336			return nil, fmt.Errorf("unsupported env flag: %s", flag)
337		}
338		useSplit = true
339		rest = after
340		if rest == "" {
341			return nil, errors.New("env -S requires a program")
342		}
343	}
344
345	if rest == "" {
346		return nil, errors.New("env: missing program name")
347	}
348
349	var prog, remainder string
350	if idx := strings.IndexAny(rest, " \t"); idx >= 0 {
351		prog = rest[:idx]
352		remainder = strings.TrimLeft(rest[idx+1:], " \t")
353	} else {
354		prog = rest
355	}
356
357	sb := &shebang{interpreter: prog}
358	if remainder != "" {
359		if useSplit {
360			sb.args = strings.Fields(remainder)
361		} else {
362			sb.args = []string{remainder}
363		}
364	}
365	return sb, nil
366}
367
368// runShellSource parses path's contents as POSIX shell and runs it
369// in-process via a nested interp.Runner. It reuses the parent runner's cwd,
370// env, and stdio, and rebuilds the Crush handler stack so builtins and the
371// dispatch handler itself remain available to anything the script invokes.
372// Positional parameters ($1, $2, …) come from args[1:].
373//
374// This is the only branch that reads the full file; probeFile keeps its
375// read to probeWindow bytes so the binary/shebang paths never touch more
376// than 128 bytes of I/O.
377func runShellSource(ctx context.Context, path string, args []string, blockFuncs []BlockFunc) error {
378	data, err := os.ReadFile(path)
379	if err != nil {
380		return err
381	}
382
383	file, err := syntax.NewParser().Parse(bytes.NewReader(data), path)
384	if err != nil {
385		return fmt.Errorf("could not parse %s: %w", path, err)
386	}
387
388	hc := interp.HandlerCtx(ctx)
389
390	opts := []interp.RunnerOption{
391		interp.StdIO(hc.Stdin, hc.Stdout, hc.Stderr),
392		interp.Interactive(false),
393		interp.Env(hc.Env),
394		interp.Dir(hc.Dir),
395		interp.ExecHandlers(standardHandlers(blockFuncs)...),
396	}
397	if len(args) > 1 {
398		// Params with a leading "--" avoids any of args[1:] being
399		// misinterpreted as set-options (e.g. a user passing "-e" as
400		// a positional arg to their script).
401		params := append([]string{"--"}, args[1:]...)
402		opts = append(opts, interp.Params(params...))
403	}
404
405	runner, err := interp.New(opts...)
406	if err != nil {
407		return fmt.Errorf("could not build runner for %s: %w", path, err)
408	}
409	return runner.Run(ctx, file)
410}
411
412// execEnvList converts an expand.Environ to the []string form that
413// os/exec.Cmd.Env expects. Only exported string variables are included,
414// matching what a real shell would pass to a child process.
415func execEnvList(env expand.Environ) []string {
416	var out []string
417	env.Each(func(name string, vr expand.Variable) bool {
418		if vr.Exported && vr.Kind == expand.String {
419			out = append(out, name+"="+vr.Str)
420		}
421		return true
422	})
423	return out
424}