1package shell
2
3import (
4 "bytes"
5 "context"
6 "errors"
7 "fmt"
8 "io"
9 "io/fs"
10 "log/slog"
11 "os"
12 "os/exec"
13 "path/filepath"
14 "runtime"
15 "strings"
16
17 "mvdan.cc/sh/v3/expand"
18 "mvdan.cc/sh/v3/interp"
19 "mvdan.cc/sh/v3/syntax"
20)
21
22// probeWindow is how many bytes we read from the head of a file to decide
23// how to dispatch it. 128 is plenty for a shebang line and for magic-byte
24// inspection, while small enough to make the probe cheap for users whose
25// hooks invoke many scripts.
26const probeWindow = 128
27
28// scriptDispatchHandler returns middleware that intercepts exec of a
29// path-prefixed argv[0] (e.g. ./foo.sh, /opt/bin/tool, C:\foo\bar.exe) and
30// dispatches based on the file's contents:
31//
32// 1. Shebang line (#!...) → exec the named interpreter via os/exec. The
33// interpreter is resolved literally first, then via PATH on the
34// basename as a permissive fallback (so #!/bin/bash works on Windows
35// boxes where Git for Windows puts bash.exe on PATH).
36// 2. Known binary magic (MZ, ELF, Mach-O) or a NUL byte in the probe
37// window → pass through to the next handler (mvdan's default exec).
38// 3. Otherwise → treat the file as shell source and run it in-process via
39// a nested interp.Runner that reuses the same handler stack.
40//
41// Non-path-prefixed argv[0] and empty args are passed straight through; this
42// handler is a no-op for ordinary commands like `echo` or `jq`.
43//
44// blockFuncs is the block list used when building the nested runner for the
45// shell-source case, so deny rules apply recursively to commands invoked
46// from in-process scripts.
47func scriptDispatchHandler(blockFuncs []BlockFunc) func(next interp.ExecHandlerFunc) interp.ExecHandlerFunc {
48 return func(next interp.ExecHandlerFunc) interp.ExecHandlerFunc {
49 return func(ctx context.Context, args []string) error {
50 if len(args) == 0 || !isPathPrefixed(args[0]) {
51 return next(ctx, args)
52 }
53
54 scriptPath := args[0]
55 // Resolve relative paths against the interpreter's cwd, not
56 // the process cwd — hook commands are authored with the hook
57 // Runner's cwd in mind and sub-shells can cd before an exec.
58 if !filepath.IsAbs(scriptPath) {
59 scriptPath = filepath.Join(interp.HandlerCtx(ctx).Dir, scriptPath)
60 }
61 probe, err := probeFile(scriptPath)
62 if err != nil {
63 return err
64 }
65
66 switch {
67 case hasShebang(probe):
68 return dispatchShebang(ctx, scriptPath, probe, args)
69 case isBinary(probe):
70 return next(ctx, args)
71 default:
72 return runShellSource(ctx, scriptPath, args, blockFuncs)
73 }
74 }
75 }
76}
77
78// isPathPrefixed reports whether argv[0] is a file reference (as opposed
79// to a bare command to be resolved via PATH). A path reference starts with
80// `./`, `../`, `/`, or — on Windows — a drive-letter prefix.
81//
82// Note: mvdan already performs tilde expansion during word expansion, so
83// `~/script.sh` arrives here as an absolute path. We still call the helper
84// on the raw string to stay robust if a future change ever bypasses that
85// expansion; cover that path with a regression test.
86func isPathPrefixed(arg string) bool {
87 switch {
88 case strings.HasPrefix(arg, "./"),
89 strings.HasPrefix(arg, "../"),
90 strings.HasPrefix(arg, "/"):
91 return true
92 }
93 if runtime.GOOS == "windows" {
94 // Drive-letter paths: C:\foo or C:/foo (length check avoids
95 // accidentally matching a single letter followed by a colon).
96 if len(arg) >= 3 && isDriveLetter(arg[0]) && arg[1] == ':' &&
97 (arg[2] == '\\' || arg[2] == '/') {
98 return true
99 }
100 // Also treat backslash-prefixed UNC-like paths as path-prefixed.
101 if strings.HasPrefix(arg, "\\") {
102 return true
103 }
104 }
105 return false
106}
107
108func isDriveLetter(b byte) bool {
109 return (b >= 'A' && b <= 'Z') || (b >= 'a' && b <= 'z')
110}
111
112// probeFile reads the first probeWindow bytes of the target path. It
113// deliberately does not slurp the whole file: callers that need the full
114// contents (only the shell-source branch) re-open via os.ReadFile. This
115// keeps memory bounded when argv[0] turns out to be a large binary.
116//
117// Returns errors surfaced by os.Open/os.Stat directly so callers see the
118// real reason: ENOENT, EACCES, EISDIR, ELOOP, etc.
119func probeFile(path string) ([]byte, error) {
120 f, err := os.Open(path)
121 if err != nil {
122 return nil, err
123 }
124 defer f.Close()
125 fi, err := f.Stat()
126 if err != nil {
127 return nil, err
128 }
129 if fi.IsDir() {
130 return nil, fmt.Errorf("%s: is a directory", path)
131 }
132 probe := make([]byte, probeWindow)
133 n, err := io.ReadFull(f, probe)
134 if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
135 return nil, err
136 }
137 return probe[:n], nil
138}
139
140// hasShebang reports whether probe starts with the `#!` marker. A
141// one-byte file that happens to be `#` is not a shebang.
142func hasShebang(probe []byte) bool {
143 return len(probe) >= 2 && probe[0] == '#' && probe[1] == '!'
144}
145
146// isBinary heuristically classifies probe as an executable or otherwise
147// non-text file. A NUL byte in the first probeWindow bytes is the classic
148// Unix-y text-vs-binary signal; we additionally recognize known magic
149// numbers so we can fast-path well-formed binaries that happen to have no
150// NUL in the first 128 bytes (rare but possible for small binaries).
151func isBinary(probe []byte) bool {
152 if bytes.IndexByte(probe, 0) >= 0 {
153 return true
154 }
155 magics := [][]byte{
156 {'M', 'Z'}, // Windows PE / DOS MZ.
157 {0x7F, 'E', 'L', 'F'}, // ELF.
158 {0xFE, 0xED, 0xFA, 0xCE}, // Mach-O 32-bit BE.
159 {0xFE, 0xED, 0xFA, 0xCF}, // Mach-O 64-bit BE.
160 {0xCF, 0xFA, 0xED, 0xFE}, // Mach-O 64-bit LE.
161 {0xCE, 0xFA, 0xED, 0xFE}, // Mach-O 32-bit LE.
162 {0xCA, 0xFE, 0xBA, 0xBE}, // Mach-O fat binary.
163 }
164 for _, m := range magics {
165 if bytes.HasPrefix(probe, m) {
166 return true
167 }
168 }
169 return false
170}
171
172// dispatchShebang parses probe's shebang line and execs the resolved
173// interpreter via os/exec, inheriting the parent runner's cwd, env, and
174// stdio. Returns interp.ExitStatus on non-zero interpreter exit so the
175// parent interpreter sees it as a normal non-zero status.
176func dispatchShebang(ctx context.Context, scriptPath string, probe []byte, args []string) error {
177 sb, err := parseShebang(probe)
178 if err != nil {
179 hc := interp.HandlerCtx(ctx)
180 fmt.Fprintf(hc.Stderr, "crush: %s: %s\n", scriptPath, err)
181 return interp.ExitStatus(126)
182 }
183
184 interpreter, err := resolveInterpreter(sb.interpreter)
185 if err != nil {
186 hc := interp.HandlerCtx(ctx)
187 fmt.Fprintf(hc.Stderr, "crush: %s: %s\n", scriptPath, err)
188 return interp.ExitStatus(127)
189 }
190
191 cmdArgs := append([]string{}, sb.args...)
192 cmdArgs = append(cmdArgs, scriptPath)
193 cmdArgs = append(cmdArgs, args[1:]...)
194
195 cmd := exec.CommandContext(ctx, interpreter, cmdArgs...)
196 hc := interp.HandlerCtx(ctx)
197 cmd.Dir = hc.Dir
198 cmd.Env = execEnvList(hc.Env)
199 cmd.Stdin = hc.Stdin
200 cmd.Stdout = hc.Stdout
201 cmd.Stderr = hc.Stderr
202
203 if err := cmd.Run(); err != nil {
204 var exitErr *exec.ExitError
205 if errors.As(err, &exitErr) {
206 code := exitErr.ExitCode()
207 if code < 0 {
208 code = 1
209 }
210 return interp.ExitStatus(uint8(code))
211 }
212 return err
213 }
214 return nil
215}
216
217// resolveInterpreter tries the literal shebang path first, then falls back
218// to PATH-lookup on its basename — but only when the literal path is
219// genuinely missing. A file that exists but fails stat for another reason
220// (EACCES, ELOOP, etc.) surfaces the real error: silently resolving a
221// different binary off PATH in that case would hide a real problem and
222// produce surprising behavior for the user.
223//
224// The permissive fallback is what makes #!/bin/bash portable to Windows
225// boxes where Git for Windows puts bash.exe on PATH but there is no
226// /bin/bash on disk.
227func resolveInterpreter(path string) (string, error) {
228 _, statErr := os.Stat(path)
229 if statErr == nil {
230 return path, nil
231 }
232 if !errors.Is(statErr, fs.ErrNotExist) {
233 return "", statErr
234 }
235
236 base := filepath.Base(path)
237 if base == "" || base == path && !strings.ContainsAny(path, `/\`) {
238 // Already a bare name — just do a PATH lookup.
239 resolved, err := exec.LookPath(path)
240 if err != nil {
241 return "", fmt.Errorf("interpreter %q not found in PATH", path)
242 }
243 return resolved, nil
244 }
245 resolved, err := exec.LookPath(base)
246 if err != nil {
247 return "", fmt.Errorf("interpreter %q not found and %q not in PATH", path, base)
248 }
249 slog.Debug("Shebang interpreter not found; falling back to PATH",
250 "requested", path, "resolved", resolved)
251 return resolved, nil
252}
253
254// shebang captures the parsed `#!` line. interpreter is the program to
255// invoke; args is the list of extra arguments to pass before the script
256// path. The kernel's single-arg semantics (for literal paths and for env
257// without `-S`) is encoded by returning a single-element args slice
258// containing the un-tokenized remainder.
259type shebang struct {
260 interpreter string
261 args []string
262}
263
264// parseShebang extracts the interpreter invocation from probe. It tolerates
265// CRLF line endings and a single leading space between `#!` and the path.
266// env special-cases: `/usr/bin/env NAME [args...]` unwraps to NAME with
267// kernel single-arg semantics; `-S` enables tokenized argument splitting.
268func parseShebang(probe []byte) (*shebang, error) {
269 if !hasShebang(probe) {
270 return nil, errors.New("not a shebang")
271 }
272 line := probe[2:]
273 // Take up to the first newline.
274 if idx := bytes.IndexByte(line, '\n'); idx >= 0 {
275 line = line[:idx]
276 }
277 // Strip trailing CR (CRLF-authored scripts).
278 line = bytes.TrimRight(line, "\r")
279 // Strip leading whitespace ("#! /usr/bin/env bash" is legal).
280 line = bytes.TrimLeft(line, " \t")
281 if len(line) == 0 {
282 return nil, errors.New("empty shebang")
283 }
284
285 var pathStr, rest string
286 if idx := bytes.IndexAny(line, " \t"); idx >= 0 {
287 pathStr = string(line[:idx])
288 rest = strings.TrimLeft(string(line[idx+1:]), " \t")
289 } else {
290 pathStr = string(line)
291 }
292
293 if isEnvShebang(pathStr) {
294 return parseEnvShebang(rest)
295 }
296
297 // Literal-path shebang: kernel semantics pass the remainder as a
298 // single argv[1], not tokenized.
299 sb := &shebang{interpreter: pathStr}
300 if rest != "" {
301 sb.args = []string{rest}
302 }
303 return sb, nil
304}
305
306// isEnvShebang reports whether the shebang path targets `env`. We accept
307// both common absolute paths and a bare `env` so that unusual setups
308// (NixOS, BSDs) still work.
309func isEnvShebang(p string) bool {
310 if p == "/usr/bin/env" || p == "/bin/env" {
311 return true
312 }
313 return filepath.Base(p) == "env"
314}
315
316// parseEnvShebang handles `/usr/bin/env` rewriting. Without `-S`, the
317// remainder after the program name is a single argv[1] (kernel
318// single-arg semantics via env, even though real env would fail to find a
319// program named "bash -x"). With `-S`, the remainder is tokenized on
320// whitespace. Any other `env` flag is rejected — forwarding unknown flags
321// to a /usr/bin/env on disk is a subtle portability footgun we don't want.
322func parseEnvShebang(rest string) (*shebang, error) {
323 if rest == "" {
324 return nil, errors.New("env: missing program name")
325 }
326
327 useSplit := false
328 if strings.HasPrefix(rest, "-") {
329 var flag, after string
330 if idx := strings.IndexAny(rest, " \t"); idx >= 0 {
331 flag = rest[:idx]
332 after = strings.TrimLeft(rest[idx+1:], " \t")
333 } else {
334 flag = rest
335 after = ""
336 }
337 if flag != "-S" {
338 return nil, fmt.Errorf("unsupported env flag: %s", flag)
339 }
340 useSplit = true
341 rest = after
342 if rest == "" {
343 return nil, errors.New("env -S requires a program")
344 }
345 }
346
347 if rest == "" {
348 return nil, errors.New("env: missing program name")
349 }
350
351 var prog, remainder string
352 if idx := strings.IndexAny(rest, " \t"); idx >= 0 {
353 prog = rest[:idx]
354 remainder = strings.TrimLeft(rest[idx+1:], " \t")
355 } else {
356 prog = rest
357 }
358
359 sb := &shebang{interpreter: prog}
360 if remainder != "" {
361 if useSplit {
362 sb.args = strings.Fields(remainder)
363 } else {
364 sb.args = []string{remainder}
365 }
366 }
367 return sb, nil
368}
369
370// runShellSource parses path's contents as POSIX shell and runs it
371// in-process via a nested interp.Runner. It reuses the parent runner's cwd,
372// env, and stdio, and rebuilds the Crush handler stack so builtins and the
373// dispatch handler itself remain available to anything the script invokes.
374// Positional parameters ($1, $2, …) come from args[1:].
375//
376// This is the only branch that reads the full file; probeFile keeps its
377// read to probeWindow bytes so the binary/shebang paths never touch more
378// than 128 bytes of I/O.
379func runShellSource(ctx context.Context, path string, args []string, blockFuncs []BlockFunc) error {
380 data, err := os.ReadFile(path)
381 if err != nil {
382 return err
383 }
384
385 file, err := syntax.NewParser().Parse(bytes.NewReader(data), path)
386 if err != nil {
387 return fmt.Errorf("could not parse %s: %w", path, err)
388 }
389
390 hc := interp.HandlerCtx(ctx)
391
392 opts := []interp.RunnerOption{
393 interp.StdIO(hc.Stdin, hc.Stdout, hc.Stderr),
394 interp.Interactive(false),
395 interp.Env(hc.Env),
396 interp.Dir(hc.Dir),
397 interp.ExecHandlers(standardHandlers(blockFuncs)...),
398 }
399 if len(args) > 1 {
400 // Params with a leading "--" avoids any of args[1:] being
401 // misinterpreted as set-options (e.g. a user passing "-e" as
402 // a positional arg to their script).
403 params := append([]string{"--"}, args[1:]...)
404 opts = append(opts, interp.Params(params...))
405 }
406
407 runner, err := interp.New(opts...)
408 if err != nil {
409 return fmt.Errorf("could not build runner for %s: %w", path, err)
410 }
411 return runner.Run(ctx, file)
412}
413
414// execEnvList converts an expand.Environ to the []string form that
415// os/exec.Cmd.Env expects. Only exported string variables are included,
416// matching what a real shell would pass to a child process.
417func execEnvList(env expand.Environ) []string {
418 var out []string
419 env.Each(func(name string, vr expand.Variable) bool {
420 if vr.Exported && vr.Kind == expand.String {
421 out = append(out, name+"="+vr.Str)
422 }
423 return true
424 })
425 return out
426}