1package shell
2
3import (
4 "bytes"
5 "context"
6 "errors"
7 "fmt"
8 "io"
9 "io/fs"
10 "log/slog"
11 "os"
12 "os/exec"
13 "path/filepath"
14 "runtime"
15 "strings"
16
17 "github.com/charmbracelet/crush/internal/filepathext"
18 "mvdan.cc/sh/v3/expand"
19 "mvdan.cc/sh/v3/interp"
20 "mvdan.cc/sh/v3/syntax"
21)
22
23// probeWindow is how many bytes we read from the head of a file to decide
24// how to dispatch it. 128 is plenty for a shebang line and for magic-byte
25// inspection, while small enough to make the probe cheap for users whose
26// hooks invoke many scripts.
27const probeWindow = 128
28
29// scriptDispatchHandler returns middleware that intercepts exec of a
30// path-prefixed argv[0] (e.g. ./foo.sh, /opt/bin/tool, C:\foo\bar.exe) and
31// dispatches based on the file's contents:
32//
33// 1. Shebang line (#!...) → exec the named interpreter via os/exec. The
34// interpreter is resolved literally first, then via PATH on the
35// basename as a permissive fallback (so #!/bin/bash works on Windows
36// boxes where Git for Windows puts bash.exe on PATH).
37// 2. Known binary magic (MZ, ELF, Mach-O) or a NUL byte in the probe
38// window → pass through to the next handler (mvdan's default exec).
39// 3. Otherwise → treat the file as shell source and run it in-process via
40// a nested interp.Runner that reuses the same handler stack.
41//
42// Non-path-prefixed argv[0] and empty args are passed straight through; this
43// handler is a no-op for ordinary commands like `echo` or `jq`.
44//
45// blockFuncs is the block list used when building the nested runner for the
46// shell-source case, so deny rules apply recursively to commands invoked
47// from in-process scripts.
48func scriptDispatchHandler(blockFuncs []BlockFunc) func(next interp.ExecHandlerFunc) interp.ExecHandlerFunc {
49 return func(next interp.ExecHandlerFunc) interp.ExecHandlerFunc {
50 return func(ctx context.Context, args []string) error {
51 if len(args) == 0 || !isPathPrefixed(args[0]) {
52 return next(ctx, args)
53 }
54
55 // Resolve relative paths against the interpreter's cwd, not
56 // the process cwd — hook commands are authored with the hook
57 // Runner's cwd in mind and sub-shells can cd before an exec.
58 scriptPath := filepathext.SmartJoin(interp.HandlerCtx(ctx).Dir, args[0])
59 probe, err := probeFile(scriptPath)
60 if err != nil {
61 return err
62 }
63
64 switch {
65 case hasShebang(probe):
66 return dispatchShebang(ctx, scriptPath, probe, args)
67 case isBinary(probe):
68 return next(ctx, args)
69 default:
70 return runShellSource(ctx, scriptPath, args, blockFuncs)
71 }
72 }
73 }
74}
75
76// isPathPrefixed reports whether argv[0] is a file reference (as opposed
77// to a bare command to be resolved via PATH). A path reference starts with
78// `./`, `../`, `/`, or — on Windows — a drive-letter prefix.
79//
80// Note: mvdan already performs tilde expansion during word expansion, so
81// `~/script.sh` arrives here as an absolute path. We still call the helper
82// on the raw string to stay robust if a future change ever bypasses that
83// expansion; cover that path with a regression test.
84func isPathPrefixed(arg string) bool {
85 switch {
86 case strings.HasPrefix(arg, "./"),
87 strings.HasPrefix(arg, "../"),
88 strings.HasPrefix(arg, "/"):
89 return true
90 }
91 if runtime.GOOS == "windows" {
92 // Drive-letter paths: C:\foo or C:/foo (length check avoids
93 // accidentally matching a single letter followed by a colon).
94 if len(arg) >= 3 && isDriveLetter(arg[0]) && arg[1] == ':' &&
95 (arg[2] == '\\' || arg[2] == '/') {
96 return true
97 }
98 // Also treat backslash-prefixed UNC-like paths as path-prefixed.
99 if strings.HasPrefix(arg, "\\") {
100 return true
101 }
102 }
103 return false
104}
105
106func isDriveLetter(b byte) bool {
107 return (b >= 'A' && b <= 'Z') || (b >= 'a' && b <= 'z')
108}
109
110// probeFile reads the first probeWindow bytes of the target path. It
111// deliberately does not slurp the whole file: callers that need the full
112// contents (only the shell-source branch) re-open via os.ReadFile. This
113// keeps memory bounded when argv[0] turns out to be a large binary.
114//
115// Returns errors surfaced by os.Open/os.Stat directly so callers see the
116// real reason: ENOENT, EACCES, EISDIR, ELOOP, etc.
117func probeFile(path string) ([]byte, error) {
118 f, err := os.Open(path)
119 if err != nil {
120 return nil, err
121 }
122 defer f.Close()
123 fi, err := f.Stat()
124 if err != nil {
125 return nil, err
126 }
127 if fi.IsDir() {
128 return nil, fmt.Errorf("%s: is a directory", path)
129 }
130 probe := make([]byte, probeWindow)
131 n, err := io.ReadFull(f, probe)
132 if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
133 return nil, err
134 }
135 return probe[:n], nil
136}
137
138// hasShebang reports whether probe starts with the `#!` marker. A
139// one-byte file that happens to be `#` is not a shebang.
140func hasShebang(probe []byte) bool {
141 return len(probe) >= 2 && probe[0] == '#' && probe[1] == '!'
142}
143
144// isBinary heuristically classifies probe as an executable or otherwise
145// non-text file. A NUL byte in the first probeWindow bytes is the classic
146// Unix-y text-vs-binary signal; we additionally recognize known magic
147// numbers so we can fast-path well-formed binaries that happen to have no
148// NUL in the first 128 bytes (rare but possible for small binaries).
149func isBinary(probe []byte) bool {
150 if bytes.IndexByte(probe, 0) >= 0 {
151 return true
152 }
153 magics := [][]byte{
154 {'M', 'Z'}, // Windows PE / DOS MZ.
155 {0x7F, 'E', 'L', 'F'}, // ELF.
156 {0xFE, 0xED, 0xFA, 0xCE}, // Mach-O 32-bit BE.
157 {0xFE, 0xED, 0xFA, 0xCF}, // Mach-O 64-bit BE.
158 {0xCF, 0xFA, 0xED, 0xFE}, // Mach-O 64-bit LE.
159 {0xCE, 0xFA, 0xED, 0xFE}, // Mach-O 32-bit LE.
160 {0xCA, 0xFE, 0xBA, 0xBE}, // Mach-O fat binary.
161 }
162 for _, m := range magics {
163 if bytes.HasPrefix(probe, m) {
164 return true
165 }
166 }
167 return false
168}
169
170// dispatchShebang parses probe's shebang line and execs the resolved
171// interpreter via os/exec, inheriting the parent runner's cwd, env, and
172// stdio. Returns interp.ExitStatus on non-zero interpreter exit so the
173// parent interpreter sees it as a normal non-zero status.
174func dispatchShebang(ctx context.Context, scriptPath string, probe []byte, args []string) error {
175 sb, err := parseShebang(probe)
176 if err != nil {
177 hc := interp.HandlerCtx(ctx)
178 fmt.Fprintf(hc.Stderr, "crush: %s: %s\n", scriptPath, err)
179 return interp.ExitStatus(126)
180 }
181
182 interpreter, err := resolveInterpreter(sb.interpreter)
183 if err != nil {
184 hc := interp.HandlerCtx(ctx)
185 fmt.Fprintf(hc.Stderr, "crush: %s: %s\n", scriptPath, err)
186 return interp.ExitStatus(127)
187 }
188
189 cmdArgs := append([]string{}, sb.args...)
190 cmdArgs = append(cmdArgs, scriptPath)
191 cmdArgs = append(cmdArgs, args[1:]...)
192
193 cmd := exec.CommandContext(ctx, interpreter, cmdArgs...)
194 hc := interp.HandlerCtx(ctx)
195 cmd.Dir = hc.Dir
196 cmd.Env = execEnvList(hc.Env)
197 cmd.Stdin = hc.Stdin
198 cmd.Stdout = hc.Stdout
199 cmd.Stderr = hc.Stderr
200
201 if err := cmd.Run(); err != nil {
202 var exitErr *exec.ExitError
203 if errors.As(err, &exitErr) {
204 code := exitErr.ExitCode()
205 if code < 0 {
206 code = 1
207 }
208 return interp.ExitStatus(uint8(code))
209 }
210 return err
211 }
212 return nil
213}
214
215// resolveInterpreter tries the literal shebang path first, then falls back
216// to PATH-lookup on its basename — but only when the literal path is
217// genuinely missing. A file that exists but fails stat for another reason
218// (EACCES, ELOOP, etc.) surfaces the real error: silently resolving a
219// different binary off PATH in that case would hide a real problem and
220// produce surprising behavior for the user.
221//
222// The permissive fallback is what makes #!/bin/bash portable to Windows
223// boxes where Git for Windows puts bash.exe on PATH but there is no
224// /bin/bash on disk.
225func resolveInterpreter(path string) (string, error) {
226 _, statErr := os.Stat(path)
227 if statErr == nil {
228 return path, nil
229 }
230 if !errors.Is(statErr, fs.ErrNotExist) {
231 return "", statErr
232 }
233
234 base := filepath.Base(path)
235 if base == "" || base == path && !strings.ContainsAny(path, `/\`) {
236 // Already a bare name — just do a PATH lookup.
237 resolved, err := exec.LookPath(path)
238 if err != nil {
239 return "", fmt.Errorf("interpreter %q not found in PATH", path)
240 }
241 return resolved, nil
242 }
243 resolved, err := exec.LookPath(base)
244 if err != nil {
245 return "", fmt.Errorf("interpreter %q not found and %q not in PATH", path, base)
246 }
247 slog.Debug("Shebang interpreter not found; falling back to PATH",
248 "requested", path, "resolved", resolved)
249 return resolved, nil
250}
251
252// shebang captures the parsed `#!` line. interpreter is the program to
253// invoke; args is the list of extra arguments to pass before the script
254// path. The kernel's single-arg semantics (for literal paths and for env
255// without `-S`) is encoded by returning a single-element args slice
256// containing the un-tokenized remainder.
257type shebang struct {
258 interpreter string
259 args []string
260}
261
262// parseShebang extracts the interpreter invocation from probe. It tolerates
263// CRLF line endings and a single leading space between `#!` and the path.
264// env special-cases: `/usr/bin/env NAME [args...]` unwraps to NAME with
265// kernel single-arg semantics; `-S` enables tokenized argument splitting.
266func parseShebang(probe []byte) (*shebang, error) {
267 if !hasShebang(probe) {
268 return nil, errors.New("not a shebang")
269 }
270 line := probe[2:]
271 // Take up to the first newline.
272 if idx := bytes.IndexByte(line, '\n'); idx >= 0 {
273 line = line[:idx]
274 }
275 // Strip trailing CR (CRLF-authored scripts).
276 line = bytes.TrimRight(line, "\r")
277 // Strip leading whitespace ("#! /usr/bin/env bash" is legal).
278 line = bytes.TrimLeft(line, " \t")
279 if len(line) == 0 {
280 return nil, errors.New("empty shebang")
281 }
282
283 var pathStr, rest string
284 if idx := bytes.IndexAny(line, " \t"); idx >= 0 {
285 pathStr = string(line[:idx])
286 rest = strings.TrimLeft(string(line[idx+1:]), " \t")
287 } else {
288 pathStr = string(line)
289 }
290
291 if isEnvShebang(pathStr) {
292 return parseEnvShebang(rest)
293 }
294
295 // Literal-path shebang: kernel semantics pass the remainder as a
296 // single argv[1], not tokenized.
297 sb := &shebang{interpreter: pathStr}
298 if rest != "" {
299 sb.args = []string{rest}
300 }
301 return sb, nil
302}
303
304// isEnvShebang reports whether the shebang path targets `env`. We accept
305// both common absolute paths and a bare `env` so that unusual setups
306// (NixOS, BSDs) still work.
307func isEnvShebang(p string) bool {
308 if p == "/usr/bin/env" || p == "/bin/env" {
309 return true
310 }
311 return filepath.Base(p) == "env"
312}
313
314// parseEnvShebang handles `/usr/bin/env` rewriting. Without `-S`, the
315// remainder after the program name is a single argv[1] (kernel
316// single-arg semantics via env, even though real env would fail to find a
317// program named "bash -x"). With `-S`, the remainder is tokenized on
318// whitespace. Any other `env` flag is rejected — forwarding unknown flags
319// to a /usr/bin/env on disk is a subtle portability footgun we don't want.
320func parseEnvShebang(rest string) (*shebang, error) {
321 if rest == "" {
322 return nil, errors.New("env: missing program name")
323 }
324
325 useSplit := false
326 if strings.HasPrefix(rest, "-") {
327 var flag, after string
328 if idx := strings.IndexAny(rest, " \t"); idx >= 0 {
329 flag = rest[:idx]
330 after = strings.TrimLeft(rest[idx+1:], " \t")
331 } else {
332 flag = rest
333 after = ""
334 }
335 if flag != "-S" {
336 return nil, fmt.Errorf("unsupported env flag: %s", flag)
337 }
338 useSplit = true
339 rest = after
340 if rest == "" {
341 return nil, errors.New("env -S requires a program")
342 }
343 }
344
345 if rest == "" {
346 return nil, errors.New("env: missing program name")
347 }
348
349 var prog, remainder string
350 if idx := strings.IndexAny(rest, " \t"); idx >= 0 {
351 prog = rest[:idx]
352 remainder = strings.TrimLeft(rest[idx+1:], " \t")
353 } else {
354 prog = rest
355 }
356
357 sb := &shebang{interpreter: prog}
358 if remainder != "" {
359 if useSplit {
360 sb.args = strings.Fields(remainder)
361 } else {
362 sb.args = []string{remainder}
363 }
364 }
365 return sb, nil
366}
367
368// runShellSource parses path's contents as POSIX shell and runs it
369// in-process via a nested interp.Runner. It reuses the parent runner's cwd,
370// env, and stdio, and rebuilds the Crush handler stack so builtins and the
371// dispatch handler itself remain available to anything the script invokes.
372// Positional parameters ($1, $2, …) come from args[1:].
373//
374// This is the only branch that reads the full file; probeFile keeps its
375// read to probeWindow bytes so the binary/shebang paths never touch more
376// than 128 bytes of I/O.
377func runShellSource(ctx context.Context, path string, args []string, blockFuncs []BlockFunc) error {
378 data, err := os.ReadFile(path)
379 if err != nil {
380 return err
381 }
382
383 file, err := syntax.NewParser().Parse(bytes.NewReader(data), path)
384 if err != nil {
385 return fmt.Errorf("could not parse %s: %w", path, err)
386 }
387
388 hc := interp.HandlerCtx(ctx)
389
390 opts := []interp.RunnerOption{
391 interp.StdIO(hc.Stdin, hc.Stdout, hc.Stderr),
392 interp.Interactive(false),
393 interp.Env(hc.Env),
394 interp.Dir(hc.Dir),
395 interp.ExecHandlers(standardHandlers(blockFuncs)...),
396 }
397 if len(args) > 1 {
398 // Params with a leading "--" avoids any of args[1:] being
399 // misinterpreted as set-options (e.g. a user passing "-e" as
400 // a positional arg to their script).
401 params := append([]string{"--"}, args[1:]...)
402 opts = append(opts, interp.Params(params...))
403 }
404
405 runner, err := interp.New(opts...)
406 if err != nil {
407 return fmt.Errorf("could not build runner for %s: %w", path, err)
408 }
409 return runner.Run(ctx, file)
410}
411
412// execEnvList converts an expand.Environ to the []string form that
413// os/exec.Cmd.Env expects. Only exported string variables are included,
414// matching what a real shell would pass to a child process.
415func execEnvList(env expand.Environ) []string {
416 var out []string
417 env.Each(func(name string, vr expand.Variable) bool {
418 if vr.Exported && vr.Kind == expand.String {
419 out = append(out, name+"="+vr.Str)
420 }
421 return true
422 })
423 return out
424}