1// Copyright (c) 2016, Daniel MartΓ <mvdan@mvdan.cc>
2// See LICENSE for licensing information
3
4package syntax
5
6import (
7 "fmt"
8 "io"
9 "iter"
10 "slices"
11 "strconv"
12 "strings"
13 "unicode/utf8"
14)
15
16// ParserOption is a function which can be passed to NewParser
17// to alter its behavior. To apply option to existing Parser
18// call it directly, for example KeepComments(true)(parser).
19type ParserOption func(*Parser)
20
21// KeepComments makes the parser parse comments and attach them to
22// nodes, as opposed to discarding them.
23func KeepComments(enabled bool) ParserOption {
24 return func(p *Parser) { p.keepComments = enabled }
25}
26
27// LangVariant describes a shell language variant to use when tokenizing and
28// parsing shell code. The zero value is [LangBash].
29type LangVariant int
30
31const (
32 // LangBash corresponds to the GNU Bash language, as described in its
33 // manual at https://www.gnu.org/software/bash/manual/bash.html.
34 //
35 // We currently follow Bash version 5.2.
36 //
37 // Its string representation is "bash".
38 LangBash LangVariant = iota
39
40 // LangPOSIX corresponds to the POSIX Shell language, as described at
41 // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html.
42 //
43 // Its string representation is "posix" or "sh".
44 LangPOSIX
45
46 // LangMirBSDKorn corresponds to the MirBSD Korn Shell, also known as
47 // mksh, as described at http://www.mirbsd.org/htman/i386/man1/mksh.htm.
48 // Note that it shares some features with Bash, due to the the shared
49 // ancestry that is ksh.
50 //
51 // We currently follow mksh version 59.
52 //
53 // Its string representation is "mksh".
54 LangMirBSDKorn
55
56 // LangBats corresponds to the Bash Automated Testing System language,
57 // as described at https://github.com/bats-core/bats-core. Note that
58 // it's just a small extension of the Bash language.
59 //
60 // Its string representation is "bats".
61 LangBats
62
63 // LangAuto corresponds to automatic language detection,
64 // commonly used by end-user applications like shfmt,
65 // which can guess a file's language variant given its filename or shebang.
66 //
67 // At this time, [Variant] does not support LangAuto.
68 LangAuto
69)
70
71// Variant changes the shell language variant that the parser will
72// accept.
73//
74// The passed language variant must be one of the constant values defined in
75// this package.
76func Variant(l LangVariant) ParserOption {
77 switch l {
78 case LangBash, LangPOSIX, LangMirBSDKorn, LangBats:
79 case LangAuto:
80 panic("LangAuto is not supported by the parser at this time")
81 default:
82 panic(fmt.Sprintf("unknown shell language variant: %d", l))
83 }
84 return func(p *Parser) { p.lang = l }
85}
86
87func (l LangVariant) String() string {
88 switch l {
89 case LangBash:
90 return "bash"
91 case LangPOSIX:
92 return "posix"
93 case LangMirBSDKorn:
94 return "mksh"
95 case LangBats:
96 return "bats"
97 case LangAuto:
98 return "auto"
99 }
100 return "unknown shell language variant"
101}
102
103func (l *LangVariant) Set(s string) error {
104 switch s {
105 case "bash":
106 *l = LangBash
107 case "posix", "sh":
108 *l = LangPOSIX
109 case "mksh":
110 *l = LangMirBSDKorn
111 case "bats":
112 *l = LangBats
113 case "auto":
114 *l = LangAuto
115 default:
116 return fmt.Errorf("unknown shell language variant: %q", s)
117 }
118 return nil
119}
120
121func (l LangVariant) isBash() bool {
122 return l == LangBash || l == LangBats
123}
124
125// StopAt configures the lexer to stop at an arbitrary word, treating it
126// as if it were the end of the input. It can contain any characters
127// except whitespace, and cannot be over four bytes in size.
128//
129// This can be useful to embed shell code within another language, as
130// one can use a special word to mark the delimiters between the two.
131//
132// As a word, it will only apply when following whitespace or a
133// separating token. For example, StopAt("$$") will act on the inputs
134// "foo $$" and "foo;$$", but not on "foo '$$'".
135//
136// The match is done by prefix, so the example above will also act on
137// "foo $$bar".
138func StopAt(word string) ParserOption {
139 if len(word) > 4 {
140 panic("stop word can't be over four bytes in size")
141 }
142 if strings.ContainsAny(word, " \t\n\r") {
143 panic("stop word can't contain whitespace characters")
144 }
145 return func(p *Parser) { p.stopAt = []byte(word) }
146}
147
148// RecoverErrors allows the parser to skip up to a maximum number of
149// errors in the given input on a best-effort basis.
150// This can be useful to tab-complete an interactive shell prompt,
151// or when providing diagnostics on slightly incomplete shell source.
152//
153// Currently, this only helps with mandatory tokens from the shell grammar
154// which are not present in the input. They result in position fields
155// or nodes whose position report [Pos.IsRecovered] as true.
156//
157// For example, given the input
158//
159// (foo |
160//
161// the result will contain two recovered positions; first, the pipe requires
162// a statement to follow, and as [Stmt.Pos] reports, the entire node is recovered.
163// Second, the subshell needs to be closed, so [Subshell.Rparen] is recovered.
164func RecoverErrors(maximum int) ParserOption {
165 return func(p *Parser) { p.recoverErrorsMax = maximum }
166}
167
168// NewParser allocates a new [Parser] and applies any number of options.
169func NewParser(options ...ParserOption) *Parser {
170 p := &Parser{}
171 for _, opt := range options {
172 opt(p)
173 }
174 return p
175}
176
177// Parse reads and parses a shell program with an optional name. It
178// returns the parsed program if no issues were encountered. Otherwise,
179// an error is returned. Reads from r are buffered.
180//
181// Parse can be called more than once, but not concurrently. That is, a
182// Parser can be reused once it is done working.
183func (p *Parser) Parse(r io.Reader, name string) (*File, error) {
184 p.reset()
185 p.f = &File{Name: name}
186 p.src = r
187 p.rune()
188 p.next()
189 p.f.Stmts, p.f.Last = p.stmtList()
190 if p.err == nil {
191 // EOF immediately after heredoc word so no newline to
192 // trigger it
193 p.doHeredocs()
194 }
195 return p.f, p.err
196}
197
198// Stmts reads and parses statements one at a time, calling a function
199// each time one is parsed. If the function returns false, parsing is
200// stopped and the function is not called again.
201func (p *Parser) Stmts(r io.Reader, fn func(*Stmt) bool) error {
202 p.reset()
203 p.f = &File{}
204 p.src = r
205 p.rune()
206 p.next()
207 p.stmts(fn)
208 if p.err == nil {
209 // EOF immediately after heredoc word so no newline to
210 // trigger it
211 p.doHeredocs()
212 }
213 return p.err
214}
215
216type wrappedReader struct {
217 *Parser
218 io.Reader
219
220 lastLine int64
221 accumulated []*Stmt
222 fn func([]*Stmt) bool
223}
224
225func (w *wrappedReader) Read(p []byte) (n int, err error) {
226 // If we lexed a newline for the first time, we just finished a line, so
227 // we may need to give a callback for the edge cases below not covered
228 // by Parser.Stmts.
229 if (w.r == '\n' || w.r == escNewl) && w.line > w.lastLine {
230 if w.Incomplete() {
231 // Incomplete statement; call back to print "> ".
232 if !w.fn(w.accumulated) {
233 return 0, io.EOF
234 }
235 } else if len(w.accumulated) == 0 {
236 // Nothing was parsed; call back to print another "$ ".
237 if !w.fn(nil) {
238 return 0, io.EOF
239 }
240 }
241 w.lastLine = w.line
242 }
243 return w.Reader.Read(p)
244}
245
246// Interactive implements what is necessary to parse statements in an
247// interactive shell. The parser will call the given function under two
248// circumstances outlined below.
249//
250// If a line containing any number of statements is parsed, the function will be
251// called with said statements.
252//
253// If a line ending in an incomplete statement is parsed, the function will be
254// called with any fully parsed statements, and [Parser.Incomplete] will return true.
255//
256// One can imagine a simple interactive shell implementation as follows:
257//
258// fmt.Fprintf(os.Stdout, "$ ")
259// parser.Interactive(os.Stdin, func(stmts []*syntax.Stmt) bool {
260// if parser.Incomplete() {
261// fmt.Fprintf(os.Stdout, "> ")
262// return true
263// }
264// run(stmts)
265// fmt.Fprintf(os.Stdout, "$ ")
266// return true
267// }
268//
269// If the callback function returns false, parsing is stopped and the function
270// is not called again.
271func (p *Parser) Interactive(r io.Reader, fn func([]*Stmt) bool) error {
272 w := wrappedReader{Parser: p, Reader: r, fn: fn}
273 return p.Stmts(&w, func(stmt *Stmt) bool {
274 w.accumulated = append(w.accumulated, stmt)
275 // We finished parsing a statement and we're at a newline token,
276 // so we finished fully parsing a number of statements. Call
277 // back to run the statements and print "$ ".
278 if p.tok == _Newl {
279 if !fn(w.accumulated) {
280 return false
281 }
282 w.accumulated = w.accumulated[:0]
283 // The callback above would already print "$ ", so we
284 // don't want the subsequent wrappedReader.Read to cause
285 // another "$ " print thinking that nothing was parsed.
286 w.lastLine = w.line + 1
287 }
288 return true
289 })
290}
291
292// Words is a pre-iterators API which now wraps [Parser.WordsSeq].
293func (p *Parser) Words(r io.Reader, fn func(*Word) bool) error {
294 for w, err := range p.WordsSeq(r) {
295 if err != nil {
296 return err
297 }
298 if !fn(w) {
299 break
300 }
301 }
302 return nil
303}
304
305// WordsSeq reads and parses a sequence of words alongside any error encountered.
306//
307// Newlines are skipped, meaning that multi-line input will work fine. If the
308// parser encounters a token that isn't a word, such as a semicolon, an error
309// will be returned.
310//
311// Note that the lexer doesn't currently tokenize spaces, so it may need to read
312// a non-space byte such as a newline or a letter before finishing the parsing
313// of a word. This will be fixed in the future.
314func (p *Parser) WordsSeq(r io.Reader) iter.Seq2[*Word, error] {
315 p.reset()
316 p.f = &File{}
317 p.src = r
318 return func(yield func(*Word, error) bool) {
319 p.rune()
320 p.next()
321 for {
322 p.got(_Newl)
323 w := p.getWord()
324 if w == nil {
325 if p.tok != _EOF {
326 p.curErr("%s is not a valid word", p.tok)
327 }
328 if p.err != nil {
329 yield(nil, p.err)
330 }
331 return
332 }
333 if !yield(w, nil) {
334 return
335 }
336 }
337 }
338}
339
340// Document parses a single here-document word. That is, it parses the input as
341// if they were lines following a <<EOF redirection.
342//
343// In practice, this is the same as parsing the input as if it were within
344// double quotes, but without having to escape all double quote characters.
345// Similarly, the here-document word parsed here cannot be ended by any
346// delimiter other than reaching the end of the input.
347func (p *Parser) Document(r io.Reader) (*Word, error) {
348 p.reset()
349 p.f = &File{}
350 p.src = r
351 p.rune()
352 p.quote = hdocBody
353 p.hdocStops = [][]byte{[]byte("MVDAN_CC_SH_SYNTAX_EOF")}
354 p.parsingDoc = true
355 p.next()
356 w := p.getWord()
357 return w, p.err
358}
359
360// Arithmetic parses a single arithmetic expression. That is, as if the input
361// were within the $(( and )) tokens.
362func (p *Parser) Arithmetic(r io.Reader) (ArithmExpr, error) {
363 p.reset()
364 p.f = &File{}
365 p.src = r
366 p.rune()
367 p.quote = arithmExpr
368 p.next()
369 expr := p.arithmExpr(false)
370 return expr, p.err
371}
372
373// Parser holds the internal state of the parsing mechanism of a
374// program.
375type Parser struct {
376 src io.Reader
377 bs []byte // current chunk of read bytes
378 bsp uint // pos within chunk for the rune after r; uint helps eliminate bounds checks
379 r rune // next rune
380 w int // width of r
381
382 f *File
383
384 spaced bool // whether tok has whitespace on its left
385
386 err error // lexer/parser error
387 readErr error // got a read error, but bytes left
388
389 tok token // current token
390 val string // current value (valid if tok is _Lit*)
391
392 // position of r, to be converted to Parser.pos later
393 offs, line, col int64
394
395 pos Pos // position of tok
396
397 quote quoteState // current lexer state
398 eqlOffs int // position of '=' in val (a literal)
399
400 keepComments bool
401 lang LangVariant
402
403 stopAt []byte
404
405 recoveredErrors int
406 recoverErrorsMax int
407
408 forbidNested bool
409
410 // list of pending heredoc bodies
411 buriedHdocs int
412 heredocs []*Redirect
413
414 hdocStops [][]byte // stack of end words for open heredocs
415
416 parsingDoc bool // true if using Parser.Document
417
418 // openNodes tracks how many entire statements or words we're currently parsing.
419 // A non-zero number means that we require certain tokens or words before
420 // reaching EOF, used for [Parser.Incomplete].
421 openNodes int
422 // openBquotes is how many levels of backquotes are open at the moment.
423 openBquotes int
424
425 // lastBquoteEsc is how many times the last backquote token was escaped
426 lastBquoteEsc int
427
428 rxOpenParens int
429 rxFirstPart bool
430
431 accComs []Comment
432 curComs *[]Comment
433
434 litBatch []Lit
435 wordBatch []wordAlloc
436
437 readBuf [bufSize]byte
438 litBuf [bufSize]byte
439 litBs []byte
440}
441
442// Incomplete reports whether the parser needs more input bytes
443// to finish properly parsing a statement or word.
444//
445// It is only safe to call while the parser is blocked on a read. For an example
446// use case, see [Parser.Interactive].
447func (p *Parser) Incomplete() bool {
448 // If there are any open nodes, we need to finish them.
449 // If we're constructing a literal, we need to finish it.
450 return p.openNodes > 0 || len(p.litBs) > 0
451}
452
453const bufSize = 1 << 10
454
455func (p *Parser) reset() {
456 p.tok, p.val = illegalTok, ""
457 p.eqlOffs = 0
458 p.bs, p.bsp = nil, 0
459 p.offs, p.line, p.col = 0, 1, 1
460 p.r, p.w = 0, 0
461 p.err, p.readErr = nil, nil
462 p.quote, p.forbidNested = noState, false
463 p.openNodes = 0
464 p.recoveredErrors = 0
465 p.heredocs, p.buriedHdocs = p.heredocs[:0], 0
466 p.hdocStops = nil
467 p.parsingDoc = false
468 p.openBquotes = 0
469 p.accComs = nil
470 p.accComs, p.curComs = nil, &p.accComs
471 p.litBatch = nil
472 p.wordBatch = nil
473 p.litBs = nil
474}
475
476func (p *Parser) nextPos() Pos {
477 // Basic protection against offset overflow;
478 // note that an offset of 0 is valid, so we leave the maximum.
479 offset := min(p.offs+int64(p.bsp)-int64(p.w), offsetMax)
480 var line, col uint
481 if p.line <= lineMax {
482 line = uint(p.line)
483 }
484 if p.col <= colMax {
485 col = uint(p.col)
486 }
487 return NewPos(uint(offset), line, col)
488}
489
490func (p *Parser) lit(pos Pos, val string) *Lit {
491 if len(p.litBatch) == 0 {
492 p.litBatch = make([]Lit, 32)
493 }
494 l := &p.litBatch[0]
495 p.litBatch = p.litBatch[1:]
496 l.ValuePos = pos
497 l.ValueEnd = p.nextPos()
498 l.Value = val
499 return l
500}
501
502type wordAlloc struct {
503 word Word
504 parts [1]WordPart
505}
506
507func (p *Parser) wordAnyNumber() *Word {
508 if len(p.wordBatch) == 0 {
509 p.wordBatch = make([]wordAlloc, 32)
510 }
511 alloc := &p.wordBatch[0]
512 p.wordBatch = p.wordBatch[1:]
513 w := &alloc.word
514 w.Parts = p.wordParts(alloc.parts[:0])
515 return w
516}
517
518func (p *Parser) wordOne(part WordPart) *Word {
519 if len(p.wordBatch) == 0 {
520 p.wordBatch = make([]wordAlloc, 32)
521 }
522 alloc := &p.wordBatch[0]
523 p.wordBatch = p.wordBatch[1:]
524 w := &alloc.word
525 w.Parts = alloc.parts[:1]
526 w.Parts[0] = part
527 return w
528}
529
530func (p *Parser) call(w *Word) *CallExpr {
531 var alloc struct {
532 ce CallExpr
533 ws [4]*Word
534 }
535 ce := &alloc.ce
536 ce.Args = alloc.ws[:1]
537 ce.Args[0] = w
538 return ce
539}
540
541//go:generate stringer -type=quoteState
542
543type quoteState uint32
544
545const (
546 noState quoteState = 1 << iota
547 subCmd
548 subCmdBckquo
549 dblQuotes
550 hdocWord
551 hdocBody
552 hdocBodyTabs
553 arithmExpr
554 arithmExprLet
555 arithmExprCmd
556 arithmExprBrack
557 testExpr
558 testExprRegexp
559 switchCase
560 paramExpName
561 paramExpSlice
562 paramExpRepl
563 paramExpExp
564 arrayElems
565
566 allKeepSpaces = paramExpRepl | dblQuotes | hdocBody |
567 hdocBodyTabs | paramExpExp
568 allRegTokens = noState | subCmd | subCmdBckquo | hdocWord |
569 switchCase | arrayElems | testExpr
570 allArithmExpr = arithmExpr | arithmExprLet | arithmExprCmd |
571 arithmExprBrack | paramExpSlice
572 allParamReg = paramExpName | paramExpSlice
573 allParamExp = allParamReg | paramExpRepl | paramExpExp | arithmExprBrack
574)
575
576type saveState struct {
577 quote quoteState
578 buriedHdocs int
579}
580
581func (p *Parser) preNested(quote quoteState) (s saveState) {
582 s.quote, s.buriedHdocs = p.quote, p.buriedHdocs
583 p.buriedHdocs, p.quote = len(p.heredocs), quote
584 return
585}
586
587func (p *Parser) postNested(s saveState) {
588 p.quote, p.buriedHdocs = s.quote, s.buriedHdocs
589}
590
591func (p *Parser) unquotedWordBytes(w *Word) ([]byte, bool) {
592 buf := make([]byte, 0, 4)
593 didUnquote := false
594 for _, wp := range w.Parts {
595 buf, didUnquote = p.unquotedWordPart(buf, wp, false)
596 }
597 return buf, didUnquote
598}
599
600func (p *Parser) unquotedWordPart(buf []byte, wp WordPart, quotes bool) (_ []byte, quoted bool) {
601 switch wp := wp.(type) {
602 case *Lit:
603 for i := 0; i < len(wp.Value); i++ {
604 if b := wp.Value[i]; b == '\\' && !quotes {
605 if i++; i < len(wp.Value) {
606 buf = append(buf, wp.Value[i])
607 }
608 quoted = true
609 } else {
610 buf = append(buf, b)
611 }
612 }
613 case *SglQuoted:
614 buf = append(buf, []byte(wp.Value)...)
615 quoted = true
616 case *DblQuoted:
617 for _, wp2 := range wp.Parts {
618 buf, _ = p.unquotedWordPart(buf, wp2, true)
619 }
620 quoted = true
621 }
622 return buf, quoted
623}
624
625func (p *Parser) doHeredocs() {
626 hdocs := p.heredocs[p.buriedHdocs:]
627 if len(hdocs) == 0 {
628 // Nothing do do; don't even issue a read.
629 return
630 }
631 p.rune() // consume '\n', since we know p.tok == _Newl
632 old := p.quote
633 p.heredocs = p.heredocs[:p.buriedHdocs]
634 for i, r := range hdocs {
635 if p.err != nil {
636 break
637 }
638 p.quote = hdocBody
639 if r.Op == DashHdoc {
640 p.quote = hdocBodyTabs
641 }
642 stop, quoted := p.unquotedWordBytes(r.Word)
643 p.hdocStops = append(p.hdocStops, stop)
644 if i > 0 && p.r == '\n' {
645 p.rune()
646 }
647 lastLine := p.line
648 if quoted {
649 r.Hdoc = p.quotedHdocWord()
650 } else {
651 p.next()
652 r.Hdoc = p.getWord()
653 }
654 if r.Hdoc != nil {
655 lastLine = int64(r.Hdoc.End().Line())
656 }
657 if lastLine < p.line {
658 // TODO: It seems like this triggers more often than it
659 // should. Look into it.
660 l := p.lit(p.nextPos(), "")
661 if r.Hdoc == nil {
662 r.Hdoc = p.wordOne(l)
663 } else {
664 r.Hdoc.Parts = append(r.Hdoc.Parts, l)
665 }
666 }
667 if stop := p.hdocStops[len(p.hdocStops)-1]; stop != nil {
668 p.posErr(r.Pos(), "unclosed here-document '%s'", stop)
669 }
670 p.hdocStops = p.hdocStops[:len(p.hdocStops)-1]
671 }
672 p.quote = old
673}
674
675func (p *Parser) got(tok token) bool {
676 if p.tok == tok {
677 p.next()
678 return true
679 }
680 return false
681}
682
683func (p *Parser) gotRsrv(val string) (Pos, bool) {
684 pos := p.pos
685 if p.tok == _LitWord && p.val == val {
686 p.next()
687 return pos, true
688 }
689 return pos, false
690}
691
692func (p *Parser) recoverError() bool {
693 if p.recoveredErrors < p.recoverErrorsMax {
694 p.recoveredErrors++
695 return true
696 }
697 return false
698}
699
700func readableStr(s string) string {
701 // don't quote tokens like & or }
702 if s != "" && s[0] >= 'a' && s[0] <= 'z' {
703 return strconv.Quote(s)
704 }
705 return s
706}
707
708func (p *Parser) followErr(pos Pos, left, right string) {
709 leftStr := readableStr(left)
710 p.posErr(pos, "%s must be followed by %s", leftStr, right)
711}
712
713func (p *Parser) followErrExp(pos Pos, left string) {
714 p.followErr(pos, left, "an expression")
715}
716
717func (p *Parser) follow(lpos Pos, left string, tok token) {
718 if !p.got(tok) {
719 p.followErr(lpos, left, tok.String())
720 }
721}
722
723func (p *Parser) followRsrv(lpos Pos, left, val string) Pos {
724 pos, ok := p.gotRsrv(val)
725 if !ok {
726 if p.recoverError() {
727 return recoveredPos
728 }
729 p.followErr(lpos, left, fmt.Sprintf("%q", val))
730 }
731 return pos
732}
733
734func (p *Parser) followStmts(left string, lpos Pos, stops ...string) ([]*Stmt, []Comment) {
735 if p.got(semicolon) {
736 return nil, nil
737 }
738 newLine := p.got(_Newl)
739 stmts, last := p.stmtList(stops...)
740 if len(stmts) < 1 && !newLine {
741 if p.recoverError() {
742 return []*Stmt{{Position: recoveredPos}}, nil
743 }
744 p.followErr(lpos, left, "a statement list")
745 }
746 return stmts, last
747}
748
749func (p *Parser) followWordTok(tok token, pos Pos) *Word {
750 w := p.getWord()
751 if w == nil {
752 if p.recoverError() {
753 return p.wordOne(&Lit{ValuePos: recoveredPos})
754 }
755 p.followErr(pos, tok.String(), "a word")
756 }
757 return w
758}
759
760func (p *Parser) stmtEnd(n Node, start, end string) Pos {
761 pos, ok := p.gotRsrv(end)
762 if !ok {
763 if p.recoverError() {
764 return recoveredPos
765 }
766 p.posErr(n.Pos(), "%s statement must end with %q", start, end)
767 }
768 return pos
769}
770
771func (p *Parser) quoteErr(lpos Pos, quote token) {
772 p.posErr(lpos, "reached %s without closing quote %s",
773 p.tok.String(), quote)
774}
775
776func (p *Parser) matchingErr(lpos Pos, left, right any) {
777 p.posErr(lpos, "reached %s without matching %s with %s",
778 p.tok.String(), left, right)
779}
780
781func (p *Parser) matched(lpos Pos, left, right token) Pos {
782 pos := p.pos
783 if !p.got(right) {
784 if p.recoverError() {
785 return recoveredPos
786 }
787 p.matchingErr(lpos, left, right)
788 }
789 return pos
790}
791
792func (p *Parser) errPass(err error) {
793 if p.err == nil {
794 p.err = err
795 p.bsp = uint(len(p.bs)) + 1
796 p.r = utf8.RuneSelf
797 p.w = 1
798 p.tok = _EOF
799 }
800}
801
802// IsIncomplete reports whether a Parser error could have been avoided with
803// extra input bytes. For example, if an [io.EOF] was encountered while there was
804// an unclosed quote or parenthesis.
805func IsIncomplete(err error) bool {
806 perr, ok := err.(ParseError)
807 return ok && perr.Incomplete
808}
809
810// IsKeyword returns true if the given word is part of the language keywords.
811func IsKeyword(word string) bool {
812 // This list has been copied from the bash 5.1 source code, file y.tab.c +4460
813 switch word {
814 case
815 "!",
816 "[[", // only if COND_COMMAND is defined
817 "]]", // only if COND_COMMAND is defined
818 "case",
819 "coproc", // only if COPROCESS_SUPPORT is defined
820 "do",
821 "done",
822 "else",
823 "esac",
824 "fi",
825 "for",
826 "function",
827 "if",
828 "in",
829 "select", // only if SELECT_COMMAND is defined
830 "then",
831 "time", // only if COMMAND_TIMING is defined
832 "until",
833 "while",
834 "{",
835 "}":
836 return true
837 }
838 return false
839}
840
841// ParseError represents an error found when parsing a source file, from which
842// the parser cannot recover.
843type ParseError struct {
844 Filename string
845 Pos Pos
846 Text string
847
848 Incomplete bool
849}
850
851func (e ParseError) Error() string {
852 if e.Filename == "" {
853 return fmt.Sprintf("%s: %s", e.Pos.String(), e.Text)
854 }
855 return fmt.Sprintf("%s:%s: %s", e.Filename, e.Pos.String(), e.Text)
856}
857
858// LangError is returned when the parser encounters code that is only valid in
859// other shell language variants. The error includes what feature is not present
860// in the current language variant, and what languages support it.
861type LangError struct {
862 Filename string
863 Pos Pos
864
865 // Feature briefly describes which language feature caused the error.
866 Feature string
867 // Langs lists some of the language variants which support the feature.
868 Langs []LangVariant
869 // LangUsed is the language variant used which led to the error.
870 LangUsed LangVariant
871}
872
873func (e LangError) Error() string {
874 var sb strings.Builder
875 if e.Filename != "" {
876 sb.WriteString(e.Filename + ":")
877 }
878 sb.WriteString(e.Pos.String() + ": ")
879 sb.WriteString(e.Feature)
880 if strings.HasSuffix(e.Feature, "s") {
881 sb.WriteString(" are a ")
882 } else {
883 sb.WriteString(" is a ")
884 }
885 for i, lang := range e.Langs {
886 if i > 0 {
887 sb.WriteString("/")
888 }
889 sb.WriteString(lang.String())
890 }
891 sb.WriteString(" feature; tried parsing as ")
892 sb.WriteString(e.LangUsed.String())
893 return sb.String()
894}
895
896func (p *Parser) posErr(pos Pos, format string, a ...any) {
897 p.errPass(ParseError{
898 Filename: p.f.Name,
899 Pos: pos,
900 Text: fmt.Sprintf(format, a...),
901 Incomplete: p.tok == _EOF && p.Incomplete(),
902 })
903}
904
905func (p *Parser) curErr(format string, a ...any) {
906 p.posErr(p.pos, format, a...)
907}
908
909func (p *Parser) langErr(pos Pos, feature string, langs ...LangVariant) {
910 p.errPass(LangError{
911 Filename: p.f.Name,
912 Pos: pos,
913 Feature: feature,
914 Langs: langs,
915 LangUsed: p.lang,
916 })
917}
918
919func (p *Parser) stmts(fn func(*Stmt) bool, stops ...string) {
920 gotEnd := true
921loop:
922 for p.tok != _EOF {
923 newLine := p.got(_Newl)
924 switch p.tok {
925 case _LitWord:
926 for _, stop := range stops {
927 if p.val == stop {
928 break loop
929 }
930 }
931 case rightParen:
932 if p.quote == subCmd {
933 break loop
934 }
935 case bckQuote:
936 if p.backquoteEnd() {
937 break loop
938 }
939 case dblSemicolon, semiAnd, dblSemiAnd, semiOr:
940 if p.quote == switchCase {
941 break loop
942 }
943 p.curErr("%s can only be used in a case clause", p.tok)
944 }
945 if !newLine && !gotEnd {
946 p.curErr("statements must be separated by &, ; or a newline")
947 }
948 if p.tok == _EOF {
949 break
950 }
951 p.openNodes++
952 s := p.getStmt(true, false, false)
953 p.openNodes--
954 if s == nil {
955 p.invalidStmtStart()
956 break
957 }
958 gotEnd = s.Semicolon.IsValid()
959 if !fn(s) {
960 break
961 }
962 }
963}
964
965func (p *Parser) stmtList(stops ...string) ([]*Stmt, []Comment) {
966 var stmts []*Stmt
967 var last []Comment
968 fn := func(s *Stmt) bool {
969 stmts = append(stmts, s)
970 return true
971 }
972 p.stmts(fn, stops...)
973 split := len(p.accComs)
974 if p.tok == _LitWord && (p.val == "elif" || p.val == "else" || p.val == "fi") {
975 // Split the comments, so that any aligned with an opening token
976 // get attached to it. For example:
977 //
978 // if foo; then
979 // # inside the body
980 // # document the else
981 // else
982 // fi
983 // TODO(mvdan): look into deduplicating this with similar logic
984 // in caseItems.
985 for i, c := range slices.Backward(p.accComs) {
986 if c.Pos().Col() != p.pos.Col() {
987 break
988 }
989 split = i
990 }
991 }
992 if split > 0 { // keep last nil if empty
993 last = p.accComs[:split]
994 }
995 p.accComs = p.accComs[split:]
996 return stmts, last
997}
998
999func (p *Parser) invalidStmtStart() {
1000 switch p.tok {
1001 case semicolon, and, or, andAnd, orOr:
1002 p.curErr("%s can only immediately follow a statement", p.tok)
1003 case rightParen:
1004 p.curErr("%s can only be used to close a subshell", p.tok)
1005 default:
1006 p.curErr("%s is not a valid start for a statement", p.tok)
1007 }
1008}
1009
1010func (p *Parser) getWord() *Word {
1011 if w := p.wordAnyNumber(); len(w.Parts) > 0 && p.err == nil {
1012 return w
1013 }
1014 return nil
1015}
1016
1017func (p *Parser) getLit() *Lit {
1018 switch p.tok {
1019 case _Lit, _LitWord, _LitRedir:
1020 l := p.lit(p.pos, p.val)
1021 p.next()
1022 return l
1023 }
1024 return nil
1025}
1026
1027func (p *Parser) wordParts(wps []WordPart) []WordPart {
1028 for {
1029 p.openNodes++
1030 n := p.wordPart()
1031 p.openNodes--
1032 if n == nil {
1033 if len(wps) == 0 {
1034 return nil // normalize empty lists into nil
1035 }
1036 return wps
1037 }
1038 wps = append(wps, n)
1039 if p.spaced {
1040 return wps
1041 }
1042 }
1043}
1044
1045func (p *Parser) ensureNoNested() {
1046 if p.forbidNested {
1047 p.curErr("expansions not allowed in heredoc words")
1048 }
1049}
1050
1051func (p *Parser) wordPart() WordPart {
1052 switch p.tok {
1053 case _Lit, _LitWord, _LitRedir:
1054 l := p.lit(p.pos, p.val)
1055 p.next()
1056 return l
1057 case dollBrace:
1058 p.ensureNoNested()
1059 switch p.r {
1060 case '|':
1061 if p.lang != LangMirBSDKorn {
1062 p.langErr(p.pos, `"${|stmts;}"`, LangMirBSDKorn)
1063 }
1064 fallthrough
1065 case ' ', '\t', '\n':
1066 if p.lang != LangMirBSDKorn {
1067 p.langErr(p.pos, `"${ stmts;}"`, LangMirBSDKorn)
1068 }
1069 cs := &CmdSubst{
1070 Left: p.pos,
1071 TempFile: p.r != '|',
1072 ReplyVar: p.r == '|',
1073 }
1074 old := p.preNested(subCmd)
1075 p.rune() // don't tokenize '|'
1076 p.next()
1077 cs.Stmts, cs.Last = p.stmtList("}")
1078 p.postNested(old)
1079 pos, ok := p.gotRsrv("}")
1080 if !ok {
1081 p.matchingErr(cs.Left, "${", "}")
1082 }
1083 cs.Right = pos
1084 return cs
1085 default:
1086 return p.paramExp()
1087 }
1088 case dollDblParen, dollBrack:
1089 p.ensureNoNested()
1090 left := p.tok
1091 ar := &ArithmExp{Left: p.pos, Bracket: left == dollBrack}
1092 var old saveState
1093 if ar.Bracket {
1094 old = p.preNested(arithmExprBrack)
1095 } else {
1096 old = p.preNested(arithmExpr)
1097 }
1098 p.next()
1099 if p.got(hash) {
1100 if p.lang != LangMirBSDKorn {
1101 p.langErr(ar.Pos(), "unsigned expressions", LangMirBSDKorn)
1102 }
1103 ar.Unsigned = true
1104 }
1105 ar.X = p.followArithm(left, ar.Left)
1106 if ar.Bracket {
1107 if p.tok != rightBrack {
1108 p.arithmMatchingErr(ar.Left, dollBrack, rightBrack)
1109 }
1110 p.postNested(old)
1111 ar.Right = p.pos
1112 p.next()
1113 } else {
1114 ar.Right = p.arithmEnd(dollDblParen, ar.Left, old)
1115 }
1116 return ar
1117 case dollParen:
1118 p.ensureNoNested()
1119 cs := &CmdSubst{Left: p.pos}
1120 old := p.preNested(subCmd)
1121 p.next()
1122 cs.Stmts, cs.Last = p.stmtList()
1123 p.postNested(old)
1124 cs.Right = p.matched(cs.Left, leftParen, rightParen)
1125 return cs
1126 case dollar:
1127 r := p.r
1128 switch {
1129 case singleRuneParam(r):
1130 p.tok, p.val = _LitWord, string(r)
1131 p.rune()
1132 case 'a' <= r && r <= 'z', 'A' <= r && r <= 'Z',
1133 '0' <= r && r <= '9', r == '_', r == '\\':
1134 p.advanceNameCont(r)
1135 default:
1136 l := p.lit(p.pos, "$")
1137 p.next()
1138 return l
1139 }
1140 p.ensureNoNested()
1141 pe := &ParamExp{Dollar: p.pos, Short: true}
1142 p.pos = posAddCol(p.pos, 1)
1143 pe.Param = p.getLit()
1144 if pe.Param != nil && pe.Param.Value == "" {
1145 l := p.lit(pe.Dollar, "$")
1146 // e.g. "$\\\"" within double quotes, so we must
1147 // keep the rest of the literal characters.
1148 l.ValueEnd = posAddCol(l.ValuePos, 1)
1149 return l
1150 }
1151 return pe
1152 case cmdIn, cmdOut:
1153 p.ensureNoNested()
1154 ps := &ProcSubst{Op: ProcOperator(p.tok), OpPos: p.pos}
1155 old := p.preNested(subCmd)
1156 p.next()
1157 ps.Stmts, ps.Last = p.stmtList()
1158 p.postNested(old)
1159 ps.Rparen = p.matched(ps.OpPos, token(ps.Op), rightParen)
1160 return ps
1161 case sglQuote, dollSglQuote:
1162 sq := &SglQuoted{Left: p.pos, Dollar: p.tok == dollSglQuote}
1163 r := p.r
1164 for p.newLit(r); ; r = p.rune() {
1165 switch r {
1166 case '\\':
1167 if sq.Dollar {
1168 p.rune()
1169 }
1170 case '\'':
1171 sq.Right = p.nextPos()
1172 sq.Value = p.endLit()
1173
1174 p.rune()
1175 p.next()
1176 return sq
1177 case escNewl:
1178 p.litBs = append(p.litBs, '\\', '\n')
1179 case utf8.RuneSelf:
1180 p.tok = _EOF
1181 if p.recoverError() {
1182 sq.Right = recoveredPos
1183 return sq
1184 }
1185 p.quoteErr(sq.Pos(), sglQuote)
1186 return nil
1187 }
1188 }
1189 case dblQuote, dollDblQuote:
1190 if p.quote == dblQuotes {
1191 // p.tok == dblQuote, as "foo$" puts $ in the lit
1192 return nil
1193 }
1194 return p.dblQuoted()
1195 case bckQuote:
1196 if p.backquoteEnd() {
1197 return nil
1198 }
1199 p.ensureNoNested()
1200 cs := &CmdSubst{Left: p.pos, Backquotes: true}
1201 old := p.preNested(subCmdBckquo)
1202 p.openBquotes++
1203
1204 // The lexer didn't call p.rune for us, so that it could have
1205 // the right p.openBquotes to properly handle backslashes.
1206 p.rune()
1207
1208 p.next()
1209 cs.Stmts, cs.Last = p.stmtList()
1210 if p.tok == bckQuote && p.lastBquoteEsc < p.openBquotes-1 {
1211 // e.g. found ` before the nested backquote \` was closed.
1212 p.tok = _EOF
1213 p.quoteErr(cs.Pos(), bckQuote)
1214 }
1215 p.postNested(old)
1216 p.openBquotes--
1217 cs.Right = p.pos
1218
1219 // Like above, the lexer didn't call p.rune for us.
1220 p.rune()
1221 if !p.got(bckQuote) {
1222 if p.recoverError() {
1223 cs.Right = recoveredPos
1224 } else {
1225 p.quoteErr(cs.Pos(), bckQuote)
1226 }
1227 }
1228 return cs
1229 case globQuest, globStar, globPlus, globAt, globExcl:
1230 if p.lang == LangPOSIX {
1231 p.langErr(p.pos, "extended globs", LangBash, LangMirBSDKorn)
1232 }
1233 eg := &ExtGlob{Op: GlobOperator(p.tok), OpPos: p.pos}
1234 lparens := 1
1235 r := p.r
1236 globLoop:
1237 for p.newLit(r); ; r = p.rune() {
1238 switch r {
1239 case utf8.RuneSelf:
1240 break globLoop
1241 case '(':
1242 lparens++
1243 case ')':
1244 if lparens--; lparens == 0 {
1245 break globLoop
1246 }
1247 }
1248 }
1249 eg.Pattern = p.lit(posAddCol(eg.OpPos, 2), p.endLit())
1250 p.rune()
1251 p.next()
1252 if lparens != 0 {
1253 p.matchingErr(eg.OpPos, eg.Op, rightParen)
1254 }
1255 return eg
1256 default:
1257 return nil
1258 }
1259}
1260
1261func (p *Parser) dblQuoted() *DblQuoted {
1262 alloc := &struct {
1263 quoted DblQuoted
1264 parts [1]WordPart
1265 }{
1266 quoted: DblQuoted{Left: p.pos, Dollar: p.tok == dollDblQuote},
1267 }
1268 q := &alloc.quoted
1269 old := p.quote
1270 p.quote = dblQuotes
1271 p.next()
1272 q.Parts = p.wordParts(alloc.parts[:0])
1273 p.quote = old
1274 q.Right = p.pos
1275 if !p.got(dblQuote) {
1276 if p.recoverError() {
1277 q.Right = recoveredPos
1278 } else {
1279 p.quoteErr(q.Pos(), dblQuote)
1280 }
1281 }
1282 return q
1283}
1284
1285func singleRuneParam(r rune) bool {
1286 switch r {
1287 case '@', '*', '#', '$', '?', '!', '-',
1288 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
1289 return true
1290 }
1291 return false
1292}
1293
1294func (p *Parser) paramExp() *ParamExp {
1295 pe := &ParamExp{Dollar: p.pos}
1296 old := p.quote
1297 p.quote = paramExpName
1298 if p.r == '#' {
1299 p.tok = hash
1300 p.pos = p.nextPos()
1301 p.rune()
1302 } else {
1303 p.next()
1304 }
1305 switch p.tok {
1306 case hash:
1307 if paramNameOp(p.r) {
1308 pe.Length = true
1309 p.next()
1310 }
1311 case perc:
1312 if p.lang != LangMirBSDKorn {
1313 p.langErr(pe.Pos(), `"${%foo}"`, LangMirBSDKorn)
1314 }
1315 if paramNameOp(p.r) {
1316 pe.Width = true
1317 p.next()
1318 }
1319 case exclMark:
1320 if paramNameOp(p.r) {
1321 pe.Excl = true
1322 p.next()
1323 }
1324 }
1325 op := p.tok
1326 switch p.tok {
1327 case _Lit, _LitWord:
1328 if !numberLiteral(p.val) && !ValidName(p.val) {
1329 p.curErr("invalid parameter name")
1330 }
1331 pe.Param = p.lit(p.pos, p.val)
1332 p.next()
1333 case quest, minus:
1334 if pe.Length && p.r != '}' {
1335 // actually ${#-default}, not ${#-}; fix the ambiguity
1336 pe.Length = false
1337 pe.Param = p.lit(posAddCol(p.pos, -1), "#")
1338 pe.Param.ValueEnd = p.pos
1339 break
1340 }
1341 fallthrough
1342 case at, star, hash, exclMark, dollar:
1343 pe.Param = p.lit(p.pos, p.tok.String())
1344 p.next()
1345 default:
1346 p.curErr("parameter expansion requires a literal")
1347 }
1348 switch p.tok {
1349 case _Lit, _LitWord:
1350 p.curErr("%s cannot be followed by a word", op)
1351 case rightBrace:
1352 if pe.Excl && p.lang == LangPOSIX {
1353 p.langErr(pe.Pos(), `"${!foo}"`, LangBash, LangMirBSDKorn)
1354 }
1355 pe.Rbrace = p.pos
1356 p.quote = old
1357 p.next()
1358 return pe
1359 case leftBrack:
1360 if p.lang == LangPOSIX {
1361 p.langErr(p.pos, "arrays", LangBash, LangMirBSDKorn)
1362 }
1363 if !ValidName(pe.Param.Value) {
1364 p.curErr("cannot index a special parameter name")
1365 }
1366 pe.Index = p.eitherIndex()
1367 }
1368 if p.tok == rightBrace {
1369 pe.Rbrace = p.pos
1370 p.quote = old
1371 p.next()
1372 return pe
1373 }
1374 if p.tok != _EOF && (pe.Length || pe.Width) {
1375 p.curErr("cannot combine multiple parameter expansion operators")
1376 }
1377 switch p.tok {
1378 case slash, dblSlash:
1379 // pattern search and replace
1380 if p.lang == LangPOSIX {
1381 p.langErr(p.pos, "search and replace", LangBash, LangMirBSDKorn)
1382 }
1383 pe.Repl = &Replace{All: p.tok == dblSlash}
1384 p.quote = paramExpRepl
1385 p.next()
1386 pe.Repl.Orig = p.getWord()
1387 p.quote = paramExpExp
1388 if p.got(slash) {
1389 pe.Repl.With = p.getWord()
1390 }
1391 case colon:
1392 // slicing
1393 if p.lang == LangPOSIX {
1394 p.langErr(p.pos, "slicing", LangBash, LangMirBSDKorn)
1395 }
1396 pe.Slice = &Slice{}
1397 colonPos := p.pos
1398 p.quote = paramExpSlice
1399 if p.next(); p.tok != colon {
1400 pe.Slice.Offset = p.followArithm(colon, colonPos)
1401 }
1402 colonPos = p.pos
1403 if p.got(colon) {
1404 pe.Slice.Length = p.followArithm(colon, colonPos)
1405 }
1406 // Need to use a different matched style so arithm errors
1407 // get reported correctly
1408 p.quote = old
1409 pe.Rbrace = p.pos
1410 p.matchedArithm(pe.Dollar, dollBrace, rightBrace)
1411 return pe
1412 case caret, dblCaret, comma, dblComma:
1413 // upper/lower case
1414 if !p.lang.isBash() {
1415 p.langErr(p.pos, "this expansion operator", LangBash)
1416 }
1417 pe.Exp = p.paramExpExp()
1418 case at, star:
1419 switch {
1420 case p.tok == at && p.lang == LangPOSIX:
1421 p.langErr(p.pos, "this expansion operator", LangBash, LangMirBSDKorn)
1422 case p.tok == star && !pe.Excl:
1423 p.curErr("not a valid parameter expansion operator: %v", p.tok)
1424 case pe.Excl && p.r == '}':
1425 if !p.lang.isBash() {
1426 p.langErr(pe.Pos(), fmt.Sprintf(`"${!foo%s}"`, p.tok), LangBash)
1427 }
1428 pe.Names = ParNamesOperator(p.tok)
1429 p.next()
1430 default:
1431 pe.Exp = p.paramExpExp()
1432 }
1433 case plus, colPlus, minus, colMinus, quest, colQuest, assgn, colAssgn,
1434 perc, dblPerc, hash, dblHash:
1435 pe.Exp = p.paramExpExp()
1436 case _EOF:
1437 default:
1438 p.curErr("not a valid parameter expansion operator: %v", p.tok)
1439 }
1440 p.quote = old
1441 pe.Rbrace = p.matched(pe.Dollar, dollBrace, rightBrace)
1442 return pe
1443}
1444
1445func (p *Parser) paramExpExp() *Expansion {
1446 op := ParExpOperator(p.tok)
1447 p.quote = paramExpExp
1448 p.next()
1449 if op == OtherParamOps {
1450 switch p.tok {
1451 case _Lit, _LitWord:
1452 default:
1453 p.curErr("@ expansion operator requires a literal")
1454 }
1455 switch p.val {
1456 case "a", "k", "u", "A", "E", "K", "L", "P", "U":
1457 if !p.lang.isBash() {
1458 p.langErr(p.pos, "this expansion operator", LangBash)
1459 }
1460 case "#":
1461 if p.lang != LangMirBSDKorn {
1462 p.langErr(p.pos, "this expansion operator", LangMirBSDKorn)
1463 }
1464 case "Q":
1465 default:
1466 p.curErr("invalid @ expansion operator %q", p.val)
1467 }
1468 }
1469 return &Expansion{Op: op, Word: p.getWord()}
1470}
1471
1472func (p *Parser) eitherIndex() ArithmExpr {
1473 old := p.quote
1474 lpos := p.pos
1475 p.quote = arithmExprBrack
1476 p.next()
1477 if p.tok == star || p.tok == at {
1478 p.tok, p.val = _LitWord, p.tok.String()
1479 }
1480 expr := p.followArithm(leftBrack, lpos)
1481 p.quote = old
1482 p.matchedArithm(lpos, leftBrack, rightBrack)
1483 return expr
1484}
1485
1486func (p *Parser) stopToken() bool {
1487 switch p.tok {
1488 case _EOF, _Newl, semicolon, and, or, andAnd, orOr, orAnd, dblSemicolon,
1489 semiAnd, dblSemiAnd, semiOr, rightParen:
1490 return true
1491 case bckQuote:
1492 return p.backquoteEnd()
1493 }
1494 return false
1495}
1496
1497func (p *Parser) backquoteEnd() bool {
1498 return p.lastBquoteEsc < p.openBquotes
1499}
1500
1501// ValidName returns whether val is a valid name as per the POSIX spec.
1502func ValidName(val string) bool {
1503 if val == "" {
1504 return false
1505 }
1506 for i, r := range val {
1507 switch {
1508 case 'a' <= r && r <= 'z':
1509 case 'A' <= r && r <= 'Z':
1510 case r == '_':
1511 case i > 0 && '0' <= r && r <= '9':
1512 default:
1513 return false
1514 }
1515 }
1516 return true
1517}
1518
1519func numberLiteral(val string) bool {
1520 for _, r := range val {
1521 if '0' > r || r > '9' {
1522 return false
1523 }
1524 }
1525 return true
1526}
1527
1528func (p *Parser) hasValidIdent() bool {
1529 if p.tok != _Lit && p.tok != _LitWord {
1530 return false
1531 }
1532 if end := p.eqlOffs; end > 0 {
1533 if p.val[end-1] == '+' && p.lang != LangPOSIX {
1534 end-- // a+=x
1535 }
1536 if ValidName(p.val[:end]) {
1537 return true
1538 }
1539 } else if !ValidName(p.val) {
1540 return false // *[i]=x
1541 }
1542 return p.r == '[' // a[i]=x
1543}
1544
1545func (p *Parser) getAssign(needEqual bool) *Assign {
1546 as := &Assign{}
1547 if p.eqlOffs > 0 { // foo=bar
1548 nameEnd := p.eqlOffs
1549 if p.lang != LangPOSIX && p.val[p.eqlOffs-1] == '+' {
1550 // a+=b
1551 as.Append = true
1552 nameEnd--
1553 }
1554 as.Name = p.lit(p.pos, p.val[:nameEnd])
1555 // since we're not using the entire p.val
1556 as.Name.ValueEnd = posAddCol(as.Name.ValuePos, nameEnd)
1557 left := p.lit(posAddCol(p.pos, 1), p.val[p.eqlOffs+1:])
1558 if left.Value != "" {
1559 left.ValuePos = posAddCol(left.ValuePos, p.eqlOffs)
1560 as.Value = p.wordOne(left)
1561 }
1562 p.next()
1563 } else { // foo[x]=bar
1564 as.Name = p.lit(p.pos, p.val)
1565 // hasValidIdent already checks p.r is '['
1566 p.rune()
1567 p.pos = posAddCol(p.pos, 1)
1568 as.Index = p.eitherIndex()
1569 if p.spaced || p.stopToken() {
1570 if needEqual {
1571 p.followErr(as.Pos(), "a[b]", "=")
1572 } else {
1573 as.Naked = true
1574 return as
1575 }
1576 }
1577 if len(p.val) > 0 && p.val[0] == '+' {
1578 as.Append = true
1579 p.val = p.val[1:]
1580 p.pos = posAddCol(p.pos, 1)
1581 }
1582 if len(p.val) < 1 || p.val[0] != '=' {
1583 if as.Append {
1584 p.followErr(as.Pos(), "a[b]+", "=")
1585 } else {
1586 p.followErr(as.Pos(), "a[b]", "=")
1587 }
1588 return nil
1589 }
1590 p.pos = posAddCol(p.pos, 1)
1591 p.val = p.val[1:]
1592 if p.val == "" {
1593 p.next()
1594 }
1595 }
1596 if p.spaced || p.stopToken() {
1597 return as
1598 }
1599 if as.Value == nil && p.tok == leftParen {
1600 if p.lang == LangPOSIX {
1601 p.langErr(p.pos, "arrays", LangBash, LangMirBSDKorn)
1602 }
1603 if as.Index != nil {
1604 p.curErr("arrays cannot be nested")
1605 }
1606 as.Array = &ArrayExpr{Lparen: p.pos}
1607 newQuote := p.quote
1608 if p.lang.isBash() {
1609 newQuote = arrayElems
1610 }
1611 old := p.preNested(newQuote)
1612 p.next()
1613 p.got(_Newl)
1614 for p.tok != _EOF && p.tok != rightParen {
1615 ae := &ArrayElem{}
1616 ae.Comments, p.accComs = p.accComs, nil
1617 if p.tok == leftBrack {
1618 left := p.pos
1619 ae.Index = p.eitherIndex()
1620 p.follow(left, `"[x]"`, assgn)
1621 }
1622 if ae.Value = p.getWord(); ae.Value == nil {
1623 switch p.tok {
1624 case leftParen:
1625 p.curErr("arrays cannot be nested")
1626 return nil
1627 case _Newl, rightParen, leftBrack:
1628 // TODO: support [index]=[
1629 default:
1630 p.curErr("array element values must be words")
1631 return nil
1632 }
1633 }
1634 if len(p.accComs) > 0 {
1635 c := p.accComs[0]
1636 if c.Pos().Line() == ae.End().Line() {
1637 ae.Comments = append(ae.Comments, c)
1638 p.accComs = p.accComs[1:]
1639 }
1640 }
1641 as.Array.Elems = append(as.Array.Elems, ae)
1642 p.got(_Newl)
1643 }
1644 as.Array.Last, p.accComs = p.accComs, nil
1645 p.postNested(old)
1646 as.Array.Rparen = p.matched(as.Array.Lparen, leftParen, rightParen)
1647 } else if w := p.getWord(); w != nil {
1648 if as.Value == nil {
1649 as.Value = w
1650 } else {
1651 as.Value.Parts = append(as.Value.Parts, w.Parts...)
1652 }
1653 }
1654 return as
1655}
1656
1657func (p *Parser) peekRedir() bool {
1658 switch p.tok {
1659 case rdrOut, appOut, rdrIn, dplIn, dplOut, clbOut, rdrInOut,
1660 hdoc, dashHdoc, wordHdoc, rdrAll, appAll, _LitRedir:
1661 return true
1662 }
1663 return false
1664}
1665
1666func (p *Parser) doRedirect(s *Stmt) {
1667 var r *Redirect
1668 if s.Redirs == nil {
1669 var alloc struct {
1670 redirs [4]*Redirect
1671 redir Redirect
1672 }
1673 s.Redirs = alloc.redirs[:0]
1674 r = &alloc.redir
1675 s.Redirs = append(s.Redirs, r)
1676 } else {
1677 r = &Redirect{}
1678 s.Redirs = append(s.Redirs, r)
1679 }
1680 r.N = p.getLit()
1681 if !p.lang.isBash() && r.N != nil && r.N.Value[0] == '{' {
1682 p.langErr(r.N.Pos(), "{varname} redirects", LangBash)
1683 }
1684 if p.lang == LangPOSIX && (p.tok == rdrAll || p.tok == appAll) {
1685 p.langErr(p.pos, "&> redirects", LangBash, LangMirBSDKorn)
1686 }
1687 r.Op, r.OpPos = RedirOperator(p.tok), p.pos
1688 p.next()
1689 switch r.Op {
1690 case Hdoc, DashHdoc:
1691 old := p.quote
1692 p.quote, p.forbidNested = hdocWord, true
1693 p.heredocs = append(p.heredocs, r)
1694 r.Word = p.followWordTok(token(r.Op), r.OpPos)
1695 p.quote, p.forbidNested = old, false
1696 if p.tok == _Newl {
1697 if len(p.accComs) > 0 {
1698 c := p.accComs[0]
1699 if c.Pos().Line() == s.End().Line() {
1700 s.Comments = append(s.Comments, c)
1701 p.accComs = p.accComs[1:]
1702 }
1703 }
1704 p.doHeredocs()
1705 }
1706 case WordHdoc:
1707 if p.lang == LangPOSIX {
1708 p.langErr(r.OpPos, "herestrings", LangBash, LangMirBSDKorn)
1709 }
1710 fallthrough
1711 default:
1712 r.Word = p.followWordTok(token(r.Op), r.OpPos)
1713 }
1714}
1715
1716func (p *Parser) getStmt(readEnd, binCmd, fnBody bool) *Stmt {
1717 pos, ok := p.gotRsrv("!")
1718 s := &Stmt{Position: pos}
1719 if ok {
1720 s.Negated = true
1721 if p.stopToken() {
1722 p.posErr(s.Pos(), `"!" cannot form a statement alone`)
1723 }
1724 if _, ok := p.gotRsrv("!"); ok {
1725 p.posErr(s.Pos(), `cannot negate a command multiple times`)
1726 }
1727 }
1728 if s = p.gotStmtPipe(s, false); s == nil || p.err != nil {
1729 return nil
1730 }
1731 // instead of using recursion, iterate manually
1732 for p.tok == andAnd || p.tok == orOr {
1733 if binCmd {
1734 // left associativity: in a list of BinaryCmds, the
1735 // right recursion should only read a single element
1736 return s
1737 }
1738 b := &BinaryCmd{
1739 OpPos: p.pos,
1740 Op: BinCmdOperator(p.tok),
1741 X: s,
1742 }
1743 p.next()
1744 p.got(_Newl)
1745 b.Y = p.getStmt(false, true, false)
1746 if b.Y == nil || p.err != nil {
1747 if p.recoverError() {
1748 b.Y = &Stmt{Position: recoveredPos}
1749 } else {
1750 p.followErr(b.OpPos, b.Op.String(), "a statement")
1751 return nil
1752 }
1753 }
1754 s = &Stmt{Position: s.Position}
1755 s.Cmd = b
1756 s.Comments, b.X.Comments = b.X.Comments, nil
1757 }
1758 if readEnd {
1759 switch p.tok {
1760 case semicolon:
1761 s.Semicolon = p.pos
1762 p.next()
1763 case and:
1764 s.Semicolon = p.pos
1765 p.next()
1766 s.Background = true
1767 case orAnd:
1768 s.Semicolon = p.pos
1769 p.next()
1770 s.Coprocess = true
1771 }
1772 }
1773 if len(p.accComs) > 0 && !binCmd && !fnBody {
1774 c := p.accComs[0]
1775 if c.Pos().Line() == s.End().Line() {
1776 s.Comments = append(s.Comments, c)
1777 p.accComs = p.accComs[1:]
1778 }
1779 }
1780 return s
1781}
1782
1783func (p *Parser) gotStmtPipe(s *Stmt, binCmd bool) *Stmt {
1784 s.Comments, p.accComs = p.accComs, nil
1785 switch p.tok {
1786 case _LitWord:
1787 switch p.val {
1788 case "{":
1789 p.block(s)
1790 case "if":
1791 p.ifClause(s)
1792 case "while", "until":
1793 p.whileClause(s, p.val == "until")
1794 case "for":
1795 p.forClause(s)
1796 case "case":
1797 p.caseClause(s)
1798 case "}":
1799 p.curErr(`%q can only be used to close a block`, p.val)
1800 case "then":
1801 p.curErr(`%q can only be used in an if`, p.val)
1802 case "elif":
1803 p.curErr(`%q can only be used in an if`, p.val)
1804 case "fi":
1805 p.curErr(`%q can only be used to end an if`, p.val)
1806 case "do":
1807 p.curErr(`%q can only be used in a loop`, p.val)
1808 case "done":
1809 p.curErr(`%q can only be used to end a loop`, p.val)
1810 case "esac":
1811 p.curErr(`%q can only be used to end a case`, p.val)
1812 case "!":
1813 if !s.Negated {
1814 p.curErr(`"!" can only be used in full statements`)
1815 break
1816 }
1817 case "[[":
1818 if p.lang != LangPOSIX {
1819 p.testClause(s)
1820 }
1821 case "]]":
1822 if p.lang != LangPOSIX {
1823 p.curErr(`%q can only be used to close a test`, p.val)
1824 }
1825 case "let":
1826 if p.lang != LangPOSIX {
1827 p.letClause(s)
1828 }
1829 case "function":
1830 if p.lang != LangPOSIX {
1831 p.bashFuncDecl(s)
1832 }
1833 case "declare":
1834 if p.lang.isBash() { // Note that mksh lacks this one.
1835 p.declClause(s)
1836 }
1837 case "local", "export", "readonly", "typeset", "nameref":
1838 if p.lang != LangPOSIX {
1839 p.declClause(s)
1840 }
1841 case "time":
1842 if p.lang != LangPOSIX {
1843 p.timeClause(s)
1844 }
1845 case "coproc":
1846 if p.lang.isBash() { // Note that mksh lacks this one.
1847 p.coprocClause(s)
1848 }
1849 case "select":
1850 if p.lang != LangPOSIX {
1851 p.selectClause(s)
1852 }
1853 case "@test":
1854 if p.lang == LangBats {
1855 p.testDecl(s)
1856 }
1857 }
1858 if s.Cmd != nil {
1859 break
1860 }
1861 if p.hasValidIdent() {
1862 p.callExpr(s, nil, true)
1863 break
1864 }
1865 name := p.lit(p.pos, p.val)
1866 if p.next(); p.got(leftParen) {
1867 p.follow(name.ValuePos, "foo(", rightParen)
1868 if p.lang == LangPOSIX && !ValidName(name.Value) {
1869 p.posErr(name.Pos(), "invalid func name")
1870 }
1871 p.funcDecl(s, name, name.ValuePos, true)
1872 } else {
1873 p.callExpr(s, p.wordOne(name), false)
1874 }
1875 case rdrOut, appOut, rdrIn, dplIn, dplOut, clbOut, rdrInOut,
1876 hdoc, dashHdoc, wordHdoc, rdrAll, appAll, _LitRedir:
1877 p.doRedirect(s)
1878 p.callExpr(s, nil, false)
1879 case bckQuote:
1880 if p.backquoteEnd() {
1881 return nil
1882 }
1883 fallthrough
1884 case _Lit, dollBrace, dollDblParen, dollParen, dollar, cmdIn, cmdOut,
1885 sglQuote, dollSglQuote, dblQuote, dollDblQuote, dollBrack,
1886 globQuest, globStar, globPlus, globAt, globExcl:
1887 if p.hasValidIdent() {
1888 p.callExpr(s, nil, true)
1889 break
1890 }
1891 w := p.wordAnyNumber()
1892 if p.got(leftParen) {
1893 p.posErr(w.Pos(), "invalid func name")
1894 }
1895 p.callExpr(s, w, false)
1896 case leftParen:
1897 p.subshell(s)
1898 case dblLeftParen:
1899 p.arithmExpCmd(s)
1900 default:
1901 if len(s.Redirs) == 0 {
1902 return nil
1903 }
1904 }
1905 for p.peekRedir() {
1906 p.doRedirect(s)
1907 }
1908 // instead of using recursion, iterate manually
1909 for p.tok == or || p.tok == orAnd {
1910 if binCmd {
1911 // left associativity: in a list of BinaryCmds, the
1912 // right recursion should only read a single element
1913 return s
1914 }
1915 if p.tok == orAnd && p.lang == LangMirBSDKorn {
1916 // No need to check for LangPOSIX, as on that language
1917 // we parse |& as two tokens.
1918 break
1919 }
1920 b := &BinaryCmd{OpPos: p.pos, Op: BinCmdOperator(p.tok), X: s}
1921 p.next()
1922 p.got(_Newl)
1923 if b.Y = p.gotStmtPipe(&Stmt{Position: p.pos}, true); b.Y == nil || p.err != nil {
1924 if p.recoverError() {
1925 b.Y = &Stmt{Position: recoveredPos}
1926 } else {
1927 p.followErr(b.OpPos, b.Op.String(), "a statement")
1928 break
1929 }
1930 }
1931 s = &Stmt{Position: s.Position}
1932 s.Cmd = b
1933 s.Comments, b.X.Comments = b.X.Comments, nil
1934 // in "! x | y", the bang applies to the entire pipeline
1935 s.Negated = b.X.Negated
1936 b.X.Negated = false
1937 }
1938 return s
1939}
1940
1941func (p *Parser) subshell(s *Stmt) {
1942 sub := &Subshell{Lparen: p.pos}
1943 old := p.preNested(subCmd)
1944 p.next()
1945 sub.Stmts, sub.Last = p.stmtList()
1946 p.postNested(old)
1947 sub.Rparen = p.matched(sub.Lparen, leftParen, rightParen)
1948 s.Cmd = sub
1949}
1950
1951func (p *Parser) arithmExpCmd(s *Stmt) {
1952 ar := &ArithmCmd{Left: p.pos}
1953 old := p.preNested(arithmExprCmd)
1954 p.next()
1955 if p.got(hash) {
1956 if p.lang != LangMirBSDKorn {
1957 p.langErr(ar.Pos(), "unsigned expressions", LangMirBSDKorn)
1958 }
1959 ar.Unsigned = true
1960 }
1961 ar.X = p.followArithm(dblLeftParen, ar.Left)
1962 ar.Right = p.arithmEnd(dblLeftParen, ar.Left, old)
1963 s.Cmd = ar
1964}
1965
1966func (p *Parser) block(s *Stmt) {
1967 b := &Block{Lbrace: p.pos}
1968 p.next()
1969 b.Stmts, b.Last = p.stmtList("}")
1970 if pos, ok := p.gotRsrv("}"); ok {
1971 b.Rbrace = pos
1972 } else if p.recoverError() {
1973 b.Rbrace = recoveredPos
1974 } else {
1975 p.matchingErr(b.Lbrace, "{", "}")
1976 }
1977 s.Cmd = b
1978}
1979
1980func (p *Parser) ifClause(s *Stmt) {
1981 rootIf := &IfClause{Position: p.pos}
1982 p.next()
1983 rootIf.Cond, rootIf.CondLast = p.followStmts("if", rootIf.Position, "then")
1984 rootIf.ThenPos = p.followRsrv(rootIf.Position, "if <cond>", "then")
1985 rootIf.Then, rootIf.ThenLast = p.followStmts("then", rootIf.ThenPos, "fi", "elif", "else")
1986 curIf := rootIf
1987 for p.tok == _LitWord && p.val == "elif" {
1988 elf := &IfClause{Position: p.pos}
1989 curIf.Last = p.accComs
1990 p.accComs = nil
1991 p.next()
1992 elf.Cond, elf.CondLast = p.followStmts("elif", elf.Position, "then")
1993 elf.ThenPos = p.followRsrv(elf.Position, "elif <cond>", "then")
1994 elf.Then, elf.ThenLast = p.followStmts("then", elf.ThenPos, "fi", "elif", "else")
1995 curIf.Else = elf
1996 curIf = elf
1997 }
1998 if elsePos, ok := p.gotRsrv("else"); ok {
1999 curIf.Last = p.accComs
2000 p.accComs = nil
2001 els := &IfClause{Position: elsePos}
2002 els.Then, els.ThenLast = p.followStmts("else", els.Position, "fi")
2003 curIf.Else = els
2004 curIf = els
2005 }
2006 curIf.Last = p.accComs
2007 p.accComs = nil
2008 rootIf.FiPos = p.stmtEnd(rootIf, "if", "fi")
2009 for els := rootIf.Else; els != nil; els = els.Else {
2010 // All the nested IfClauses share the same FiPos.
2011 els.FiPos = rootIf.FiPos
2012 }
2013 s.Cmd = rootIf
2014}
2015
2016func (p *Parser) whileClause(s *Stmt, until bool) {
2017 wc := &WhileClause{WhilePos: p.pos, Until: until}
2018 rsrv := "while"
2019 rsrvCond := "while <cond>"
2020 if wc.Until {
2021 rsrv = "until"
2022 rsrvCond = "until <cond>"
2023 }
2024 p.next()
2025 wc.Cond, wc.CondLast = p.followStmts(rsrv, wc.WhilePos, "do")
2026 wc.DoPos = p.followRsrv(wc.WhilePos, rsrvCond, "do")
2027 wc.Do, wc.DoLast = p.followStmts("do", wc.DoPos, "done")
2028 wc.DonePos = p.stmtEnd(wc, rsrv, "done")
2029 s.Cmd = wc
2030}
2031
2032func (p *Parser) forClause(s *Stmt) {
2033 fc := &ForClause{ForPos: p.pos}
2034 p.next()
2035 fc.Loop = p.loop(fc.ForPos)
2036
2037 start, end := "do", "done"
2038 if pos, ok := p.gotRsrv("{"); ok {
2039 if p.lang == LangPOSIX {
2040 p.langErr(pos, "for loops with braces", LangBash, LangMirBSDKorn)
2041 }
2042 fc.DoPos = pos
2043 fc.Braces = true
2044 start, end = "{", "}"
2045 } else {
2046 fc.DoPos = p.followRsrv(fc.ForPos, "for foo [in words]", start)
2047 }
2048
2049 s.Comments = append(s.Comments, p.accComs...)
2050 p.accComs = nil
2051 fc.Do, fc.DoLast = p.followStmts(start, fc.DoPos, end)
2052 fc.DonePos = p.stmtEnd(fc, "for", end)
2053 s.Cmd = fc
2054}
2055
2056func (p *Parser) loop(fpos Pos) Loop {
2057 if !p.lang.isBash() {
2058 switch p.tok {
2059 case leftParen, dblLeftParen:
2060 p.langErr(p.pos, "c-style fors", LangBash)
2061 }
2062 }
2063 if p.tok == dblLeftParen {
2064 cl := &CStyleLoop{Lparen: p.pos}
2065 old := p.preNested(arithmExprCmd)
2066 p.next()
2067 cl.Init = p.arithmExpr(false)
2068 if !p.got(dblSemicolon) {
2069 p.follow(p.pos, "expr", semicolon)
2070 cl.Cond = p.arithmExpr(false)
2071 p.follow(p.pos, "expr", semicolon)
2072 }
2073 cl.Post = p.arithmExpr(false)
2074 cl.Rparen = p.arithmEnd(dblLeftParen, cl.Lparen, old)
2075 p.got(semicolon)
2076 p.got(_Newl)
2077 return cl
2078 }
2079 return p.wordIter("for", fpos)
2080}
2081
2082func (p *Parser) wordIter(ftok string, fpos Pos) *WordIter {
2083 wi := &WordIter{}
2084 if wi.Name = p.getLit(); wi.Name == nil {
2085 p.followErr(fpos, ftok, "a literal")
2086 }
2087 if p.got(semicolon) {
2088 p.got(_Newl)
2089 return wi
2090 }
2091 p.got(_Newl)
2092 if pos, ok := p.gotRsrv("in"); ok {
2093 wi.InPos = pos
2094 for !p.stopToken() {
2095 if w := p.getWord(); w == nil {
2096 p.curErr("word list can only contain words")
2097 } else {
2098 wi.Items = append(wi.Items, w)
2099 }
2100 }
2101 p.got(semicolon)
2102 p.got(_Newl)
2103 } else if p.tok == _LitWord && p.val == "do" {
2104 } else {
2105 p.followErr(fpos, ftok+" foo", `"in", "do", ;, or a newline`)
2106 }
2107 return wi
2108}
2109
2110func (p *Parser) selectClause(s *Stmt) {
2111 fc := &ForClause{ForPos: p.pos, Select: true}
2112 p.next()
2113 fc.Loop = p.wordIter("select", fc.ForPos)
2114 fc.DoPos = p.followRsrv(fc.ForPos, "select foo [in words]", "do")
2115 fc.Do, fc.DoLast = p.followStmts("do", fc.DoPos, "done")
2116 fc.DonePos = p.stmtEnd(fc, "select", "done")
2117 s.Cmd = fc
2118}
2119
2120func (p *Parser) caseClause(s *Stmt) {
2121 cc := &CaseClause{Case: p.pos}
2122 p.next()
2123 cc.Word = p.getWord()
2124 if cc.Word == nil {
2125 p.followErr(cc.Case, "case", "a word")
2126 }
2127 end := "esac"
2128 p.got(_Newl)
2129 if pos, ok := p.gotRsrv("{"); ok {
2130 cc.In = pos
2131 cc.Braces = true
2132 if p.lang != LangMirBSDKorn {
2133 p.langErr(cc.Pos(), `"case i {"`, LangMirBSDKorn)
2134 }
2135 end = "}"
2136 } else {
2137 cc.In = p.followRsrv(cc.Case, "case x", "in")
2138 }
2139 cc.Items = p.caseItems(end)
2140 cc.Last, p.accComs = p.accComs, nil
2141 cc.Esac = p.stmtEnd(cc, "case", end)
2142 s.Cmd = cc
2143}
2144
2145func (p *Parser) caseItems(stop string) (items []*CaseItem) {
2146 p.got(_Newl)
2147 for p.tok != _EOF && (p.tok != _LitWord || p.val != stop) {
2148 ci := &CaseItem{}
2149 ci.Comments, p.accComs = p.accComs, nil
2150 p.got(leftParen)
2151 for p.tok != _EOF {
2152 if w := p.getWord(); w == nil {
2153 p.curErr("case patterns must consist of words")
2154 } else {
2155 ci.Patterns = append(ci.Patterns, w)
2156 }
2157 if p.tok == rightParen {
2158 break
2159 }
2160 if !p.got(or) {
2161 p.curErr("case patterns must be separated with |")
2162 }
2163 }
2164 old := p.preNested(switchCase)
2165 p.next()
2166 ci.Stmts, ci.Last = p.stmtList(stop)
2167 p.postNested(old)
2168 switch p.tok {
2169 case dblSemicolon, semiAnd, dblSemiAnd, semiOr:
2170 default:
2171 ci.Op = Break
2172 items = append(items, ci)
2173 return
2174 }
2175 ci.Last = append(ci.Last, p.accComs...)
2176 p.accComs = nil
2177 ci.OpPos = p.pos
2178 ci.Op = CaseOperator(p.tok)
2179 p.next()
2180 p.got(_Newl)
2181
2182 // Split the comments:
2183 //
2184 // case x in
2185 // a)
2186 // foo
2187 // ;;
2188 // # comment for a
2189 // # comment for b
2190 // b)
2191 // [...]
2192 split := len(p.accComs)
2193 for i, c := range slices.Backward(p.accComs) {
2194 if c.Pos().Col() != p.pos.Col() {
2195 break
2196 }
2197 split = i
2198 }
2199 ci.Comments = append(ci.Comments, p.accComs[:split]...)
2200 p.accComs = p.accComs[split:]
2201
2202 items = append(items, ci)
2203 }
2204 return
2205}
2206
2207func (p *Parser) testClause(s *Stmt) {
2208 tc := &TestClause{Left: p.pos}
2209 old := p.preNested(testExpr)
2210 p.next()
2211 if _, ok := p.gotRsrv("]]"); ok || p.tok == _EOF {
2212 p.posErr(tc.Left, "test clause requires at least one expression")
2213 }
2214 tc.X = p.testExpr(false)
2215 if tc.X == nil {
2216 p.followErrExp(tc.Left, "[[")
2217 }
2218 tc.Right = p.pos
2219 if _, ok := p.gotRsrv("]]"); !ok {
2220 p.matchingErr(tc.Left, "[[", "]]")
2221 }
2222 p.postNested(old)
2223 s.Cmd = tc
2224}
2225
2226func (p *Parser) testExpr(pastAndOr bool) TestExpr {
2227 p.got(_Newl)
2228 var left TestExpr
2229 if pastAndOr {
2230 left = p.testExprBase()
2231 } else {
2232 left = p.testExpr(true)
2233 }
2234 if left == nil {
2235 return left
2236 }
2237 p.got(_Newl)
2238 switch p.tok {
2239 case andAnd, orOr:
2240 case _LitWord:
2241 if p.val == "]]" {
2242 return left
2243 }
2244 if p.tok = token(testBinaryOp(p.val)); p.tok == illegalTok {
2245 p.curErr("not a valid test operator: %s", p.val)
2246 }
2247 case rdrIn, rdrOut:
2248 case _EOF, rightParen:
2249 return left
2250 case _Lit:
2251 p.curErr("test operator words must consist of a single literal")
2252 default:
2253 p.curErr("not a valid test operator: %v", p.tok)
2254 }
2255 b := &BinaryTest{
2256 OpPos: p.pos,
2257 Op: BinTestOperator(p.tok),
2258 X: left,
2259 }
2260 // Save the previous quoteState, since we change it in TsReMatch.
2261 oldQuote := p.quote
2262
2263 switch b.Op {
2264 case AndTest, OrTest:
2265 p.next()
2266 if b.Y = p.testExpr(false); b.Y == nil {
2267 p.followErrExp(b.OpPos, b.Op.String())
2268 }
2269 case TsReMatch:
2270 if !p.lang.isBash() {
2271 p.langErr(p.pos, "regex tests", LangBash)
2272 }
2273 p.rxOpenParens = 0
2274 p.rxFirstPart = true
2275 // TODO(mvdan): Using nested states within a regex will break in
2276 // all sorts of ways. The better fix is likely to use a stop
2277 // token, like we do with heredocs.
2278 p.quote = testExprRegexp
2279 fallthrough
2280 default:
2281 if _, ok := b.X.(*Word); !ok {
2282 p.posErr(b.OpPos, "expected %s, %s or %s after complex expr",
2283 AndTest, OrTest, "]]")
2284 }
2285 p.next()
2286 b.Y = p.followWordTok(token(b.Op), b.OpPos)
2287 }
2288 p.quote = oldQuote
2289 return b
2290}
2291
2292func (p *Parser) testExprBase() TestExpr {
2293 switch p.tok {
2294 case _EOF, rightParen:
2295 return nil
2296 case _LitWord:
2297 op := token(testUnaryOp(p.val))
2298 switch op {
2299 case illegalTok:
2300 case tsRefVar, tsModif: // not available in mksh
2301 if p.lang.isBash() {
2302 p.tok = op
2303 }
2304 default:
2305 p.tok = op
2306 }
2307 }
2308 switch p.tok {
2309 case exclMark:
2310 u := &UnaryTest{OpPos: p.pos, Op: TsNot}
2311 p.next()
2312 if u.X = p.testExpr(false); u.X == nil {
2313 p.followErrExp(u.OpPos, u.Op.String())
2314 }
2315 return u
2316 case tsExists, tsRegFile, tsDirect, tsCharSp, tsBlckSp, tsNmPipe,
2317 tsSocket, tsSmbLink, tsSticky, tsGIDSet, tsUIDSet, tsGrpOwn,
2318 tsUsrOwn, tsModif, tsRead, tsWrite, tsExec, tsNoEmpty,
2319 tsFdTerm, tsEmpStr, tsNempStr, tsOptSet, tsVarSet, tsRefVar:
2320 u := &UnaryTest{OpPos: p.pos, Op: UnTestOperator(p.tok)}
2321 p.next()
2322 u.X = p.followWordTok(token(u.Op), u.OpPos)
2323 return u
2324 case leftParen:
2325 pe := &ParenTest{Lparen: p.pos}
2326 p.next()
2327 if pe.X = p.testExpr(false); pe.X == nil {
2328 p.followErrExp(pe.Lparen, "(")
2329 }
2330 pe.Rparen = p.matched(pe.Lparen, leftParen, rightParen)
2331 return pe
2332 case _LitWord:
2333 if p.val == "]]" {
2334 return nil
2335 }
2336 fallthrough
2337 default:
2338 if w := p.getWord(); w != nil {
2339 return w
2340 }
2341 // otherwise we'd return a typed nil above
2342 return nil
2343 }
2344}
2345
2346func (p *Parser) declClause(s *Stmt) {
2347 ds := &DeclClause{Variant: p.lit(p.pos, p.val)}
2348 p.next()
2349 for !p.stopToken() && !p.peekRedir() {
2350 if p.hasValidIdent() {
2351 ds.Args = append(ds.Args, p.getAssign(false))
2352 } else if p.eqlOffs > 0 {
2353 p.curErr("invalid var name")
2354 } else if p.tok == _LitWord && ValidName(p.val) {
2355 ds.Args = append(ds.Args, &Assign{
2356 Naked: true,
2357 Name: p.getLit(),
2358 })
2359 } else if w := p.getWord(); w != nil {
2360 ds.Args = append(ds.Args, &Assign{
2361 Naked: true,
2362 Value: w,
2363 })
2364 } else {
2365 p.followErr(p.pos, ds.Variant.Value, "names or assignments")
2366 }
2367 }
2368 s.Cmd = ds
2369}
2370
2371func isBashCompoundCommand(tok token, val string) bool {
2372 switch tok {
2373 case leftParen, dblLeftParen:
2374 return true
2375 case _LitWord:
2376 switch val {
2377 case "{", "if", "while", "until", "for", "case", "[[",
2378 "coproc", "let", "function", "declare", "local",
2379 "export", "readonly", "typeset", "nameref":
2380 return true
2381 }
2382 }
2383 return false
2384}
2385
2386func (p *Parser) timeClause(s *Stmt) {
2387 tc := &TimeClause{Time: p.pos}
2388 p.next()
2389 if _, ok := p.gotRsrv("-p"); ok {
2390 tc.PosixFormat = true
2391 }
2392 tc.Stmt = p.gotStmtPipe(&Stmt{Position: p.pos}, false)
2393 s.Cmd = tc
2394}
2395
2396func (p *Parser) coprocClause(s *Stmt) {
2397 cc := &CoprocClause{Coproc: p.pos}
2398 if p.next(); isBashCompoundCommand(p.tok, p.val) {
2399 // has no name
2400 cc.Stmt = p.gotStmtPipe(&Stmt{Position: p.pos}, false)
2401 s.Cmd = cc
2402 return
2403 }
2404 cc.Name = p.getWord()
2405 cc.Stmt = p.gotStmtPipe(&Stmt{Position: p.pos}, false)
2406 if cc.Stmt == nil {
2407 if cc.Name == nil {
2408 p.posErr(cc.Coproc, "coproc clause requires a command")
2409 return
2410 }
2411 // name was in fact the stmt
2412 cc.Stmt = &Stmt{Position: cc.Name.Pos()}
2413 cc.Stmt.Cmd = p.call(cc.Name)
2414 cc.Name = nil
2415 } else if cc.Name != nil {
2416 if call, ok := cc.Stmt.Cmd.(*CallExpr); ok {
2417 // name was in fact the start of a call
2418 call.Args = append([]*Word{cc.Name}, call.Args...)
2419 cc.Name = nil
2420 }
2421 }
2422 s.Cmd = cc
2423}
2424
2425func (p *Parser) letClause(s *Stmt) {
2426 lc := &LetClause{Let: p.pos}
2427 old := p.preNested(arithmExprLet)
2428 p.next()
2429 for !p.stopToken() && !p.peekRedir() {
2430 x := p.arithmExpr(true)
2431 if x == nil {
2432 break
2433 }
2434 lc.Exprs = append(lc.Exprs, x)
2435 }
2436 if len(lc.Exprs) == 0 {
2437 p.followErrExp(lc.Let, "let")
2438 }
2439 p.postNested(old)
2440 s.Cmd = lc
2441}
2442
2443func (p *Parser) bashFuncDecl(s *Stmt) {
2444 fpos := p.pos
2445 if p.next(); p.tok != _LitWord {
2446 p.followErr(fpos, "function", "a name")
2447 }
2448 name := p.lit(p.pos, p.val)
2449 hasParens := false
2450 if p.next(); p.got(leftParen) {
2451 hasParens = true
2452 p.follow(name.ValuePos, "foo(", rightParen)
2453 }
2454 p.funcDecl(s, name, fpos, hasParens)
2455}
2456
2457func (p *Parser) testDecl(s *Stmt) {
2458 td := &TestDecl{Position: p.pos}
2459 p.next()
2460 if td.Description = p.getWord(); td.Description == nil {
2461 p.followErr(td.Position, "@test", "a description word")
2462 }
2463 if td.Body = p.getStmt(false, false, true); td.Body == nil {
2464 p.followErr(td.Position, `@test "desc"`, "a statement")
2465 }
2466 s.Cmd = td
2467}
2468
2469func (p *Parser) callExpr(s *Stmt, w *Word, assign bool) {
2470 ce := p.call(w)
2471 if w == nil {
2472 ce.Args = ce.Args[:0]
2473 }
2474 if assign {
2475 ce.Assigns = append(ce.Assigns, p.getAssign(true))
2476 }
2477loop:
2478 for {
2479 switch p.tok {
2480 case _EOF, _Newl, semicolon, and, or, andAnd, orOr, orAnd,
2481 dblSemicolon, semiAnd, dblSemiAnd, semiOr:
2482 break loop
2483 case _LitWord:
2484 if len(ce.Args) == 0 && p.hasValidIdent() {
2485 ce.Assigns = append(ce.Assigns, p.getAssign(true))
2486 break
2487 }
2488 // Avoid failing later with the confusing "} can only be used to close a block".
2489 if p.lang == LangPOSIX && p.val == "{" && w != nil && w.Lit() == "function" {
2490 p.langErr(p.pos, `the "function" builtin`, LangBash)
2491 }
2492 ce.Args = append(ce.Args, p.wordOne(p.lit(p.pos, p.val)))
2493 p.next()
2494 case _Lit:
2495 if len(ce.Args) == 0 && p.hasValidIdent() {
2496 ce.Assigns = append(ce.Assigns, p.getAssign(true))
2497 break
2498 }
2499 ce.Args = append(ce.Args, p.wordAnyNumber())
2500 case bckQuote:
2501 if p.backquoteEnd() {
2502 break loop
2503 }
2504 fallthrough
2505 case dollBrace, dollDblParen, dollParen, dollar, cmdIn, cmdOut,
2506 sglQuote, dollSglQuote, dblQuote, dollDblQuote, dollBrack,
2507 globQuest, globStar, globPlus, globAt, globExcl:
2508 ce.Args = append(ce.Args, p.wordAnyNumber())
2509 case rdrOut, appOut, rdrIn, dplIn, dplOut, clbOut, rdrInOut,
2510 hdoc, dashHdoc, wordHdoc, rdrAll, appAll, _LitRedir:
2511 p.doRedirect(s)
2512 case dblLeftParen:
2513 p.curErr("%s can only be used to open an arithmetic cmd", p.tok)
2514 case rightParen:
2515 if p.quote == subCmd {
2516 break loop
2517 }
2518 fallthrough
2519 default:
2520 // Note that we'll only keep the first error that happens.
2521 if len(ce.Args) > 0 {
2522 if cmd := ce.Args[0].Lit(); p.lang == LangPOSIX && isBashCompoundCommand(_LitWord, cmd) {
2523 p.langErr(p.pos, fmt.Sprintf("the %q builtin", cmd), LangBash)
2524 }
2525 }
2526 p.curErr("a command can only contain words and redirects; encountered %s", p.tok)
2527 }
2528 }
2529 if len(ce.Assigns) == 0 && len(ce.Args) == 0 {
2530 return
2531 }
2532 if len(ce.Args) == 0 {
2533 ce.Args = nil
2534 } else {
2535 for _, asgn := range ce.Assigns {
2536 if asgn.Index != nil || asgn.Array != nil {
2537 p.posErr(asgn.Pos(), "inline variables cannot be arrays")
2538 }
2539 }
2540 }
2541 s.Cmd = ce
2542}
2543
2544func (p *Parser) funcDecl(s *Stmt, name *Lit, pos Pos, withParens bool) {
2545 fd := &FuncDecl{
2546 Position: pos,
2547 RsrvWord: pos != name.ValuePos,
2548 Parens: withParens,
2549 Name: name,
2550 }
2551 p.got(_Newl)
2552 if fd.Body = p.getStmt(false, false, true); fd.Body == nil {
2553 p.followErr(fd.Pos(), "foo()", "a statement")
2554 }
2555 s.Cmd = fd
2556}