parser.go

   1// Copyright (c) 2016, Daniel Martí <mvdan@mvdan.cc>
   2// See LICENSE for licensing information
   3
   4package syntax
   5
   6import (
   7	"fmt"
   8	"io"
   9	"iter"
  10	"slices"
  11	"strconv"
  12	"strings"
  13	"unicode/utf8"
  14)
  15
  16// ParserOption is a function which can be passed to NewParser
  17// to alter its behavior. To apply option to existing Parser
  18// call it directly, for example KeepComments(true)(parser).
  19type ParserOption func(*Parser)
  20
  21// KeepComments makes the parser parse comments and attach them to
  22// nodes, as opposed to discarding them.
  23func KeepComments(enabled bool) ParserOption {
  24	return func(p *Parser) { p.keepComments = enabled }
  25}
  26
  27// LangVariant describes a shell language variant to use when tokenizing and
  28// parsing shell code. The zero value is [LangBash].
  29type LangVariant int
  30
  31const (
  32	// LangBash corresponds to the GNU Bash language, as described in its
  33	// manual at https://www.gnu.org/software/bash/manual/bash.html.
  34	//
  35	// We currently follow Bash version 5.2.
  36	//
  37	// Its string representation is "bash".
  38	LangBash LangVariant = iota
  39
  40	// LangPOSIX corresponds to the POSIX Shell language, as described at
  41	// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html.
  42	//
  43	// Its string representation is "posix" or "sh".
  44	LangPOSIX
  45
  46	// LangMirBSDKorn corresponds to the MirBSD Korn Shell, also known as
  47	// mksh, as described at http://www.mirbsd.org/htman/i386/man1/mksh.htm.
  48	// Note that it shares some features with Bash, due to the the shared
  49	// ancestry that is ksh.
  50	//
  51	// We currently follow mksh version 59.
  52	//
  53	// Its string representation is "mksh".
  54	LangMirBSDKorn
  55
  56	// LangBats corresponds to the Bash Automated Testing System language,
  57	// as described at https://github.com/bats-core/bats-core. Note that
  58	// it's just a small extension of the Bash language.
  59	//
  60	// Its string representation is "bats".
  61	LangBats
  62
  63	// LangAuto corresponds to automatic language detection,
  64	// commonly used by end-user applications like shfmt,
  65	// which can guess a file's language variant given its filename or shebang.
  66	//
  67	// At this time, [Variant] does not support LangAuto.
  68	LangAuto
  69)
  70
  71// Variant changes the shell language variant that the parser will
  72// accept.
  73//
  74// The passed language variant must be one of the constant values defined in
  75// this package.
  76func Variant(l LangVariant) ParserOption {
  77	switch l {
  78	case LangBash, LangPOSIX, LangMirBSDKorn, LangBats:
  79	case LangAuto:
  80		panic("LangAuto is not supported by the parser at this time")
  81	default:
  82		panic(fmt.Sprintf("unknown shell language variant: %d", l))
  83	}
  84	return func(p *Parser) { p.lang = l }
  85}
  86
  87func (l LangVariant) String() string {
  88	switch l {
  89	case LangBash:
  90		return "bash"
  91	case LangPOSIX:
  92		return "posix"
  93	case LangMirBSDKorn:
  94		return "mksh"
  95	case LangBats:
  96		return "bats"
  97	case LangAuto:
  98		return "auto"
  99	}
 100	return "unknown shell language variant"
 101}
 102
 103func (l *LangVariant) Set(s string) error {
 104	switch s {
 105	case "bash":
 106		*l = LangBash
 107	case "posix", "sh":
 108		*l = LangPOSIX
 109	case "mksh":
 110		*l = LangMirBSDKorn
 111	case "bats":
 112		*l = LangBats
 113	case "auto":
 114		*l = LangAuto
 115	default:
 116		return fmt.Errorf("unknown shell language variant: %q", s)
 117	}
 118	return nil
 119}
 120
 121func (l LangVariant) isBash() bool {
 122	return l == LangBash || l == LangBats
 123}
 124
 125// StopAt configures the lexer to stop at an arbitrary word, treating it
 126// as if it were the end of the input. It can contain any characters
 127// except whitespace, and cannot be over four bytes in size.
 128//
 129// This can be useful to embed shell code within another language, as
 130// one can use a special word to mark the delimiters between the two.
 131//
 132// As a word, it will only apply when following whitespace or a
 133// separating token. For example, StopAt("$$") will act on the inputs
 134// "foo $$" and "foo;$$", but not on "foo '$$'".
 135//
 136// The match is done by prefix, so the example above will also act on
 137// "foo $$bar".
 138func StopAt(word string) ParserOption {
 139	if len(word) > 4 {
 140		panic("stop word can't be over four bytes in size")
 141	}
 142	if strings.ContainsAny(word, " \t\n\r") {
 143		panic("stop word can't contain whitespace characters")
 144	}
 145	return func(p *Parser) { p.stopAt = []byte(word) }
 146}
 147
 148// RecoverErrors allows the parser to skip up to a maximum number of
 149// errors in the given input on a best-effort basis.
 150// This can be useful to tab-complete an interactive shell prompt,
 151// or when providing diagnostics on slightly incomplete shell source.
 152//
 153// Currently, this only helps with mandatory tokens from the shell grammar
 154// which are not present in the input. They result in position fields
 155// or nodes whose position report [Pos.IsRecovered] as true.
 156//
 157// For example, given the input
 158//
 159//	(foo |
 160//
 161// the result will contain two recovered positions; first, the pipe requires
 162// a statement to follow, and as [Stmt.Pos] reports, the entire node is recovered.
 163// Second, the subshell needs to be closed, so [Subshell.Rparen] is recovered.
 164func RecoverErrors(maximum int) ParserOption {
 165	return func(p *Parser) { p.recoverErrorsMax = maximum }
 166}
 167
 168// NewParser allocates a new [Parser] and applies any number of options.
 169func NewParser(options ...ParserOption) *Parser {
 170	p := &Parser{}
 171	for _, opt := range options {
 172		opt(p)
 173	}
 174	return p
 175}
 176
 177// Parse reads and parses a shell program with an optional name. It
 178// returns the parsed program if no issues were encountered. Otherwise,
 179// an error is returned. Reads from r are buffered.
 180//
 181// Parse can be called more than once, but not concurrently. That is, a
 182// Parser can be reused once it is done working.
 183func (p *Parser) Parse(r io.Reader, name string) (*File, error) {
 184	p.reset()
 185	p.f = &File{Name: name}
 186	p.src = r
 187	p.rune()
 188	p.next()
 189	p.f.Stmts, p.f.Last = p.stmtList()
 190	if p.err == nil {
 191		// EOF immediately after heredoc word so no newline to
 192		// trigger it
 193		p.doHeredocs()
 194	}
 195	return p.f, p.err
 196}
 197
 198// Stmts reads and parses statements one at a time, calling a function
 199// each time one is parsed. If the function returns false, parsing is
 200// stopped and the function is not called again.
 201func (p *Parser) Stmts(r io.Reader, fn func(*Stmt) bool) error {
 202	p.reset()
 203	p.f = &File{}
 204	p.src = r
 205	p.rune()
 206	p.next()
 207	p.stmts(fn)
 208	if p.err == nil {
 209		// EOF immediately after heredoc word so no newline to
 210		// trigger it
 211		p.doHeredocs()
 212	}
 213	return p.err
 214}
 215
 216type wrappedReader struct {
 217	*Parser
 218	io.Reader
 219
 220	lastLine    int64
 221	accumulated []*Stmt
 222	fn          func([]*Stmt) bool
 223}
 224
 225func (w *wrappedReader) Read(p []byte) (n int, err error) {
 226	// If we lexed a newline for the first time, we just finished a line, so
 227	// we may need to give a callback for the edge cases below not covered
 228	// by Parser.Stmts.
 229	if (w.r == '\n' || w.r == escNewl) && w.line > w.lastLine {
 230		if w.Incomplete() {
 231			// Incomplete statement; call back to print "> ".
 232			if !w.fn(w.accumulated) {
 233				return 0, io.EOF
 234			}
 235		} else if len(w.accumulated) == 0 {
 236			// Nothing was parsed; call back to print another "$ ".
 237			if !w.fn(nil) {
 238				return 0, io.EOF
 239			}
 240		}
 241		w.lastLine = w.line
 242	}
 243	return w.Reader.Read(p)
 244}
 245
 246// Interactive implements what is necessary to parse statements in an
 247// interactive shell. The parser will call the given function under two
 248// circumstances outlined below.
 249//
 250// If a line containing any number of statements is parsed, the function will be
 251// called with said statements.
 252//
 253// If a line ending in an incomplete statement is parsed, the function will be
 254// called with any fully parsed statements, and [Parser.Incomplete] will return true.
 255//
 256// One can imagine a simple interactive shell implementation as follows:
 257//
 258//	fmt.Fprintf(os.Stdout, "$ ")
 259//	parser.Interactive(os.Stdin, func(stmts []*syntax.Stmt) bool {
 260//		if parser.Incomplete() {
 261//			fmt.Fprintf(os.Stdout, "> ")
 262//			return true
 263//		}
 264//		run(stmts)
 265//		fmt.Fprintf(os.Stdout, "$ ")
 266//		return true
 267//	}
 268//
 269// If the callback function returns false, parsing is stopped and the function
 270// is not called again.
 271func (p *Parser) Interactive(r io.Reader, fn func([]*Stmt) bool) error {
 272	w := wrappedReader{Parser: p, Reader: r, fn: fn}
 273	return p.Stmts(&w, func(stmt *Stmt) bool {
 274		w.accumulated = append(w.accumulated, stmt)
 275		// We finished parsing a statement and we're at a newline token,
 276		// so we finished fully parsing a number of statements. Call
 277		// back to run the statements and print "$ ".
 278		if p.tok == _Newl {
 279			if !fn(w.accumulated) {
 280				return false
 281			}
 282			w.accumulated = w.accumulated[:0]
 283			// The callback above would already print "$ ", so we
 284			// don't want the subsequent wrappedReader.Read to cause
 285			// another "$ " print thinking that nothing was parsed.
 286			w.lastLine = w.line + 1
 287		}
 288		return true
 289	})
 290}
 291
 292// Words is a pre-iterators API which now wraps [Parser.WordsSeq].
 293func (p *Parser) Words(r io.Reader, fn func(*Word) bool) error {
 294	for w, err := range p.WordsSeq(r) {
 295		if err != nil {
 296			return err
 297		}
 298		if !fn(w) {
 299			break
 300		}
 301	}
 302	return nil
 303}
 304
 305// WordsSeq reads and parses a sequence of words alongside any error encountered.
 306//
 307// Newlines are skipped, meaning that multi-line input will work fine. If the
 308// parser encounters a token that isn't a word, such as a semicolon, an error
 309// will be returned.
 310//
 311// Note that the lexer doesn't currently tokenize spaces, so it may need to read
 312// a non-space byte such as a newline or a letter before finishing the parsing
 313// of a word. This will be fixed in the future.
 314func (p *Parser) WordsSeq(r io.Reader) iter.Seq2[*Word, error] {
 315	p.reset()
 316	p.f = &File{}
 317	p.src = r
 318	return func(yield func(*Word, error) bool) {
 319		p.rune()
 320		p.next()
 321		for {
 322			p.got(_Newl)
 323			w := p.getWord()
 324			if w == nil {
 325				if p.tok != _EOF {
 326					p.curErr("%s is not a valid word", p.tok)
 327				}
 328				if p.err != nil {
 329					yield(nil, p.err)
 330				}
 331				return
 332			}
 333			if !yield(w, nil) {
 334				return
 335			}
 336		}
 337	}
 338}
 339
 340// Document parses a single here-document word. That is, it parses the input as
 341// if they were lines following a <<EOF redirection.
 342//
 343// In practice, this is the same as parsing the input as if it were within
 344// double quotes, but without having to escape all double quote characters.
 345// Similarly, the here-document word parsed here cannot be ended by any
 346// delimiter other than reaching the end of the input.
 347func (p *Parser) Document(r io.Reader) (*Word, error) {
 348	p.reset()
 349	p.f = &File{}
 350	p.src = r
 351	p.rune()
 352	p.quote = hdocBody
 353	p.hdocStops = [][]byte{[]byte("MVDAN_CC_SH_SYNTAX_EOF")}
 354	p.parsingDoc = true
 355	p.next()
 356	w := p.getWord()
 357	return w, p.err
 358}
 359
 360// Arithmetic parses a single arithmetic expression. That is, as if the input
 361// were within the $(( and )) tokens.
 362func (p *Parser) Arithmetic(r io.Reader) (ArithmExpr, error) {
 363	p.reset()
 364	p.f = &File{}
 365	p.src = r
 366	p.rune()
 367	p.quote = arithmExpr
 368	p.next()
 369	expr := p.arithmExpr(false)
 370	return expr, p.err
 371}
 372
 373// Parser holds the internal state of the parsing mechanism of a
 374// program.
 375type Parser struct {
 376	src io.Reader
 377	bs  []byte // current chunk of read bytes
 378	bsp uint   // pos within chunk for the rune after r; uint helps eliminate bounds checks
 379	r   rune   // next rune
 380	w   int    // width of r
 381
 382	f *File
 383
 384	spaced bool // whether tok has whitespace on its left
 385
 386	err     error // lexer/parser error
 387	readErr error // got a read error, but bytes left
 388
 389	tok token  // current token
 390	val string // current value (valid if tok is _Lit*)
 391
 392	// position of r, to be converted to Parser.pos later
 393	offs, line, col int64
 394
 395	pos Pos // position of tok
 396
 397	quote   quoteState // current lexer state
 398	eqlOffs int        // position of '=' in val (a literal)
 399
 400	keepComments bool
 401	lang         LangVariant
 402
 403	stopAt []byte
 404
 405	recoveredErrors  int
 406	recoverErrorsMax int
 407
 408	forbidNested bool
 409
 410	// list of pending heredoc bodies
 411	buriedHdocs int
 412	heredocs    []*Redirect
 413
 414	hdocStops [][]byte // stack of end words for open heredocs
 415
 416	parsingDoc bool // true if using Parser.Document
 417
 418	// openNodes tracks how many entire statements or words we're currently parsing.
 419	// A non-zero number means that we require certain tokens or words before
 420	// reaching EOF, used for [Parser.Incomplete].
 421	openNodes int
 422	// openBquotes is how many levels of backquotes are open at the moment.
 423	openBquotes int
 424
 425	// lastBquoteEsc is how many times the last backquote token was escaped
 426	lastBquoteEsc int
 427
 428	rxOpenParens int
 429	rxFirstPart  bool
 430
 431	accComs []Comment
 432	curComs *[]Comment
 433
 434	litBatch  []Lit
 435	wordBatch []wordAlloc
 436
 437	readBuf [bufSize]byte
 438	litBuf  [bufSize]byte
 439	litBs   []byte
 440}
 441
 442// Incomplete reports whether the parser needs more input bytes
 443// to finish properly parsing a statement or word.
 444//
 445// It is only safe to call while the parser is blocked on a read. For an example
 446// use case, see [Parser.Interactive].
 447func (p *Parser) Incomplete() bool {
 448	// If there are any open nodes, we need to finish them.
 449	// If we're constructing a literal, we need to finish it.
 450	return p.openNodes > 0 || len(p.litBs) > 0
 451}
 452
 453const bufSize = 1 << 10
 454
 455func (p *Parser) reset() {
 456	p.tok, p.val = illegalTok, ""
 457	p.eqlOffs = 0
 458	p.bs, p.bsp = nil, 0
 459	p.offs, p.line, p.col = 0, 1, 1
 460	p.r, p.w = 0, 0
 461	p.err, p.readErr = nil, nil
 462	p.quote, p.forbidNested = noState, false
 463	p.openNodes = 0
 464	p.recoveredErrors = 0
 465	p.heredocs, p.buriedHdocs = p.heredocs[:0], 0
 466	p.hdocStops = nil
 467	p.parsingDoc = false
 468	p.openBquotes = 0
 469	p.accComs = nil
 470	p.accComs, p.curComs = nil, &p.accComs
 471	p.litBatch = nil
 472	p.wordBatch = nil
 473	p.litBs = nil
 474}
 475
 476func (p *Parser) nextPos() Pos {
 477	// Basic protection against offset overflow;
 478	// note that an offset of 0 is valid, so we leave the maximum.
 479	offset := min(p.offs+int64(p.bsp)-int64(p.w), offsetMax)
 480	var line, col uint
 481	if p.line <= lineMax {
 482		line = uint(p.line)
 483	}
 484	if p.col <= colMax {
 485		col = uint(p.col)
 486	}
 487	return NewPos(uint(offset), line, col)
 488}
 489
 490func (p *Parser) lit(pos Pos, val string) *Lit {
 491	if len(p.litBatch) == 0 {
 492		p.litBatch = make([]Lit, 32)
 493	}
 494	l := &p.litBatch[0]
 495	p.litBatch = p.litBatch[1:]
 496	l.ValuePos = pos
 497	l.ValueEnd = p.nextPos()
 498	l.Value = val
 499	return l
 500}
 501
 502type wordAlloc struct {
 503	word  Word
 504	parts [1]WordPart
 505}
 506
 507func (p *Parser) wordAnyNumber() *Word {
 508	if len(p.wordBatch) == 0 {
 509		p.wordBatch = make([]wordAlloc, 32)
 510	}
 511	alloc := &p.wordBatch[0]
 512	p.wordBatch = p.wordBatch[1:]
 513	w := &alloc.word
 514	w.Parts = p.wordParts(alloc.parts[:0])
 515	return w
 516}
 517
 518func (p *Parser) wordOne(part WordPart) *Word {
 519	if len(p.wordBatch) == 0 {
 520		p.wordBatch = make([]wordAlloc, 32)
 521	}
 522	alloc := &p.wordBatch[0]
 523	p.wordBatch = p.wordBatch[1:]
 524	w := &alloc.word
 525	w.Parts = alloc.parts[:1]
 526	w.Parts[0] = part
 527	return w
 528}
 529
 530func (p *Parser) call(w *Word) *CallExpr {
 531	var alloc struct {
 532		ce CallExpr
 533		ws [4]*Word
 534	}
 535	ce := &alloc.ce
 536	ce.Args = alloc.ws[:1]
 537	ce.Args[0] = w
 538	return ce
 539}
 540
 541//go:generate stringer -type=quoteState
 542
 543type quoteState uint32
 544
 545const (
 546	noState quoteState = 1 << iota
 547	subCmd
 548	subCmdBckquo
 549	dblQuotes
 550	hdocWord
 551	hdocBody
 552	hdocBodyTabs
 553	arithmExpr
 554	arithmExprLet
 555	arithmExprCmd
 556	arithmExprBrack
 557	testExpr
 558	testExprRegexp
 559	switchCase
 560	paramExpName
 561	paramExpSlice
 562	paramExpRepl
 563	paramExpExp
 564	arrayElems
 565
 566	allKeepSpaces = paramExpRepl | dblQuotes | hdocBody |
 567		hdocBodyTabs | paramExpExp
 568	allRegTokens = noState | subCmd | subCmdBckquo | hdocWord |
 569		switchCase | arrayElems | testExpr
 570	allArithmExpr = arithmExpr | arithmExprLet | arithmExprCmd |
 571		arithmExprBrack | paramExpSlice
 572	allParamReg = paramExpName | paramExpSlice
 573	allParamExp = allParamReg | paramExpRepl | paramExpExp | arithmExprBrack
 574)
 575
 576type saveState struct {
 577	quote       quoteState
 578	buriedHdocs int
 579}
 580
 581func (p *Parser) preNested(quote quoteState) (s saveState) {
 582	s.quote, s.buriedHdocs = p.quote, p.buriedHdocs
 583	p.buriedHdocs, p.quote = len(p.heredocs), quote
 584	return
 585}
 586
 587func (p *Parser) postNested(s saveState) {
 588	p.quote, p.buriedHdocs = s.quote, s.buriedHdocs
 589}
 590
 591func (p *Parser) unquotedWordBytes(w *Word) ([]byte, bool) {
 592	buf := make([]byte, 0, 4)
 593	didUnquote := false
 594	for _, wp := range w.Parts {
 595		buf, didUnquote = p.unquotedWordPart(buf, wp, false)
 596	}
 597	return buf, didUnquote
 598}
 599
 600func (p *Parser) unquotedWordPart(buf []byte, wp WordPart, quotes bool) (_ []byte, quoted bool) {
 601	switch wp := wp.(type) {
 602	case *Lit:
 603		for i := 0; i < len(wp.Value); i++ {
 604			if b := wp.Value[i]; b == '\\' && !quotes {
 605				if i++; i < len(wp.Value) {
 606					buf = append(buf, wp.Value[i])
 607				}
 608				quoted = true
 609			} else {
 610				buf = append(buf, b)
 611			}
 612		}
 613	case *SglQuoted:
 614		buf = append(buf, []byte(wp.Value)...)
 615		quoted = true
 616	case *DblQuoted:
 617		for _, wp2 := range wp.Parts {
 618			buf, _ = p.unquotedWordPart(buf, wp2, true)
 619		}
 620		quoted = true
 621	}
 622	return buf, quoted
 623}
 624
 625func (p *Parser) doHeredocs() {
 626	hdocs := p.heredocs[p.buriedHdocs:]
 627	if len(hdocs) == 0 {
 628		// Nothing do do; don't even issue a read.
 629		return
 630	}
 631	p.rune() // consume '\n', since we know p.tok == _Newl
 632	old := p.quote
 633	p.heredocs = p.heredocs[:p.buriedHdocs]
 634	for i, r := range hdocs {
 635		if p.err != nil {
 636			break
 637		}
 638		p.quote = hdocBody
 639		if r.Op == DashHdoc {
 640			p.quote = hdocBodyTabs
 641		}
 642		stop, quoted := p.unquotedWordBytes(r.Word)
 643		p.hdocStops = append(p.hdocStops, stop)
 644		if i > 0 && p.r == '\n' {
 645			p.rune()
 646		}
 647		lastLine := p.line
 648		if quoted {
 649			r.Hdoc = p.quotedHdocWord()
 650		} else {
 651			p.next()
 652			r.Hdoc = p.getWord()
 653		}
 654		if r.Hdoc != nil {
 655			lastLine = int64(r.Hdoc.End().Line())
 656		}
 657		if lastLine < p.line {
 658			// TODO: It seems like this triggers more often than it
 659			// should. Look into it.
 660			l := p.lit(p.nextPos(), "")
 661			if r.Hdoc == nil {
 662				r.Hdoc = p.wordOne(l)
 663			} else {
 664				r.Hdoc.Parts = append(r.Hdoc.Parts, l)
 665			}
 666		}
 667		if stop := p.hdocStops[len(p.hdocStops)-1]; stop != nil {
 668			p.posErr(r.Pos(), "unclosed here-document '%s'", stop)
 669		}
 670		p.hdocStops = p.hdocStops[:len(p.hdocStops)-1]
 671	}
 672	p.quote = old
 673}
 674
 675func (p *Parser) got(tok token) bool {
 676	if p.tok == tok {
 677		p.next()
 678		return true
 679	}
 680	return false
 681}
 682
 683func (p *Parser) gotRsrv(val string) (Pos, bool) {
 684	pos := p.pos
 685	if p.tok == _LitWord && p.val == val {
 686		p.next()
 687		return pos, true
 688	}
 689	return pos, false
 690}
 691
 692func (p *Parser) recoverError() bool {
 693	if p.recoveredErrors < p.recoverErrorsMax {
 694		p.recoveredErrors++
 695		return true
 696	}
 697	return false
 698}
 699
 700func readableStr(s string) string {
 701	// don't quote tokens like & or }
 702	if s != "" && s[0] >= 'a' && s[0] <= 'z' {
 703		return strconv.Quote(s)
 704	}
 705	return s
 706}
 707
 708func (p *Parser) followErr(pos Pos, left, right string) {
 709	leftStr := readableStr(left)
 710	p.posErr(pos, "%s must be followed by %s", leftStr, right)
 711}
 712
 713func (p *Parser) followErrExp(pos Pos, left string) {
 714	p.followErr(pos, left, "an expression")
 715}
 716
 717func (p *Parser) follow(lpos Pos, left string, tok token) {
 718	if !p.got(tok) {
 719		p.followErr(lpos, left, tok.String())
 720	}
 721}
 722
 723func (p *Parser) followRsrv(lpos Pos, left, val string) Pos {
 724	pos, ok := p.gotRsrv(val)
 725	if !ok {
 726		if p.recoverError() {
 727			return recoveredPos
 728		}
 729		p.followErr(lpos, left, fmt.Sprintf("%q", val))
 730	}
 731	return pos
 732}
 733
 734func (p *Parser) followStmts(left string, lpos Pos, stops ...string) ([]*Stmt, []Comment) {
 735	if p.got(semicolon) {
 736		return nil, nil
 737	}
 738	newLine := p.got(_Newl)
 739	stmts, last := p.stmtList(stops...)
 740	if len(stmts) < 1 && !newLine {
 741		if p.recoverError() {
 742			return []*Stmt{{Position: recoveredPos}}, nil
 743		}
 744		p.followErr(lpos, left, "a statement list")
 745	}
 746	return stmts, last
 747}
 748
 749func (p *Parser) followWordTok(tok token, pos Pos) *Word {
 750	w := p.getWord()
 751	if w == nil {
 752		if p.recoverError() {
 753			return p.wordOne(&Lit{ValuePos: recoveredPos})
 754		}
 755		p.followErr(pos, tok.String(), "a word")
 756	}
 757	return w
 758}
 759
 760func (p *Parser) stmtEnd(n Node, start, end string) Pos {
 761	pos, ok := p.gotRsrv(end)
 762	if !ok {
 763		if p.recoverError() {
 764			return recoveredPos
 765		}
 766		p.posErr(n.Pos(), "%s statement must end with %q", start, end)
 767	}
 768	return pos
 769}
 770
 771func (p *Parser) quoteErr(lpos Pos, quote token) {
 772	p.posErr(lpos, "reached %s without closing quote %s",
 773		p.tok.String(), quote)
 774}
 775
 776func (p *Parser) matchingErr(lpos Pos, left, right any) {
 777	p.posErr(lpos, "reached %s without matching %s with %s",
 778		p.tok.String(), left, right)
 779}
 780
 781func (p *Parser) matched(lpos Pos, left, right token) Pos {
 782	pos := p.pos
 783	if !p.got(right) {
 784		if p.recoverError() {
 785			return recoveredPos
 786		}
 787		p.matchingErr(lpos, left, right)
 788	}
 789	return pos
 790}
 791
 792func (p *Parser) errPass(err error) {
 793	if p.err == nil {
 794		p.err = err
 795		p.bsp = uint(len(p.bs)) + 1
 796		p.r = utf8.RuneSelf
 797		p.w = 1
 798		p.tok = _EOF
 799	}
 800}
 801
 802// IsIncomplete reports whether a Parser error could have been avoided with
 803// extra input bytes. For example, if an [io.EOF] was encountered while there was
 804// an unclosed quote or parenthesis.
 805func IsIncomplete(err error) bool {
 806	perr, ok := err.(ParseError)
 807	return ok && perr.Incomplete
 808}
 809
 810// IsKeyword returns true if the given word is part of the language keywords.
 811func IsKeyword(word string) bool {
 812	// This list has been copied from the bash 5.1 source code, file y.tab.c +4460
 813	switch word {
 814	case
 815		"!",
 816		"[[", // only if COND_COMMAND is defined
 817		"]]", // only if COND_COMMAND is defined
 818		"case",
 819		"coproc", // only if COPROCESS_SUPPORT is defined
 820		"do",
 821		"done",
 822		"else",
 823		"esac",
 824		"fi",
 825		"for",
 826		"function",
 827		"if",
 828		"in",
 829		"select", // only if SELECT_COMMAND is defined
 830		"then",
 831		"time", // only if COMMAND_TIMING is defined
 832		"until",
 833		"while",
 834		"{",
 835		"}":
 836		return true
 837	}
 838	return false
 839}
 840
 841// ParseError represents an error found when parsing a source file, from which
 842// the parser cannot recover.
 843type ParseError struct {
 844	Filename string
 845	Pos      Pos
 846	Text     string
 847
 848	Incomplete bool
 849}
 850
 851func (e ParseError) Error() string {
 852	if e.Filename == "" {
 853		return fmt.Sprintf("%s: %s", e.Pos.String(), e.Text)
 854	}
 855	return fmt.Sprintf("%s:%s: %s", e.Filename, e.Pos.String(), e.Text)
 856}
 857
 858// LangError is returned when the parser encounters code that is only valid in
 859// other shell language variants. The error includes what feature is not present
 860// in the current language variant, and what languages support it.
 861type LangError struct {
 862	Filename string
 863	Pos      Pos
 864
 865	// Feature briefly describes which language feature caused the error.
 866	Feature string
 867	// Langs lists some of the language variants which support the feature.
 868	Langs []LangVariant
 869	// LangUsed is the language variant used which led to the error.
 870	LangUsed LangVariant
 871}
 872
 873func (e LangError) Error() string {
 874	var sb strings.Builder
 875	if e.Filename != "" {
 876		sb.WriteString(e.Filename + ":")
 877	}
 878	sb.WriteString(e.Pos.String() + ": ")
 879	sb.WriteString(e.Feature)
 880	if strings.HasSuffix(e.Feature, "s") {
 881		sb.WriteString(" are a ")
 882	} else {
 883		sb.WriteString(" is a ")
 884	}
 885	for i, lang := range e.Langs {
 886		if i > 0 {
 887			sb.WriteString("/")
 888		}
 889		sb.WriteString(lang.String())
 890	}
 891	sb.WriteString(" feature; tried parsing as ")
 892	sb.WriteString(e.LangUsed.String())
 893	return sb.String()
 894}
 895
 896func (p *Parser) posErr(pos Pos, format string, a ...any) {
 897	p.errPass(ParseError{
 898		Filename:   p.f.Name,
 899		Pos:        pos,
 900		Text:       fmt.Sprintf(format, a...),
 901		Incomplete: p.tok == _EOF && p.Incomplete(),
 902	})
 903}
 904
 905func (p *Parser) curErr(format string, a ...any) {
 906	p.posErr(p.pos, format, a...)
 907}
 908
 909func (p *Parser) langErr(pos Pos, feature string, langs ...LangVariant) {
 910	p.errPass(LangError{
 911		Filename: p.f.Name,
 912		Pos:      pos,
 913		Feature:  feature,
 914		Langs:    langs,
 915		LangUsed: p.lang,
 916	})
 917}
 918
 919func (p *Parser) stmts(fn func(*Stmt) bool, stops ...string) {
 920	gotEnd := true
 921loop:
 922	for p.tok != _EOF {
 923		newLine := p.got(_Newl)
 924		switch p.tok {
 925		case _LitWord:
 926			for _, stop := range stops {
 927				if p.val == stop {
 928					break loop
 929				}
 930			}
 931		case rightParen:
 932			if p.quote == subCmd {
 933				break loop
 934			}
 935		case bckQuote:
 936			if p.backquoteEnd() {
 937				break loop
 938			}
 939		case dblSemicolon, semiAnd, dblSemiAnd, semiOr:
 940			if p.quote == switchCase {
 941				break loop
 942			}
 943			p.curErr("%s can only be used in a case clause", p.tok)
 944		}
 945		if !newLine && !gotEnd {
 946			p.curErr("statements must be separated by &, ; or a newline")
 947		}
 948		if p.tok == _EOF {
 949			break
 950		}
 951		p.openNodes++
 952		s := p.getStmt(true, false, false)
 953		p.openNodes--
 954		if s == nil {
 955			p.invalidStmtStart()
 956			break
 957		}
 958		gotEnd = s.Semicolon.IsValid()
 959		if !fn(s) {
 960			break
 961		}
 962	}
 963}
 964
 965func (p *Parser) stmtList(stops ...string) ([]*Stmt, []Comment) {
 966	var stmts []*Stmt
 967	var last []Comment
 968	fn := func(s *Stmt) bool {
 969		stmts = append(stmts, s)
 970		return true
 971	}
 972	p.stmts(fn, stops...)
 973	split := len(p.accComs)
 974	if p.tok == _LitWord && (p.val == "elif" || p.val == "else" || p.val == "fi") {
 975		// Split the comments, so that any aligned with an opening token
 976		// get attached to it. For example:
 977		//
 978		//     if foo; then
 979		//         # inside the body
 980		//     # document the else
 981		//     else
 982		//     fi
 983		// TODO(mvdan): look into deduplicating this with similar logic
 984		// in caseItems.
 985		for i, c := range slices.Backward(p.accComs) {
 986			if c.Pos().Col() != p.pos.Col() {
 987				break
 988			}
 989			split = i
 990		}
 991	}
 992	if split > 0 { // keep last nil if empty
 993		last = p.accComs[:split]
 994	}
 995	p.accComs = p.accComs[split:]
 996	return stmts, last
 997}
 998
 999func (p *Parser) invalidStmtStart() {
1000	switch p.tok {
1001	case semicolon, and, or, andAnd, orOr:
1002		p.curErr("%s can only immediately follow a statement", p.tok)
1003	case rightParen:
1004		p.curErr("%s can only be used to close a subshell", p.tok)
1005	default:
1006		p.curErr("%s is not a valid start for a statement", p.tok)
1007	}
1008}
1009
1010func (p *Parser) getWord() *Word {
1011	if w := p.wordAnyNumber(); len(w.Parts) > 0 && p.err == nil {
1012		return w
1013	}
1014	return nil
1015}
1016
1017func (p *Parser) getLit() *Lit {
1018	switch p.tok {
1019	case _Lit, _LitWord, _LitRedir:
1020		l := p.lit(p.pos, p.val)
1021		p.next()
1022		return l
1023	}
1024	return nil
1025}
1026
1027func (p *Parser) wordParts(wps []WordPart) []WordPart {
1028	for {
1029		p.openNodes++
1030		n := p.wordPart()
1031		p.openNodes--
1032		if n == nil {
1033			if len(wps) == 0 {
1034				return nil // normalize empty lists into nil
1035			}
1036			return wps
1037		}
1038		wps = append(wps, n)
1039		if p.spaced {
1040			return wps
1041		}
1042	}
1043}
1044
1045func (p *Parser) ensureNoNested() {
1046	if p.forbidNested {
1047		p.curErr("expansions not allowed in heredoc words")
1048	}
1049}
1050
1051func (p *Parser) wordPart() WordPart {
1052	switch p.tok {
1053	case _Lit, _LitWord, _LitRedir:
1054		l := p.lit(p.pos, p.val)
1055		p.next()
1056		return l
1057	case dollBrace:
1058		p.ensureNoNested()
1059		switch p.r {
1060		case '|':
1061			if p.lang != LangMirBSDKorn {
1062				p.langErr(p.pos, `"${|stmts;}"`, LangMirBSDKorn)
1063			}
1064			fallthrough
1065		case ' ', '\t', '\n':
1066			if p.lang != LangMirBSDKorn {
1067				p.langErr(p.pos, `"${ stmts;}"`, LangMirBSDKorn)
1068			}
1069			cs := &CmdSubst{
1070				Left:     p.pos,
1071				TempFile: p.r != '|',
1072				ReplyVar: p.r == '|',
1073			}
1074			old := p.preNested(subCmd)
1075			p.rune() // don't tokenize '|'
1076			p.next()
1077			cs.Stmts, cs.Last = p.stmtList("}")
1078			p.postNested(old)
1079			pos, ok := p.gotRsrv("}")
1080			if !ok {
1081				p.matchingErr(cs.Left, "${", "}")
1082			}
1083			cs.Right = pos
1084			return cs
1085		default:
1086			return p.paramExp()
1087		}
1088	case dollDblParen, dollBrack:
1089		p.ensureNoNested()
1090		left := p.tok
1091		ar := &ArithmExp{Left: p.pos, Bracket: left == dollBrack}
1092		var old saveState
1093		if ar.Bracket {
1094			old = p.preNested(arithmExprBrack)
1095		} else {
1096			old = p.preNested(arithmExpr)
1097		}
1098		p.next()
1099		if p.got(hash) {
1100			if p.lang != LangMirBSDKorn {
1101				p.langErr(ar.Pos(), "unsigned expressions", LangMirBSDKorn)
1102			}
1103			ar.Unsigned = true
1104		}
1105		ar.X = p.followArithm(left, ar.Left)
1106		if ar.Bracket {
1107			if p.tok != rightBrack {
1108				p.arithmMatchingErr(ar.Left, dollBrack, rightBrack)
1109			}
1110			p.postNested(old)
1111			ar.Right = p.pos
1112			p.next()
1113		} else {
1114			ar.Right = p.arithmEnd(dollDblParen, ar.Left, old)
1115		}
1116		return ar
1117	case dollParen:
1118		p.ensureNoNested()
1119		cs := &CmdSubst{Left: p.pos}
1120		old := p.preNested(subCmd)
1121		p.next()
1122		cs.Stmts, cs.Last = p.stmtList()
1123		p.postNested(old)
1124		cs.Right = p.matched(cs.Left, leftParen, rightParen)
1125		return cs
1126	case dollar:
1127		r := p.r
1128		switch {
1129		case singleRuneParam(r):
1130			p.tok, p.val = _LitWord, string(r)
1131			p.rune()
1132		case 'a' <= r && r <= 'z', 'A' <= r && r <= 'Z',
1133			'0' <= r && r <= '9', r == '_', r == '\\':
1134			p.advanceNameCont(r)
1135		default:
1136			l := p.lit(p.pos, "$")
1137			p.next()
1138			return l
1139		}
1140		p.ensureNoNested()
1141		pe := &ParamExp{Dollar: p.pos, Short: true}
1142		p.pos = posAddCol(p.pos, 1)
1143		pe.Param = p.getLit()
1144		if pe.Param != nil && pe.Param.Value == "" {
1145			l := p.lit(pe.Dollar, "$")
1146			// e.g. "$\\\"" within double quotes, so we must
1147			// keep the rest of the literal characters.
1148			l.ValueEnd = posAddCol(l.ValuePos, 1)
1149			return l
1150		}
1151		return pe
1152	case cmdIn, cmdOut:
1153		p.ensureNoNested()
1154		ps := &ProcSubst{Op: ProcOperator(p.tok), OpPos: p.pos}
1155		old := p.preNested(subCmd)
1156		p.next()
1157		ps.Stmts, ps.Last = p.stmtList()
1158		p.postNested(old)
1159		ps.Rparen = p.matched(ps.OpPos, token(ps.Op), rightParen)
1160		return ps
1161	case sglQuote, dollSglQuote:
1162		sq := &SglQuoted{Left: p.pos, Dollar: p.tok == dollSglQuote}
1163		r := p.r
1164		for p.newLit(r); ; r = p.rune() {
1165			switch r {
1166			case '\\':
1167				if sq.Dollar {
1168					p.rune()
1169				}
1170			case '\'':
1171				sq.Right = p.nextPos()
1172				sq.Value = p.endLit()
1173
1174				p.rune()
1175				p.next()
1176				return sq
1177			case escNewl:
1178				p.litBs = append(p.litBs, '\\', '\n')
1179			case utf8.RuneSelf:
1180				p.tok = _EOF
1181				if p.recoverError() {
1182					sq.Right = recoveredPos
1183					return sq
1184				}
1185				p.quoteErr(sq.Pos(), sglQuote)
1186				return nil
1187			}
1188		}
1189	case dblQuote, dollDblQuote:
1190		if p.quote == dblQuotes {
1191			// p.tok == dblQuote, as "foo$" puts $ in the lit
1192			return nil
1193		}
1194		return p.dblQuoted()
1195	case bckQuote:
1196		if p.backquoteEnd() {
1197			return nil
1198		}
1199		p.ensureNoNested()
1200		cs := &CmdSubst{Left: p.pos, Backquotes: true}
1201		old := p.preNested(subCmdBckquo)
1202		p.openBquotes++
1203
1204		// The lexer didn't call p.rune for us, so that it could have
1205		// the right p.openBquotes to properly handle backslashes.
1206		p.rune()
1207
1208		p.next()
1209		cs.Stmts, cs.Last = p.stmtList()
1210		if p.tok == bckQuote && p.lastBquoteEsc < p.openBquotes-1 {
1211			// e.g. found ` before the nested backquote \` was closed.
1212			p.tok = _EOF
1213			p.quoteErr(cs.Pos(), bckQuote)
1214		}
1215		p.postNested(old)
1216		p.openBquotes--
1217		cs.Right = p.pos
1218
1219		// Like above, the lexer didn't call p.rune for us.
1220		p.rune()
1221		if !p.got(bckQuote) {
1222			if p.recoverError() {
1223				cs.Right = recoveredPos
1224			} else {
1225				p.quoteErr(cs.Pos(), bckQuote)
1226			}
1227		}
1228		return cs
1229	case globQuest, globStar, globPlus, globAt, globExcl:
1230		if p.lang == LangPOSIX {
1231			p.langErr(p.pos, "extended globs", LangBash, LangMirBSDKorn)
1232		}
1233		eg := &ExtGlob{Op: GlobOperator(p.tok), OpPos: p.pos}
1234		lparens := 1
1235		r := p.r
1236	globLoop:
1237		for p.newLit(r); ; r = p.rune() {
1238			switch r {
1239			case utf8.RuneSelf:
1240				break globLoop
1241			case '(':
1242				lparens++
1243			case ')':
1244				if lparens--; lparens == 0 {
1245					break globLoop
1246				}
1247			}
1248		}
1249		eg.Pattern = p.lit(posAddCol(eg.OpPos, 2), p.endLit())
1250		p.rune()
1251		p.next()
1252		if lparens != 0 {
1253			p.matchingErr(eg.OpPos, eg.Op, rightParen)
1254		}
1255		return eg
1256	default:
1257		return nil
1258	}
1259}
1260
1261func (p *Parser) dblQuoted() *DblQuoted {
1262	alloc := &struct {
1263		quoted DblQuoted
1264		parts  [1]WordPart
1265	}{
1266		quoted: DblQuoted{Left: p.pos, Dollar: p.tok == dollDblQuote},
1267	}
1268	q := &alloc.quoted
1269	old := p.quote
1270	p.quote = dblQuotes
1271	p.next()
1272	q.Parts = p.wordParts(alloc.parts[:0])
1273	p.quote = old
1274	q.Right = p.pos
1275	if !p.got(dblQuote) {
1276		if p.recoverError() {
1277			q.Right = recoveredPos
1278		} else {
1279			p.quoteErr(q.Pos(), dblQuote)
1280		}
1281	}
1282	return q
1283}
1284
1285func singleRuneParam(r rune) bool {
1286	switch r {
1287	case '@', '*', '#', '$', '?', '!', '-',
1288		'0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
1289		return true
1290	}
1291	return false
1292}
1293
1294func (p *Parser) paramExp() *ParamExp {
1295	pe := &ParamExp{Dollar: p.pos}
1296	old := p.quote
1297	p.quote = paramExpName
1298	if p.r == '#' {
1299		p.tok = hash
1300		p.pos = p.nextPos()
1301		p.rune()
1302	} else {
1303		p.next()
1304	}
1305	switch p.tok {
1306	case hash:
1307		if paramNameOp(p.r) {
1308			pe.Length = true
1309			p.next()
1310		}
1311	case perc:
1312		if p.lang != LangMirBSDKorn {
1313			p.langErr(pe.Pos(), `"${%foo}"`, LangMirBSDKorn)
1314		}
1315		if paramNameOp(p.r) {
1316			pe.Width = true
1317			p.next()
1318		}
1319	case exclMark:
1320		if paramNameOp(p.r) {
1321			pe.Excl = true
1322			p.next()
1323		}
1324	}
1325	op := p.tok
1326	switch p.tok {
1327	case _Lit, _LitWord:
1328		if !numberLiteral(p.val) && !ValidName(p.val) {
1329			p.curErr("invalid parameter name")
1330		}
1331		pe.Param = p.lit(p.pos, p.val)
1332		p.next()
1333	case quest, minus:
1334		if pe.Length && p.r != '}' {
1335			// actually ${#-default}, not ${#-}; fix the ambiguity
1336			pe.Length = false
1337			pe.Param = p.lit(posAddCol(p.pos, -1), "#")
1338			pe.Param.ValueEnd = p.pos
1339			break
1340		}
1341		fallthrough
1342	case at, star, hash, exclMark, dollar:
1343		pe.Param = p.lit(p.pos, p.tok.String())
1344		p.next()
1345	default:
1346		p.curErr("parameter expansion requires a literal")
1347	}
1348	switch p.tok {
1349	case _Lit, _LitWord:
1350		p.curErr("%s cannot be followed by a word", op)
1351	case rightBrace:
1352		if pe.Excl && p.lang == LangPOSIX {
1353			p.langErr(pe.Pos(), `"${!foo}"`, LangBash, LangMirBSDKorn)
1354		}
1355		pe.Rbrace = p.pos
1356		p.quote = old
1357		p.next()
1358		return pe
1359	case leftBrack:
1360		if p.lang == LangPOSIX {
1361			p.langErr(p.pos, "arrays", LangBash, LangMirBSDKorn)
1362		}
1363		if !ValidName(pe.Param.Value) {
1364			p.curErr("cannot index a special parameter name")
1365		}
1366		pe.Index = p.eitherIndex()
1367	}
1368	if p.tok == rightBrace {
1369		pe.Rbrace = p.pos
1370		p.quote = old
1371		p.next()
1372		return pe
1373	}
1374	if p.tok != _EOF && (pe.Length || pe.Width) {
1375		p.curErr("cannot combine multiple parameter expansion operators")
1376	}
1377	switch p.tok {
1378	case slash, dblSlash:
1379		// pattern search and replace
1380		if p.lang == LangPOSIX {
1381			p.langErr(p.pos, "search and replace", LangBash, LangMirBSDKorn)
1382		}
1383		pe.Repl = &Replace{All: p.tok == dblSlash}
1384		p.quote = paramExpRepl
1385		p.next()
1386		pe.Repl.Orig = p.getWord()
1387		p.quote = paramExpExp
1388		if p.got(slash) {
1389			pe.Repl.With = p.getWord()
1390		}
1391	case colon:
1392		// slicing
1393		if p.lang == LangPOSIX {
1394			p.langErr(p.pos, "slicing", LangBash, LangMirBSDKorn)
1395		}
1396		pe.Slice = &Slice{}
1397		colonPos := p.pos
1398		p.quote = paramExpSlice
1399		if p.next(); p.tok != colon {
1400			pe.Slice.Offset = p.followArithm(colon, colonPos)
1401		}
1402		colonPos = p.pos
1403		if p.got(colon) {
1404			pe.Slice.Length = p.followArithm(colon, colonPos)
1405		}
1406		// Need to use a different matched style so arithm errors
1407		// get reported correctly
1408		p.quote = old
1409		pe.Rbrace = p.pos
1410		p.matchedArithm(pe.Dollar, dollBrace, rightBrace)
1411		return pe
1412	case caret, dblCaret, comma, dblComma:
1413		// upper/lower case
1414		if !p.lang.isBash() {
1415			p.langErr(p.pos, "this expansion operator", LangBash)
1416		}
1417		pe.Exp = p.paramExpExp()
1418	case at, star:
1419		switch {
1420		case p.tok == at && p.lang == LangPOSIX:
1421			p.langErr(p.pos, "this expansion operator", LangBash, LangMirBSDKorn)
1422		case p.tok == star && !pe.Excl:
1423			p.curErr("not a valid parameter expansion operator: %v", p.tok)
1424		case pe.Excl && p.r == '}':
1425			if !p.lang.isBash() {
1426				p.langErr(pe.Pos(), fmt.Sprintf(`"${!foo%s}"`, p.tok), LangBash)
1427			}
1428			pe.Names = ParNamesOperator(p.tok)
1429			p.next()
1430		default:
1431			pe.Exp = p.paramExpExp()
1432		}
1433	case plus, colPlus, minus, colMinus, quest, colQuest, assgn, colAssgn,
1434		perc, dblPerc, hash, dblHash:
1435		pe.Exp = p.paramExpExp()
1436	case _EOF:
1437	default:
1438		p.curErr("not a valid parameter expansion operator: %v", p.tok)
1439	}
1440	p.quote = old
1441	pe.Rbrace = p.matched(pe.Dollar, dollBrace, rightBrace)
1442	return pe
1443}
1444
1445func (p *Parser) paramExpExp() *Expansion {
1446	op := ParExpOperator(p.tok)
1447	p.quote = paramExpExp
1448	p.next()
1449	if op == OtherParamOps {
1450		switch p.tok {
1451		case _Lit, _LitWord:
1452		default:
1453			p.curErr("@ expansion operator requires a literal")
1454		}
1455		switch p.val {
1456		case "a", "k", "u", "A", "E", "K", "L", "P", "U":
1457			if !p.lang.isBash() {
1458				p.langErr(p.pos, "this expansion operator", LangBash)
1459			}
1460		case "#":
1461			if p.lang != LangMirBSDKorn {
1462				p.langErr(p.pos, "this expansion operator", LangMirBSDKorn)
1463			}
1464		case "Q":
1465		default:
1466			p.curErr("invalid @ expansion operator %q", p.val)
1467		}
1468	}
1469	return &Expansion{Op: op, Word: p.getWord()}
1470}
1471
1472func (p *Parser) eitherIndex() ArithmExpr {
1473	old := p.quote
1474	lpos := p.pos
1475	p.quote = arithmExprBrack
1476	p.next()
1477	if p.tok == star || p.tok == at {
1478		p.tok, p.val = _LitWord, p.tok.String()
1479	}
1480	expr := p.followArithm(leftBrack, lpos)
1481	p.quote = old
1482	p.matchedArithm(lpos, leftBrack, rightBrack)
1483	return expr
1484}
1485
1486func (p *Parser) stopToken() bool {
1487	switch p.tok {
1488	case _EOF, _Newl, semicolon, and, or, andAnd, orOr, orAnd, dblSemicolon,
1489		semiAnd, dblSemiAnd, semiOr, rightParen:
1490		return true
1491	case bckQuote:
1492		return p.backquoteEnd()
1493	}
1494	return false
1495}
1496
1497func (p *Parser) backquoteEnd() bool {
1498	return p.lastBquoteEsc < p.openBquotes
1499}
1500
1501// ValidName returns whether val is a valid name as per the POSIX spec.
1502func ValidName(val string) bool {
1503	if val == "" {
1504		return false
1505	}
1506	for i, r := range val {
1507		switch {
1508		case 'a' <= r && r <= 'z':
1509		case 'A' <= r && r <= 'Z':
1510		case r == '_':
1511		case i > 0 && '0' <= r && r <= '9':
1512		default:
1513			return false
1514		}
1515	}
1516	return true
1517}
1518
1519func numberLiteral(val string) bool {
1520	for _, r := range val {
1521		if '0' > r || r > '9' {
1522			return false
1523		}
1524	}
1525	return true
1526}
1527
1528func (p *Parser) hasValidIdent() bool {
1529	if p.tok != _Lit && p.tok != _LitWord {
1530		return false
1531	}
1532	if end := p.eqlOffs; end > 0 {
1533		if p.val[end-1] == '+' && p.lang != LangPOSIX {
1534			end-- // a+=x
1535		}
1536		if ValidName(p.val[:end]) {
1537			return true
1538		}
1539	} else if !ValidName(p.val) {
1540		return false // *[i]=x
1541	}
1542	return p.r == '[' // a[i]=x
1543}
1544
1545func (p *Parser) getAssign(needEqual bool) *Assign {
1546	as := &Assign{}
1547	if p.eqlOffs > 0 { // foo=bar
1548		nameEnd := p.eqlOffs
1549		if p.lang != LangPOSIX && p.val[p.eqlOffs-1] == '+' {
1550			// a+=b
1551			as.Append = true
1552			nameEnd--
1553		}
1554		as.Name = p.lit(p.pos, p.val[:nameEnd])
1555		// since we're not using the entire p.val
1556		as.Name.ValueEnd = posAddCol(as.Name.ValuePos, nameEnd)
1557		left := p.lit(posAddCol(p.pos, 1), p.val[p.eqlOffs+1:])
1558		if left.Value != "" {
1559			left.ValuePos = posAddCol(left.ValuePos, p.eqlOffs)
1560			as.Value = p.wordOne(left)
1561		}
1562		p.next()
1563	} else { // foo[x]=bar
1564		as.Name = p.lit(p.pos, p.val)
1565		// hasValidIdent already checks p.r is '['
1566		p.rune()
1567		p.pos = posAddCol(p.pos, 1)
1568		as.Index = p.eitherIndex()
1569		if p.spaced || p.stopToken() {
1570			if needEqual {
1571				p.followErr(as.Pos(), "a[b]", "=")
1572			} else {
1573				as.Naked = true
1574				return as
1575			}
1576		}
1577		if len(p.val) > 0 && p.val[0] == '+' {
1578			as.Append = true
1579			p.val = p.val[1:]
1580			p.pos = posAddCol(p.pos, 1)
1581		}
1582		if len(p.val) < 1 || p.val[0] != '=' {
1583			if as.Append {
1584				p.followErr(as.Pos(), "a[b]+", "=")
1585			} else {
1586				p.followErr(as.Pos(), "a[b]", "=")
1587			}
1588			return nil
1589		}
1590		p.pos = posAddCol(p.pos, 1)
1591		p.val = p.val[1:]
1592		if p.val == "" {
1593			p.next()
1594		}
1595	}
1596	if p.spaced || p.stopToken() {
1597		return as
1598	}
1599	if as.Value == nil && p.tok == leftParen {
1600		if p.lang == LangPOSIX {
1601			p.langErr(p.pos, "arrays", LangBash, LangMirBSDKorn)
1602		}
1603		if as.Index != nil {
1604			p.curErr("arrays cannot be nested")
1605		}
1606		as.Array = &ArrayExpr{Lparen: p.pos}
1607		newQuote := p.quote
1608		if p.lang.isBash() {
1609			newQuote = arrayElems
1610		}
1611		old := p.preNested(newQuote)
1612		p.next()
1613		p.got(_Newl)
1614		for p.tok != _EOF && p.tok != rightParen {
1615			ae := &ArrayElem{}
1616			ae.Comments, p.accComs = p.accComs, nil
1617			if p.tok == leftBrack {
1618				left := p.pos
1619				ae.Index = p.eitherIndex()
1620				p.follow(left, `"[x]"`, assgn)
1621			}
1622			if ae.Value = p.getWord(); ae.Value == nil {
1623				switch p.tok {
1624				case leftParen:
1625					p.curErr("arrays cannot be nested")
1626					return nil
1627				case _Newl, rightParen, leftBrack:
1628					// TODO: support [index]=[
1629				default:
1630					p.curErr("array element values must be words")
1631					return nil
1632				}
1633			}
1634			if len(p.accComs) > 0 {
1635				c := p.accComs[0]
1636				if c.Pos().Line() == ae.End().Line() {
1637					ae.Comments = append(ae.Comments, c)
1638					p.accComs = p.accComs[1:]
1639				}
1640			}
1641			as.Array.Elems = append(as.Array.Elems, ae)
1642			p.got(_Newl)
1643		}
1644		as.Array.Last, p.accComs = p.accComs, nil
1645		p.postNested(old)
1646		as.Array.Rparen = p.matched(as.Array.Lparen, leftParen, rightParen)
1647	} else if w := p.getWord(); w != nil {
1648		if as.Value == nil {
1649			as.Value = w
1650		} else {
1651			as.Value.Parts = append(as.Value.Parts, w.Parts...)
1652		}
1653	}
1654	return as
1655}
1656
1657func (p *Parser) peekRedir() bool {
1658	switch p.tok {
1659	case rdrOut, appOut, rdrIn, dplIn, dplOut, clbOut, rdrInOut,
1660		hdoc, dashHdoc, wordHdoc, rdrAll, appAll, _LitRedir:
1661		return true
1662	}
1663	return false
1664}
1665
1666func (p *Parser) doRedirect(s *Stmt) {
1667	var r *Redirect
1668	if s.Redirs == nil {
1669		var alloc struct {
1670			redirs [4]*Redirect
1671			redir  Redirect
1672		}
1673		s.Redirs = alloc.redirs[:0]
1674		r = &alloc.redir
1675		s.Redirs = append(s.Redirs, r)
1676	} else {
1677		r = &Redirect{}
1678		s.Redirs = append(s.Redirs, r)
1679	}
1680	r.N = p.getLit()
1681	if !p.lang.isBash() && r.N != nil && r.N.Value[0] == '{' {
1682		p.langErr(r.N.Pos(), "{varname} redirects", LangBash)
1683	}
1684	if p.lang == LangPOSIX && (p.tok == rdrAll || p.tok == appAll) {
1685		p.langErr(p.pos, "&> redirects", LangBash, LangMirBSDKorn)
1686	}
1687	r.Op, r.OpPos = RedirOperator(p.tok), p.pos
1688	p.next()
1689	switch r.Op {
1690	case Hdoc, DashHdoc:
1691		old := p.quote
1692		p.quote, p.forbidNested = hdocWord, true
1693		p.heredocs = append(p.heredocs, r)
1694		r.Word = p.followWordTok(token(r.Op), r.OpPos)
1695		p.quote, p.forbidNested = old, false
1696		if p.tok == _Newl {
1697			if len(p.accComs) > 0 {
1698				c := p.accComs[0]
1699				if c.Pos().Line() == s.End().Line() {
1700					s.Comments = append(s.Comments, c)
1701					p.accComs = p.accComs[1:]
1702				}
1703			}
1704			p.doHeredocs()
1705		}
1706	case WordHdoc:
1707		if p.lang == LangPOSIX {
1708			p.langErr(r.OpPos, "herestrings", LangBash, LangMirBSDKorn)
1709		}
1710		fallthrough
1711	default:
1712		r.Word = p.followWordTok(token(r.Op), r.OpPos)
1713	}
1714}
1715
1716func (p *Parser) getStmt(readEnd, binCmd, fnBody bool) *Stmt {
1717	pos, ok := p.gotRsrv("!")
1718	s := &Stmt{Position: pos}
1719	if ok {
1720		s.Negated = true
1721		if p.stopToken() {
1722			p.posErr(s.Pos(), `"!" cannot form a statement alone`)
1723		}
1724		if _, ok := p.gotRsrv("!"); ok {
1725			p.posErr(s.Pos(), `cannot negate a command multiple times`)
1726		}
1727	}
1728	if s = p.gotStmtPipe(s, false); s == nil || p.err != nil {
1729		return nil
1730	}
1731	// instead of using recursion, iterate manually
1732	for p.tok == andAnd || p.tok == orOr {
1733		if binCmd {
1734			// left associativity: in a list of BinaryCmds, the
1735			// right recursion should only read a single element
1736			return s
1737		}
1738		b := &BinaryCmd{
1739			OpPos: p.pos,
1740			Op:    BinCmdOperator(p.tok),
1741			X:     s,
1742		}
1743		p.next()
1744		p.got(_Newl)
1745		b.Y = p.getStmt(false, true, false)
1746		if b.Y == nil || p.err != nil {
1747			if p.recoverError() {
1748				b.Y = &Stmt{Position: recoveredPos}
1749			} else {
1750				p.followErr(b.OpPos, b.Op.String(), "a statement")
1751				return nil
1752			}
1753		}
1754		s = &Stmt{Position: s.Position}
1755		s.Cmd = b
1756		s.Comments, b.X.Comments = b.X.Comments, nil
1757	}
1758	if readEnd {
1759		switch p.tok {
1760		case semicolon:
1761			s.Semicolon = p.pos
1762			p.next()
1763		case and:
1764			s.Semicolon = p.pos
1765			p.next()
1766			s.Background = true
1767		case orAnd:
1768			s.Semicolon = p.pos
1769			p.next()
1770			s.Coprocess = true
1771		}
1772	}
1773	if len(p.accComs) > 0 && !binCmd && !fnBody {
1774		c := p.accComs[0]
1775		if c.Pos().Line() == s.End().Line() {
1776			s.Comments = append(s.Comments, c)
1777			p.accComs = p.accComs[1:]
1778		}
1779	}
1780	return s
1781}
1782
1783func (p *Parser) gotStmtPipe(s *Stmt, binCmd bool) *Stmt {
1784	s.Comments, p.accComs = p.accComs, nil
1785	switch p.tok {
1786	case _LitWord:
1787		switch p.val {
1788		case "{":
1789			p.block(s)
1790		case "if":
1791			p.ifClause(s)
1792		case "while", "until":
1793			p.whileClause(s, p.val == "until")
1794		case "for":
1795			p.forClause(s)
1796		case "case":
1797			p.caseClause(s)
1798		case "}":
1799			p.curErr(`%q can only be used to close a block`, p.val)
1800		case "then":
1801			p.curErr(`%q can only be used in an if`, p.val)
1802		case "elif":
1803			p.curErr(`%q can only be used in an if`, p.val)
1804		case "fi":
1805			p.curErr(`%q can only be used to end an if`, p.val)
1806		case "do":
1807			p.curErr(`%q can only be used in a loop`, p.val)
1808		case "done":
1809			p.curErr(`%q can only be used to end a loop`, p.val)
1810		case "esac":
1811			p.curErr(`%q can only be used to end a case`, p.val)
1812		case "!":
1813			if !s.Negated {
1814				p.curErr(`"!" can only be used in full statements`)
1815				break
1816			}
1817		case "[[":
1818			if p.lang != LangPOSIX {
1819				p.testClause(s)
1820			}
1821		case "]]":
1822			if p.lang != LangPOSIX {
1823				p.curErr(`%q can only be used to close a test`, p.val)
1824			}
1825		case "let":
1826			if p.lang != LangPOSIX {
1827				p.letClause(s)
1828			}
1829		case "function":
1830			if p.lang != LangPOSIX {
1831				p.bashFuncDecl(s)
1832			}
1833		case "declare":
1834			if p.lang.isBash() { // Note that mksh lacks this one.
1835				p.declClause(s)
1836			}
1837		case "local", "export", "readonly", "typeset", "nameref":
1838			if p.lang != LangPOSIX {
1839				p.declClause(s)
1840			}
1841		case "time":
1842			if p.lang != LangPOSIX {
1843				p.timeClause(s)
1844			}
1845		case "coproc":
1846			if p.lang.isBash() { // Note that mksh lacks this one.
1847				p.coprocClause(s)
1848			}
1849		case "select":
1850			if p.lang != LangPOSIX {
1851				p.selectClause(s)
1852			}
1853		case "@test":
1854			if p.lang == LangBats {
1855				p.testDecl(s)
1856			}
1857		}
1858		if s.Cmd != nil {
1859			break
1860		}
1861		if p.hasValidIdent() {
1862			p.callExpr(s, nil, true)
1863			break
1864		}
1865		name := p.lit(p.pos, p.val)
1866		if p.next(); p.got(leftParen) {
1867			p.follow(name.ValuePos, "foo(", rightParen)
1868			if p.lang == LangPOSIX && !ValidName(name.Value) {
1869				p.posErr(name.Pos(), "invalid func name")
1870			}
1871			p.funcDecl(s, name, name.ValuePos, true)
1872		} else {
1873			p.callExpr(s, p.wordOne(name), false)
1874		}
1875	case rdrOut, appOut, rdrIn, dplIn, dplOut, clbOut, rdrInOut,
1876		hdoc, dashHdoc, wordHdoc, rdrAll, appAll, _LitRedir:
1877		p.doRedirect(s)
1878		p.callExpr(s, nil, false)
1879	case bckQuote:
1880		if p.backquoteEnd() {
1881			return nil
1882		}
1883		fallthrough
1884	case _Lit, dollBrace, dollDblParen, dollParen, dollar, cmdIn, cmdOut,
1885		sglQuote, dollSglQuote, dblQuote, dollDblQuote, dollBrack,
1886		globQuest, globStar, globPlus, globAt, globExcl:
1887		if p.hasValidIdent() {
1888			p.callExpr(s, nil, true)
1889			break
1890		}
1891		w := p.wordAnyNumber()
1892		if p.got(leftParen) {
1893			p.posErr(w.Pos(), "invalid func name")
1894		}
1895		p.callExpr(s, w, false)
1896	case leftParen:
1897		p.subshell(s)
1898	case dblLeftParen:
1899		p.arithmExpCmd(s)
1900	default:
1901		if len(s.Redirs) == 0 {
1902			return nil
1903		}
1904	}
1905	for p.peekRedir() {
1906		p.doRedirect(s)
1907	}
1908	// instead of using recursion, iterate manually
1909	for p.tok == or || p.tok == orAnd {
1910		if binCmd {
1911			// left associativity: in a list of BinaryCmds, the
1912			// right recursion should only read a single element
1913			return s
1914		}
1915		if p.tok == orAnd && p.lang == LangMirBSDKorn {
1916			// No need to check for LangPOSIX, as on that language
1917			// we parse |& as two tokens.
1918			break
1919		}
1920		b := &BinaryCmd{OpPos: p.pos, Op: BinCmdOperator(p.tok), X: s}
1921		p.next()
1922		p.got(_Newl)
1923		if b.Y = p.gotStmtPipe(&Stmt{Position: p.pos}, true); b.Y == nil || p.err != nil {
1924			if p.recoverError() {
1925				b.Y = &Stmt{Position: recoveredPos}
1926			} else {
1927				p.followErr(b.OpPos, b.Op.String(), "a statement")
1928				break
1929			}
1930		}
1931		s = &Stmt{Position: s.Position}
1932		s.Cmd = b
1933		s.Comments, b.X.Comments = b.X.Comments, nil
1934		// in "! x | y", the bang applies to the entire pipeline
1935		s.Negated = b.X.Negated
1936		b.X.Negated = false
1937	}
1938	return s
1939}
1940
1941func (p *Parser) subshell(s *Stmt) {
1942	sub := &Subshell{Lparen: p.pos}
1943	old := p.preNested(subCmd)
1944	p.next()
1945	sub.Stmts, sub.Last = p.stmtList()
1946	p.postNested(old)
1947	sub.Rparen = p.matched(sub.Lparen, leftParen, rightParen)
1948	s.Cmd = sub
1949}
1950
1951func (p *Parser) arithmExpCmd(s *Stmt) {
1952	ar := &ArithmCmd{Left: p.pos}
1953	old := p.preNested(arithmExprCmd)
1954	p.next()
1955	if p.got(hash) {
1956		if p.lang != LangMirBSDKorn {
1957			p.langErr(ar.Pos(), "unsigned expressions", LangMirBSDKorn)
1958		}
1959		ar.Unsigned = true
1960	}
1961	ar.X = p.followArithm(dblLeftParen, ar.Left)
1962	ar.Right = p.arithmEnd(dblLeftParen, ar.Left, old)
1963	s.Cmd = ar
1964}
1965
1966func (p *Parser) block(s *Stmt) {
1967	b := &Block{Lbrace: p.pos}
1968	p.next()
1969	b.Stmts, b.Last = p.stmtList("}")
1970	if pos, ok := p.gotRsrv("}"); ok {
1971		b.Rbrace = pos
1972	} else if p.recoverError() {
1973		b.Rbrace = recoveredPos
1974	} else {
1975		p.matchingErr(b.Lbrace, "{", "}")
1976	}
1977	s.Cmd = b
1978}
1979
1980func (p *Parser) ifClause(s *Stmt) {
1981	rootIf := &IfClause{Position: p.pos}
1982	p.next()
1983	rootIf.Cond, rootIf.CondLast = p.followStmts("if", rootIf.Position, "then")
1984	rootIf.ThenPos = p.followRsrv(rootIf.Position, "if <cond>", "then")
1985	rootIf.Then, rootIf.ThenLast = p.followStmts("then", rootIf.ThenPos, "fi", "elif", "else")
1986	curIf := rootIf
1987	for p.tok == _LitWord && p.val == "elif" {
1988		elf := &IfClause{Position: p.pos}
1989		curIf.Last = p.accComs
1990		p.accComs = nil
1991		p.next()
1992		elf.Cond, elf.CondLast = p.followStmts("elif", elf.Position, "then")
1993		elf.ThenPos = p.followRsrv(elf.Position, "elif <cond>", "then")
1994		elf.Then, elf.ThenLast = p.followStmts("then", elf.ThenPos, "fi", "elif", "else")
1995		curIf.Else = elf
1996		curIf = elf
1997	}
1998	if elsePos, ok := p.gotRsrv("else"); ok {
1999		curIf.Last = p.accComs
2000		p.accComs = nil
2001		els := &IfClause{Position: elsePos}
2002		els.Then, els.ThenLast = p.followStmts("else", els.Position, "fi")
2003		curIf.Else = els
2004		curIf = els
2005	}
2006	curIf.Last = p.accComs
2007	p.accComs = nil
2008	rootIf.FiPos = p.stmtEnd(rootIf, "if", "fi")
2009	for els := rootIf.Else; els != nil; els = els.Else {
2010		// All the nested IfClauses share the same FiPos.
2011		els.FiPos = rootIf.FiPos
2012	}
2013	s.Cmd = rootIf
2014}
2015
2016func (p *Parser) whileClause(s *Stmt, until bool) {
2017	wc := &WhileClause{WhilePos: p.pos, Until: until}
2018	rsrv := "while"
2019	rsrvCond := "while <cond>"
2020	if wc.Until {
2021		rsrv = "until"
2022		rsrvCond = "until <cond>"
2023	}
2024	p.next()
2025	wc.Cond, wc.CondLast = p.followStmts(rsrv, wc.WhilePos, "do")
2026	wc.DoPos = p.followRsrv(wc.WhilePos, rsrvCond, "do")
2027	wc.Do, wc.DoLast = p.followStmts("do", wc.DoPos, "done")
2028	wc.DonePos = p.stmtEnd(wc, rsrv, "done")
2029	s.Cmd = wc
2030}
2031
2032func (p *Parser) forClause(s *Stmt) {
2033	fc := &ForClause{ForPos: p.pos}
2034	p.next()
2035	fc.Loop = p.loop(fc.ForPos)
2036
2037	start, end := "do", "done"
2038	if pos, ok := p.gotRsrv("{"); ok {
2039		if p.lang == LangPOSIX {
2040			p.langErr(pos, "for loops with braces", LangBash, LangMirBSDKorn)
2041		}
2042		fc.DoPos = pos
2043		fc.Braces = true
2044		start, end = "{", "}"
2045	} else {
2046		fc.DoPos = p.followRsrv(fc.ForPos, "for foo [in words]", start)
2047	}
2048
2049	s.Comments = append(s.Comments, p.accComs...)
2050	p.accComs = nil
2051	fc.Do, fc.DoLast = p.followStmts(start, fc.DoPos, end)
2052	fc.DonePos = p.stmtEnd(fc, "for", end)
2053	s.Cmd = fc
2054}
2055
2056func (p *Parser) loop(fpos Pos) Loop {
2057	if !p.lang.isBash() {
2058		switch p.tok {
2059		case leftParen, dblLeftParen:
2060			p.langErr(p.pos, "c-style fors", LangBash)
2061		}
2062	}
2063	if p.tok == dblLeftParen {
2064		cl := &CStyleLoop{Lparen: p.pos}
2065		old := p.preNested(arithmExprCmd)
2066		p.next()
2067		cl.Init = p.arithmExpr(false)
2068		if !p.got(dblSemicolon) {
2069			p.follow(p.pos, "expr", semicolon)
2070			cl.Cond = p.arithmExpr(false)
2071			p.follow(p.pos, "expr", semicolon)
2072		}
2073		cl.Post = p.arithmExpr(false)
2074		cl.Rparen = p.arithmEnd(dblLeftParen, cl.Lparen, old)
2075		p.got(semicolon)
2076		p.got(_Newl)
2077		return cl
2078	}
2079	return p.wordIter("for", fpos)
2080}
2081
2082func (p *Parser) wordIter(ftok string, fpos Pos) *WordIter {
2083	wi := &WordIter{}
2084	if wi.Name = p.getLit(); wi.Name == nil {
2085		p.followErr(fpos, ftok, "a literal")
2086	}
2087	if p.got(semicolon) {
2088		p.got(_Newl)
2089		return wi
2090	}
2091	p.got(_Newl)
2092	if pos, ok := p.gotRsrv("in"); ok {
2093		wi.InPos = pos
2094		for !p.stopToken() {
2095			if w := p.getWord(); w == nil {
2096				p.curErr("word list can only contain words")
2097			} else {
2098				wi.Items = append(wi.Items, w)
2099			}
2100		}
2101		p.got(semicolon)
2102		p.got(_Newl)
2103	} else if p.tok == _LitWord && p.val == "do" {
2104	} else {
2105		p.followErr(fpos, ftok+" foo", `"in", "do", ;, or a newline`)
2106	}
2107	return wi
2108}
2109
2110func (p *Parser) selectClause(s *Stmt) {
2111	fc := &ForClause{ForPos: p.pos, Select: true}
2112	p.next()
2113	fc.Loop = p.wordIter("select", fc.ForPos)
2114	fc.DoPos = p.followRsrv(fc.ForPos, "select foo [in words]", "do")
2115	fc.Do, fc.DoLast = p.followStmts("do", fc.DoPos, "done")
2116	fc.DonePos = p.stmtEnd(fc, "select", "done")
2117	s.Cmd = fc
2118}
2119
2120func (p *Parser) caseClause(s *Stmt) {
2121	cc := &CaseClause{Case: p.pos}
2122	p.next()
2123	cc.Word = p.getWord()
2124	if cc.Word == nil {
2125		p.followErr(cc.Case, "case", "a word")
2126	}
2127	end := "esac"
2128	p.got(_Newl)
2129	if pos, ok := p.gotRsrv("{"); ok {
2130		cc.In = pos
2131		cc.Braces = true
2132		if p.lang != LangMirBSDKorn {
2133			p.langErr(cc.Pos(), `"case i {"`, LangMirBSDKorn)
2134		}
2135		end = "}"
2136	} else {
2137		cc.In = p.followRsrv(cc.Case, "case x", "in")
2138	}
2139	cc.Items = p.caseItems(end)
2140	cc.Last, p.accComs = p.accComs, nil
2141	cc.Esac = p.stmtEnd(cc, "case", end)
2142	s.Cmd = cc
2143}
2144
2145func (p *Parser) caseItems(stop string) (items []*CaseItem) {
2146	p.got(_Newl)
2147	for p.tok != _EOF && (p.tok != _LitWord || p.val != stop) {
2148		ci := &CaseItem{}
2149		ci.Comments, p.accComs = p.accComs, nil
2150		p.got(leftParen)
2151		for p.tok != _EOF {
2152			if w := p.getWord(); w == nil {
2153				p.curErr("case patterns must consist of words")
2154			} else {
2155				ci.Patterns = append(ci.Patterns, w)
2156			}
2157			if p.tok == rightParen {
2158				break
2159			}
2160			if !p.got(or) {
2161				p.curErr("case patterns must be separated with |")
2162			}
2163		}
2164		old := p.preNested(switchCase)
2165		p.next()
2166		ci.Stmts, ci.Last = p.stmtList(stop)
2167		p.postNested(old)
2168		switch p.tok {
2169		case dblSemicolon, semiAnd, dblSemiAnd, semiOr:
2170		default:
2171			ci.Op = Break
2172			items = append(items, ci)
2173			return
2174		}
2175		ci.Last = append(ci.Last, p.accComs...)
2176		p.accComs = nil
2177		ci.OpPos = p.pos
2178		ci.Op = CaseOperator(p.tok)
2179		p.next()
2180		p.got(_Newl)
2181
2182		// Split the comments:
2183		//
2184		// case x in
2185		// a)
2186		//   foo
2187		//   ;;
2188		//   # comment for a
2189		// # comment for b
2190		// b)
2191		//   [...]
2192		split := len(p.accComs)
2193		for i, c := range slices.Backward(p.accComs) {
2194			if c.Pos().Col() != p.pos.Col() {
2195				break
2196			}
2197			split = i
2198		}
2199		ci.Comments = append(ci.Comments, p.accComs[:split]...)
2200		p.accComs = p.accComs[split:]
2201
2202		items = append(items, ci)
2203	}
2204	return
2205}
2206
2207func (p *Parser) testClause(s *Stmt) {
2208	tc := &TestClause{Left: p.pos}
2209	old := p.preNested(testExpr)
2210	p.next()
2211	if _, ok := p.gotRsrv("]]"); ok || p.tok == _EOF {
2212		p.posErr(tc.Left, "test clause requires at least one expression")
2213	}
2214	tc.X = p.testExpr(false)
2215	if tc.X == nil {
2216		p.followErrExp(tc.Left, "[[")
2217	}
2218	tc.Right = p.pos
2219	if _, ok := p.gotRsrv("]]"); !ok {
2220		p.matchingErr(tc.Left, "[[", "]]")
2221	}
2222	p.postNested(old)
2223	s.Cmd = tc
2224}
2225
2226func (p *Parser) testExpr(pastAndOr bool) TestExpr {
2227	p.got(_Newl)
2228	var left TestExpr
2229	if pastAndOr {
2230		left = p.testExprBase()
2231	} else {
2232		left = p.testExpr(true)
2233	}
2234	if left == nil {
2235		return left
2236	}
2237	p.got(_Newl)
2238	switch p.tok {
2239	case andAnd, orOr:
2240	case _LitWord:
2241		if p.val == "]]" {
2242			return left
2243		}
2244		if p.tok = token(testBinaryOp(p.val)); p.tok == illegalTok {
2245			p.curErr("not a valid test operator: %s", p.val)
2246		}
2247	case rdrIn, rdrOut:
2248	case _EOF, rightParen:
2249		return left
2250	case _Lit:
2251		p.curErr("test operator words must consist of a single literal")
2252	default:
2253		p.curErr("not a valid test operator: %v", p.tok)
2254	}
2255	b := &BinaryTest{
2256		OpPos: p.pos,
2257		Op:    BinTestOperator(p.tok),
2258		X:     left,
2259	}
2260	// Save the previous quoteState, since we change it in TsReMatch.
2261	oldQuote := p.quote
2262
2263	switch b.Op {
2264	case AndTest, OrTest:
2265		p.next()
2266		if b.Y = p.testExpr(false); b.Y == nil {
2267			p.followErrExp(b.OpPos, b.Op.String())
2268		}
2269	case TsReMatch:
2270		if !p.lang.isBash() {
2271			p.langErr(p.pos, "regex tests", LangBash)
2272		}
2273		p.rxOpenParens = 0
2274		p.rxFirstPart = true
2275		// TODO(mvdan): Using nested states within a regex will break in
2276		// all sorts of ways. The better fix is likely to use a stop
2277		// token, like we do with heredocs.
2278		p.quote = testExprRegexp
2279		fallthrough
2280	default:
2281		if _, ok := b.X.(*Word); !ok {
2282			p.posErr(b.OpPos, "expected %s, %s or %s after complex expr",
2283				AndTest, OrTest, "]]")
2284		}
2285		p.next()
2286		b.Y = p.followWordTok(token(b.Op), b.OpPos)
2287	}
2288	p.quote = oldQuote
2289	return b
2290}
2291
2292func (p *Parser) testExprBase() TestExpr {
2293	switch p.tok {
2294	case _EOF, rightParen:
2295		return nil
2296	case _LitWord:
2297		op := token(testUnaryOp(p.val))
2298		switch op {
2299		case illegalTok:
2300		case tsRefVar, tsModif: // not available in mksh
2301			if p.lang.isBash() {
2302				p.tok = op
2303			}
2304		default:
2305			p.tok = op
2306		}
2307	}
2308	switch p.tok {
2309	case exclMark:
2310		u := &UnaryTest{OpPos: p.pos, Op: TsNot}
2311		p.next()
2312		if u.X = p.testExpr(false); u.X == nil {
2313			p.followErrExp(u.OpPos, u.Op.String())
2314		}
2315		return u
2316	case tsExists, tsRegFile, tsDirect, tsCharSp, tsBlckSp, tsNmPipe,
2317		tsSocket, tsSmbLink, tsSticky, tsGIDSet, tsUIDSet, tsGrpOwn,
2318		tsUsrOwn, tsModif, tsRead, tsWrite, tsExec, tsNoEmpty,
2319		tsFdTerm, tsEmpStr, tsNempStr, tsOptSet, tsVarSet, tsRefVar:
2320		u := &UnaryTest{OpPos: p.pos, Op: UnTestOperator(p.tok)}
2321		p.next()
2322		u.X = p.followWordTok(token(u.Op), u.OpPos)
2323		return u
2324	case leftParen:
2325		pe := &ParenTest{Lparen: p.pos}
2326		p.next()
2327		if pe.X = p.testExpr(false); pe.X == nil {
2328			p.followErrExp(pe.Lparen, "(")
2329		}
2330		pe.Rparen = p.matched(pe.Lparen, leftParen, rightParen)
2331		return pe
2332	case _LitWord:
2333		if p.val == "]]" {
2334			return nil
2335		}
2336		fallthrough
2337	default:
2338		if w := p.getWord(); w != nil {
2339			return w
2340		}
2341		// otherwise we'd return a typed nil above
2342		return nil
2343	}
2344}
2345
2346func (p *Parser) declClause(s *Stmt) {
2347	ds := &DeclClause{Variant: p.lit(p.pos, p.val)}
2348	p.next()
2349	for !p.stopToken() && !p.peekRedir() {
2350		if p.hasValidIdent() {
2351			ds.Args = append(ds.Args, p.getAssign(false))
2352		} else if p.eqlOffs > 0 {
2353			p.curErr("invalid var name")
2354		} else if p.tok == _LitWord && ValidName(p.val) {
2355			ds.Args = append(ds.Args, &Assign{
2356				Naked: true,
2357				Name:  p.getLit(),
2358			})
2359		} else if w := p.getWord(); w != nil {
2360			ds.Args = append(ds.Args, &Assign{
2361				Naked: true,
2362				Value: w,
2363			})
2364		} else {
2365			p.followErr(p.pos, ds.Variant.Value, "names or assignments")
2366		}
2367	}
2368	s.Cmd = ds
2369}
2370
2371func isBashCompoundCommand(tok token, val string) bool {
2372	switch tok {
2373	case leftParen, dblLeftParen:
2374		return true
2375	case _LitWord:
2376		switch val {
2377		case "{", "if", "while", "until", "for", "case", "[[",
2378			"coproc", "let", "function", "declare", "local",
2379			"export", "readonly", "typeset", "nameref":
2380			return true
2381		}
2382	}
2383	return false
2384}
2385
2386func (p *Parser) timeClause(s *Stmt) {
2387	tc := &TimeClause{Time: p.pos}
2388	p.next()
2389	if _, ok := p.gotRsrv("-p"); ok {
2390		tc.PosixFormat = true
2391	}
2392	tc.Stmt = p.gotStmtPipe(&Stmt{Position: p.pos}, false)
2393	s.Cmd = tc
2394}
2395
2396func (p *Parser) coprocClause(s *Stmt) {
2397	cc := &CoprocClause{Coproc: p.pos}
2398	if p.next(); isBashCompoundCommand(p.tok, p.val) {
2399		// has no name
2400		cc.Stmt = p.gotStmtPipe(&Stmt{Position: p.pos}, false)
2401		s.Cmd = cc
2402		return
2403	}
2404	cc.Name = p.getWord()
2405	cc.Stmt = p.gotStmtPipe(&Stmt{Position: p.pos}, false)
2406	if cc.Stmt == nil {
2407		if cc.Name == nil {
2408			p.posErr(cc.Coproc, "coproc clause requires a command")
2409			return
2410		}
2411		// name was in fact the stmt
2412		cc.Stmt = &Stmt{Position: cc.Name.Pos()}
2413		cc.Stmt.Cmd = p.call(cc.Name)
2414		cc.Name = nil
2415	} else if cc.Name != nil {
2416		if call, ok := cc.Stmt.Cmd.(*CallExpr); ok {
2417			// name was in fact the start of a call
2418			call.Args = append([]*Word{cc.Name}, call.Args...)
2419			cc.Name = nil
2420		}
2421	}
2422	s.Cmd = cc
2423}
2424
2425func (p *Parser) letClause(s *Stmt) {
2426	lc := &LetClause{Let: p.pos}
2427	old := p.preNested(arithmExprLet)
2428	p.next()
2429	for !p.stopToken() && !p.peekRedir() {
2430		x := p.arithmExpr(true)
2431		if x == nil {
2432			break
2433		}
2434		lc.Exprs = append(lc.Exprs, x)
2435	}
2436	if len(lc.Exprs) == 0 {
2437		p.followErrExp(lc.Let, "let")
2438	}
2439	p.postNested(old)
2440	s.Cmd = lc
2441}
2442
2443func (p *Parser) bashFuncDecl(s *Stmt) {
2444	fpos := p.pos
2445	if p.next(); p.tok != _LitWord {
2446		p.followErr(fpos, "function", "a name")
2447	}
2448	name := p.lit(p.pos, p.val)
2449	hasParens := false
2450	if p.next(); p.got(leftParen) {
2451		hasParens = true
2452		p.follow(name.ValuePos, "foo(", rightParen)
2453	}
2454	p.funcDecl(s, name, fpos, hasParens)
2455}
2456
2457func (p *Parser) testDecl(s *Stmt) {
2458	td := &TestDecl{Position: p.pos}
2459	p.next()
2460	if td.Description = p.getWord(); td.Description == nil {
2461		p.followErr(td.Position, "@test", "a description word")
2462	}
2463	if td.Body = p.getStmt(false, false, true); td.Body == nil {
2464		p.followErr(td.Position, `@test "desc"`, "a statement")
2465	}
2466	s.Cmd = td
2467}
2468
2469func (p *Parser) callExpr(s *Stmt, w *Word, assign bool) {
2470	ce := p.call(w)
2471	if w == nil {
2472		ce.Args = ce.Args[:0]
2473	}
2474	if assign {
2475		ce.Assigns = append(ce.Assigns, p.getAssign(true))
2476	}
2477loop:
2478	for {
2479		switch p.tok {
2480		case _EOF, _Newl, semicolon, and, or, andAnd, orOr, orAnd,
2481			dblSemicolon, semiAnd, dblSemiAnd, semiOr:
2482			break loop
2483		case _LitWord:
2484			if len(ce.Args) == 0 && p.hasValidIdent() {
2485				ce.Assigns = append(ce.Assigns, p.getAssign(true))
2486				break
2487			}
2488			// Avoid failing later with the confusing "} can only be used to close a block".
2489			if p.lang == LangPOSIX && p.val == "{" && w != nil && w.Lit() == "function" {
2490				p.langErr(p.pos, `the "function" builtin`, LangBash)
2491			}
2492			ce.Args = append(ce.Args, p.wordOne(p.lit(p.pos, p.val)))
2493			p.next()
2494		case _Lit:
2495			if len(ce.Args) == 0 && p.hasValidIdent() {
2496				ce.Assigns = append(ce.Assigns, p.getAssign(true))
2497				break
2498			}
2499			ce.Args = append(ce.Args, p.wordAnyNumber())
2500		case bckQuote:
2501			if p.backquoteEnd() {
2502				break loop
2503			}
2504			fallthrough
2505		case dollBrace, dollDblParen, dollParen, dollar, cmdIn, cmdOut,
2506			sglQuote, dollSglQuote, dblQuote, dollDblQuote, dollBrack,
2507			globQuest, globStar, globPlus, globAt, globExcl:
2508			ce.Args = append(ce.Args, p.wordAnyNumber())
2509		case rdrOut, appOut, rdrIn, dplIn, dplOut, clbOut, rdrInOut,
2510			hdoc, dashHdoc, wordHdoc, rdrAll, appAll, _LitRedir:
2511			p.doRedirect(s)
2512		case dblLeftParen:
2513			p.curErr("%s can only be used to open an arithmetic cmd", p.tok)
2514		case rightParen:
2515			if p.quote == subCmd {
2516				break loop
2517			}
2518			fallthrough
2519		default:
2520			// Note that we'll only keep the first error that happens.
2521			if len(ce.Args) > 0 {
2522				if cmd := ce.Args[0].Lit(); p.lang == LangPOSIX && isBashCompoundCommand(_LitWord, cmd) {
2523					p.langErr(p.pos, fmt.Sprintf("the %q builtin", cmd), LangBash)
2524				}
2525			}
2526			p.curErr("a command can only contain words and redirects; encountered %s", p.tok)
2527		}
2528	}
2529	if len(ce.Assigns) == 0 && len(ce.Args) == 0 {
2530		return
2531	}
2532	if len(ce.Args) == 0 {
2533		ce.Args = nil
2534	} else {
2535		for _, asgn := range ce.Assigns {
2536			if asgn.Index != nil || asgn.Array != nil {
2537				p.posErr(asgn.Pos(), "inline variables cannot be arrays")
2538			}
2539		}
2540	}
2541	s.Cmd = ce
2542}
2543
2544func (p *Parser) funcDecl(s *Stmt, name *Lit, pos Pos, withParens bool) {
2545	fd := &FuncDecl{
2546		Position: pos,
2547		RsrvWord: pos != name.ValuePos,
2548		Parens:   withParens,
2549		Name:     name,
2550	}
2551	p.got(_Newl)
2552	if fd.Body = p.getStmt(false, false, true); fd.Body == nil {
2553		p.followErr(fd.Pos(), "foo()", "a statement")
2554	}
2555	s.Cmd = fd
2556}