lexer.go

   1// Copyright (c) 2016, Daniel Martí <mvdan@mvdan.cc>
   2// See LICENSE for licensing information
   3
   4package syntax
   5
   6import (
   7	"bytes"
   8	"io"
   9	"unicode/utf8"
  10)
  11
  12// bytes that form or start a token
  13func regOps(r rune) bool {
  14	switch r {
  15	case ';', '"', '\'', '(', ')', '$', '|', '&', '>', '<', '`':
  16		return true
  17	}
  18	return false
  19}
  20
  21// tokenize these inside parameter expansions
  22func paramOps(r rune) bool {
  23	switch r {
  24	case '}', '#', '!', ':', '-', '+', '=', '?', '%', '[', ']', '/', '^',
  25		',', '@', '*':
  26		return true
  27	}
  28	return false
  29}
  30
  31// these start a parameter expansion name
  32func paramNameOp(r rune) bool {
  33	switch r {
  34	case '}', ':', '+', '=', '%', '[', ']', '/', '^', ',':
  35		return false
  36	}
  37	return true
  38}
  39
  40// tokenize these inside arithmetic expansions
  41func arithmOps(r rune) bool {
  42	switch r {
  43	case '+', '-', '!', '~', '*', '/', '%', '(', ')', '^', '<', '>', ':', '=',
  44		',', '?', '|', '&', '[', ']', '#':
  45		return true
  46	}
  47	return false
  48}
  49
  50func bquoteEscaped(b byte) bool {
  51	switch b {
  52	case '$', '`', '\\':
  53		return true
  54	}
  55	return false
  56}
  57
  58const escNewl rune = utf8.RuneSelf + 1
  59
  60func (p *Parser) rune() rune {
  61	if p.r == '\n' || p.r == escNewl {
  62		// p.r instead of b so that newline
  63		// character positions don't have col 0.
  64		p.line++
  65		p.col = 0
  66	}
  67	p.col += int64(p.w)
  68	bquotes := 0
  69retry:
  70	if p.bsp < uint(len(p.bs)) {
  71		if b := p.bs[p.bsp]; b < utf8.RuneSelf {
  72			p.bsp++
  73			switch b {
  74			case '\x00':
  75				// Ignore null bytes while parsing, like bash.
  76				p.col++
  77				goto retry
  78			case '\r':
  79				if p.peekByte('\n') { // \r\n turns into \n
  80					p.col++
  81					goto retry
  82				}
  83			case '\\':
  84				if p.r == '\\' {
  85				} else if p.peekByte('\n') {
  86					p.bsp++
  87					p.w, p.r = 1, escNewl
  88					return escNewl
  89				} else if p.peekBytes("\r\n") { // \\\r\n turns into \\\n
  90					p.col++
  91					p.bsp += 2
  92					p.w, p.r = 2, escNewl
  93					return escNewl
  94				}
  95				if p.openBquotes > 0 && bquotes < p.openBquotes &&
  96					p.bsp < uint(len(p.bs)) && bquoteEscaped(p.bs[p.bsp]) {
  97					// We turn backquote command substitutions into $(),
  98					// so we remove the extra backslashes needed by the backquotes.
  99					bquotes++
 100					p.col++
 101					goto retry
 102				}
 103			}
 104			if b == '`' {
 105				p.lastBquoteEsc = bquotes
 106			}
 107			if p.litBs != nil {
 108				p.litBs = append(p.litBs, b)
 109			}
 110			p.w, p.r = 1, rune(b)
 111			return p.r
 112		}
 113		if !utf8.FullRune(p.bs[p.bsp:]) {
 114			// we need more bytes to read a full non-ascii rune
 115			p.fill()
 116		}
 117		var w int
 118		p.r, w = utf8.DecodeRune(p.bs[p.bsp:])
 119		if p.litBs != nil {
 120			p.litBs = append(p.litBs, p.bs[p.bsp:p.bsp+uint(w)]...)
 121		}
 122		p.bsp += uint(w)
 123		if p.r == utf8.RuneError && w == 1 {
 124			p.posErr(p.nextPos(), "invalid UTF-8 encoding")
 125		}
 126		p.w = w
 127	} else {
 128		if p.r == utf8.RuneSelf {
 129		} else if p.fill(); p.bs == nil {
 130			p.bsp++
 131			p.r = utf8.RuneSelf
 132			p.w = 1
 133		} else {
 134			goto retry
 135		}
 136	}
 137	return p.r
 138}
 139
 140// fill reads more bytes from the input src into readBuf. Any bytes that
 141// had not yet been used at the end of the buffer are slid into the
 142// beginning of the buffer.
 143func (p *Parser) fill() {
 144	p.offs += int64(p.bsp)
 145	left := len(p.bs) - int(p.bsp)
 146	copy(p.readBuf[:left], p.readBuf[p.bsp:])
 147readAgain:
 148	n, err := 0, p.readErr
 149	if err == nil {
 150		n, err = p.src.Read(p.readBuf[left:])
 151		p.readErr = err
 152	}
 153	if n == 0 {
 154		if err == nil {
 155			goto readAgain
 156		}
 157		// don't use p.errPass as we don't want to overwrite p.tok
 158		if err != io.EOF {
 159			p.err = err
 160		}
 161		if left > 0 {
 162			p.bs = p.readBuf[:left]
 163		} else {
 164			p.bs = nil
 165		}
 166	} else {
 167		p.bs = p.readBuf[:left+n]
 168	}
 169	p.bsp = 0
 170}
 171
 172func (p *Parser) nextKeepSpaces() {
 173	r := p.r
 174	if p.quote != hdocBody && p.quote != hdocBodyTabs {
 175		// Heredocs handle escaped newlines in a special way, but others
 176		// do not.
 177		for r == escNewl {
 178			r = p.rune()
 179		}
 180	}
 181	p.pos = p.nextPos()
 182	switch p.quote {
 183	case paramExpRepl:
 184		switch r {
 185		case '}', '/':
 186			p.tok = p.paramToken(r)
 187		case '`', '"', '$', '\'':
 188			p.tok = p.regToken(r)
 189		default:
 190			p.advanceLitOther(r)
 191		}
 192	case dblQuotes:
 193		switch r {
 194		case '`', '"', '$':
 195			p.tok = p.dqToken(r)
 196		default:
 197			p.advanceLitDquote(r)
 198		}
 199	case hdocBody, hdocBodyTabs:
 200		switch r {
 201		case '`', '$':
 202			p.tok = p.dqToken(r)
 203		default:
 204			p.advanceLitHdoc(r)
 205		}
 206	default: // paramExpExp:
 207		switch r {
 208		case '}':
 209			p.tok = p.paramToken(r)
 210		case '`', '"', '$', '\'':
 211			p.tok = p.regToken(r)
 212		default:
 213			p.advanceLitOther(r)
 214		}
 215	}
 216	if p.err != nil && p.tok != _EOF {
 217		p.tok = _EOF
 218	}
 219}
 220
 221func (p *Parser) next() {
 222	if p.r == utf8.RuneSelf {
 223		p.tok = _EOF
 224		return
 225	}
 226	p.spaced = false
 227	if p.quote&allKeepSpaces != 0 {
 228		p.nextKeepSpaces()
 229		return
 230	}
 231	r := p.r
 232	for r == escNewl {
 233		r = p.rune()
 234	}
 235skipSpace:
 236	for {
 237		switch r {
 238		case utf8.RuneSelf:
 239			p.tok = _EOF
 240			return
 241		case escNewl:
 242			r = p.rune()
 243		case ' ', '\t', '\r':
 244			p.spaced = true
 245			r = p.rune()
 246		case '\n':
 247			if p.tok == _Newl {
 248				// merge consecutive newline tokens
 249				r = p.rune()
 250				continue
 251			}
 252			p.spaced = true
 253			p.tok = _Newl
 254			if p.quote != hdocWord && len(p.heredocs) > p.buriedHdocs {
 255				p.doHeredocs()
 256			}
 257			return
 258		default:
 259			break skipSpace
 260		}
 261	}
 262	if p.stopAt != nil && (p.spaced || p.tok == illegalTok || p.stopToken()) {
 263		w := utf8.RuneLen(r)
 264		if bytes.HasPrefix(p.bs[p.bsp-uint(w):], p.stopAt) {
 265			p.r = utf8.RuneSelf
 266			p.w = 1
 267			p.tok = _EOF
 268			return
 269		}
 270	}
 271	p.pos = p.nextPos()
 272	switch {
 273	case p.quote&allRegTokens != 0:
 274		switch r {
 275		case ';', '"', '\'', '(', ')', '$', '|', '&', '>', '<', '`':
 276			p.tok = p.regToken(r)
 277		case '#':
 278			// If we're parsing $foo#bar, ${foo}#bar, 'foo'#bar, or "foo"#bar,
 279			// #bar is a continuation of the same word, not a comment.
 280			// TODO: support $(foo)#bar and `foo`#bar as well, which is slightly tricky,
 281			// as we can't easily tell them apart from (foo)#bar and `#bar`,
 282			// where #bar should remain a comment.
 283			if !p.spaced {
 284				switch p.tok {
 285				case _LitWord, rightBrace, sglQuote, dblQuote:
 286					p.advanceLitNone(r)
 287					return
 288				}
 289			}
 290			r = p.rune()
 291			p.newLit(r)
 292		runeLoop:
 293			for {
 294				switch r {
 295				case '\n', utf8.RuneSelf:
 296					break runeLoop
 297				case escNewl:
 298					p.litBs = append(p.litBs, '\\', '\n')
 299					break runeLoop
 300				case '`':
 301					if p.backquoteEnd() {
 302						break runeLoop
 303					}
 304				}
 305				r = p.rune()
 306			}
 307			if p.keepComments {
 308				*p.curComs = append(*p.curComs, Comment{
 309					Hash: p.pos,
 310					Text: p.endLit(),
 311				})
 312			} else {
 313				p.litBs = nil
 314			}
 315			p.next()
 316		case '[', '=':
 317			if p.quote == arrayElems {
 318				p.tok = p.paramToken(r)
 319			} else {
 320				p.advanceLitNone(r)
 321			}
 322		case '?', '*', '+', '@', '!':
 323			if p.extendedGlob() {
 324				switch r {
 325				case '?':
 326					p.tok = globQuest
 327				case '*':
 328					p.tok = globStar
 329				case '+':
 330					p.tok = globPlus
 331				case '@':
 332					p.tok = globAt
 333				default: // '!'
 334					p.tok = globExcl
 335				}
 336				p.rune()
 337				p.rune()
 338			} else {
 339				p.advanceLitNone(r)
 340			}
 341		default:
 342			p.advanceLitNone(r)
 343		}
 344	case p.quote&allArithmExpr != 0 && arithmOps(r):
 345		p.tok = p.arithmToken(r)
 346	case p.quote&allParamExp != 0 && paramOps(r):
 347		p.tok = p.paramToken(r)
 348	case p.quote == testExprRegexp:
 349		if !p.rxFirstPart && p.spaced {
 350			p.quote = noState
 351			goto skipSpace
 352		}
 353		p.rxFirstPart = false
 354		switch r {
 355		case ';', '"', '\'', '$', '&', '>', '<', '`':
 356			p.tok = p.regToken(r)
 357		case ')':
 358			if p.rxOpenParens > 0 {
 359				// continuation of open paren
 360				p.advanceLitRe(r)
 361			} else {
 362				p.tok = rightParen
 363				p.quote = noState
 364				p.rune() // we are tokenizing manually
 365			}
 366		default: // including '(', '|'
 367			p.advanceLitRe(r)
 368		}
 369	case regOps(r):
 370		p.tok = p.regToken(r)
 371	default:
 372		p.advanceLitOther(r)
 373	}
 374	if p.err != nil && p.tok != _EOF {
 375		p.tok = _EOF
 376	}
 377}
 378
 379// extendedGlob determines whether we're parsing a Bash extended globbing expression.
 380// For example, whether `*` or `@` are followed by `(` to form `@(foo)`.
 381func (p *Parser) extendedGlob() bool {
 382	if p.val == "function" {
 383		return false
 384	}
 385	if p.peekByte('(') {
 386		// NOTE: empty pattern list is a valid globbing syntax like `@()`,
 387		// but we'll operate on the "likelihood" that it is a function;
 388		// only tokenize if its a non-empty pattern list.
 389		// We do this after peeking for just one byte, so that the input `echo *`
 390		// followed by a newline does not hang an interactive shell parser until
 391		// another byte is input.
 392		return !p.peekBytes("()")
 393	}
 394	return false
 395}
 396
 397func (p *Parser) peekBytes(s string) bool {
 398	peekEnd := int(p.bsp) + len(s)
 399	// TODO: This should loop for slow readers, e.g. those providing one byte at
 400	// a time. Use a loop and test it with [testing/iotest.OneByteReader].
 401	if peekEnd > len(p.bs) {
 402		p.fill()
 403	}
 404	return peekEnd <= len(p.bs) && bytes.HasPrefix(p.bs[p.bsp:peekEnd], []byte(s))
 405}
 406
 407func (p *Parser) peekByte(b byte) bool {
 408	if p.bsp == uint(len(p.bs)) {
 409		p.fill()
 410	}
 411	return p.bsp < uint(len(p.bs)) && p.bs[p.bsp] == b
 412}
 413
 414func (p *Parser) regToken(r rune) token {
 415	switch r {
 416	case '\'':
 417		p.rune()
 418		return sglQuote
 419	case '"':
 420		p.rune()
 421		return dblQuote
 422	case '`':
 423		// Don't call p.rune, as we need to work out p.openBquotes to
 424		// properly handle backslashes in the lexer.
 425		return bckQuote
 426	case '&':
 427		switch p.rune() {
 428		case '&':
 429			p.rune()
 430			return andAnd
 431		case '>':
 432			if p.rune() == '>' {
 433				p.rune()
 434				return appAll
 435			}
 436			return rdrAll
 437		}
 438		return and
 439	case '|':
 440		switch p.rune() {
 441		case '|':
 442			p.rune()
 443			return orOr
 444		case '&':
 445			if p.lang == LangPOSIX {
 446				break
 447			}
 448			p.rune()
 449			return orAnd
 450		}
 451		return or
 452	case '$':
 453		switch p.rune() {
 454		case '\'':
 455			if p.lang == LangPOSIX {
 456				break
 457			}
 458			p.rune()
 459			return dollSglQuote
 460		case '"':
 461			if p.lang == LangPOSIX {
 462				break
 463			}
 464			p.rune()
 465			return dollDblQuote
 466		case '{':
 467			p.rune()
 468			return dollBrace
 469		case '[':
 470			if !p.lang.isBash() || p.quote == paramExpName {
 471				// latter to not tokenise ${$[@]} as $[
 472				break
 473			}
 474			p.rune()
 475			return dollBrack
 476		case '(':
 477			if p.rune() == '(' {
 478				p.rune()
 479				return dollDblParen
 480			}
 481			return dollParen
 482		}
 483		return dollar
 484	case '(':
 485		if p.rune() == '(' && p.lang != LangPOSIX && p.quote != testExpr {
 486			p.rune()
 487			return dblLeftParen
 488		}
 489		return leftParen
 490	case ')':
 491		p.rune()
 492		return rightParen
 493	case ';':
 494		switch p.rune() {
 495		case ';':
 496			if p.rune() == '&' && p.lang.isBash() {
 497				p.rune()
 498				return dblSemiAnd
 499			}
 500			return dblSemicolon
 501		case '&':
 502			if p.lang == LangPOSIX {
 503				break
 504			}
 505			p.rune()
 506			return semiAnd
 507		case '|':
 508			if p.lang != LangMirBSDKorn {
 509				break
 510			}
 511			p.rune()
 512			return semiOr
 513		}
 514		return semicolon
 515	case '<':
 516		switch p.rune() {
 517		case '<':
 518			if r = p.rune(); r == '-' {
 519				p.rune()
 520				return dashHdoc
 521			} else if r == '<' {
 522				p.rune()
 523				return wordHdoc
 524			}
 525			return hdoc
 526		case '>':
 527			p.rune()
 528			return rdrInOut
 529		case '&':
 530			p.rune()
 531			return dplIn
 532		case '(':
 533			if !p.lang.isBash() {
 534				break
 535			}
 536			p.rune()
 537			return cmdIn
 538		}
 539		return rdrIn
 540	default: // '>'
 541		switch p.rune() {
 542		case '>':
 543			p.rune()
 544			return appOut
 545		case '&':
 546			p.rune()
 547			return dplOut
 548		case '|':
 549			p.rune()
 550			return clbOut
 551		case '(':
 552			if !p.lang.isBash() {
 553				break
 554			}
 555			p.rune()
 556			return cmdOut
 557		}
 558		return rdrOut
 559	}
 560}
 561
 562func (p *Parser) dqToken(r rune) token {
 563	switch r {
 564	case '"':
 565		p.rune()
 566		return dblQuote
 567	case '`':
 568		// Don't call p.rune, as we need to work out p.openBquotes to
 569		// properly handle backslashes in the lexer.
 570		return bckQuote
 571	default: // '$'
 572		switch p.rune() {
 573		case '{':
 574			p.rune()
 575			return dollBrace
 576		case '[':
 577			if !p.lang.isBash() {
 578				break
 579			}
 580			p.rune()
 581			return dollBrack
 582		case '(':
 583			if p.rune() == '(' {
 584				p.rune()
 585				return dollDblParen
 586			}
 587			return dollParen
 588		}
 589		return dollar
 590	}
 591}
 592
 593func (p *Parser) paramToken(r rune) token {
 594	switch r {
 595	case '}':
 596		p.rune()
 597		return rightBrace
 598	case ':':
 599		switch p.rune() {
 600		case '+':
 601			p.rune()
 602			return colPlus
 603		case '-':
 604			p.rune()
 605			return colMinus
 606		case '?':
 607			p.rune()
 608			return colQuest
 609		case '=':
 610			p.rune()
 611			return colAssgn
 612		}
 613		return colon
 614	case '+':
 615		p.rune()
 616		return plus
 617	case '-':
 618		p.rune()
 619		return minus
 620	case '?':
 621		p.rune()
 622		return quest
 623	case '=':
 624		p.rune()
 625		return assgn
 626	case '%':
 627		if p.rune() == '%' {
 628			p.rune()
 629			return dblPerc
 630		}
 631		return perc
 632	case '#':
 633		if p.rune() == '#' {
 634			p.rune()
 635			return dblHash
 636		}
 637		return hash
 638	case '!':
 639		p.rune()
 640		return exclMark
 641	case '[':
 642		p.rune()
 643		return leftBrack
 644	case ']':
 645		p.rune()
 646		return rightBrack
 647	case '/':
 648		if p.rune() == '/' && p.quote != paramExpRepl {
 649			p.rune()
 650			return dblSlash
 651		}
 652		return slash
 653	case '^':
 654		if p.rune() == '^' {
 655			p.rune()
 656			return dblCaret
 657		}
 658		return caret
 659	case ',':
 660		if p.rune() == ',' {
 661			p.rune()
 662			return dblComma
 663		}
 664		return comma
 665	case '@':
 666		p.rune()
 667		return at
 668	default: // '*'
 669		p.rune()
 670		return star
 671	}
 672}
 673
 674func (p *Parser) arithmToken(r rune) token {
 675	switch r {
 676	case '!':
 677		if p.rune() == '=' {
 678			p.rune()
 679			return nequal
 680		}
 681		return exclMark
 682	case '=':
 683		if p.rune() == '=' {
 684			p.rune()
 685			return equal
 686		}
 687		return assgn
 688	case '~':
 689		p.rune()
 690		return tilde
 691	case '(':
 692		p.rune()
 693		return leftParen
 694	case ')':
 695		p.rune()
 696		return rightParen
 697	case '&':
 698		switch p.rune() {
 699		case '&':
 700			p.rune()
 701			return andAnd
 702		case '=':
 703			p.rune()
 704			return andAssgn
 705		}
 706		return and
 707	case '|':
 708		switch p.rune() {
 709		case '|':
 710			p.rune()
 711			return orOr
 712		case '=':
 713			p.rune()
 714			return orAssgn
 715		}
 716		return or
 717	case '<':
 718		switch p.rune() {
 719		case '<':
 720			if p.rune() == '=' {
 721				p.rune()
 722				return shlAssgn
 723			}
 724			return hdoc
 725		case '=':
 726			p.rune()
 727			return lequal
 728		}
 729		return rdrIn
 730	case '>':
 731		switch p.rune() {
 732		case '>':
 733			if p.rune() == '=' {
 734				p.rune()
 735				return shrAssgn
 736			}
 737			return appOut
 738		case '=':
 739			p.rune()
 740			return gequal
 741		}
 742		return rdrOut
 743	case '+':
 744		switch p.rune() {
 745		case '+':
 746			p.rune()
 747			return addAdd
 748		case '=':
 749			p.rune()
 750			return addAssgn
 751		}
 752		return plus
 753	case '-':
 754		switch p.rune() {
 755		case '-':
 756			p.rune()
 757			return subSub
 758		case '=':
 759			p.rune()
 760			return subAssgn
 761		}
 762		return minus
 763	case '%':
 764		if p.rune() == '=' {
 765			p.rune()
 766			return remAssgn
 767		}
 768		return perc
 769	case '*':
 770		switch p.rune() {
 771		case '*':
 772			p.rune()
 773			return power
 774		case '=':
 775			p.rune()
 776			return mulAssgn
 777		}
 778		return star
 779	case '/':
 780		if p.rune() == '=' {
 781			p.rune()
 782			return quoAssgn
 783		}
 784		return slash
 785	case '^':
 786		if p.rune() == '=' {
 787			p.rune()
 788			return xorAssgn
 789		}
 790		return caret
 791	case '[':
 792		p.rune()
 793		return leftBrack
 794	case ']':
 795		p.rune()
 796		return rightBrack
 797	case ',':
 798		p.rune()
 799		return comma
 800	case '?':
 801		p.rune()
 802		return quest
 803	case ':':
 804		p.rune()
 805		return colon
 806	default: // '#'
 807		p.rune()
 808		return hash
 809	}
 810}
 811
 812func (p *Parser) newLit(r rune) {
 813	switch {
 814	case r < utf8.RuneSelf:
 815		p.litBs = p.litBuf[:1]
 816		p.litBs[0] = byte(r)
 817	case r > escNewl:
 818		w := utf8.RuneLen(r)
 819		p.litBs = append(p.litBuf[:0], p.bs[p.bsp-uint(w):p.bsp]...)
 820	default:
 821		// don't let r == utf8.RuneSelf go to the second case as [utf8.RuneLen]
 822		// would return -1
 823		p.litBs = p.litBuf[:0]
 824	}
 825}
 826
 827func (p *Parser) endLit() (s string) {
 828	if p.r == utf8.RuneSelf || p.r == escNewl {
 829		s = string(p.litBs)
 830	} else {
 831		s = string(p.litBs[:len(p.litBs)-p.w])
 832	}
 833	p.litBs = nil
 834	return
 835}
 836
 837func (p *Parser) isLitRedir() bool {
 838	lit := p.litBs[:len(p.litBs)-1]
 839	if lit[0] == '{' && lit[len(lit)-1] == '}' {
 840		return ValidName(string(lit[1 : len(lit)-1]))
 841	}
 842	for _, b := range lit {
 843		switch b {
 844		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 845		default:
 846			return false
 847		}
 848	}
 849	return true
 850}
 851
 852func (p *Parser) advanceNameCont(r rune) {
 853	// we know that r is a letter or underscore
 854loop:
 855	for p.newLit(r); r != utf8.RuneSelf; r = p.rune() {
 856		switch {
 857		case 'a' <= r && r <= 'z':
 858		case 'A' <= r && r <= 'Z':
 859		case r == '_':
 860		case '0' <= r && r <= '9':
 861		case r == escNewl:
 862		default:
 863			break loop
 864		}
 865	}
 866	p.tok, p.val = _LitWord, p.endLit()
 867}
 868
 869func (p *Parser) advanceLitOther(r rune) {
 870	tok := _LitWord
 871loop:
 872	for p.newLit(r); r != utf8.RuneSelf; r = p.rune() {
 873		switch r {
 874		case '\\': // escaped byte follows
 875			p.rune()
 876		case '\'', '"', '`', '$':
 877			tok = _Lit
 878			break loop
 879		case '}':
 880			if p.quote&allParamExp != 0 {
 881				break loop
 882			}
 883		case '/':
 884			if p.quote != paramExpExp {
 885				break loop
 886			}
 887		case ':', '=', '%', '^', ',', '?', '!', '~', '*':
 888			if p.quote&allArithmExpr != 0 || p.quote == paramExpName {
 889				break loop
 890			}
 891		case '[', ']':
 892			if p.lang != LangPOSIX && p.quote&allArithmExpr != 0 {
 893				break loop
 894			}
 895			fallthrough
 896		case '#', '@':
 897			if p.quote&allParamReg != 0 {
 898				break loop
 899			}
 900		case '+', '-', ' ', '\t', ';', '&', '>', '<', '|', '(', ')', '\n', '\r':
 901			if p.quote&allKeepSpaces == 0 {
 902				break loop
 903			}
 904		}
 905	}
 906	p.tok, p.val = tok, p.endLit()
 907}
 908
 909func (p *Parser) advanceLitNone(r rune) {
 910	p.eqlOffs = -1
 911	tok := _LitWord
 912loop:
 913	for p.newLit(r); r != utf8.RuneSelf; r = p.rune() {
 914		switch r {
 915		case ' ', '\t', '\n', '\r', '&', '|', ';', '(', ')':
 916			break loop
 917		case '\\': // escaped byte follows
 918			p.rune()
 919		case '>', '<':
 920			if p.peekByte('(') {
 921				tok = _Lit
 922			} else if p.isLitRedir() {
 923				tok = _LitRedir
 924			}
 925			break loop
 926		case '`':
 927			if p.quote != subCmdBckquo {
 928				tok = _Lit
 929			}
 930			break loop
 931		case '"', '\'', '$':
 932			tok = _Lit
 933			break loop
 934		case '?', '*', '+', '@', '!':
 935			if p.extendedGlob() {
 936				tok = _Lit
 937				break loop
 938			}
 939		case '=':
 940			if p.eqlOffs < 0 {
 941				p.eqlOffs = len(p.litBs) - 1
 942			}
 943		case '[':
 944			if p.lang != LangPOSIX && len(p.litBs) > 1 && p.litBs[0] != '[' {
 945				tok = _Lit
 946				break loop
 947			}
 948		}
 949	}
 950	p.tok, p.val = tok, p.endLit()
 951}
 952
 953func (p *Parser) advanceLitDquote(r rune) {
 954	tok := _LitWord
 955loop:
 956	for p.newLit(r); r != utf8.RuneSelf; r = p.rune() {
 957		switch r {
 958		case '"':
 959			break loop
 960		case '\\': // escaped byte follows
 961			p.rune()
 962		case escNewl, '`', '$':
 963			tok = _Lit
 964			break loop
 965		}
 966	}
 967	p.tok, p.val = tok, p.endLit()
 968}
 969
 970func (p *Parser) advanceLitHdoc(r rune) {
 971	// Unlike the rest of nextKeepSpaces quote states, we handle escaped
 972	// newlines here. If lastTok==_Lit, then we know we're following an
 973	// escaped newline, so the first line can't end the heredoc.
 974	lastTok := p.tok
 975	for r == escNewl {
 976		r = p.rune()
 977		lastTok = _Lit
 978	}
 979	p.pos = p.nextPos()
 980
 981	p.tok = _Lit
 982	p.newLit(r)
 983	if p.quote == hdocBodyTabs {
 984		for r == '\t' {
 985			r = p.rune()
 986		}
 987	}
 988	lStart := len(p.litBs) - 1
 989	stop := p.hdocStops[len(p.hdocStops)-1]
 990	for ; ; r = p.rune() {
 991		switch r {
 992		case escNewl, '$':
 993			p.val = p.endLit()
 994			return
 995		case '\\': // escaped byte follows
 996			p.rune()
 997		case '`':
 998			if !p.backquoteEnd() {
 999				p.val = p.endLit()
1000				return
1001			}
1002			fallthrough
1003		case '\n', utf8.RuneSelf:
1004			if p.parsingDoc {
1005				if r == utf8.RuneSelf {
1006					p.tok = _LitWord
1007					p.val = p.endLit()
1008					return
1009				}
1010			} else if lStart == 0 && lastTok == _Lit {
1011				// This line starts right after an escaped
1012				// newline, so it should never end the heredoc.
1013			} else if lStart >= 0 {
1014				// Compare the current line with the stop word.
1015				line := p.litBs[lStart:]
1016				if r != utf8.RuneSelf && len(line) > 0 {
1017					line = line[:len(line)-1] // minus trailing character
1018				}
1019				if bytes.Equal(line, stop) {
1020					p.tok = _LitWord
1021					p.val = p.endLit()[:lStart]
1022					if p.val == "" {
1023						p.tok = _Newl
1024					}
1025					p.hdocStops[len(p.hdocStops)-1] = nil
1026					return
1027				}
1028			}
1029			if r != '\n' {
1030				return // hit an unexpected EOF or closing backquote
1031			}
1032			if p.quote == hdocBodyTabs {
1033				for p.peekByte('\t') {
1034					p.rune()
1035				}
1036			}
1037			lStart = len(p.litBs)
1038		}
1039	}
1040}
1041
1042func (p *Parser) quotedHdocWord() *Word {
1043	r := p.r
1044	p.newLit(r)
1045	pos := p.nextPos()
1046	stop := p.hdocStops[len(p.hdocStops)-1]
1047	for ; ; r = p.rune() {
1048		if r == utf8.RuneSelf {
1049			return nil
1050		}
1051		if p.quote == hdocBodyTabs {
1052			for r == '\t' {
1053				r = p.rune()
1054			}
1055		}
1056		lStart := len(p.litBs) - 1
1057	runeLoop:
1058		for {
1059			switch r {
1060			case utf8.RuneSelf, '\n':
1061				break runeLoop
1062			case '`':
1063				if p.backquoteEnd() {
1064					break runeLoop
1065				}
1066			case escNewl:
1067				p.litBs = append(p.litBs, '\\', '\n')
1068				break runeLoop
1069			}
1070			r = p.rune()
1071		}
1072		if lStart < 0 {
1073			continue
1074		}
1075		// Compare the current line with the stop word.
1076		line := p.litBs[lStart:]
1077		if r != utf8.RuneSelf && len(line) > 0 {
1078			line = line[:len(line)-1] // minus \n
1079		}
1080		if bytes.Equal(line, stop) {
1081			p.hdocStops[len(p.hdocStops)-1] = nil
1082			val := p.endLit()[:lStart]
1083			if val == "" {
1084				return nil
1085			}
1086			return p.wordOne(p.lit(pos, val))
1087		}
1088	}
1089}
1090
1091func (p *Parser) advanceLitRe(r rune) {
1092	for p.newLit(r); ; r = p.rune() {
1093		switch r {
1094		case '\\':
1095			p.rune()
1096		case '(':
1097			p.rxOpenParens++
1098		case ')':
1099			if p.rxOpenParens--; p.rxOpenParens < 0 {
1100				p.tok, p.val = _LitWord, p.endLit()
1101				p.quote = noState
1102				return
1103			}
1104		case ' ', '\t', '\r', '\n', ';', '&', '>', '<':
1105			if p.rxOpenParens <= 0 {
1106				p.tok, p.val = _LitWord, p.endLit()
1107				p.quote = noState
1108				return
1109			}
1110		case '"', '\'', '$', '`':
1111			p.tok, p.val = _Lit, p.endLit()
1112			return
1113		case utf8.RuneSelf:
1114			p.tok, p.val = _LitWord, p.endLit()
1115			p.quote = noState
1116			return
1117		}
1118	}
1119}
1120
1121func testUnaryOp(val string) UnTestOperator {
1122	switch val {
1123	case "!":
1124		return TsNot
1125	case "-e", "-a":
1126		return TsExists
1127	case "-f":
1128		return TsRegFile
1129	case "-d":
1130		return TsDirect
1131	case "-c":
1132		return TsCharSp
1133	case "-b":
1134		return TsBlckSp
1135	case "-p":
1136		return TsNmPipe
1137	case "-S":
1138		return TsSocket
1139	case "-L", "-h":
1140		return TsSmbLink
1141	case "-k":
1142		return TsSticky
1143	case "-g":
1144		return TsGIDSet
1145	case "-u":
1146		return TsUIDSet
1147	case "-G":
1148		return TsGrpOwn
1149	case "-O":
1150		return TsUsrOwn
1151	case "-N":
1152		return TsModif
1153	case "-r":
1154		return TsRead
1155	case "-w":
1156		return TsWrite
1157	case "-x":
1158		return TsExec
1159	case "-s":
1160		return TsNoEmpty
1161	case "-t":
1162		return TsFdTerm
1163	case "-z":
1164		return TsEmpStr
1165	case "-n":
1166		return TsNempStr
1167	case "-o":
1168		return TsOptSet
1169	case "-v":
1170		return TsVarSet
1171	case "-R":
1172		return TsRefVar
1173	default:
1174		return 0
1175	}
1176}
1177
1178func testBinaryOp(val string) BinTestOperator {
1179	switch val {
1180	case "=":
1181		return TsMatchShort
1182	case "==":
1183		return TsMatch
1184	case "!=":
1185		return TsNoMatch
1186	case "=~":
1187		return TsReMatch
1188	case "-nt":
1189		return TsNewer
1190	case "-ot":
1191		return TsOlder
1192	case "-ef":
1193		return TsDevIno
1194	case "-eq":
1195		return TsEql
1196	case "-ne":
1197		return TsNeq
1198	case "-le":
1199		return TsLeq
1200	case "-ge":
1201		return TsGeq
1202	case "-lt":
1203		return TsLss
1204	case "-gt":
1205		return TsGtr
1206	default:
1207		return 0
1208	}
1209}