1// Copyright (c) 2016, Daniel MartΓ <mvdan@mvdan.cc>
2// See LICENSE for licensing information
3
4package syntax
5
6import (
7 "bytes"
8 "io"
9 "unicode/utf8"
10)
11
12// bytes that form or start a token
13func regOps(r rune) bool {
14 switch r {
15 case ';', '"', '\'', '(', ')', '$', '|', '&', '>', '<', '`':
16 return true
17 }
18 return false
19}
20
21// tokenize these inside parameter expansions
22func paramOps(r rune) bool {
23 switch r {
24 case '}', '#', '!', ':', '-', '+', '=', '?', '%', '[', ']', '/', '^',
25 ',', '@', '*':
26 return true
27 }
28 return false
29}
30
31// these start a parameter expansion name
32func paramNameOp(r rune) bool {
33 switch r {
34 case '}', ':', '+', '=', '%', '[', ']', '/', '^', ',':
35 return false
36 }
37 return true
38}
39
40// tokenize these inside arithmetic expansions
41func arithmOps(r rune) bool {
42 switch r {
43 case '+', '-', '!', '~', '*', '/', '%', '(', ')', '^', '<', '>', ':', '=',
44 ',', '?', '|', '&', '[', ']', '#':
45 return true
46 }
47 return false
48}
49
50func bquoteEscaped(b byte) bool {
51 switch b {
52 case '$', '`', '\\':
53 return true
54 }
55 return false
56}
57
58const escNewl rune = utf8.RuneSelf + 1
59
60func (p *Parser) rune() rune {
61 if p.r == '\n' || p.r == escNewl {
62 // p.r instead of b so that newline
63 // character positions don't have col 0.
64 p.line++
65 p.col = 0
66 }
67 p.col += int64(p.w)
68 bquotes := 0
69retry:
70 if p.bsp < uint(len(p.bs)) {
71 if b := p.bs[p.bsp]; b < utf8.RuneSelf {
72 p.bsp++
73 switch b {
74 case '\x00':
75 // Ignore null bytes while parsing, like bash.
76 p.col++
77 goto retry
78 case '\r':
79 if p.peekByte('\n') { // \r\n turns into \n
80 p.col++
81 goto retry
82 }
83 case '\\':
84 if p.r == '\\' {
85 } else if p.peekByte('\n') {
86 p.bsp++
87 p.w, p.r = 1, escNewl
88 return escNewl
89 } else if p.peekBytes("\r\n") { // \\\r\n turns into \\\n
90 p.col++
91 p.bsp += 2
92 p.w, p.r = 2, escNewl
93 return escNewl
94 }
95 if p.openBquotes > 0 && bquotes < p.openBquotes &&
96 p.bsp < uint(len(p.bs)) && bquoteEscaped(p.bs[p.bsp]) {
97 // We turn backquote command substitutions into $(),
98 // so we remove the extra backslashes needed by the backquotes.
99 bquotes++
100 p.col++
101 goto retry
102 }
103 }
104 if b == '`' {
105 p.lastBquoteEsc = bquotes
106 }
107 if p.litBs != nil {
108 p.litBs = append(p.litBs, b)
109 }
110 p.w, p.r = 1, rune(b)
111 return p.r
112 }
113 if !utf8.FullRune(p.bs[p.bsp:]) {
114 // we need more bytes to read a full non-ascii rune
115 p.fill()
116 }
117 var w int
118 p.r, w = utf8.DecodeRune(p.bs[p.bsp:])
119 if p.litBs != nil {
120 p.litBs = append(p.litBs, p.bs[p.bsp:p.bsp+uint(w)]...)
121 }
122 p.bsp += uint(w)
123 if p.r == utf8.RuneError && w == 1 {
124 p.posErr(p.nextPos(), "invalid UTF-8 encoding")
125 }
126 p.w = w
127 } else {
128 if p.r == utf8.RuneSelf {
129 } else if p.fill(); p.bs == nil {
130 p.bsp++
131 p.r = utf8.RuneSelf
132 p.w = 1
133 } else {
134 goto retry
135 }
136 }
137 return p.r
138}
139
140// fill reads more bytes from the input src into readBuf. Any bytes that
141// had not yet been used at the end of the buffer are slid into the
142// beginning of the buffer.
143func (p *Parser) fill() {
144 p.offs += int64(p.bsp)
145 left := len(p.bs) - int(p.bsp)
146 copy(p.readBuf[:left], p.readBuf[p.bsp:])
147readAgain:
148 n, err := 0, p.readErr
149 if err == nil {
150 n, err = p.src.Read(p.readBuf[left:])
151 p.readErr = err
152 }
153 if n == 0 {
154 if err == nil {
155 goto readAgain
156 }
157 // don't use p.errPass as we don't want to overwrite p.tok
158 if err != io.EOF {
159 p.err = err
160 }
161 if left > 0 {
162 p.bs = p.readBuf[:left]
163 } else {
164 p.bs = nil
165 }
166 } else {
167 p.bs = p.readBuf[:left+n]
168 }
169 p.bsp = 0
170}
171
172func (p *Parser) nextKeepSpaces() {
173 r := p.r
174 if p.quote != hdocBody && p.quote != hdocBodyTabs {
175 // Heredocs handle escaped newlines in a special way, but others
176 // do not.
177 for r == escNewl {
178 r = p.rune()
179 }
180 }
181 p.pos = p.nextPos()
182 switch p.quote {
183 case paramExpRepl:
184 switch r {
185 case '}', '/':
186 p.tok = p.paramToken(r)
187 case '`', '"', '$', '\'':
188 p.tok = p.regToken(r)
189 default:
190 p.advanceLitOther(r)
191 }
192 case dblQuotes:
193 switch r {
194 case '`', '"', '$':
195 p.tok = p.dqToken(r)
196 default:
197 p.advanceLitDquote(r)
198 }
199 case hdocBody, hdocBodyTabs:
200 switch r {
201 case '`', '$':
202 p.tok = p.dqToken(r)
203 default:
204 p.advanceLitHdoc(r)
205 }
206 default: // paramExpExp:
207 switch r {
208 case '}':
209 p.tok = p.paramToken(r)
210 case '`', '"', '$', '\'':
211 p.tok = p.regToken(r)
212 default:
213 p.advanceLitOther(r)
214 }
215 }
216 if p.err != nil && p.tok != _EOF {
217 p.tok = _EOF
218 }
219}
220
221func (p *Parser) next() {
222 if p.r == utf8.RuneSelf {
223 p.tok = _EOF
224 return
225 }
226 p.spaced = false
227 if p.quote&allKeepSpaces != 0 {
228 p.nextKeepSpaces()
229 return
230 }
231 r := p.r
232 for r == escNewl {
233 r = p.rune()
234 }
235skipSpace:
236 for {
237 switch r {
238 case utf8.RuneSelf:
239 p.tok = _EOF
240 return
241 case escNewl:
242 r = p.rune()
243 case ' ', '\t', '\r':
244 p.spaced = true
245 r = p.rune()
246 case '\n':
247 if p.tok == _Newl {
248 // merge consecutive newline tokens
249 r = p.rune()
250 continue
251 }
252 p.spaced = true
253 p.tok = _Newl
254 if p.quote != hdocWord && len(p.heredocs) > p.buriedHdocs {
255 p.doHeredocs()
256 }
257 return
258 default:
259 break skipSpace
260 }
261 }
262 if p.stopAt != nil && (p.spaced || p.tok == illegalTok || p.stopToken()) {
263 w := utf8.RuneLen(r)
264 if bytes.HasPrefix(p.bs[p.bsp-uint(w):], p.stopAt) {
265 p.r = utf8.RuneSelf
266 p.w = 1
267 p.tok = _EOF
268 return
269 }
270 }
271 p.pos = p.nextPos()
272 switch {
273 case p.quote&allRegTokens != 0:
274 switch r {
275 case ';', '"', '\'', '(', ')', '$', '|', '&', '>', '<', '`':
276 p.tok = p.regToken(r)
277 case '#':
278 // If we're parsing $foo#bar, ${foo}#bar, 'foo'#bar, or "foo"#bar,
279 // #bar is a continuation of the same word, not a comment.
280 // TODO: support $(foo)#bar and `foo`#bar as well, which is slightly tricky,
281 // as we can't easily tell them apart from (foo)#bar and `#bar`,
282 // where #bar should remain a comment.
283 if !p.spaced {
284 switch p.tok {
285 case _LitWord, rightBrace, sglQuote, dblQuote:
286 p.advanceLitNone(r)
287 return
288 }
289 }
290 r = p.rune()
291 p.newLit(r)
292 runeLoop:
293 for {
294 switch r {
295 case '\n', utf8.RuneSelf:
296 break runeLoop
297 case escNewl:
298 p.litBs = append(p.litBs, '\\', '\n')
299 break runeLoop
300 case '`':
301 if p.backquoteEnd() {
302 break runeLoop
303 }
304 }
305 r = p.rune()
306 }
307 if p.keepComments {
308 *p.curComs = append(*p.curComs, Comment{
309 Hash: p.pos,
310 Text: p.endLit(),
311 })
312 } else {
313 p.litBs = nil
314 }
315 p.next()
316 case '[', '=':
317 if p.quote == arrayElems {
318 p.tok = p.paramToken(r)
319 } else {
320 p.advanceLitNone(r)
321 }
322 case '?', '*', '+', '@', '!':
323 if p.extendedGlob() {
324 switch r {
325 case '?':
326 p.tok = globQuest
327 case '*':
328 p.tok = globStar
329 case '+':
330 p.tok = globPlus
331 case '@':
332 p.tok = globAt
333 default: // '!'
334 p.tok = globExcl
335 }
336 p.rune()
337 p.rune()
338 } else {
339 p.advanceLitNone(r)
340 }
341 default:
342 p.advanceLitNone(r)
343 }
344 case p.quote&allArithmExpr != 0 && arithmOps(r):
345 p.tok = p.arithmToken(r)
346 case p.quote&allParamExp != 0 && paramOps(r):
347 p.tok = p.paramToken(r)
348 case p.quote == testExprRegexp:
349 if !p.rxFirstPart && p.spaced {
350 p.quote = noState
351 goto skipSpace
352 }
353 p.rxFirstPart = false
354 switch r {
355 case ';', '"', '\'', '$', '&', '>', '<', '`':
356 p.tok = p.regToken(r)
357 case ')':
358 if p.rxOpenParens > 0 {
359 // continuation of open paren
360 p.advanceLitRe(r)
361 } else {
362 p.tok = rightParen
363 p.quote = noState
364 p.rune() // we are tokenizing manually
365 }
366 default: // including '(', '|'
367 p.advanceLitRe(r)
368 }
369 case regOps(r):
370 p.tok = p.regToken(r)
371 default:
372 p.advanceLitOther(r)
373 }
374 if p.err != nil && p.tok != _EOF {
375 p.tok = _EOF
376 }
377}
378
379// extendedGlob determines whether we're parsing a Bash extended globbing expression.
380// For example, whether `*` or `@` are followed by `(` to form `@(foo)`.
381func (p *Parser) extendedGlob() bool {
382 if p.val == "function" {
383 return false
384 }
385 if p.peekByte('(') {
386 // NOTE: empty pattern list is a valid globbing syntax like `@()`,
387 // but we'll operate on the "likelihood" that it is a function;
388 // only tokenize if its a non-empty pattern list.
389 // We do this after peeking for just one byte, so that the input `echo *`
390 // followed by a newline does not hang an interactive shell parser until
391 // another byte is input.
392 return !p.peekBytes("()")
393 }
394 return false
395}
396
397func (p *Parser) peekBytes(s string) bool {
398 peekEnd := int(p.bsp) + len(s)
399 // TODO: This should loop for slow readers, e.g. those providing one byte at
400 // a time. Use a loop and test it with [testing/iotest.OneByteReader].
401 if peekEnd > len(p.bs) {
402 p.fill()
403 }
404 return peekEnd <= len(p.bs) && bytes.HasPrefix(p.bs[p.bsp:peekEnd], []byte(s))
405}
406
407func (p *Parser) peekByte(b byte) bool {
408 if p.bsp == uint(len(p.bs)) {
409 p.fill()
410 }
411 return p.bsp < uint(len(p.bs)) && p.bs[p.bsp] == b
412}
413
414func (p *Parser) regToken(r rune) token {
415 switch r {
416 case '\'':
417 p.rune()
418 return sglQuote
419 case '"':
420 p.rune()
421 return dblQuote
422 case '`':
423 // Don't call p.rune, as we need to work out p.openBquotes to
424 // properly handle backslashes in the lexer.
425 return bckQuote
426 case '&':
427 switch p.rune() {
428 case '&':
429 p.rune()
430 return andAnd
431 case '>':
432 if p.rune() == '>' {
433 p.rune()
434 return appAll
435 }
436 return rdrAll
437 }
438 return and
439 case '|':
440 switch p.rune() {
441 case '|':
442 p.rune()
443 return orOr
444 case '&':
445 if p.lang == LangPOSIX {
446 break
447 }
448 p.rune()
449 return orAnd
450 }
451 return or
452 case '$':
453 switch p.rune() {
454 case '\'':
455 if p.lang == LangPOSIX {
456 break
457 }
458 p.rune()
459 return dollSglQuote
460 case '"':
461 if p.lang == LangPOSIX {
462 break
463 }
464 p.rune()
465 return dollDblQuote
466 case '{':
467 p.rune()
468 return dollBrace
469 case '[':
470 if !p.lang.isBash() || p.quote == paramExpName {
471 // latter to not tokenise ${$[@]} as $[
472 break
473 }
474 p.rune()
475 return dollBrack
476 case '(':
477 if p.rune() == '(' {
478 p.rune()
479 return dollDblParen
480 }
481 return dollParen
482 }
483 return dollar
484 case '(':
485 if p.rune() == '(' && p.lang != LangPOSIX && p.quote != testExpr {
486 p.rune()
487 return dblLeftParen
488 }
489 return leftParen
490 case ')':
491 p.rune()
492 return rightParen
493 case ';':
494 switch p.rune() {
495 case ';':
496 if p.rune() == '&' && p.lang.isBash() {
497 p.rune()
498 return dblSemiAnd
499 }
500 return dblSemicolon
501 case '&':
502 if p.lang == LangPOSIX {
503 break
504 }
505 p.rune()
506 return semiAnd
507 case '|':
508 if p.lang != LangMirBSDKorn {
509 break
510 }
511 p.rune()
512 return semiOr
513 }
514 return semicolon
515 case '<':
516 switch p.rune() {
517 case '<':
518 if r = p.rune(); r == '-' {
519 p.rune()
520 return dashHdoc
521 } else if r == '<' {
522 p.rune()
523 return wordHdoc
524 }
525 return hdoc
526 case '>':
527 p.rune()
528 return rdrInOut
529 case '&':
530 p.rune()
531 return dplIn
532 case '(':
533 if !p.lang.isBash() {
534 break
535 }
536 p.rune()
537 return cmdIn
538 }
539 return rdrIn
540 default: // '>'
541 switch p.rune() {
542 case '>':
543 p.rune()
544 return appOut
545 case '&':
546 p.rune()
547 return dplOut
548 case '|':
549 p.rune()
550 return clbOut
551 case '(':
552 if !p.lang.isBash() {
553 break
554 }
555 p.rune()
556 return cmdOut
557 }
558 return rdrOut
559 }
560}
561
562func (p *Parser) dqToken(r rune) token {
563 switch r {
564 case '"':
565 p.rune()
566 return dblQuote
567 case '`':
568 // Don't call p.rune, as we need to work out p.openBquotes to
569 // properly handle backslashes in the lexer.
570 return bckQuote
571 default: // '$'
572 switch p.rune() {
573 case '{':
574 p.rune()
575 return dollBrace
576 case '[':
577 if !p.lang.isBash() {
578 break
579 }
580 p.rune()
581 return dollBrack
582 case '(':
583 if p.rune() == '(' {
584 p.rune()
585 return dollDblParen
586 }
587 return dollParen
588 }
589 return dollar
590 }
591}
592
593func (p *Parser) paramToken(r rune) token {
594 switch r {
595 case '}':
596 p.rune()
597 return rightBrace
598 case ':':
599 switch p.rune() {
600 case '+':
601 p.rune()
602 return colPlus
603 case '-':
604 p.rune()
605 return colMinus
606 case '?':
607 p.rune()
608 return colQuest
609 case '=':
610 p.rune()
611 return colAssgn
612 }
613 return colon
614 case '+':
615 p.rune()
616 return plus
617 case '-':
618 p.rune()
619 return minus
620 case '?':
621 p.rune()
622 return quest
623 case '=':
624 p.rune()
625 return assgn
626 case '%':
627 if p.rune() == '%' {
628 p.rune()
629 return dblPerc
630 }
631 return perc
632 case '#':
633 if p.rune() == '#' {
634 p.rune()
635 return dblHash
636 }
637 return hash
638 case '!':
639 p.rune()
640 return exclMark
641 case '[':
642 p.rune()
643 return leftBrack
644 case ']':
645 p.rune()
646 return rightBrack
647 case '/':
648 if p.rune() == '/' && p.quote != paramExpRepl {
649 p.rune()
650 return dblSlash
651 }
652 return slash
653 case '^':
654 if p.rune() == '^' {
655 p.rune()
656 return dblCaret
657 }
658 return caret
659 case ',':
660 if p.rune() == ',' {
661 p.rune()
662 return dblComma
663 }
664 return comma
665 case '@':
666 p.rune()
667 return at
668 default: // '*'
669 p.rune()
670 return star
671 }
672}
673
674func (p *Parser) arithmToken(r rune) token {
675 switch r {
676 case '!':
677 if p.rune() == '=' {
678 p.rune()
679 return nequal
680 }
681 return exclMark
682 case '=':
683 if p.rune() == '=' {
684 p.rune()
685 return equal
686 }
687 return assgn
688 case '~':
689 p.rune()
690 return tilde
691 case '(':
692 p.rune()
693 return leftParen
694 case ')':
695 p.rune()
696 return rightParen
697 case '&':
698 switch p.rune() {
699 case '&':
700 p.rune()
701 return andAnd
702 case '=':
703 p.rune()
704 return andAssgn
705 }
706 return and
707 case '|':
708 switch p.rune() {
709 case '|':
710 p.rune()
711 return orOr
712 case '=':
713 p.rune()
714 return orAssgn
715 }
716 return or
717 case '<':
718 switch p.rune() {
719 case '<':
720 if p.rune() == '=' {
721 p.rune()
722 return shlAssgn
723 }
724 return hdoc
725 case '=':
726 p.rune()
727 return lequal
728 }
729 return rdrIn
730 case '>':
731 switch p.rune() {
732 case '>':
733 if p.rune() == '=' {
734 p.rune()
735 return shrAssgn
736 }
737 return appOut
738 case '=':
739 p.rune()
740 return gequal
741 }
742 return rdrOut
743 case '+':
744 switch p.rune() {
745 case '+':
746 p.rune()
747 return addAdd
748 case '=':
749 p.rune()
750 return addAssgn
751 }
752 return plus
753 case '-':
754 switch p.rune() {
755 case '-':
756 p.rune()
757 return subSub
758 case '=':
759 p.rune()
760 return subAssgn
761 }
762 return minus
763 case '%':
764 if p.rune() == '=' {
765 p.rune()
766 return remAssgn
767 }
768 return perc
769 case '*':
770 switch p.rune() {
771 case '*':
772 p.rune()
773 return power
774 case '=':
775 p.rune()
776 return mulAssgn
777 }
778 return star
779 case '/':
780 if p.rune() == '=' {
781 p.rune()
782 return quoAssgn
783 }
784 return slash
785 case '^':
786 if p.rune() == '=' {
787 p.rune()
788 return xorAssgn
789 }
790 return caret
791 case '[':
792 p.rune()
793 return leftBrack
794 case ']':
795 p.rune()
796 return rightBrack
797 case ',':
798 p.rune()
799 return comma
800 case '?':
801 p.rune()
802 return quest
803 case ':':
804 p.rune()
805 return colon
806 default: // '#'
807 p.rune()
808 return hash
809 }
810}
811
812func (p *Parser) newLit(r rune) {
813 switch {
814 case r < utf8.RuneSelf:
815 p.litBs = p.litBuf[:1]
816 p.litBs[0] = byte(r)
817 case r > escNewl:
818 w := utf8.RuneLen(r)
819 p.litBs = append(p.litBuf[:0], p.bs[p.bsp-uint(w):p.bsp]...)
820 default:
821 // don't let r == utf8.RuneSelf go to the second case as [utf8.RuneLen]
822 // would return -1
823 p.litBs = p.litBuf[:0]
824 }
825}
826
827func (p *Parser) endLit() (s string) {
828 if p.r == utf8.RuneSelf || p.r == escNewl {
829 s = string(p.litBs)
830 } else {
831 s = string(p.litBs[:len(p.litBs)-p.w])
832 }
833 p.litBs = nil
834 return
835}
836
837func (p *Parser) isLitRedir() bool {
838 lit := p.litBs[:len(p.litBs)-1]
839 if lit[0] == '{' && lit[len(lit)-1] == '}' {
840 return ValidName(string(lit[1 : len(lit)-1]))
841 }
842 for _, b := range lit {
843 switch b {
844 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
845 default:
846 return false
847 }
848 }
849 return true
850}
851
852func (p *Parser) advanceNameCont(r rune) {
853 // we know that r is a letter or underscore
854loop:
855 for p.newLit(r); r != utf8.RuneSelf; r = p.rune() {
856 switch {
857 case 'a' <= r && r <= 'z':
858 case 'A' <= r && r <= 'Z':
859 case r == '_':
860 case '0' <= r && r <= '9':
861 case r == escNewl:
862 default:
863 break loop
864 }
865 }
866 p.tok, p.val = _LitWord, p.endLit()
867}
868
869func (p *Parser) advanceLitOther(r rune) {
870 tok := _LitWord
871loop:
872 for p.newLit(r); r != utf8.RuneSelf; r = p.rune() {
873 switch r {
874 case '\\': // escaped byte follows
875 p.rune()
876 case '\'', '"', '`', '$':
877 tok = _Lit
878 break loop
879 case '}':
880 if p.quote&allParamExp != 0 {
881 break loop
882 }
883 case '/':
884 if p.quote != paramExpExp {
885 break loop
886 }
887 case ':', '=', '%', '^', ',', '?', '!', '~', '*':
888 if p.quote&allArithmExpr != 0 || p.quote == paramExpName {
889 break loop
890 }
891 case '[', ']':
892 if p.lang != LangPOSIX && p.quote&allArithmExpr != 0 {
893 break loop
894 }
895 fallthrough
896 case '#', '@':
897 if p.quote&allParamReg != 0 {
898 break loop
899 }
900 case '+', '-', ' ', '\t', ';', '&', '>', '<', '|', '(', ')', '\n', '\r':
901 if p.quote&allKeepSpaces == 0 {
902 break loop
903 }
904 }
905 }
906 p.tok, p.val = tok, p.endLit()
907}
908
909func (p *Parser) advanceLitNone(r rune) {
910 p.eqlOffs = -1
911 tok := _LitWord
912loop:
913 for p.newLit(r); r != utf8.RuneSelf; r = p.rune() {
914 switch r {
915 case ' ', '\t', '\n', '\r', '&', '|', ';', '(', ')':
916 break loop
917 case '\\': // escaped byte follows
918 p.rune()
919 case '>', '<':
920 if p.peekByte('(') {
921 tok = _Lit
922 } else if p.isLitRedir() {
923 tok = _LitRedir
924 }
925 break loop
926 case '`':
927 if p.quote != subCmdBckquo {
928 tok = _Lit
929 }
930 break loop
931 case '"', '\'', '$':
932 tok = _Lit
933 break loop
934 case '?', '*', '+', '@', '!':
935 if p.extendedGlob() {
936 tok = _Lit
937 break loop
938 }
939 case '=':
940 if p.eqlOffs < 0 {
941 p.eqlOffs = len(p.litBs) - 1
942 }
943 case '[':
944 if p.lang != LangPOSIX && len(p.litBs) > 1 && p.litBs[0] != '[' {
945 tok = _Lit
946 break loop
947 }
948 }
949 }
950 p.tok, p.val = tok, p.endLit()
951}
952
953func (p *Parser) advanceLitDquote(r rune) {
954 tok := _LitWord
955loop:
956 for p.newLit(r); r != utf8.RuneSelf; r = p.rune() {
957 switch r {
958 case '"':
959 break loop
960 case '\\': // escaped byte follows
961 p.rune()
962 case escNewl, '`', '$':
963 tok = _Lit
964 break loop
965 }
966 }
967 p.tok, p.val = tok, p.endLit()
968}
969
970func (p *Parser) advanceLitHdoc(r rune) {
971 // Unlike the rest of nextKeepSpaces quote states, we handle escaped
972 // newlines here. If lastTok==_Lit, then we know we're following an
973 // escaped newline, so the first line can't end the heredoc.
974 lastTok := p.tok
975 for r == escNewl {
976 r = p.rune()
977 lastTok = _Lit
978 }
979 p.pos = p.nextPos()
980
981 p.tok = _Lit
982 p.newLit(r)
983 if p.quote == hdocBodyTabs {
984 for r == '\t' {
985 r = p.rune()
986 }
987 }
988 lStart := len(p.litBs) - 1
989 stop := p.hdocStops[len(p.hdocStops)-1]
990 for ; ; r = p.rune() {
991 switch r {
992 case escNewl, '$':
993 p.val = p.endLit()
994 return
995 case '\\': // escaped byte follows
996 p.rune()
997 case '`':
998 if !p.backquoteEnd() {
999 p.val = p.endLit()
1000 return
1001 }
1002 fallthrough
1003 case '\n', utf8.RuneSelf:
1004 if p.parsingDoc {
1005 if r == utf8.RuneSelf {
1006 p.tok = _LitWord
1007 p.val = p.endLit()
1008 return
1009 }
1010 } else if lStart == 0 && lastTok == _Lit {
1011 // This line starts right after an escaped
1012 // newline, so it should never end the heredoc.
1013 } else if lStart >= 0 {
1014 // Compare the current line with the stop word.
1015 line := p.litBs[lStart:]
1016 if r != utf8.RuneSelf && len(line) > 0 {
1017 line = line[:len(line)-1] // minus trailing character
1018 }
1019 if bytes.Equal(line, stop) {
1020 p.tok = _LitWord
1021 p.val = p.endLit()[:lStart]
1022 if p.val == "" {
1023 p.tok = _Newl
1024 }
1025 p.hdocStops[len(p.hdocStops)-1] = nil
1026 return
1027 }
1028 }
1029 if r != '\n' {
1030 return // hit an unexpected EOF or closing backquote
1031 }
1032 if p.quote == hdocBodyTabs {
1033 for p.peekByte('\t') {
1034 p.rune()
1035 }
1036 }
1037 lStart = len(p.litBs)
1038 }
1039 }
1040}
1041
1042func (p *Parser) quotedHdocWord() *Word {
1043 r := p.r
1044 p.newLit(r)
1045 pos := p.nextPos()
1046 stop := p.hdocStops[len(p.hdocStops)-1]
1047 for ; ; r = p.rune() {
1048 if r == utf8.RuneSelf {
1049 return nil
1050 }
1051 if p.quote == hdocBodyTabs {
1052 for r == '\t' {
1053 r = p.rune()
1054 }
1055 }
1056 lStart := len(p.litBs) - 1
1057 runeLoop:
1058 for {
1059 switch r {
1060 case utf8.RuneSelf, '\n':
1061 break runeLoop
1062 case '`':
1063 if p.backquoteEnd() {
1064 break runeLoop
1065 }
1066 case escNewl:
1067 p.litBs = append(p.litBs, '\\', '\n')
1068 break runeLoop
1069 }
1070 r = p.rune()
1071 }
1072 if lStart < 0 {
1073 continue
1074 }
1075 // Compare the current line with the stop word.
1076 line := p.litBs[lStart:]
1077 if r != utf8.RuneSelf && len(line) > 0 {
1078 line = line[:len(line)-1] // minus \n
1079 }
1080 if bytes.Equal(line, stop) {
1081 p.hdocStops[len(p.hdocStops)-1] = nil
1082 val := p.endLit()[:lStart]
1083 if val == "" {
1084 return nil
1085 }
1086 return p.wordOne(p.lit(pos, val))
1087 }
1088 }
1089}
1090
1091func (p *Parser) advanceLitRe(r rune) {
1092 for p.newLit(r); ; r = p.rune() {
1093 switch r {
1094 case '\\':
1095 p.rune()
1096 case '(':
1097 p.rxOpenParens++
1098 case ')':
1099 if p.rxOpenParens--; p.rxOpenParens < 0 {
1100 p.tok, p.val = _LitWord, p.endLit()
1101 p.quote = noState
1102 return
1103 }
1104 case ' ', '\t', '\r', '\n', ';', '&', '>', '<':
1105 if p.rxOpenParens <= 0 {
1106 p.tok, p.val = _LitWord, p.endLit()
1107 p.quote = noState
1108 return
1109 }
1110 case '"', '\'', '$', '`':
1111 p.tok, p.val = _Lit, p.endLit()
1112 return
1113 case utf8.RuneSelf:
1114 p.tok, p.val = _LitWord, p.endLit()
1115 p.quote = noState
1116 return
1117 }
1118 }
1119}
1120
1121func testUnaryOp(val string) UnTestOperator {
1122 switch val {
1123 case "!":
1124 return TsNot
1125 case "-e", "-a":
1126 return TsExists
1127 case "-f":
1128 return TsRegFile
1129 case "-d":
1130 return TsDirect
1131 case "-c":
1132 return TsCharSp
1133 case "-b":
1134 return TsBlckSp
1135 case "-p":
1136 return TsNmPipe
1137 case "-S":
1138 return TsSocket
1139 case "-L", "-h":
1140 return TsSmbLink
1141 case "-k":
1142 return TsSticky
1143 case "-g":
1144 return TsGIDSet
1145 case "-u":
1146 return TsUIDSet
1147 case "-G":
1148 return TsGrpOwn
1149 case "-O":
1150 return TsUsrOwn
1151 case "-N":
1152 return TsModif
1153 case "-r":
1154 return TsRead
1155 case "-w":
1156 return TsWrite
1157 case "-x":
1158 return TsExec
1159 case "-s":
1160 return TsNoEmpty
1161 case "-t":
1162 return TsFdTerm
1163 case "-z":
1164 return TsEmpStr
1165 case "-n":
1166 return TsNempStr
1167 case "-o":
1168 return TsOptSet
1169 case "-v":
1170 return TsVarSet
1171 case "-R":
1172 return TsRefVar
1173 default:
1174 return 0
1175 }
1176}
1177
1178func testBinaryOp(val string) BinTestOperator {
1179 switch val {
1180 case "=":
1181 return TsMatchShort
1182 case "==":
1183 return TsMatch
1184 case "!=":
1185 return TsNoMatch
1186 case "=~":
1187 return TsReMatch
1188 case "-nt":
1189 return TsNewer
1190 case "-ot":
1191 return TsOlder
1192 case "-ef":
1193 return TsDevIno
1194 case "-eq":
1195 return TsEql
1196 case "-ne":
1197 return TsNeq
1198 case "-le":
1199 return TsLeq
1200 case "-ge":
1201 return TsGeq
1202 case "-lt":
1203 return TsLss
1204 case "-gt":
1205 return TsGtr
1206 default:
1207 return 0
1208 }
1209}