1package regexp2
2
3import (
4 "bytes"
5 "errors"
6 "fmt"
7 "math"
8 "strconv"
9 "strings"
10 "time"
11 "unicode"
12
13 "github.com/dlclark/regexp2/syntax"
14)
15
16type runner struct {
17 re *Regexp
18 code *syntax.Code
19
20 runtextstart int // starting point for search
21
22 runtext []rune // text to search
23 runtextpos int // current position in text
24 runtextend int
25
26 // The backtracking stack. Opcodes use this to store data regarding
27 // what they have matched and where to backtrack to. Each "frame" on
28 // the stack takes the form of [CodePosition Data1 Data2...], where
29 // CodePosition is the position of the current opcode and
30 // the data values are all optional. The CodePosition can be negative, and
31 // these values (also called "back2") are used by the BranchMark family of opcodes
32 // to indicate whether they are backtracking after a successful or failed
33 // match.
34 // When we backtrack, we pop the CodePosition off the stack, set the current
35 // instruction pointer to that code position, and mark the opcode
36 // with a backtracking flag ("Back"). Each opcode then knows how to
37 // handle its own data.
38 runtrack []int
39 runtrackpos int
40
41 // This stack is used to track text positions across different opcodes.
42 // For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark
43 // pair. SetMark records the text position before we match a*b. Then
44 // CaptureMark uses that position to figure out where the capture starts.
45 // Opcodes which push onto this stack are always paired with other opcodes
46 // which will pop the value from it later. A successful match should mean
47 // that this stack is empty.
48 runstack []int
49 runstackpos int
50
51 // The crawl stack is used to keep track of captures. Every time a group
52 // has a capture, we push its group number onto the runcrawl stack. In
53 // the case of a balanced match, we push BOTH groups onto the stack.
54 runcrawl []int
55 runcrawlpos int
56
57 runtrackcount int // count of states that may do backtracking
58
59 runmatch *Match // result object
60
61 ignoreTimeout bool
62 timeout time.Duration // timeout in milliseconds (needed for actual)
63 timeoutChecksToSkip int
64 timeoutAt time.Time
65
66 operator syntax.InstOp
67 codepos int
68 rightToLeft bool
69 caseInsensitive bool
70}
71
72// run searches for matches and can continue from the previous match
73//
74// quick is usually false, but can be true to not return matches, just put it in caches
75// textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input
76// input is the string to search for our regex pattern
77func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) {
78
79 // get a cached runner
80 runner := re.getRunner()
81 defer re.putRunner(runner)
82
83 if textstart < 0 {
84 if re.RightToLeft() {
85 textstart = len(input)
86 } else {
87 textstart = 0
88 }
89 }
90
91 return runner.scan(input, textstart, quick, re.MatchTimeout)
92}
93
94// Scans the string to find the first match. Uses the Match object
95// both to feed text in and as a place to store matches that come out.
96//
97// All the action is in the Go() method. Our
98// responsibility is to load up the class members before
99// calling Go.
100//
101// The optimizer can compute a set of candidate starting characters,
102// and we could use a separate method Skip() that will quickly scan past
103// any characters that we know can't match.
104func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) {
105 r.timeout = timeout
106 r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout)
107 r.runtextstart = textstart
108 r.runtext = rt
109 r.runtextend = len(rt)
110
111 stoppos := r.runtextend
112 bump := 1
113
114 if r.re.RightToLeft() {
115 bump = -1
116 stoppos = 0
117 }
118
119 r.runtextpos = textstart
120 initted := false
121
122 r.startTimeoutWatch()
123 for {
124 if r.re.Debug() {
125 //fmt.Printf("\nSearch content: %v\n", string(r.runtext))
126 fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend)
127 fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos)
128 }
129
130 if r.findFirstChar() {
131 if err := r.checkTimeout(); err != nil {
132 return nil, err
133 }
134
135 if !initted {
136 r.initMatch()
137 initted = true
138 }
139
140 if r.re.Debug() {
141 fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos)
142 }
143
144 if err := r.execute(); err != nil {
145 return nil, err
146 }
147
148 if r.runmatch.matchcount[0] > 0 {
149 // We'll return a match even if it touches a previous empty match
150 return r.tidyMatch(quick), nil
151 }
152
153 // reset state for another go
154 r.runtrackpos = len(r.runtrack)
155 r.runstackpos = len(r.runstack)
156 r.runcrawlpos = len(r.runcrawl)
157 }
158
159 // failure!
160
161 if r.runtextpos == stoppos {
162 r.tidyMatch(true)
163 return nil, nil
164 }
165
166 // Recognize leading []* and various anchors, and bump on failure accordingly
167
168 // r.bump by one and start again
169
170 r.runtextpos += bump
171 }
172 // We never get here
173}
174
175func (r *runner) execute() error {
176
177 r.goTo(0)
178
179 for {
180
181 if r.re.Debug() {
182 r.dumpState()
183 }
184
185 if err := r.checkTimeout(); err != nil {
186 return err
187 }
188
189 switch r.operator {
190 case syntax.Stop:
191 return nil
192
193 case syntax.Nothing:
194 break
195
196 case syntax.Goto:
197 r.goTo(r.operand(0))
198 continue
199
200 case syntax.Testref:
201 if !r.runmatch.isMatched(r.operand(0)) {
202 break
203 }
204 r.advance(1)
205 continue
206
207 case syntax.Lazybranch:
208 r.trackPush1(r.textPos())
209 r.advance(1)
210 continue
211
212 case syntax.Lazybranch | syntax.Back:
213 r.trackPop()
214 r.textto(r.trackPeek())
215 r.goTo(r.operand(0))
216 continue
217
218 case syntax.Setmark:
219 r.stackPush(r.textPos())
220 r.trackPush()
221 r.advance(0)
222 continue
223
224 case syntax.Nullmark:
225 r.stackPush(-1)
226 r.trackPush()
227 r.advance(0)
228 continue
229
230 case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back:
231 r.stackPop()
232 break
233
234 case syntax.Getmark:
235 r.stackPop()
236 r.trackPush1(r.stackPeek())
237 r.textto(r.stackPeek())
238 r.advance(0)
239 continue
240
241 case syntax.Getmark | syntax.Back:
242 r.trackPop()
243 r.stackPush(r.trackPeek())
244 break
245
246 case syntax.Capturemark:
247 if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) {
248 break
249 }
250 r.stackPop()
251 if r.operand(1) != -1 {
252 r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos())
253 } else {
254 r.capture(r.operand(0), r.stackPeek(), r.textPos())
255 }
256 r.trackPush1(r.stackPeek())
257
258 r.advance(2)
259
260 continue
261
262 case syntax.Capturemark | syntax.Back:
263 r.trackPop()
264 r.stackPush(r.trackPeek())
265 r.uncapture()
266 if r.operand(0) != -1 && r.operand(1) != -1 {
267 r.uncapture()
268 }
269
270 break
271
272 case syntax.Branchmark:
273 r.stackPop()
274
275 matched := r.textPos() - r.stackPeek()
276
277 if matched != 0 { // Nonempty match -> loop now
278 r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos
279 r.stackPush(r.textPos()) // Make new mark
280 r.goTo(r.operand(0)) // Loop
281 } else { // Empty match -> straight now
282 r.trackPushNeg1(r.stackPeek()) // Save old mark
283 r.advance(1) // Straight
284 }
285 continue
286
287 case syntax.Branchmark | syntax.Back:
288 r.trackPopN(2)
289 r.stackPop()
290 r.textto(r.trackPeekN(1)) // Recall position
291 r.trackPushNeg1(r.trackPeek()) // Save old mark
292 r.advance(1) // Straight
293 continue
294
295 case syntax.Branchmark | syntax.Back2:
296 r.trackPop()
297 r.stackPush(r.trackPeek()) // Recall old mark
298 break // Backtrack
299
300 case syntax.Lazybranchmark:
301 {
302 // We hit this the first time through a lazy loop and after each
303 // successful match of the inner expression. It simply continues
304 // on and doesn't loop.
305 r.stackPop()
306
307 oldMarkPos := r.stackPeek()
308
309 if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state
310 if oldMarkPos != -1 {
311 r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos
312 } else {
313 r.trackPush2(r.textPos(), r.textPos())
314 }
315 } else {
316 // The inner expression found an empty match, so we'll go directly to 'back2' if we
317 // backtrack. In this case, we need to push something on the stack, since back2 pops.
318 // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
319 // position associated with that empty match.
320 r.stackPush(oldMarkPos)
321
322 r.trackPushNeg1(r.stackPeek()) // Save old mark
323 }
324 r.advance(1)
325 continue
326 }
327
328 case syntax.Lazybranchmark | syntax.Back:
329
330 // After the first time, Lazybranchmark | syntax.Back occurs
331 // with each iteration of the loop, and therefore with every attempted
332 // match of the inner expression. We'll try to match the inner expression,
333 // then go back to Lazybranchmark if successful. If the inner expression
334 // fails, we go to Lazybranchmark | syntax.Back2
335
336 r.trackPopN(2)
337 pos := r.trackPeekN(1)
338 r.trackPushNeg1(r.trackPeek()) // Save old mark
339 r.stackPush(pos) // Make new mark
340 r.textto(pos) // Recall position
341 r.goTo(r.operand(0)) // Loop
342 continue
343
344 case syntax.Lazybranchmark | syntax.Back2:
345 // The lazy loop has failed. We'll do a true backtrack and
346 // start over before the lazy loop.
347 r.stackPop()
348 r.trackPop()
349 r.stackPush(r.trackPeek()) // Recall old mark
350 break
351
352 case syntax.Setcount:
353 r.stackPush2(r.textPos(), r.operand(0))
354 r.trackPush()
355 r.advance(1)
356 continue
357
358 case syntax.Nullcount:
359 r.stackPush2(-1, r.operand(0))
360 r.trackPush()
361 r.advance(1)
362 continue
363
364 case syntax.Setcount | syntax.Back:
365 r.stackPopN(2)
366 break
367
368 case syntax.Nullcount | syntax.Back:
369 r.stackPopN(2)
370 break
371
372 case syntax.Branchcount:
373 // r.stackPush:
374 // 0: Mark
375 // 1: Count
376
377 r.stackPopN(2)
378 mark := r.stackPeek()
379 count := r.stackPeekN(1)
380 matched := r.textPos() - mark
381
382 if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now
383 r.trackPushNeg2(mark, count) // Save old mark, count
384 r.advance(2) // Straight
385 } else { // Nonempty match -> count+loop now
386 r.trackPush1(mark) // remember mark
387 r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
388 r.goTo(r.operand(0)) // Loop
389 }
390 continue
391
392 case syntax.Branchcount | syntax.Back:
393 // r.trackPush:
394 // 0: Previous mark
395 // r.stackPush:
396 // 0: Mark (= current pos, discarded)
397 // 1: Count
398 r.trackPop()
399 r.stackPopN(2)
400 if r.stackPeekN(1) > 0 { // Positive -> can go straight
401 r.textto(r.stackPeek()) // Zap to mark
402 r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count
403 r.advance(2) // Straight
404 continue
405 }
406 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count
407 break
408
409 case syntax.Branchcount | syntax.Back2:
410 // r.trackPush:
411 // 0: Previous mark
412 // 1: Previous count
413 r.trackPopN(2)
414 r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count
415 break // Backtrack
416
417 case syntax.Lazybranchcount:
418 // r.stackPush:
419 // 0: Mark
420 // 1: Count
421
422 r.stackPopN(2)
423 mark := r.stackPeek()
424 count := r.stackPeekN(1)
425
426 if count < 0 { // Negative count -> loop now
427 r.trackPushNeg1(mark) // Save old mark
428 r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
429 r.goTo(r.operand(0)) // Loop
430 } else { // Nonneg count -> straight now
431 r.trackPush3(mark, count, r.textPos()) // Save mark, count, position
432 r.advance(2) // Straight
433 }
434 continue
435
436 case syntax.Lazybranchcount | syntax.Back:
437 // r.trackPush:
438 // 0: Mark
439 // 1: Count
440 // 2: r.textPos
441
442 r.trackPopN(3)
443 mark := r.trackPeek()
444 textpos := r.trackPeekN(2)
445
446 if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop
447 r.textto(textpos) // Recall position
448 r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count
449 r.trackPushNeg1(mark) // Save old mark
450 r.goTo(r.operand(0)) // Loop
451 continue
452 } else { // Max loops or empty match -> backtrack
453 r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count
454 break // backtrack
455 }
456
457 case syntax.Lazybranchcount | syntax.Back2:
458 // r.trackPush:
459 // 0: Previous mark
460 // r.stackPush:
461 // 0: Mark (== current pos, discarded)
462 // 1: Count
463 r.trackPop()
464 r.stackPopN(2)
465 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count
466 break // Backtrack
467
468 case syntax.Setjump:
469 r.stackPush2(r.trackpos(), r.crawlpos())
470 r.trackPush()
471 r.advance(0)
472 continue
473
474 case syntax.Setjump | syntax.Back:
475 r.stackPopN(2)
476 break
477
478 case syntax.Backjump:
479 // r.stackPush:
480 // 0: Saved trackpos
481 // 1: r.crawlpos
482 r.stackPopN(2)
483 r.trackto(r.stackPeek())
484
485 for r.crawlpos() != r.stackPeekN(1) {
486 r.uncapture()
487 }
488
489 break
490
491 case syntax.Forejump:
492 // r.stackPush:
493 // 0: Saved trackpos
494 // 1: r.crawlpos
495 r.stackPopN(2)
496 r.trackto(r.stackPeek())
497 r.trackPush1(r.stackPeekN(1))
498 r.advance(0)
499 continue
500
501 case syntax.Forejump | syntax.Back:
502 // r.trackPush:
503 // 0: r.crawlpos
504 r.trackPop()
505
506 for r.crawlpos() != r.trackPeek() {
507 r.uncapture()
508 }
509
510 break
511
512 case syntax.Bol:
513 if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' {
514 break
515 }
516 r.advance(0)
517 continue
518
519 case syntax.Eol:
520 if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' {
521 break
522 }
523 r.advance(0)
524 continue
525
526 case syntax.Boundary:
527 if !r.isBoundary(r.textPos(), 0, r.runtextend) {
528 break
529 }
530 r.advance(0)
531 continue
532
533 case syntax.Nonboundary:
534 if r.isBoundary(r.textPos(), 0, r.runtextend) {
535 break
536 }
537 r.advance(0)
538 continue
539
540 case syntax.ECMABoundary:
541 if !r.isECMABoundary(r.textPos(), 0, r.runtextend) {
542 break
543 }
544 r.advance(0)
545 continue
546
547 case syntax.NonECMABoundary:
548 if r.isECMABoundary(r.textPos(), 0, r.runtextend) {
549 break
550 }
551 r.advance(0)
552 continue
553
554 case syntax.Beginning:
555 if r.leftchars() > 0 {
556 break
557 }
558 r.advance(0)
559 continue
560
561 case syntax.Start:
562 if r.textPos() != r.textstart() {
563 break
564 }
565 r.advance(0)
566 continue
567
568 case syntax.EndZ:
569 if r.rightchars() > 1 || r.rightchars() == 1 && r.charAt(r.textPos()) != '\n' {
570 break
571 }
572 r.advance(0)
573 continue
574
575 case syntax.End:
576 if r.rightchars() > 0 {
577 break
578 }
579 r.advance(0)
580 continue
581
582 case syntax.One:
583 if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) {
584 break
585 }
586
587 r.advance(1)
588 continue
589
590 case syntax.Notone:
591 if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) {
592 break
593 }
594
595 r.advance(1)
596 continue
597
598 case syntax.Set:
599
600 if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
601 break
602 }
603
604 r.advance(1)
605 continue
606
607 case syntax.Multi:
608 if !r.runematch(r.code.Strings[r.operand(0)]) {
609 break
610 }
611
612 r.advance(1)
613 continue
614
615 case syntax.Ref:
616
617 capnum := r.operand(0)
618
619 if r.runmatch.isMatched(capnum) {
620 if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) {
621 break
622 }
623 } else {
624 if (r.re.options & ECMAScript) == 0 {
625 break
626 }
627 }
628
629 r.advance(1)
630 continue
631
632 case syntax.Onerep:
633
634 c := r.operand(1)
635
636 if r.forwardchars() < c {
637 break
638 }
639
640 ch := rune(r.operand(0))
641
642 for c > 0 {
643 if r.forwardcharnext() != ch {
644 goto BreakBackward
645 }
646 c--
647 }
648
649 r.advance(2)
650 continue
651
652 case syntax.Notonerep:
653
654 c := r.operand(1)
655
656 if r.forwardchars() < c {
657 break
658 }
659 ch := rune(r.operand(0))
660
661 for c > 0 {
662 if r.forwardcharnext() == ch {
663 goto BreakBackward
664 }
665 c--
666 }
667
668 r.advance(2)
669 continue
670
671 case syntax.Setrep:
672
673 c := r.operand(1)
674
675 if r.forwardchars() < c {
676 break
677 }
678
679 set := r.code.Sets[r.operand(0)]
680
681 for c > 0 {
682 if !set.CharIn(r.forwardcharnext()) {
683 goto BreakBackward
684 }
685 c--
686 }
687
688 r.advance(2)
689 continue
690
691 case syntax.Oneloop:
692
693 c := r.operand(1)
694
695 if c > r.forwardchars() {
696 c = r.forwardchars()
697 }
698
699 ch := rune(r.operand(0))
700 i := c
701
702 for ; i > 0; i-- {
703 if r.forwardcharnext() != ch {
704 r.backwardnext()
705 break
706 }
707 }
708
709 if c > i {
710 r.trackPush2(c-i-1, r.textPos()-r.bump())
711 }
712
713 r.advance(2)
714 continue
715
716 case syntax.Notoneloop:
717
718 c := r.operand(1)
719
720 if c > r.forwardchars() {
721 c = r.forwardchars()
722 }
723
724 ch := rune(r.operand(0))
725 i := c
726
727 for ; i > 0; i-- {
728 if r.forwardcharnext() == ch {
729 r.backwardnext()
730 break
731 }
732 }
733
734 if c > i {
735 r.trackPush2(c-i-1, r.textPos()-r.bump())
736 }
737
738 r.advance(2)
739 continue
740
741 case syntax.Setloop:
742
743 c := r.operand(1)
744
745 if c > r.forwardchars() {
746 c = r.forwardchars()
747 }
748
749 set := r.code.Sets[r.operand(0)]
750 i := c
751
752 for ; i > 0; i-- {
753 if !set.CharIn(r.forwardcharnext()) {
754 r.backwardnext()
755 break
756 }
757 }
758
759 if c > i {
760 r.trackPush2(c-i-1, r.textPos()-r.bump())
761 }
762
763 r.advance(2)
764 continue
765
766 case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back:
767
768 r.trackPopN(2)
769 i := r.trackPeek()
770 pos := r.trackPeekN(1)
771
772 r.textto(pos)
773
774 if i > 0 {
775 r.trackPush2(i-1, pos-r.bump())
776 }
777
778 r.advance(2)
779 continue
780
781 case syntax.Setloop | syntax.Back:
782
783 r.trackPopN(2)
784 i := r.trackPeek()
785 pos := r.trackPeekN(1)
786
787 r.textto(pos)
788
789 if i > 0 {
790 r.trackPush2(i-1, pos-r.bump())
791 }
792
793 r.advance(2)
794 continue
795
796 case syntax.Onelazy, syntax.Notonelazy:
797
798 c := r.operand(1)
799
800 if c > r.forwardchars() {
801 c = r.forwardchars()
802 }
803
804 if c > 0 {
805 r.trackPush2(c-1, r.textPos())
806 }
807
808 r.advance(2)
809 continue
810
811 case syntax.Setlazy:
812
813 c := r.operand(1)
814
815 if c > r.forwardchars() {
816 c = r.forwardchars()
817 }
818
819 if c > 0 {
820 r.trackPush2(c-1, r.textPos())
821 }
822
823 r.advance(2)
824 continue
825
826 case syntax.Onelazy | syntax.Back:
827
828 r.trackPopN(2)
829 pos := r.trackPeekN(1)
830 r.textto(pos)
831
832 if r.forwardcharnext() != rune(r.operand(0)) {
833 break
834 }
835
836 i := r.trackPeek()
837
838 if i > 0 {
839 r.trackPush2(i-1, pos+r.bump())
840 }
841
842 r.advance(2)
843 continue
844
845 case syntax.Notonelazy | syntax.Back:
846
847 r.trackPopN(2)
848 pos := r.trackPeekN(1)
849 r.textto(pos)
850
851 if r.forwardcharnext() == rune(r.operand(0)) {
852 break
853 }
854
855 i := r.trackPeek()
856
857 if i > 0 {
858 r.trackPush2(i-1, pos+r.bump())
859 }
860
861 r.advance(2)
862 continue
863
864 case syntax.Setlazy | syntax.Back:
865
866 r.trackPopN(2)
867 pos := r.trackPeekN(1)
868 r.textto(pos)
869
870 if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
871 break
872 }
873
874 i := r.trackPeek()
875
876 if i > 0 {
877 r.trackPush2(i-1, pos+r.bump())
878 }
879
880 r.advance(2)
881 continue
882
883 default:
884 return errors.New("unknown state in regex runner")
885 }
886
887 BreakBackward:
888 ;
889
890 // "break Backward" comes here:
891 r.backtrack()
892 }
893}
894
895// increase the size of stack and track storage
896func (r *runner) ensureStorage() {
897 if r.runstackpos < r.runtrackcount*4 {
898 doubleIntSlice(&r.runstack, &r.runstackpos)
899 }
900 if r.runtrackpos < r.runtrackcount*4 {
901 doubleIntSlice(&r.runtrack, &r.runtrackpos)
902 }
903}
904
905func doubleIntSlice(s *[]int, pos *int) {
906 oldLen := len(*s)
907 newS := make([]int, oldLen*2)
908
909 copy(newS[oldLen:], *s)
910 *pos += oldLen
911 *s = newS
912}
913
914// Save a number on the longjump unrolling stack
915func (r *runner) crawl(i int) {
916 if r.runcrawlpos == 0 {
917 doubleIntSlice(&r.runcrawl, &r.runcrawlpos)
918 }
919 r.runcrawlpos--
920 r.runcrawl[r.runcrawlpos] = i
921}
922
923// Remove a number from the longjump unrolling stack
924func (r *runner) popcrawl() int {
925 val := r.runcrawl[r.runcrawlpos]
926 r.runcrawlpos++
927 return val
928}
929
930// Get the height of the stack
931func (r *runner) crawlpos() int {
932 return len(r.runcrawl) - r.runcrawlpos
933}
934
935func (r *runner) advance(i int) {
936 r.codepos += (i + 1)
937 r.setOperator(r.code.Codes[r.codepos])
938}
939
940func (r *runner) goTo(newpos int) {
941 // when branching backward, ensure storage
942 if newpos < r.codepos {
943 r.ensureStorage()
944 }
945
946 r.setOperator(r.code.Codes[newpos])
947 r.codepos = newpos
948}
949
950func (r *runner) textto(newpos int) {
951 r.runtextpos = newpos
952}
953
954func (r *runner) trackto(newpos int) {
955 r.runtrackpos = len(r.runtrack) - newpos
956}
957
958func (r *runner) textstart() int {
959 return r.runtextstart
960}
961
962func (r *runner) textPos() int {
963 return r.runtextpos
964}
965
966// push onto the backtracking stack
967func (r *runner) trackpos() int {
968 return len(r.runtrack) - r.runtrackpos
969}
970
971func (r *runner) trackPush() {
972 r.runtrackpos--
973 r.runtrack[r.runtrackpos] = r.codepos
974}
975
976func (r *runner) trackPush1(I1 int) {
977 r.runtrackpos--
978 r.runtrack[r.runtrackpos] = I1
979 r.runtrackpos--
980 r.runtrack[r.runtrackpos] = r.codepos
981}
982
983func (r *runner) trackPush2(I1, I2 int) {
984 r.runtrackpos--
985 r.runtrack[r.runtrackpos] = I1
986 r.runtrackpos--
987 r.runtrack[r.runtrackpos] = I2
988 r.runtrackpos--
989 r.runtrack[r.runtrackpos] = r.codepos
990}
991
992func (r *runner) trackPush3(I1, I2, I3 int) {
993 r.runtrackpos--
994 r.runtrack[r.runtrackpos] = I1
995 r.runtrackpos--
996 r.runtrack[r.runtrackpos] = I2
997 r.runtrackpos--
998 r.runtrack[r.runtrackpos] = I3
999 r.runtrackpos--
1000 r.runtrack[r.runtrackpos] = r.codepos
1001}
1002
1003func (r *runner) trackPushNeg1(I1 int) {
1004 r.runtrackpos--
1005 r.runtrack[r.runtrackpos] = I1
1006 r.runtrackpos--
1007 r.runtrack[r.runtrackpos] = -r.codepos
1008}
1009
1010func (r *runner) trackPushNeg2(I1, I2 int) {
1011 r.runtrackpos--
1012 r.runtrack[r.runtrackpos] = I1
1013 r.runtrackpos--
1014 r.runtrack[r.runtrackpos] = I2
1015 r.runtrackpos--
1016 r.runtrack[r.runtrackpos] = -r.codepos
1017}
1018
1019func (r *runner) backtrack() {
1020 newpos := r.runtrack[r.runtrackpos]
1021 r.runtrackpos++
1022
1023 if r.re.Debug() {
1024 if newpos < 0 {
1025 fmt.Printf(" Backtracking (back2) to code position %v\n", -newpos)
1026 } else {
1027 fmt.Printf(" Backtracking to code position %v\n", newpos)
1028 }
1029 }
1030
1031 if newpos < 0 {
1032 newpos = -newpos
1033 r.setOperator(r.code.Codes[newpos] | syntax.Back2)
1034 } else {
1035 r.setOperator(r.code.Codes[newpos] | syntax.Back)
1036 }
1037
1038 // When branching backward, ensure storage
1039 if newpos < r.codepos {
1040 r.ensureStorage()
1041 }
1042
1043 r.codepos = newpos
1044}
1045
1046func (r *runner) setOperator(op int) {
1047 r.caseInsensitive = (0 != (op & syntax.Ci))
1048 r.rightToLeft = (0 != (op & syntax.Rtl))
1049 r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci))
1050}
1051
1052func (r *runner) trackPop() {
1053 r.runtrackpos++
1054}
1055
1056// pop framesize items from the backtracking stack
1057func (r *runner) trackPopN(framesize int) {
1058 r.runtrackpos += framesize
1059}
1060
1061// Technically we are actually peeking at items already popped. So if you want to
1062// get and pop the top item from the stack, you do
1063// r.trackPop();
1064// r.trackPeek();
1065func (r *runner) trackPeek() int {
1066 return r.runtrack[r.runtrackpos-1]
1067}
1068
1069// get the ith element down on the backtracking stack
1070func (r *runner) trackPeekN(i int) int {
1071 return r.runtrack[r.runtrackpos-i-1]
1072}
1073
1074// Push onto the grouping stack
1075func (r *runner) stackPush(I1 int) {
1076 r.runstackpos--
1077 r.runstack[r.runstackpos] = I1
1078}
1079
1080func (r *runner) stackPush2(I1, I2 int) {
1081 r.runstackpos--
1082 r.runstack[r.runstackpos] = I1
1083 r.runstackpos--
1084 r.runstack[r.runstackpos] = I2
1085}
1086
1087func (r *runner) stackPop() {
1088 r.runstackpos++
1089}
1090
1091// pop framesize items from the grouping stack
1092func (r *runner) stackPopN(framesize int) {
1093 r.runstackpos += framesize
1094}
1095
1096// Technically we are actually peeking at items already popped. So if you want to
1097// get and pop the top item from the stack, you do
1098// r.stackPop();
1099// r.stackPeek();
1100func (r *runner) stackPeek() int {
1101 return r.runstack[r.runstackpos-1]
1102}
1103
1104// get the ith element down on the grouping stack
1105func (r *runner) stackPeekN(i int) int {
1106 return r.runstack[r.runstackpos-i-1]
1107}
1108
1109func (r *runner) operand(i int) int {
1110 return r.code.Codes[r.codepos+i+1]
1111}
1112
1113func (r *runner) leftchars() int {
1114 return r.runtextpos
1115}
1116
1117func (r *runner) rightchars() int {
1118 return r.runtextend - r.runtextpos
1119}
1120
1121func (r *runner) bump() int {
1122 if r.rightToLeft {
1123 return -1
1124 }
1125 return 1
1126}
1127
1128func (r *runner) forwardchars() int {
1129 if r.rightToLeft {
1130 return r.runtextpos
1131 }
1132 return r.runtextend - r.runtextpos
1133}
1134
1135func (r *runner) forwardcharnext() rune {
1136 var ch rune
1137 if r.rightToLeft {
1138 r.runtextpos--
1139 ch = r.runtext[r.runtextpos]
1140 } else {
1141 ch = r.runtext[r.runtextpos]
1142 r.runtextpos++
1143 }
1144
1145 if r.caseInsensitive {
1146 return unicode.ToLower(ch)
1147 }
1148 return ch
1149}
1150
1151func (r *runner) runematch(str []rune) bool {
1152 var pos int
1153
1154 c := len(str)
1155 if !r.rightToLeft {
1156 if r.runtextend-r.runtextpos < c {
1157 return false
1158 }
1159
1160 pos = r.runtextpos + c
1161 } else {
1162 if r.runtextpos-0 < c {
1163 return false
1164 }
1165
1166 pos = r.runtextpos
1167 }
1168
1169 if !r.caseInsensitive {
1170 for c != 0 {
1171 c--
1172 pos--
1173 if str[c] != r.runtext[pos] {
1174 return false
1175 }
1176 }
1177 } else {
1178 for c != 0 {
1179 c--
1180 pos--
1181 if str[c] != unicode.ToLower(r.runtext[pos]) {
1182 return false
1183 }
1184 }
1185 }
1186
1187 if !r.rightToLeft {
1188 pos += len(str)
1189 }
1190
1191 r.runtextpos = pos
1192
1193 return true
1194}
1195
1196func (r *runner) refmatch(index, len int) bool {
1197 var c, pos, cmpos int
1198
1199 if !r.rightToLeft {
1200 if r.runtextend-r.runtextpos < len {
1201 return false
1202 }
1203
1204 pos = r.runtextpos + len
1205 } else {
1206 if r.runtextpos-0 < len {
1207 return false
1208 }
1209
1210 pos = r.runtextpos
1211 }
1212 cmpos = index + len
1213
1214 c = len
1215
1216 if !r.caseInsensitive {
1217 for c != 0 {
1218 c--
1219 cmpos--
1220 pos--
1221 if r.runtext[cmpos] != r.runtext[pos] {
1222 return false
1223 }
1224
1225 }
1226 } else {
1227 for c != 0 {
1228 c--
1229 cmpos--
1230 pos--
1231
1232 if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) {
1233 return false
1234 }
1235 }
1236 }
1237
1238 if !r.rightToLeft {
1239 pos += len
1240 }
1241
1242 r.runtextpos = pos
1243
1244 return true
1245}
1246
1247func (r *runner) backwardnext() {
1248 if r.rightToLeft {
1249 r.runtextpos++
1250 } else {
1251 r.runtextpos--
1252 }
1253}
1254
1255func (r *runner) charAt(j int) rune {
1256 return r.runtext[j]
1257}
1258
1259func (r *runner) findFirstChar() bool {
1260
1261 if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) {
1262 if !r.code.RightToLeft {
1263 if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) ||
1264 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) {
1265 r.runtextpos = r.runtextend
1266 return false
1267 }
1268 if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 {
1269 r.runtextpos = r.runtextend - 1
1270 } else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend {
1271 r.runtextpos = r.runtextend
1272 }
1273 } else {
1274 if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) ||
1275 (0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 ||
1276 (r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) ||
1277 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) {
1278 r.runtextpos = 0
1279 return false
1280 }
1281 if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 {
1282 r.runtextpos = 0
1283 }
1284 }
1285
1286 if r.code.BmPrefix != nil {
1287 return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend)
1288 }
1289
1290 return true // found a valid start or end anchor
1291 } else if r.code.BmPrefix != nil {
1292 r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend)
1293
1294 if r.runtextpos == -1 {
1295 if r.code.RightToLeft {
1296 r.runtextpos = 0
1297 } else {
1298 r.runtextpos = r.runtextend
1299 }
1300 return false
1301 }
1302
1303 return true
1304 } else if r.code.FcPrefix == nil {
1305 return true
1306 }
1307
1308 r.rightToLeft = r.code.RightToLeft
1309 r.caseInsensitive = r.code.FcPrefix.CaseInsensitive
1310
1311 set := r.code.FcPrefix.PrefixSet
1312 if set.IsSingleton() {
1313 ch := set.SingletonChar()
1314 for i := r.forwardchars(); i > 0; i-- {
1315 if ch == r.forwardcharnext() {
1316 r.backwardnext()
1317 return true
1318 }
1319 }
1320 } else {
1321 for i := r.forwardchars(); i > 0; i-- {
1322 n := r.forwardcharnext()
1323 //fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n))
1324 if set.CharIn(n) {
1325 r.backwardnext()
1326 return true
1327 }
1328 }
1329 }
1330
1331 return false
1332}
1333
1334func (r *runner) initMatch() {
1335 // Use a hashtable'ed Match object if the capture numbers are sparse
1336
1337 if r.runmatch == nil {
1338 if r.re.caps != nil {
1339 r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart)
1340 } else {
1341 r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart)
1342 }
1343 } else {
1344 r.runmatch.reset(r.runtext, r.runtextstart)
1345 }
1346
1347 // note we test runcrawl, because it is the last one to be allocated
1348 // If there is an alloc failure in the middle of the three allocations,
1349 // we may still return to reuse this instance, and we want to behave
1350 // as if the allocations didn't occur. (we used to test _trackcount != 0)
1351
1352 if r.runcrawl != nil {
1353 r.runtrackpos = len(r.runtrack)
1354 r.runstackpos = len(r.runstack)
1355 r.runcrawlpos = len(r.runcrawl)
1356 return
1357 }
1358
1359 r.initTrackCount()
1360
1361 tracksize := r.runtrackcount * 8
1362 stacksize := r.runtrackcount * 8
1363
1364 if tracksize < 32 {
1365 tracksize = 32
1366 }
1367 if stacksize < 16 {
1368 stacksize = 16
1369 }
1370
1371 r.runtrack = make([]int, tracksize)
1372 r.runtrackpos = tracksize
1373
1374 r.runstack = make([]int, stacksize)
1375 r.runstackpos = stacksize
1376
1377 r.runcrawl = make([]int, 32)
1378 r.runcrawlpos = 32
1379}
1380
1381func (r *runner) tidyMatch(quick bool) *Match {
1382 if !quick {
1383 match := r.runmatch
1384
1385 r.runmatch = nil
1386
1387 match.tidy(r.runtextpos)
1388 return match
1389 } else {
1390 // send back our match -- it's not leaving the package, so it's safe to not clean it up
1391 // this reduces allocs for frequent calls to the "IsMatch" bool-only functions
1392 return r.runmatch
1393 }
1394}
1395
1396// capture captures a subexpression. Note that the
1397// capnum used here has already been mapped to a non-sparse
1398// index (by the code generator RegexWriter).
1399func (r *runner) capture(capnum, start, end int) {
1400 if end < start {
1401 T := end
1402 end = start
1403 start = T
1404 }
1405
1406 r.crawl(capnum)
1407 r.runmatch.addMatch(capnum, start, end-start)
1408}
1409
1410// transferCapture captures a subexpression. Note that the
1411// capnum used here has already been mapped to a non-sparse
1412// index (by the code generator RegexWriter).
1413func (r *runner) transferCapture(capnum, uncapnum, start, end int) {
1414 var start2, end2 int
1415
1416 // these are the two intervals that are cancelling each other
1417
1418 if end < start {
1419 T := end
1420 end = start
1421 start = T
1422 }
1423
1424 start2 = r.runmatch.matchIndex(uncapnum)
1425 end2 = start2 + r.runmatch.matchLength(uncapnum)
1426
1427 // The new capture gets the innermost defined interval
1428
1429 if start >= end2 {
1430 end = start
1431 start = end2
1432 } else if end <= start2 {
1433 start = start2
1434 } else {
1435 if end > end2 {
1436 end = end2
1437 }
1438 if start2 > start {
1439 start = start2
1440 }
1441 }
1442
1443 r.crawl(uncapnum)
1444 r.runmatch.balanceMatch(uncapnum)
1445
1446 if capnum != -1 {
1447 r.crawl(capnum)
1448 r.runmatch.addMatch(capnum, start, end-start)
1449 }
1450}
1451
1452// revert the last capture
1453func (r *runner) uncapture() {
1454 capnum := r.popcrawl()
1455 r.runmatch.removeMatch(capnum)
1456}
1457
1458//debug
1459
1460func (r *runner) dumpState() {
1461 back := ""
1462 if r.operator&syntax.Back != 0 {
1463 back = " Back"
1464 }
1465 if r.operator&syntax.Back2 != 0 {
1466 back += " Back2"
1467 }
1468 fmt.Printf("Text: %v\nTrack: %v\nStack: %v\n %s%s\n\n",
1469 r.textposDescription(),
1470 r.stackDescription(r.runtrack, r.runtrackpos),
1471 r.stackDescription(r.runstack, r.runstackpos),
1472 r.code.OpcodeDescription(r.codepos),
1473 back)
1474}
1475
1476func (r *runner) stackDescription(a []int, index int) string {
1477 buf := &bytes.Buffer{}
1478
1479 fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a))
1480 if buf.Len() < 8 {
1481 buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1482 }
1483
1484 buf.WriteRune('(')
1485 for i := index; i < len(a); i++ {
1486 if i > index {
1487 buf.WriteRune(' ')
1488 }
1489
1490 buf.WriteString(strconv.Itoa(a[i]))
1491 }
1492
1493 buf.WriteRune(')')
1494
1495 return buf.String()
1496}
1497
1498func (r *runner) textposDescription() string {
1499 buf := &bytes.Buffer{}
1500
1501 buf.WriteString(strconv.Itoa(r.runtextpos))
1502
1503 if buf.Len() < 8 {
1504 buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1505 }
1506
1507 if r.runtextpos > 0 {
1508 buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1]))
1509 } else {
1510 buf.WriteRune('^')
1511 }
1512
1513 buf.WriteRune('>')
1514
1515 for i := r.runtextpos; i < r.runtextend; i++ {
1516 buf.WriteString(syntax.CharDescription(r.runtext[i]))
1517 }
1518 if buf.Len() >= 64 {
1519 buf.Truncate(61)
1520 buf.WriteString("...")
1521 } else {
1522 buf.WriteRune('$')
1523 }
1524
1525 return buf.String()
1526}
1527
1528// decide whether the pos
1529// at the specified index is a boundary or not. It's just not worth
1530// emitting inline code for this logic.
1531func (r *runner) isBoundary(index, startpos, endpos int) bool {
1532 return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) !=
1533 (index < endpos && syntax.IsWordChar(r.runtext[index]))
1534}
1535
1536func (r *runner) isECMABoundary(index, startpos, endpos int) bool {
1537 return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) !=
1538 (index < endpos && syntax.IsECMAWordChar(r.runtext[index]))
1539}
1540
1541// this seems like a comment to justify randomly picking 1000 :-P
1542// We have determined this value in a series of experiments where x86 retail
1543// builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values
1544// of TimeoutCheckFrequency did not tend to increase performance; smaller values
1545// of TimeoutCheckFrequency tended to slow down the execution.
1546const timeoutCheckFrequency int = 1000
1547
1548func (r *runner) startTimeoutWatch() {
1549 if r.ignoreTimeout {
1550 return
1551 }
1552
1553 r.timeoutChecksToSkip = timeoutCheckFrequency
1554 r.timeoutAt = time.Now().Add(r.timeout)
1555}
1556
1557func (r *runner) checkTimeout() error {
1558 if r.ignoreTimeout {
1559 return nil
1560 }
1561 r.timeoutChecksToSkip--
1562 if r.timeoutChecksToSkip != 0 {
1563 return nil
1564 }
1565
1566 r.timeoutChecksToSkip = timeoutCheckFrequency
1567 return r.doCheckTimeout()
1568}
1569
1570func (r *runner) doCheckTimeout() error {
1571 current := time.Now()
1572
1573 if current.Before(r.timeoutAt) {
1574 return nil
1575 }
1576
1577 if r.re.Debug() {
1578 //Debug.WriteLine("")
1579 //Debug.WriteLine("RegEx match timeout occurred!")
1580 //Debug.WriteLine("Specified timeout: " + TimeSpan.FromMilliseconds(_timeout).ToString())
1581 //Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency)
1582 //Debug.WriteLine("Search pattern: " + _runregex._pattern)
1583 //Debug.WriteLine("Input: " + r.runtext)
1584 //Debug.WriteLine("About to throw RegexMatchTimeoutException.")
1585 }
1586
1587 return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext))
1588}
1589
1590func (r *runner) initTrackCount() {
1591 r.runtrackcount = r.code.TrackCount
1592}
1593
1594// getRunner returns a run to use for matching re.
1595// It uses the re's runner cache if possible, to avoid
1596// unnecessary allocation.
1597func (re *Regexp) getRunner() *runner {
1598 re.muRun.Lock()
1599 if n := len(re.runner); n > 0 {
1600 z := re.runner[n-1]
1601 re.runner = re.runner[:n-1]
1602 re.muRun.Unlock()
1603 return z
1604 }
1605 re.muRun.Unlock()
1606 z := &runner{
1607 re: re,
1608 code: re.code,
1609 }
1610 return z
1611}
1612
1613// putRunner returns a runner to the re's cache.
1614// There is no attempt to limit the size of the cache, so it will
1615// grow to the maximum number of simultaneous matches
1616// run using re. (The cache empties when re gets garbage collected.)
1617func (re *Regexp) putRunner(r *runner) {
1618 re.muRun.Lock()
1619 re.runner = append(re.runner, r)
1620 re.muRun.Unlock()
1621}