1package regexp2
2
3import (
4 "bytes"
5 "errors"
6 "fmt"
7 "math"
8 "strconv"
9 "strings"
10 "time"
11 "unicode"
12
13 "github.com/dlclark/regexp2/syntax"
14)
15
16type runner struct {
17 re *Regexp
18 code *syntax.Code
19
20 runtextstart int // starting point for search
21
22 runtext []rune // text to search
23 runtextpos int // current position in text
24 runtextend int
25
26 // The backtracking stack. Opcodes use this to store data regarding
27 // what they have matched and where to backtrack to. Each "frame" on
28 // the stack takes the form of [CodePosition Data1 Data2...], where
29 // CodePosition is the position of the current opcode and
30 // the data values are all optional. The CodePosition can be negative, and
31 // these values (also called "back2") are used by the BranchMark family of opcodes
32 // to indicate whether they are backtracking after a successful or failed
33 // match.
34 // When we backtrack, we pop the CodePosition off the stack, set the current
35 // instruction pointer to that code position, and mark the opcode
36 // with a backtracking flag ("Back"). Each opcode then knows how to
37 // handle its own data.
38 runtrack []int
39 runtrackpos int
40
41 // This stack is used to track text positions across different opcodes.
42 // For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark
43 // pair. SetMark records the text position before we match a*b. Then
44 // CaptureMark uses that position to figure out where the capture starts.
45 // Opcodes which push onto this stack are always paired with other opcodes
46 // which will pop the value from it later. A successful match should mean
47 // that this stack is empty.
48 runstack []int
49 runstackpos int
50
51 // The crawl stack is used to keep track of captures. Every time a group
52 // has a capture, we push its group number onto the runcrawl stack. In
53 // the case of a balanced match, we push BOTH groups onto the stack.
54 runcrawl []int
55 runcrawlpos int
56
57 runtrackcount int // count of states that may do backtracking
58
59 runmatch *Match // result object
60
61 ignoreTimeout bool
62 timeout time.Duration // timeout in milliseconds (needed for actual)
63 deadline fasttime
64
65 operator syntax.InstOp
66 codepos int
67 rightToLeft bool
68 caseInsensitive bool
69}
70
71// run searches for matches and can continue from the previous match
72//
73// quick is usually false, but can be true to not return matches, just put it in caches
74// textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input
75// input is the string to search for our regex pattern
76func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) {
77
78 // get a cached runner
79 runner := re.getRunner()
80 defer re.putRunner(runner)
81
82 if textstart < 0 {
83 if re.RightToLeft() {
84 textstart = len(input)
85 } else {
86 textstart = 0
87 }
88 }
89
90 return runner.scan(input, textstart, quick, re.MatchTimeout)
91}
92
93// Scans the string to find the first match. Uses the Match object
94// both to feed text in and as a place to store matches that come out.
95//
96// All the action is in the Go() method. Our
97// responsibility is to load up the class members before
98// calling Go.
99//
100// The optimizer can compute a set of candidate starting characters,
101// and we could use a separate method Skip() that will quickly scan past
102// any characters that we know can't match.
103func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) {
104 r.timeout = timeout
105 r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout)
106 r.runtextstart = textstart
107 r.runtext = rt
108 r.runtextend = len(rt)
109
110 stoppos := r.runtextend
111 bump := 1
112
113 if r.re.RightToLeft() {
114 bump = -1
115 stoppos = 0
116 }
117
118 r.runtextpos = textstart
119 initted := false
120
121 r.startTimeoutWatch()
122 for {
123 if r.re.Debug() {
124 //fmt.Printf("\nSearch content: %v\n", string(r.runtext))
125 fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend)
126 fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos)
127 }
128
129 if r.findFirstChar() {
130 if err := r.checkTimeout(); err != nil {
131 return nil, err
132 }
133
134 if !initted {
135 r.initMatch()
136 initted = true
137 }
138
139 if r.re.Debug() {
140 fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos)
141 }
142
143 if err := r.execute(); err != nil {
144 return nil, err
145 }
146
147 if r.runmatch.matchcount[0] > 0 {
148 // We'll return a match even if it touches a previous empty match
149 return r.tidyMatch(quick), nil
150 }
151
152 // reset state for another go
153 r.runtrackpos = len(r.runtrack)
154 r.runstackpos = len(r.runstack)
155 r.runcrawlpos = len(r.runcrawl)
156 }
157
158 // failure!
159
160 if r.runtextpos == stoppos {
161 r.tidyMatch(true)
162 return nil, nil
163 }
164
165 // Recognize leading []* and various anchors, and bump on failure accordingly
166
167 // r.bump by one and start again
168
169 r.runtextpos += bump
170 }
171 // We never get here
172}
173
174func (r *runner) execute() error {
175
176 r.goTo(0)
177
178 for {
179
180 if r.re.Debug() {
181 r.dumpState()
182 }
183
184 if err := r.checkTimeout(); err != nil {
185 return err
186 }
187
188 switch r.operator {
189 case syntax.Stop:
190 return nil
191
192 case syntax.Nothing:
193 break
194
195 case syntax.Goto:
196 r.goTo(r.operand(0))
197 continue
198
199 case syntax.Testref:
200 if !r.runmatch.isMatched(r.operand(0)) {
201 break
202 }
203 r.advance(1)
204 continue
205
206 case syntax.Lazybranch:
207 r.trackPush1(r.textPos())
208 r.advance(1)
209 continue
210
211 case syntax.Lazybranch | syntax.Back:
212 r.trackPop()
213 r.textto(r.trackPeek())
214 r.goTo(r.operand(0))
215 continue
216
217 case syntax.Setmark:
218 r.stackPush(r.textPos())
219 r.trackPush()
220 r.advance(0)
221 continue
222
223 case syntax.Nullmark:
224 r.stackPush(-1)
225 r.trackPush()
226 r.advance(0)
227 continue
228
229 case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back:
230 r.stackPop()
231 break
232
233 case syntax.Getmark:
234 r.stackPop()
235 r.trackPush1(r.stackPeek())
236 r.textto(r.stackPeek())
237 r.advance(0)
238 continue
239
240 case syntax.Getmark | syntax.Back:
241 r.trackPop()
242 r.stackPush(r.trackPeek())
243 break
244
245 case syntax.Capturemark:
246 if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) {
247 break
248 }
249 r.stackPop()
250 if r.operand(1) != -1 {
251 r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos())
252 } else {
253 r.capture(r.operand(0), r.stackPeek(), r.textPos())
254 }
255 r.trackPush1(r.stackPeek())
256
257 r.advance(2)
258
259 continue
260
261 case syntax.Capturemark | syntax.Back:
262 r.trackPop()
263 r.stackPush(r.trackPeek())
264 r.uncapture()
265 if r.operand(0) != -1 && r.operand(1) != -1 {
266 r.uncapture()
267 }
268
269 break
270
271 case syntax.Branchmark:
272 r.stackPop()
273
274 matched := r.textPos() - r.stackPeek()
275
276 if matched != 0 { // Nonempty match -> loop now
277 r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos
278 r.stackPush(r.textPos()) // Make new mark
279 r.goTo(r.operand(0)) // Loop
280 } else { // Empty match -> straight now
281 r.trackPushNeg1(r.stackPeek()) // Save old mark
282 r.advance(1) // Straight
283 }
284 continue
285
286 case syntax.Branchmark | syntax.Back:
287 r.trackPopN(2)
288 r.stackPop()
289 r.textto(r.trackPeekN(1)) // Recall position
290 r.trackPushNeg1(r.trackPeek()) // Save old mark
291 r.advance(1) // Straight
292 continue
293
294 case syntax.Branchmark | syntax.Back2:
295 r.trackPop()
296 r.stackPush(r.trackPeek()) // Recall old mark
297 break // Backtrack
298
299 case syntax.Lazybranchmark:
300 {
301 // We hit this the first time through a lazy loop and after each
302 // successful match of the inner expression. It simply continues
303 // on and doesn't loop.
304 r.stackPop()
305
306 oldMarkPos := r.stackPeek()
307
308 if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state
309 if oldMarkPos != -1 {
310 r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos
311 } else {
312 r.trackPush2(r.textPos(), r.textPos())
313 }
314 } else {
315 // The inner expression found an empty match, so we'll go directly to 'back2' if we
316 // backtrack. In this case, we need to push something on the stack, since back2 pops.
317 // However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
318 // position associated with that empty match.
319 r.stackPush(oldMarkPos)
320
321 r.trackPushNeg1(r.stackPeek()) // Save old mark
322 }
323 r.advance(1)
324 continue
325 }
326
327 case syntax.Lazybranchmark | syntax.Back:
328
329 // After the first time, Lazybranchmark | syntax.Back occurs
330 // with each iteration of the loop, and therefore with every attempted
331 // match of the inner expression. We'll try to match the inner expression,
332 // then go back to Lazybranchmark if successful. If the inner expression
333 // fails, we go to Lazybranchmark | syntax.Back2
334
335 r.trackPopN(2)
336 pos := r.trackPeekN(1)
337 r.trackPushNeg1(r.trackPeek()) // Save old mark
338 r.stackPush(pos) // Make new mark
339 r.textto(pos) // Recall position
340 r.goTo(r.operand(0)) // Loop
341 continue
342
343 case syntax.Lazybranchmark | syntax.Back2:
344 // The lazy loop has failed. We'll do a true backtrack and
345 // start over before the lazy loop.
346 r.stackPop()
347 r.trackPop()
348 r.stackPush(r.trackPeek()) // Recall old mark
349 break
350
351 case syntax.Setcount:
352 r.stackPush2(r.textPos(), r.operand(0))
353 r.trackPush()
354 r.advance(1)
355 continue
356
357 case syntax.Nullcount:
358 r.stackPush2(-1, r.operand(0))
359 r.trackPush()
360 r.advance(1)
361 continue
362
363 case syntax.Setcount | syntax.Back:
364 r.stackPopN(2)
365 break
366
367 case syntax.Nullcount | syntax.Back:
368 r.stackPopN(2)
369 break
370
371 case syntax.Branchcount:
372 // r.stackPush:
373 // 0: Mark
374 // 1: Count
375
376 r.stackPopN(2)
377 mark := r.stackPeek()
378 count := r.stackPeekN(1)
379 matched := r.textPos() - mark
380
381 if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now
382 r.trackPushNeg2(mark, count) // Save old mark, count
383 r.advance(2) // Straight
384 } else { // Nonempty match -> count+loop now
385 r.trackPush1(mark) // remember mark
386 r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
387 r.goTo(r.operand(0)) // Loop
388 }
389 continue
390
391 case syntax.Branchcount | syntax.Back:
392 // r.trackPush:
393 // 0: Previous mark
394 // r.stackPush:
395 // 0: Mark (= current pos, discarded)
396 // 1: Count
397 r.trackPop()
398 r.stackPopN(2)
399 if r.stackPeekN(1) > 0 { // Positive -> can go straight
400 r.textto(r.stackPeek()) // Zap to mark
401 r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count
402 r.advance(2) // Straight
403 continue
404 }
405 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count
406 break
407
408 case syntax.Branchcount | syntax.Back2:
409 // r.trackPush:
410 // 0: Previous mark
411 // 1: Previous count
412 r.trackPopN(2)
413 r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count
414 break // Backtrack
415
416 case syntax.Lazybranchcount:
417 // r.stackPush:
418 // 0: Mark
419 // 1: Count
420
421 r.stackPopN(2)
422 mark := r.stackPeek()
423 count := r.stackPeekN(1)
424
425 if count < 0 { // Negative count -> loop now
426 r.trackPushNeg1(mark) // Save old mark
427 r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
428 r.goTo(r.operand(0)) // Loop
429 } else { // Nonneg count -> straight now
430 r.trackPush3(mark, count, r.textPos()) // Save mark, count, position
431 r.advance(2) // Straight
432 }
433 continue
434
435 case syntax.Lazybranchcount | syntax.Back:
436 // r.trackPush:
437 // 0: Mark
438 // 1: Count
439 // 2: r.textPos
440
441 r.trackPopN(3)
442 mark := r.trackPeek()
443 textpos := r.trackPeekN(2)
444
445 if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop
446 r.textto(textpos) // Recall position
447 r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count
448 r.trackPushNeg1(mark) // Save old mark
449 r.goTo(r.operand(0)) // Loop
450 continue
451 } else { // Max loops or empty match -> backtrack
452 r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count
453 break // backtrack
454 }
455
456 case syntax.Lazybranchcount | syntax.Back2:
457 // r.trackPush:
458 // 0: Previous mark
459 // r.stackPush:
460 // 0: Mark (== current pos, discarded)
461 // 1: Count
462 r.trackPop()
463 r.stackPopN(2)
464 r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count
465 break // Backtrack
466
467 case syntax.Setjump:
468 r.stackPush2(r.trackpos(), r.crawlpos())
469 r.trackPush()
470 r.advance(0)
471 continue
472
473 case syntax.Setjump | syntax.Back:
474 r.stackPopN(2)
475 break
476
477 case syntax.Backjump:
478 // r.stackPush:
479 // 0: Saved trackpos
480 // 1: r.crawlpos
481 r.stackPopN(2)
482 r.trackto(r.stackPeek())
483
484 for r.crawlpos() != r.stackPeekN(1) {
485 r.uncapture()
486 }
487
488 break
489
490 case syntax.Forejump:
491 // r.stackPush:
492 // 0: Saved trackpos
493 // 1: r.crawlpos
494 r.stackPopN(2)
495 r.trackto(r.stackPeek())
496 r.trackPush1(r.stackPeekN(1))
497 r.advance(0)
498 continue
499
500 case syntax.Forejump | syntax.Back:
501 // r.trackPush:
502 // 0: r.crawlpos
503 r.trackPop()
504
505 for r.crawlpos() != r.trackPeek() {
506 r.uncapture()
507 }
508
509 break
510
511 case syntax.Bol:
512 if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' {
513 break
514 }
515 r.advance(0)
516 continue
517
518 case syntax.Eol:
519 if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' {
520 break
521 }
522 r.advance(0)
523 continue
524
525 case syntax.Boundary:
526 if !r.isBoundary(r.textPos(), 0, r.runtextend) {
527 break
528 }
529 r.advance(0)
530 continue
531
532 case syntax.Nonboundary:
533 if r.isBoundary(r.textPos(), 0, r.runtextend) {
534 break
535 }
536 r.advance(0)
537 continue
538
539 case syntax.ECMABoundary:
540 if !r.isECMABoundary(r.textPos(), 0, r.runtextend) {
541 break
542 }
543 r.advance(0)
544 continue
545
546 case syntax.NonECMABoundary:
547 if r.isECMABoundary(r.textPos(), 0, r.runtextend) {
548 break
549 }
550 r.advance(0)
551 continue
552
553 case syntax.Beginning:
554 if r.leftchars() > 0 {
555 break
556 }
557 r.advance(0)
558 continue
559
560 case syntax.Start:
561 if r.textPos() != r.textstart() {
562 break
563 }
564 r.advance(0)
565 continue
566
567 case syntax.EndZ:
568 rchars := r.rightchars()
569 if rchars > 1 {
570 break
571 }
572 // RE2 and EcmaScript define $ as "asserts position at the end of the string"
573 // PCRE/.NET adds "or before the line terminator right at the end of the string (if any)"
574 if (r.re.options & (RE2 | ECMAScript)) != 0 {
575 // RE2/Ecmascript mode
576 if rchars > 0 {
577 break
578 }
579 } else if rchars == 1 && r.charAt(r.textPos()) != '\n' {
580 // "regular" mode
581 break
582 }
583
584 r.advance(0)
585 continue
586
587 case syntax.End:
588 if r.rightchars() > 0 {
589 break
590 }
591 r.advance(0)
592 continue
593
594 case syntax.One:
595 if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) {
596 break
597 }
598
599 r.advance(1)
600 continue
601
602 case syntax.Notone:
603 if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) {
604 break
605 }
606
607 r.advance(1)
608 continue
609
610 case syntax.Set:
611
612 if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
613 break
614 }
615
616 r.advance(1)
617 continue
618
619 case syntax.Multi:
620 if !r.runematch(r.code.Strings[r.operand(0)]) {
621 break
622 }
623
624 r.advance(1)
625 continue
626
627 case syntax.Ref:
628
629 capnum := r.operand(0)
630
631 if r.runmatch.isMatched(capnum) {
632 if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) {
633 break
634 }
635 } else {
636 if (r.re.options & ECMAScript) == 0 {
637 break
638 }
639 }
640
641 r.advance(1)
642 continue
643
644 case syntax.Onerep:
645
646 c := r.operand(1)
647
648 if r.forwardchars() < c {
649 break
650 }
651
652 ch := rune(r.operand(0))
653
654 for c > 0 {
655 if r.forwardcharnext() != ch {
656 goto BreakBackward
657 }
658 c--
659 }
660
661 r.advance(2)
662 continue
663
664 case syntax.Notonerep:
665
666 c := r.operand(1)
667
668 if r.forwardchars() < c {
669 break
670 }
671 ch := rune(r.operand(0))
672
673 for c > 0 {
674 if r.forwardcharnext() == ch {
675 goto BreakBackward
676 }
677 c--
678 }
679
680 r.advance(2)
681 continue
682
683 case syntax.Setrep:
684
685 c := r.operand(1)
686
687 if r.forwardchars() < c {
688 break
689 }
690
691 set := r.code.Sets[r.operand(0)]
692
693 for c > 0 {
694 if !set.CharIn(r.forwardcharnext()) {
695 goto BreakBackward
696 }
697 c--
698 }
699
700 r.advance(2)
701 continue
702
703 case syntax.Oneloop:
704
705 c := r.operand(1)
706
707 if c > r.forwardchars() {
708 c = r.forwardchars()
709 }
710
711 ch := rune(r.operand(0))
712 i := c
713
714 for ; i > 0; i-- {
715 if r.forwardcharnext() != ch {
716 r.backwardnext()
717 break
718 }
719 }
720
721 if c > i {
722 r.trackPush2(c-i-1, r.textPos()-r.bump())
723 }
724
725 r.advance(2)
726 continue
727
728 case syntax.Notoneloop:
729
730 c := r.operand(1)
731
732 if c > r.forwardchars() {
733 c = r.forwardchars()
734 }
735
736 ch := rune(r.operand(0))
737 i := c
738
739 for ; i > 0; i-- {
740 if r.forwardcharnext() == ch {
741 r.backwardnext()
742 break
743 }
744 }
745
746 if c > i {
747 r.trackPush2(c-i-1, r.textPos()-r.bump())
748 }
749
750 r.advance(2)
751 continue
752
753 case syntax.Setloop:
754
755 c := r.operand(1)
756
757 if c > r.forwardchars() {
758 c = r.forwardchars()
759 }
760
761 set := r.code.Sets[r.operand(0)]
762 i := c
763
764 for ; i > 0; i-- {
765 if !set.CharIn(r.forwardcharnext()) {
766 r.backwardnext()
767 break
768 }
769 }
770
771 if c > i {
772 r.trackPush2(c-i-1, r.textPos()-r.bump())
773 }
774
775 r.advance(2)
776 continue
777
778 case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back:
779
780 r.trackPopN(2)
781 i := r.trackPeek()
782 pos := r.trackPeekN(1)
783
784 r.textto(pos)
785
786 if i > 0 {
787 r.trackPush2(i-1, pos-r.bump())
788 }
789
790 r.advance(2)
791 continue
792
793 case syntax.Setloop | syntax.Back:
794
795 r.trackPopN(2)
796 i := r.trackPeek()
797 pos := r.trackPeekN(1)
798
799 r.textto(pos)
800
801 if i > 0 {
802 r.trackPush2(i-1, pos-r.bump())
803 }
804
805 r.advance(2)
806 continue
807
808 case syntax.Onelazy, syntax.Notonelazy:
809
810 c := r.operand(1)
811
812 if c > r.forwardchars() {
813 c = r.forwardchars()
814 }
815
816 if c > 0 {
817 r.trackPush2(c-1, r.textPos())
818 }
819
820 r.advance(2)
821 continue
822
823 case syntax.Setlazy:
824
825 c := r.operand(1)
826
827 if c > r.forwardchars() {
828 c = r.forwardchars()
829 }
830
831 if c > 0 {
832 r.trackPush2(c-1, r.textPos())
833 }
834
835 r.advance(2)
836 continue
837
838 case syntax.Onelazy | syntax.Back:
839
840 r.trackPopN(2)
841 pos := r.trackPeekN(1)
842 r.textto(pos)
843
844 if r.forwardcharnext() != rune(r.operand(0)) {
845 break
846 }
847
848 i := r.trackPeek()
849
850 if i > 0 {
851 r.trackPush2(i-1, pos+r.bump())
852 }
853
854 r.advance(2)
855 continue
856
857 case syntax.Notonelazy | syntax.Back:
858
859 r.trackPopN(2)
860 pos := r.trackPeekN(1)
861 r.textto(pos)
862
863 if r.forwardcharnext() == rune(r.operand(0)) {
864 break
865 }
866
867 i := r.trackPeek()
868
869 if i > 0 {
870 r.trackPush2(i-1, pos+r.bump())
871 }
872
873 r.advance(2)
874 continue
875
876 case syntax.Setlazy | syntax.Back:
877
878 r.trackPopN(2)
879 pos := r.trackPeekN(1)
880 r.textto(pos)
881
882 if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
883 break
884 }
885
886 i := r.trackPeek()
887
888 if i > 0 {
889 r.trackPush2(i-1, pos+r.bump())
890 }
891
892 r.advance(2)
893 continue
894
895 default:
896 return errors.New("unknown state in regex runner")
897 }
898
899 BreakBackward:
900 ;
901
902 // "break Backward" comes here:
903 r.backtrack()
904 }
905}
906
907// increase the size of stack and track storage
908func (r *runner) ensureStorage() {
909 if r.runstackpos < r.runtrackcount*4 {
910 doubleIntSlice(&r.runstack, &r.runstackpos)
911 }
912 if r.runtrackpos < r.runtrackcount*4 {
913 doubleIntSlice(&r.runtrack, &r.runtrackpos)
914 }
915}
916
917func doubleIntSlice(s *[]int, pos *int) {
918 oldLen := len(*s)
919 newS := make([]int, oldLen*2)
920
921 copy(newS[oldLen:], *s)
922 *pos += oldLen
923 *s = newS
924}
925
926// Save a number on the longjump unrolling stack
927func (r *runner) crawl(i int) {
928 if r.runcrawlpos == 0 {
929 doubleIntSlice(&r.runcrawl, &r.runcrawlpos)
930 }
931 r.runcrawlpos--
932 r.runcrawl[r.runcrawlpos] = i
933}
934
935// Remove a number from the longjump unrolling stack
936func (r *runner) popcrawl() int {
937 val := r.runcrawl[r.runcrawlpos]
938 r.runcrawlpos++
939 return val
940}
941
942// Get the height of the stack
943func (r *runner) crawlpos() int {
944 return len(r.runcrawl) - r.runcrawlpos
945}
946
947func (r *runner) advance(i int) {
948 r.codepos += (i + 1)
949 r.setOperator(r.code.Codes[r.codepos])
950}
951
952func (r *runner) goTo(newpos int) {
953 // when branching backward or in place, ensure storage
954 if newpos <= r.codepos {
955 r.ensureStorage()
956 }
957
958 r.setOperator(r.code.Codes[newpos])
959 r.codepos = newpos
960}
961
962func (r *runner) textto(newpos int) {
963 r.runtextpos = newpos
964}
965
966func (r *runner) trackto(newpos int) {
967 r.runtrackpos = len(r.runtrack) - newpos
968}
969
970func (r *runner) textstart() int {
971 return r.runtextstart
972}
973
974func (r *runner) textPos() int {
975 return r.runtextpos
976}
977
978// push onto the backtracking stack
979func (r *runner) trackpos() int {
980 return len(r.runtrack) - r.runtrackpos
981}
982
983func (r *runner) trackPush() {
984 r.runtrackpos--
985 r.runtrack[r.runtrackpos] = r.codepos
986}
987
988func (r *runner) trackPush1(I1 int) {
989 r.runtrackpos--
990 r.runtrack[r.runtrackpos] = I1
991 r.runtrackpos--
992 r.runtrack[r.runtrackpos] = r.codepos
993}
994
995func (r *runner) trackPush2(I1, I2 int) {
996 r.runtrackpos--
997 r.runtrack[r.runtrackpos] = I1
998 r.runtrackpos--
999 r.runtrack[r.runtrackpos] = I2
1000 r.runtrackpos--
1001 r.runtrack[r.runtrackpos] = r.codepos
1002}
1003
1004func (r *runner) trackPush3(I1, I2, I3 int) {
1005 r.runtrackpos--
1006 r.runtrack[r.runtrackpos] = I1
1007 r.runtrackpos--
1008 r.runtrack[r.runtrackpos] = I2
1009 r.runtrackpos--
1010 r.runtrack[r.runtrackpos] = I3
1011 r.runtrackpos--
1012 r.runtrack[r.runtrackpos] = r.codepos
1013}
1014
1015func (r *runner) trackPushNeg1(I1 int) {
1016 r.runtrackpos--
1017 r.runtrack[r.runtrackpos] = I1
1018 r.runtrackpos--
1019 r.runtrack[r.runtrackpos] = -r.codepos
1020}
1021
1022func (r *runner) trackPushNeg2(I1, I2 int) {
1023 r.runtrackpos--
1024 r.runtrack[r.runtrackpos] = I1
1025 r.runtrackpos--
1026 r.runtrack[r.runtrackpos] = I2
1027 r.runtrackpos--
1028 r.runtrack[r.runtrackpos] = -r.codepos
1029}
1030
1031func (r *runner) backtrack() {
1032 newpos := r.runtrack[r.runtrackpos]
1033 r.runtrackpos++
1034
1035 if r.re.Debug() {
1036 if newpos < 0 {
1037 fmt.Printf(" Backtracking (back2) to code position %v\n", -newpos)
1038 } else {
1039 fmt.Printf(" Backtracking to code position %v\n", newpos)
1040 }
1041 }
1042
1043 if newpos < 0 {
1044 newpos = -newpos
1045 r.setOperator(r.code.Codes[newpos] | syntax.Back2)
1046 } else {
1047 r.setOperator(r.code.Codes[newpos] | syntax.Back)
1048 }
1049
1050 // When branching backward, ensure storage
1051 if newpos < r.codepos {
1052 r.ensureStorage()
1053 }
1054
1055 r.codepos = newpos
1056}
1057
1058func (r *runner) setOperator(op int) {
1059 r.caseInsensitive = (0 != (op & syntax.Ci))
1060 r.rightToLeft = (0 != (op & syntax.Rtl))
1061 r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci))
1062}
1063
1064func (r *runner) trackPop() {
1065 r.runtrackpos++
1066}
1067
1068// pop framesize items from the backtracking stack
1069func (r *runner) trackPopN(framesize int) {
1070 r.runtrackpos += framesize
1071}
1072
1073// Technically we are actually peeking at items already popped. So if you want to
1074// get and pop the top item from the stack, you do
1075// r.trackPop();
1076// r.trackPeek();
1077func (r *runner) trackPeek() int {
1078 return r.runtrack[r.runtrackpos-1]
1079}
1080
1081// get the ith element down on the backtracking stack
1082func (r *runner) trackPeekN(i int) int {
1083 return r.runtrack[r.runtrackpos-i-1]
1084}
1085
1086// Push onto the grouping stack
1087func (r *runner) stackPush(I1 int) {
1088 r.runstackpos--
1089 r.runstack[r.runstackpos] = I1
1090}
1091
1092func (r *runner) stackPush2(I1, I2 int) {
1093 r.runstackpos--
1094 r.runstack[r.runstackpos] = I1
1095 r.runstackpos--
1096 r.runstack[r.runstackpos] = I2
1097}
1098
1099func (r *runner) stackPop() {
1100 r.runstackpos++
1101}
1102
1103// pop framesize items from the grouping stack
1104func (r *runner) stackPopN(framesize int) {
1105 r.runstackpos += framesize
1106}
1107
1108// Technically we are actually peeking at items already popped. So if you want to
1109// get and pop the top item from the stack, you do
1110// r.stackPop();
1111// r.stackPeek();
1112func (r *runner) stackPeek() int {
1113 return r.runstack[r.runstackpos-1]
1114}
1115
1116// get the ith element down on the grouping stack
1117func (r *runner) stackPeekN(i int) int {
1118 return r.runstack[r.runstackpos-i-1]
1119}
1120
1121func (r *runner) operand(i int) int {
1122 return r.code.Codes[r.codepos+i+1]
1123}
1124
1125func (r *runner) leftchars() int {
1126 return r.runtextpos
1127}
1128
1129func (r *runner) rightchars() int {
1130 return r.runtextend - r.runtextpos
1131}
1132
1133func (r *runner) bump() int {
1134 if r.rightToLeft {
1135 return -1
1136 }
1137 return 1
1138}
1139
1140func (r *runner) forwardchars() int {
1141 if r.rightToLeft {
1142 return r.runtextpos
1143 }
1144 return r.runtextend - r.runtextpos
1145}
1146
1147func (r *runner) forwardcharnext() rune {
1148 var ch rune
1149 if r.rightToLeft {
1150 r.runtextpos--
1151 ch = r.runtext[r.runtextpos]
1152 } else {
1153 ch = r.runtext[r.runtextpos]
1154 r.runtextpos++
1155 }
1156
1157 if r.caseInsensitive {
1158 return unicode.ToLower(ch)
1159 }
1160 return ch
1161}
1162
1163func (r *runner) runematch(str []rune) bool {
1164 var pos int
1165
1166 c := len(str)
1167 if !r.rightToLeft {
1168 if r.runtextend-r.runtextpos < c {
1169 return false
1170 }
1171
1172 pos = r.runtextpos + c
1173 } else {
1174 if r.runtextpos-0 < c {
1175 return false
1176 }
1177
1178 pos = r.runtextpos
1179 }
1180
1181 if !r.caseInsensitive {
1182 for c != 0 {
1183 c--
1184 pos--
1185 if str[c] != r.runtext[pos] {
1186 return false
1187 }
1188 }
1189 } else {
1190 for c != 0 {
1191 c--
1192 pos--
1193 if str[c] != unicode.ToLower(r.runtext[pos]) {
1194 return false
1195 }
1196 }
1197 }
1198
1199 if !r.rightToLeft {
1200 pos += len(str)
1201 }
1202
1203 r.runtextpos = pos
1204
1205 return true
1206}
1207
1208func (r *runner) refmatch(index, len int) bool {
1209 var c, pos, cmpos int
1210
1211 if !r.rightToLeft {
1212 if r.runtextend-r.runtextpos < len {
1213 return false
1214 }
1215
1216 pos = r.runtextpos + len
1217 } else {
1218 if r.runtextpos-0 < len {
1219 return false
1220 }
1221
1222 pos = r.runtextpos
1223 }
1224 cmpos = index + len
1225
1226 c = len
1227
1228 if !r.caseInsensitive {
1229 for c != 0 {
1230 c--
1231 cmpos--
1232 pos--
1233 if r.runtext[cmpos] != r.runtext[pos] {
1234 return false
1235 }
1236
1237 }
1238 } else {
1239 for c != 0 {
1240 c--
1241 cmpos--
1242 pos--
1243
1244 if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) {
1245 return false
1246 }
1247 }
1248 }
1249
1250 if !r.rightToLeft {
1251 pos += len
1252 }
1253
1254 r.runtextpos = pos
1255
1256 return true
1257}
1258
1259func (r *runner) backwardnext() {
1260 if r.rightToLeft {
1261 r.runtextpos++
1262 } else {
1263 r.runtextpos--
1264 }
1265}
1266
1267func (r *runner) charAt(j int) rune {
1268 return r.runtext[j]
1269}
1270
1271func (r *runner) findFirstChar() bool {
1272
1273 if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) {
1274 if !r.code.RightToLeft {
1275 if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) ||
1276 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) {
1277 r.runtextpos = r.runtextend
1278 return false
1279 }
1280 if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 {
1281 r.runtextpos = r.runtextend - 1
1282 } else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend {
1283 r.runtextpos = r.runtextend
1284 }
1285 } else {
1286 if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) ||
1287 (0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 ||
1288 (r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) ||
1289 (0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) {
1290 r.runtextpos = 0
1291 return false
1292 }
1293 if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 {
1294 r.runtextpos = 0
1295 }
1296 }
1297
1298 if r.code.BmPrefix != nil {
1299 return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend)
1300 }
1301
1302 return true // found a valid start or end anchor
1303 } else if r.code.BmPrefix != nil {
1304 r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend)
1305
1306 if r.runtextpos == -1 {
1307 if r.code.RightToLeft {
1308 r.runtextpos = 0
1309 } else {
1310 r.runtextpos = r.runtextend
1311 }
1312 return false
1313 }
1314
1315 return true
1316 } else if r.code.FcPrefix == nil {
1317 return true
1318 }
1319
1320 r.rightToLeft = r.code.RightToLeft
1321 r.caseInsensitive = r.code.FcPrefix.CaseInsensitive
1322
1323 set := r.code.FcPrefix.PrefixSet
1324 if set.IsSingleton() {
1325 ch := set.SingletonChar()
1326 for i := r.forwardchars(); i > 0; i-- {
1327 if ch == r.forwardcharnext() {
1328 r.backwardnext()
1329 return true
1330 }
1331 }
1332 } else {
1333 for i := r.forwardchars(); i > 0; i-- {
1334 n := r.forwardcharnext()
1335 //fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n))
1336 if set.CharIn(n) {
1337 r.backwardnext()
1338 return true
1339 }
1340 }
1341 }
1342
1343 return false
1344}
1345
1346func (r *runner) initMatch() {
1347 // Use a hashtable'ed Match object if the capture numbers are sparse
1348
1349 if r.runmatch == nil {
1350 if r.re.caps != nil {
1351 r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart)
1352 } else {
1353 r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart)
1354 }
1355 } else {
1356 r.runmatch.reset(r.runtext, r.runtextstart)
1357 }
1358
1359 // note we test runcrawl, because it is the last one to be allocated
1360 // If there is an alloc failure in the middle of the three allocations,
1361 // we may still return to reuse this instance, and we want to behave
1362 // as if the allocations didn't occur. (we used to test _trackcount != 0)
1363
1364 if r.runcrawl != nil {
1365 r.runtrackpos = len(r.runtrack)
1366 r.runstackpos = len(r.runstack)
1367 r.runcrawlpos = len(r.runcrawl)
1368 return
1369 }
1370
1371 r.initTrackCount()
1372
1373 tracksize := r.runtrackcount * 8
1374 stacksize := r.runtrackcount * 8
1375
1376 if tracksize < 32 {
1377 tracksize = 32
1378 }
1379 if stacksize < 16 {
1380 stacksize = 16
1381 }
1382
1383 r.runtrack = make([]int, tracksize)
1384 r.runtrackpos = tracksize
1385
1386 r.runstack = make([]int, stacksize)
1387 r.runstackpos = stacksize
1388
1389 r.runcrawl = make([]int, 32)
1390 r.runcrawlpos = 32
1391}
1392
1393func (r *runner) tidyMatch(quick bool) *Match {
1394 if !quick {
1395 match := r.runmatch
1396
1397 r.runmatch = nil
1398
1399 match.tidy(r.runtextpos)
1400 return match
1401 } else {
1402 // send back our match -- it's not leaving the package, so it's safe to not clean it up
1403 // this reduces allocs for frequent calls to the "IsMatch" bool-only functions
1404 return r.runmatch
1405 }
1406}
1407
1408// capture captures a subexpression. Note that the
1409// capnum used here has already been mapped to a non-sparse
1410// index (by the code generator RegexWriter).
1411func (r *runner) capture(capnum, start, end int) {
1412 if end < start {
1413 T := end
1414 end = start
1415 start = T
1416 }
1417
1418 r.crawl(capnum)
1419 r.runmatch.addMatch(capnum, start, end-start)
1420}
1421
1422// transferCapture captures a subexpression. Note that the
1423// capnum used here has already been mapped to a non-sparse
1424// index (by the code generator RegexWriter).
1425func (r *runner) transferCapture(capnum, uncapnum, start, end int) {
1426 var start2, end2 int
1427
1428 // these are the two intervals that are cancelling each other
1429
1430 if end < start {
1431 T := end
1432 end = start
1433 start = T
1434 }
1435
1436 start2 = r.runmatch.matchIndex(uncapnum)
1437 end2 = start2 + r.runmatch.matchLength(uncapnum)
1438
1439 // The new capture gets the innermost defined interval
1440
1441 if start >= end2 {
1442 end = start
1443 start = end2
1444 } else if end <= start2 {
1445 start = start2
1446 } else {
1447 if end > end2 {
1448 end = end2
1449 }
1450 if start2 > start {
1451 start = start2
1452 }
1453 }
1454
1455 r.crawl(uncapnum)
1456 r.runmatch.balanceMatch(uncapnum)
1457
1458 if capnum != -1 {
1459 r.crawl(capnum)
1460 r.runmatch.addMatch(capnum, start, end-start)
1461 }
1462}
1463
1464// revert the last capture
1465func (r *runner) uncapture() {
1466 capnum := r.popcrawl()
1467 r.runmatch.removeMatch(capnum)
1468}
1469
1470//debug
1471
1472func (r *runner) dumpState() {
1473 back := ""
1474 if r.operator&syntax.Back != 0 {
1475 back = " Back"
1476 }
1477 if r.operator&syntax.Back2 != 0 {
1478 back += " Back2"
1479 }
1480 fmt.Printf("Text: %v\nTrack: %v\nStack: %v\n %s%s\n\n",
1481 r.textposDescription(),
1482 r.stackDescription(r.runtrack, r.runtrackpos),
1483 r.stackDescription(r.runstack, r.runstackpos),
1484 r.code.OpcodeDescription(r.codepos),
1485 back)
1486}
1487
1488func (r *runner) stackDescription(a []int, index int) string {
1489 buf := &bytes.Buffer{}
1490
1491 fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a))
1492 if buf.Len() < 8 {
1493 buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1494 }
1495
1496 buf.WriteRune('(')
1497 for i := index; i < len(a); i++ {
1498 if i > index {
1499 buf.WriteRune(' ')
1500 }
1501
1502 buf.WriteString(strconv.Itoa(a[i]))
1503 }
1504
1505 buf.WriteRune(')')
1506
1507 return buf.String()
1508}
1509
1510func (r *runner) textposDescription() string {
1511 buf := &bytes.Buffer{}
1512
1513 buf.WriteString(strconv.Itoa(r.runtextpos))
1514
1515 if buf.Len() < 8 {
1516 buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1517 }
1518
1519 if r.runtextpos > 0 {
1520 buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1]))
1521 } else {
1522 buf.WriteRune('^')
1523 }
1524
1525 buf.WriteRune('>')
1526
1527 for i := r.runtextpos; i < r.runtextend; i++ {
1528 buf.WriteString(syntax.CharDescription(r.runtext[i]))
1529 }
1530 if buf.Len() >= 64 {
1531 buf.Truncate(61)
1532 buf.WriteString("...")
1533 } else {
1534 buf.WriteRune('$')
1535 }
1536
1537 return buf.String()
1538}
1539
1540// decide whether the pos
1541// at the specified index is a boundary or not. It's just not worth
1542// emitting inline code for this logic.
1543func (r *runner) isBoundary(index, startpos, endpos int) bool {
1544 return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) !=
1545 (index < endpos && syntax.IsWordChar(r.runtext[index]))
1546}
1547
1548func (r *runner) isECMABoundary(index, startpos, endpos int) bool {
1549 return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) !=
1550 (index < endpos && syntax.IsECMAWordChar(r.runtext[index]))
1551}
1552
1553func (r *runner) startTimeoutWatch() {
1554 if r.ignoreTimeout {
1555 return
1556 }
1557 r.deadline = makeDeadline(r.timeout)
1558}
1559
1560func (r *runner) checkTimeout() error {
1561 if r.ignoreTimeout || !r.deadline.reached() {
1562 return nil
1563 }
1564
1565 if r.re.Debug() {
1566 //Debug.WriteLine("")
1567 //Debug.WriteLine("RegEx match timeout occurred!")
1568 //Debug.WriteLine("Specified timeout: " + TimeSpan.FromMilliseconds(_timeout).ToString())
1569 //Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency)
1570 //Debug.WriteLine("Search pattern: " + _runregex._pattern)
1571 //Debug.WriteLine("Input: " + r.runtext)
1572 //Debug.WriteLine("About to throw RegexMatchTimeoutException.")
1573 }
1574
1575 return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext))
1576}
1577
1578func (r *runner) initTrackCount() {
1579 r.runtrackcount = r.code.TrackCount
1580}
1581
1582// getRunner returns a run to use for matching re.
1583// It uses the re's runner cache if possible, to avoid
1584// unnecessary allocation.
1585func (re *Regexp) getRunner() *runner {
1586 re.muRun.Lock()
1587 if n := len(re.runner); n > 0 {
1588 z := re.runner[n-1]
1589 re.runner = re.runner[:n-1]
1590 re.muRun.Unlock()
1591 return z
1592 }
1593 re.muRun.Unlock()
1594 z := &runner{
1595 re: re,
1596 code: re.code,
1597 }
1598 return z
1599}
1600
1601// putRunner returns a runner to the re's cache.
1602// There is no attempt to limit the size of the cache, so it will
1603// grow to the maximum number of simultaneous matches
1604// run using re. (The cache empties when re gets garbage collected.)
1605func (re *Regexp) putRunner(r *runner) {
1606 re.muRun.Lock()
1607 r.runtext = nil
1608 if r.runmatch != nil {
1609 r.runmatch.text = nil
1610 }
1611 re.runner = append(re.runner, r)
1612 re.muRun.Unlock()
1613}