runner.go

   1package regexp2
   2
   3import (
   4	"bytes"
   5	"errors"
   6	"fmt"
   7	"math"
   8	"strconv"
   9	"strings"
  10	"time"
  11	"unicode"
  12
  13	"github.com/dlclark/regexp2/syntax"
  14)
  15
  16type runner struct {
  17	re   *Regexp
  18	code *syntax.Code
  19
  20	runtextstart int // starting point for search
  21
  22	runtext    []rune // text to search
  23	runtextpos int    // current position in text
  24	runtextend int
  25
  26	// The backtracking stack.  Opcodes use this to store data regarding
  27	// what they have matched and where to backtrack to.  Each "frame" on
  28	// the stack takes the form of [CodePosition Data1 Data2...], where
  29	// CodePosition is the position of the current opcode and
  30	// the data values are all optional.  The CodePosition can be negative, and
  31	// these values (also called "back2") are used by the BranchMark family of opcodes
  32	// to indicate whether they are backtracking after a successful or failed
  33	// match.
  34	// When we backtrack, we pop the CodePosition off the stack, set the current
  35	// instruction pointer to that code position, and mark the opcode
  36	// with a backtracking flag ("Back").  Each opcode then knows how to
  37	// handle its own data.
  38	runtrack    []int
  39	runtrackpos int
  40
  41	// This stack is used to track text positions across different opcodes.
  42	// For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark
  43	// pair. SetMark records the text position before we match a*b.  Then
  44	// CaptureMark uses that position to figure out where the capture starts.
  45	// Opcodes which push onto this stack are always paired with other opcodes
  46	// which will pop the value from it later.  A successful match should mean
  47	// that this stack is empty.
  48	runstack    []int
  49	runstackpos int
  50
  51	// The crawl stack is used to keep track of captures.  Every time a group
  52	// has a capture, we push its group number onto the runcrawl stack.  In
  53	// the case of a balanced match, we push BOTH groups onto the stack.
  54	runcrawl    []int
  55	runcrawlpos int
  56
  57	runtrackcount int // count of states that may do backtracking
  58
  59	runmatch *Match // result object
  60
  61	ignoreTimeout       bool
  62	timeout             time.Duration // timeout in milliseconds (needed for actual)
  63	timeoutChecksToSkip int
  64	timeoutAt           time.Time
  65
  66	operator        syntax.InstOp
  67	codepos         int
  68	rightToLeft     bool
  69	caseInsensitive bool
  70}
  71
  72// run searches for matches and can continue from the previous match
  73//
  74// quick is usually false, but can be true to not return matches, just put it in caches
  75// textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input
  76// input is the string to search for our regex pattern
  77func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) {
  78
  79	// get a cached runner
  80	runner := re.getRunner()
  81	defer re.putRunner(runner)
  82
  83	if textstart < 0 {
  84		if re.RightToLeft() {
  85			textstart = len(input)
  86		} else {
  87			textstart = 0
  88		}
  89	}
  90
  91	return runner.scan(input, textstart, quick, re.MatchTimeout)
  92}
  93
  94// Scans the string to find the first match. Uses the Match object
  95// both to feed text in and as a place to store matches that come out.
  96//
  97// All the action is in the Go() method. Our
  98// responsibility is to load up the class members before
  99// calling Go.
 100//
 101// The optimizer can compute a set of candidate starting characters,
 102// and we could use a separate method Skip() that will quickly scan past
 103// any characters that we know can't match.
 104func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) {
 105	r.timeout = timeout
 106	r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout)
 107	r.runtextstart = textstart
 108	r.runtext = rt
 109	r.runtextend = len(rt)
 110
 111	stoppos := r.runtextend
 112	bump := 1
 113
 114	if r.re.RightToLeft() {
 115		bump = -1
 116		stoppos = 0
 117	}
 118
 119	r.runtextpos = textstart
 120	initted := false
 121
 122	r.startTimeoutWatch()
 123	for {
 124		if r.re.Debug() {
 125			//fmt.Printf("\nSearch content: %v\n", string(r.runtext))
 126			fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend)
 127			fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos)
 128		}
 129
 130		if r.findFirstChar() {
 131			if err := r.checkTimeout(); err != nil {
 132				return nil, err
 133			}
 134
 135			if !initted {
 136				r.initMatch()
 137				initted = true
 138			}
 139
 140			if r.re.Debug() {
 141				fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos)
 142			}
 143
 144			if err := r.execute(); err != nil {
 145				return nil, err
 146			}
 147
 148			if r.runmatch.matchcount[0] > 0 {
 149				// We'll return a match even if it touches a previous empty match
 150				return r.tidyMatch(quick), nil
 151			}
 152
 153			// reset state for another go
 154			r.runtrackpos = len(r.runtrack)
 155			r.runstackpos = len(r.runstack)
 156			r.runcrawlpos = len(r.runcrawl)
 157		}
 158
 159		// failure!
 160
 161		if r.runtextpos == stoppos {
 162			r.tidyMatch(true)
 163			return nil, nil
 164		}
 165
 166		// Recognize leading []* and various anchors, and bump on failure accordingly
 167
 168		// r.bump by one and start again
 169
 170		r.runtextpos += bump
 171	}
 172	// We never get here
 173}
 174
 175func (r *runner) execute() error {
 176
 177	r.goTo(0)
 178
 179	for {
 180
 181		if r.re.Debug() {
 182			r.dumpState()
 183		}
 184
 185		if err := r.checkTimeout(); err != nil {
 186			return err
 187		}
 188
 189		switch r.operator {
 190		case syntax.Stop:
 191			return nil
 192
 193		case syntax.Nothing:
 194			break
 195
 196		case syntax.Goto:
 197			r.goTo(r.operand(0))
 198			continue
 199
 200		case syntax.Testref:
 201			if !r.runmatch.isMatched(r.operand(0)) {
 202				break
 203			}
 204			r.advance(1)
 205			continue
 206
 207		case syntax.Lazybranch:
 208			r.trackPush1(r.textPos())
 209			r.advance(1)
 210			continue
 211
 212		case syntax.Lazybranch | syntax.Back:
 213			r.trackPop()
 214			r.textto(r.trackPeek())
 215			r.goTo(r.operand(0))
 216			continue
 217
 218		case syntax.Setmark:
 219			r.stackPush(r.textPos())
 220			r.trackPush()
 221			r.advance(0)
 222			continue
 223
 224		case syntax.Nullmark:
 225			r.stackPush(-1)
 226			r.trackPush()
 227			r.advance(0)
 228			continue
 229
 230		case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back:
 231			r.stackPop()
 232			break
 233
 234		case syntax.Getmark:
 235			r.stackPop()
 236			r.trackPush1(r.stackPeek())
 237			r.textto(r.stackPeek())
 238			r.advance(0)
 239			continue
 240
 241		case syntax.Getmark | syntax.Back:
 242			r.trackPop()
 243			r.stackPush(r.trackPeek())
 244			break
 245
 246		case syntax.Capturemark:
 247			if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) {
 248				break
 249			}
 250			r.stackPop()
 251			if r.operand(1) != -1 {
 252				r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos())
 253			} else {
 254				r.capture(r.operand(0), r.stackPeek(), r.textPos())
 255			}
 256			r.trackPush1(r.stackPeek())
 257
 258			r.advance(2)
 259
 260			continue
 261
 262		case syntax.Capturemark | syntax.Back:
 263			r.trackPop()
 264			r.stackPush(r.trackPeek())
 265			r.uncapture()
 266			if r.operand(0) != -1 && r.operand(1) != -1 {
 267				r.uncapture()
 268			}
 269
 270			break
 271
 272		case syntax.Branchmark:
 273			r.stackPop()
 274
 275			matched := r.textPos() - r.stackPeek()
 276
 277			if matched != 0 { // Nonempty match -> loop now
 278				r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos
 279				r.stackPush(r.textPos())                 // Make new mark
 280				r.goTo(r.operand(0))                     // Loop
 281			} else { // Empty match -> straight now
 282				r.trackPushNeg1(r.stackPeek()) // Save old mark
 283				r.advance(1)                   // Straight
 284			}
 285			continue
 286
 287		case syntax.Branchmark | syntax.Back:
 288			r.trackPopN(2)
 289			r.stackPop()
 290			r.textto(r.trackPeekN(1))      // Recall position
 291			r.trackPushNeg1(r.trackPeek()) // Save old mark
 292			r.advance(1)                   // Straight
 293			continue
 294
 295		case syntax.Branchmark | syntax.Back2:
 296			r.trackPop()
 297			r.stackPush(r.trackPeek()) // Recall old mark
 298			break                      // Backtrack
 299
 300		case syntax.Lazybranchmark:
 301			{
 302				// We hit this the first time through a lazy loop and after each
 303				// successful match of the inner expression.  It simply continues
 304				// on and doesn't loop.
 305				r.stackPop()
 306
 307				oldMarkPos := r.stackPeek()
 308
 309				if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state
 310					if oldMarkPos != -1 {
 311						r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos
 312					} else {
 313						r.trackPush2(r.textPos(), r.textPos())
 314					}
 315				} else {
 316					// The inner expression found an empty match, so we'll go directly to 'back2' if we
 317					// backtrack.  In this case, we need to push something on the stack, since back2 pops.
 318					// However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
 319					// position associated with that empty match.
 320					r.stackPush(oldMarkPos)
 321
 322					r.trackPushNeg1(r.stackPeek()) // Save old mark
 323				}
 324				r.advance(1)
 325				continue
 326			}
 327
 328		case syntax.Lazybranchmark | syntax.Back:
 329
 330			// After the first time, Lazybranchmark | syntax.Back occurs
 331			// with each iteration of the loop, and therefore with every attempted
 332			// match of the inner expression.  We'll try to match the inner expression,
 333			// then go back to Lazybranchmark if successful.  If the inner expression
 334			// fails, we go to Lazybranchmark | syntax.Back2
 335
 336			r.trackPopN(2)
 337			pos := r.trackPeekN(1)
 338			r.trackPushNeg1(r.trackPeek()) // Save old mark
 339			r.stackPush(pos)               // Make new mark
 340			r.textto(pos)                  // Recall position
 341			r.goTo(r.operand(0))           // Loop
 342			continue
 343
 344		case syntax.Lazybranchmark | syntax.Back2:
 345			// The lazy loop has failed.  We'll do a true backtrack and
 346			// start over before the lazy loop.
 347			r.stackPop()
 348			r.trackPop()
 349			r.stackPush(r.trackPeek()) // Recall old mark
 350			break
 351
 352		case syntax.Setcount:
 353			r.stackPush2(r.textPos(), r.operand(0))
 354			r.trackPush()
 355			r.advance(1)
 356			continue
 357
 358		case syntax.Nullcount:
 359			r.stackPush2(-1, r.operand(0))
 360			r.trackPush()
 361			r.advance(1)
 362			continue
 363
 364		case syntax.Setcount | syntax.Back:
 365			r.stackPopN(2)
 366			break
 367
 368		case syntax.Nullcount | syntax.Back:
 369			r.stackPopN(2)
 370			break
 371
 372		case syntax.Branchcount:
 373			// r.stackPush:
 374			//  0: Mark
 375			//  1: Count
 376
 377			r.stackPopN(2)
 378			mark := r.stackPeek()
 379			count := r.stackPeekN(1)
 380			matched := r.textPos() - mark
 381
 382			if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now
 383				r.trackPushNeg2(mark, count) // Save old mark, count
 384				r.advance(2)                 // Straight
 385			} else { // Nonempty match -> count+loop now
 386				r.trackPush1(mark)                 // remember mark
 387				r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
 388				r.goTo(r.operand(0))               // Loop
 389			}
 390			continue
 391
 392		case syntax.Branchcount | syntax.Back:
 393			// r.trackPush:
 394			//  0: Previous mark
 395			// r.stackPush:
 396			//  0: Mark (= current pos, discarded)
 397			//  1: Count
 398			r.trackPop()
 399			r.stackPopN(2)
 400			if r.stackPeekN(1) > 0 { // Positive -> can go straight
 401				r.textto(r.stackPeek())                           // Zap to mark
 402				r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count
 403				r.advance(2)                                      // Straight
 404				continue
 405			}
 406			r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count
 407			break
 408
 409		case syntax.Branchcount | syntax.Back2:
 410			// r.trackPush:
 411			//  0: Previous mark
 412			//  1: Previous count
 413			r.trackPopN(2)
 414			r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count
 415			break                                        // Backtrack
 416
 417		case syntax.Lazybranchcount:
 418			// r.stackPush:
 419			//  0: Mark
 420			//  1: Count
 421
 422			r.stackPopN(2)
 423			mark := r.stackPeek()
 424			count := r.stackPeekN(1)
 425
 426			if count < 0 { // Negative count -> loop now
 427				r.trackPushNeg1(mark)              // Save old mark
 428				r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
 429				r.goTo(r.operand(0))               // Loop
 430			} else { // Nonneg count -> straight now
 431				r.trackPush3(mark, count, r.textPos()) // Save mark, count, position
 432				r.advance(2)                           // Straight
 433			}
 434			continue
 435
 436		case syntax.Lazybranchcount | syntax.Back:
 437			// r.trackPush:
 438			//  0: Mark
 439			//  1: Count
 440			//  2: r.textPos
 441
 442			r.trackPopN(3)
 443			mark := r.trackPeek()
 444			textpos := r.trackPeekN(2)
 445
 446			if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop
 447				r.textto(textpos)                        // Recall position
 448				r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count
 449				r.trackPushNeg1(mark)                    // Save old mark
 450				r.goTo(r.operand(0))                     // Loop
 451				continue
 452			} else { // Max loops or empty match -> backtrack
 453				r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count
 454				break                                        // backtrack
 455			}
 456
 457		case syntax.Lazybranchcount | syntax.Back2:
 458			// r.trackPush:
 459			//  0: Previous mark
 460			// r.stackPush:
 461			//  0: Mark (== current pos, discarded)
 462			//  1: Count
 463			r.trackPop()
 464			r.stackPopN(2)
 465			r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count
 466			break                                          // Backtrack
 467
 468		case syntax.Setjump:
 469			r.stackPush2(r.trackpos(), r.crawlpos())
 470			r.trackPush()
 471			r.advance(0)
 472			continue
 473
 474		case syntax.Setjump | syntax.Back:
 475			r.stackPopN(2)
 476			break
 477
 478		case syntax.Backjump:
 479			// r.stackPush:
 480			//  0: Saved trackpos
 481			//  1: r.crawlpos
 482			r.stackPopN(2)
 483			r.trackto(r.stackPeek())
 484
 485			for r.crawlpos() != r.stackPeekN(1) {
 486				r.uncapture()
 487			}
 488
 489			break
 490
 491		case syntax.Forejump:
 492			// r.stackPush:
 493			//  0: Saved trackpos
 494			//  1: r.crawlpos
 495			r.stackPopN(2)
 496			r.trackto(r.stackPeek())
 497			r.trackPush1(r.stackPeekN(1))
 498			r.advance(0)
 499			continue
 500
 501		case syntax.Forejump | syntax.Back:
 502			// r.trackPush:
 503			//  0: r.crawlpos
 504			r.trackPop()
 505
 506			for r.crawlpos() != r.trackPeek() {
 507				r.uncapture()
 508			}
 509
 510			break
 511
 512		case syntax.Bol:
 513			if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' {
 514				break
 515			}
 516			r.advance(0)
 517			continue
 518
 519		case syntax.Eol:
 520			if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' {
 521				break
 522			}
 523			r.advance(0)
 524			continue
 525
 526		case syntax.Boundary:
 527			if !r.isBoundary(r.textPos(), 0, r.runtextend) {
 528				break
 529			}
 530			r.advance(0)
 531			continue
 532
 533		case syntax.Nonboundary:
 534			if r.isBoundary(r.textPos(), 0, r.runtextend) {
 535				break
 536			}
 537			r.advance(0)
 538			continue
 539
 540		case syntax.ECMABoundary:
 541			if !r.isECMABoundary(r.textPos(), 0, r.runtextend) {
 542				break
 543			}
 544			r.advance(0)
 545			continue
 546
 547		case syntax.NonECMABoundary:
 548			if r.isECMABoundary(r.textPos(), 0, r.runtextend) {
 549				break
 550			}
 551			r.advance(0)
 552			continue
 553
 554		case syntax.Beginning:
 555			if r.leftchars() > 0 {
 556				break
 557			}
 558			r.advance(0)
 559			continue
 560
 561		case syntax.Start:
 562			if r.textPos() != r.textstart() {
 563				break
 564			}
 565			r.advance(0)
 566			continue
 567
 568		case syntax.EndZ:
 569			if r.rightchars() > 1 || r.rightchars() == 1 && r.charAt(r.textPos()) != '\n' {
 570				break
 571			}
 572			r.advance(0)
 573			continue
 574
 575		case syntax.End:
 576			if r.rightchars() > 0 {
 577				break
 578			}
 579			r.advance(0)
 580			continue
 581
 582		case syntax.One:
 583			if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) {
 584				break
 585			}
 586
 587			r.advance(1)
 588			continue
 589
 590		case syntax.Notone:
 591			if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) {
 592				break
 593			}
 594
 595			r.advance(1)
 596			continue
 597
 598		case syntax.Set:
 599
 600			if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
 601				break
 602			}
 603
 604			r.advance(1)
 605			continue
 606
 607		case syntax.Multi:
 608			if !r.runematch(r.code.Strings[r.operand(0)]) {
 609				break
 610			}
 611
 612			r.advance(1)
 613			continue
 614
 615		case syntax.Ref:
 616
 617			capnum := r.operand(0)
 618
 619			if r.runmatch.isMatched(capnum) {
 620				if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) {
 621					break
 622				}
 623			} else {
 624				if (r.re.options & ECMAScript) == 0 {
 625					break
 626				}
 627			}
 628
 629			r.advance(1)
 630			continue
 631
 632		case syntax.Onerep:
 633
 634			c := r.operand(1)
 635
 636			if r.forwardchars() < c {
 637				break
 638			}
 639
 640			ch := rune(r.operand(0))
 641
 642			for c > 0 {
 643				if r.forwardcharnext() != ch {
 644					goto BreakBackward
 645				}
 646				c--
 647			}
 648
 649			r.advance(2)
 650			continue
 651
 652		case syntax.Notonerep:
 653
 654			c := r.operand(1)
 655
 656			if r.forwardchars() < c {
 657				break
 658			}
 659			ch := rune(r.operand(0))
 660
 661			for c > 0 {
 662				if r.forwardcharnext() == ch {
 663					goto BreakBackward
 664				}
 665				c--
 666			}
 667
 668			r.advance(2)
 669			continue
 670
 671		case syntax.Setrep:
 672
 673			c := r.operand(1)
 674
 675			if r.forwardchars() < c {
 676				break
 677			}
 678
 679			set := r.code.Sets[r.operand(0)]
 680
 681			for c > 0 {
 682				if !set.CharIn(r.forwardcharnext()) {
 683					goto BreakBackward
 684				}
 685				c--
 686			}
 687
 688			r.advance(2)
 689			continue
 690
 691		case syntax.Oneloop:
 692
 693			c := r.operand(1)
 694
 695			if c > r.forwardchars() {
 696				c = r.forwardchars()
 697			}
 698
 699			ch := rune(r.operand(0))
 700			i := c
 701
 702			for ; i > 0; i-- {
 703				if r.forwardcharnext() != ch {
 704					r.backwardnext()
 705					break
 706				}
 707			}
 708
 709			if c > i {
 710				r.trackPush2(c-i-1, r.textPos()-r.bump())
 711			}
 712
 713			r.advance(2)
 714			continue
 715
 716		case syntax.Notoneloop:
 717
 718			c := r.operand(1)
 719
 720			if c > r.forwardchars() {
 721				c = r.forwardchars()
 722			}
 723
 724			ch := rune(r.operand(0))
 725			i := c
 726
 727			for ; i > 0; i-- {
 728				if r.forwardcharnext() == ch {
 729					r.backwardnext()
 730					break
 731				}
 732			}
 733
 734			if c > i {
 735				r.trackPush2(c-i-1, r.textPos()-r.bump())
 736			}
 737
 738			r.advance(2)
 739			continue
 740
 741		case syntax.Setloop:
 742
 743			c := r.operand(1)
 744
 745			if c > r.forwardchars() {
 746				c = r.forwardchars()
 747			}
 748
 749			set := r.code.Sets[r.operand(0)]
 750			i := c
 751
 752			for ; i > 0; i-- {
 753				if !set.CharIn(r.forwardcharnext()) {
 754					r.backwardnext()
 755					break
 756				}
 757			}
 758
 759			if c > i {
 760				r.trackPush2(c-i-1, r.textPos()-r.bump())
 761			}
 762
 763			r.advance(2)
 764			continue
 765
 766		case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back:
 767
 768			r.trackPopN(2)
 769			i := r.trackPeek()
 770			pos := r.trackPeekN(1)
 771
 772			r.textto(pos)
 773
 774			if i > 0 {
 775				r.trackPush2(i-1, pos-r.bump())
 776			}
 777
 778			r.advance(2)
 779			continue
 780
 781		case syntax.Setloop | syntax.Back:
 782
 783			r.trackPopN(2)
 784			i := r.trackPeek()
 785			pos := r.trackPeekN(1)
 786
 787			r.textto(pos)
 788
 789			if i > 0 {
 790				r.trackPush2(i-1, pos-r.bump())
 791			}
 792
 793			r.advance(2)
 794			continue
 795
 796		case syntax.Onelazy, syntax.Notonelazy:
 797
 798			c := r.operand(1)
 799
 800			if c > r.forwardchars() {
 801				c = r.forwardchars()
 802			}
 803
 804			if c > 0 {
 805				r.trackPush2(c-1, r.textPos())
 806			}
 807
 808			r.advance(2)
 809			continue
 810
 811		case syntax.Setlazy:
 812
 813			c := r.operand(1)
 814
 815			if c > r.forwardchars() {
 816				c = r.forwardchars()
 817			}
 818
 819			if c > 0 {
 820				r.trackPush2(c-1, r.textPos())
 821			}
 822
 823			r.advance(2)
 824			continue
 825
 826		case syntax.Onelazy | syntax.Back:
 827
 828			r.trackPopN(2)
 829			pos := r.trackPeekN(1)
 830			r.textto(pos)
 831
 832			if r.forwardcharnext() != rune(r.operand(0)) {
 833				break
 834			}
 835
 836			i := r.trackPeek()
 837
 838			if i > 0 {
 839				r.trackPush2(i-1, pos+r.bump())
 840			}
 841
 842			r.advance(2)
 843			continue
 844
 845		case syntax.Notonelazy | syntax.Back:
 846
 847			r.trackPopN(2)
 848			pos := r.trackPeekN(1)
 849			r.textto(pos)
 850
 851			if r.forwardcharnext() == rune(r.operand(0)) {
 852				break
 853			}
 854
 855			i := r.trackPeek()
 856
 857			if i > 0 {
 858				r.trackPush2(i-1, pos+r.bump())
 859			}
 860
 861			r.advance(2)
 862			continue
 863
 864		case syntax.Setlazy | syntax.Back:
 865
 866			r.trackPopN(2)
 867			pos := r.trackPeekN(1)
 868			r.textto(pos)
 869
 870			if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
 871				break
 872			}
 873
 874			i := r.trackPeek()
 875
 876			if i > 0 {
 877				r.trackPush2(i-1, pos+r.bump())
 878			}
 879
 880			r.advance(2)
 881			continue
 882
 883		default:
 884			return errors.New("unknown state in regex runner")
 885		}
 886
 887	BreakBackward:
 888		;
 889
 890		// "break Backward" comes here:
 891		r.backtrack()
 892	}
 893}
 894
 895// increase the size of stack and track storage
 896func (r *runner) ensureStorage() {
 897	if r.runstackpos < r.runtrackcount*4 {
 898		doubleIntSlice(&r.runstack, &r.runstackpos)
 899	}
 900	if r.runtrackpos < r.runtrackcount*4 {
 901		doubleIntSlice(&r.runtrack, &r.runtrackpos)
 902	}
 903}
 904
 905func doubleIntSlice(s *[]int, pos *int) {
 906	oldLen := len(*s)
 907	newS := make([]int, oldLen*2)
 908
 909	copy(newS[oldLen:], *s)
 910	*pos += oldLen
 911	*s = newS
 912}
 913
 914// Save a number on the longjump unrolling stack
 915func (r *runner) crawl(i int) {
 916	if r.runcrawlpos == 0 {
 917		doubleIntSlice(&r.runcrawl, &r.runcrawlpos)
 918	}
 919	r.runcrawlpos--
 920	r.runcrawl[r.runcrawlpos] = i
 921}
 922
 923// Remove a number from the longjump unrolling stack
 924func (r *runner) popcrawl() int {
 925	val := r.runcrawl[r.runcrawlpos]
 926	r.runcrawlpos++
 927	return val
 928}
 929
 930// Get the height of the stack
 931func (r *runner) crawlpos() int {
 932	return len(r.runcrawl) - r.runcrawlpos
 933}
 934
 935func (r *runner) advance(i int) {
 936	r.codepos += (i + 1)
 937	r.setOperator(r.code.Codes[r.codepos])
 938}
 939
 940func (r *runner) goTo(newpos int) {
 941	// when branching backward, ensure storage
 942	if newpos < r.codepos {
 943		r.ensureStorage()
 944	}
 945
 946	r.setOperator(r.code.Codes[newpos])
 947	r.codepos = newpos
 948}
 949
 950func (r *runner) textto(newpos int) {
 951	r.runtextpos = newpos
 952}
 953
 954func (r *runner) trackto(newpos int) {
 955	r.runtrackpos = len(r.runtrack) - newpos
 956}
 957
 958func (r *runner) textstart() int {
 959	return r.runtextstart
 960}
 961
 962func (r *runner) textPos() int {
 963	return r.runtextpos
 964}
 965
 966// push onto the backtracking stack
 967func (r *runner) trackpos() int {
 968	return len(r.runtrack) - r.runtrackpos
 969}
 970
 971func (r *runner) trackPush() {
 972	r.runtrackpos--
 973	r.runtrack[r.runtrackpos] = r.codepos
 974}
 975
 976func (r *runner) trackPush1(I1 int) {
 977	r.runtrackpos--
 978	r.runtrack[r.runtrackpos] = I1
 979	r.runtrackpos--
 980	r.runtrack[r.runtrackpos] = r.codepos
 981}
 982
 983func (r *runner) trackPush2(I1, I2 int) {
 984	r.runtrackpos--
 985	r.runtrack[r.runtrackpos] = I1
 986	r.runtrackpos--
 987	r.runtrack[r.runtrackpos] = I2
 988	r.runtrackpos--
 989	r.runtrack[r.runtrackpos] = r.codepos
 990}
 991
 992func (r *runner) trackPush3(I1, I2, I3 int) {
 993	r.runtrackpos--
 994	r.runtrack[r.runtrackpos] = I1
 995	r.runtrackpos--
 996	r.runtrack[r.runtrackpos] = I2
 997	r.runtrackpos--
 998	r.runtrack[r.runtrackpos] = I3
 999	r.runtrackpos--
1000	r.runtrack[r.runtrackpos] = r.codepos
1001}
1002
1003func (r *runner) trackPushNeg1(I1 int) {
1004	r.runtrackpos--
1005	r.runtrack[r.runtrackpos] = I1
1006	r.runtrackpos--
1007	r.runtrack[r.runtrackpos] = -r.codepos
1008}
1009
1010func (r *runner) trackPushNeg2(I1, I2 int) {
1011	r.runtrackpos--
1012	r.runtrack[r.runtrackpos] = I1
1013	r.runtrackpos--
1014	r.runtrack[r.runtrackpos] = I2
1015	r.runtrackpos--
1016	r.runtrack[r.runtrackpos] = -r.codepos
1017}
1018
1019func (r *runner) backtrack() {
1020	newpos := r.runtrack[r.runtrackpos]
1021	r.runtrackpos++
1022
1023	if r.re.Debug() {
1024		if newpos < 0 {
1025			fmt.Printf("       Backtracking (back2) to code position %v\n", -newpos)
1026		} else {
1027			fmt.Printf("       Backtracking to code position %v\n", newpos)
1028		}
1029	}
1030
1031	if newpos < 0 {
1032		newpos = -newpos
1033		r.setOperator(r.code.Codes[newpos] | syntax.Back2)
1034	} else {
1035		r.setOperator(r.code.Codes[newpos] | syntax.Back)
1036	}
1037
1038	// When branching backward, ensure storage
1039	if newpos < r.codepos {
1040		r.ensureStorage()
1041	}
1042
1043	r.codepos = newpos
1044}
1045
1046func (r *runner) setOperator(op int) {
1047	r.caseInsensitive = (0 != (op & syntax.Ci))
1048	r.rightToLeft = (0 != (op & syntax.Rtl))
1049	r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci))
1050}
1051
1052func (r *runner) trackPop() {
1053	r.runtrackpos++
1054}
1055
1056// pop framesize items from the backtracking stack
1057func (r *runner) trackPopN(framesize int) {
1058	r.runtrackpos += framesize
1059}
1060
1061// Technically we are actually peeking at items already popped.  So if you want to
1062// get and pop the top item from the stack, you do
1063// r.trackPop();
1064// r.trackPeek();
1065func (r *runner) trackPeek() int {
1066	return r.runtrack[r.runtrackpos-1]
1067}
1068
1069// get the ith element down on the backtracking stack
1070func (r *runner) trackPeekN(i int) int {
1071	return r.runtrack[r.runtrackpos-i-1]
1072}
1073
1074// Push onto the grouping stack
1075func (r *runner) stackPush(I1 int) {
1076	r.runstackpos--
1077	r.runstack[r.runstackpos] = I1
1078}
1079
1080func (r *runner) stackPush2(I1, I2 int) {
1081	r.runstackpos--
1082	r.runstack[r.runstackpos] = I1
1083	r.runstackpos--
1084	r.runstack[r.runstackpos] = I2
1085}
1086
1087func (r *runner) stackPop() {
1088	r.runstackpos++
1089}
1090
1091// pop framesize items from the grouping stack
1092func (r *runner) stackPopN(framesize int) {
1093	r.runstackpos += framesize
1094}
1095
1096// Technically we are actually peeking at items already popped.  So if you want to
1097// get and pop the top item from the stack, you do
1098// r.stackPop();
1099// r.stackPeek();
1100func (r *runner) stackPeek() int {
1101	return r.runstack[r.runstackpos-1]
1102}
1103
1104// get the ith element down on the grouping stack
1105func (r *runner) stackPeekN(i int) int {
1106	return r.runstack[r.runstackpos-i-1]
1107}
1108
1109func (r *runner) operand(i int) int {
1110	return r.code.Codes[r.codepos+i+1]
1111}
1112
1113func (r *runner) leftchars() int {
1114	return r.runtextpos
1115}
1116
1117func (r *runner) rightchars() int {
1118	return r.runtextend - r.runtextpos
1119}
1120
1121func (r *runner) bump() int {
1122	if r.rightToLeft {
1123		return -1
1124	}
1125	return 1
1126}
1127
1128func (r *runner) forwardchars() int {
1129	if r.rightToLeft {
1130		return r.runtextpos
1131	}
1132	return r.runtextend - r.runtextpos
1133}
1134
1135func (r *runner) forwardcharnext() rune {
1136	var ch rune
1137	if r.rightToLeft {
1138		r.runtextpos--
1139		ch = r.runtext[r.runtextpos]
1140	} else {
1141		ch = r.runtext[r.runtextpos]
1142		r.runtextpos++
1143	}
1144
1145	if r.caseInsensitive {
1146		return unicode.ToLower(ch)
1147	}
1148	return ch
1149}
1150
1151func (r *runner) runematch(str []rune) bool {
1152	var pos int
1153
1154	c := len(str)
1155	if !r.rightToLeft {
1156		if r.runtextend-r.runtextpos < c {
1157			return false
1158		}
1159
1160		pos = r.runtextpos + c
1161	} else {
1162		if r.runtextpos-0 < c {
1163			return false
1164		}
1165
1166		pos = r.runtextpos
1167	}
1168
1169	if !r.caseInsensitive {
1170		for c != 0 {
1171			c--
1172			pos--
1173			if str[c] != r.runtext[pos] {
1174				return false
1175			}
1176		}
1177	} else {
1178		for c != 0 {
1179			c--
1180			pos--
1181			if str[c] != unicode.ToLower(r.runtext[pos]) {
1182				return false
1183			}
1184		}
1185	}
1186
1187	if !r.rightToLeft {
1188		pos += len(str)
1189	}
1190
1191	r.runtextpos = pos
1192
1193	return true
1194}
1195
1196func (r *runner) refmatch(index, len int) bool {
1197	var c, pos, cmpos int
1198
1199	if !r.rightToLeft {
1200		if r.runtextend-r.runtextpos < len {
1201			return false
1202		}
1203
1204		pos = r.runtextpos + len
1205	} else {
1206		if r.runtextpos-0 < len {
1207			return false
1208		}
1209
1210		pos = r.runtextpos
1211	}
1212	cmpos = index + len
1213
1214	c = len
1215
1216	if !r.caseInsensitive {
1217		for c != 0 {
1218			c--
1219			cmpos--
1220			pos--
1221			if r.runtext[cmpos] != r.runtext[pos] {
1222				return false
1223			}
1224
1225		}
1226	} else {
1227		for c != 0 {
1228			c--
1229			cmpos--
1230			pos--
1231
1232			if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) {
1233				return false
1234			}
1235		}
1236	}
1237
1238	if !r.rightToLeft {
1239		pos += len
1240	}
1241
1242	r.runtextpos = pos
1243
1244	return true
1245}
1246
1247func (r *runner) backwardnext() {
1248	if r.rightToLeft {
1249		r.runtextpos++
1250	} else {
1251		r.runtextpos--
1252	}
1253}
1254
1255func (r *runner) charAt(j int) rune {
1256	return r.runtext[j]
1257}
1258
1259func (r *runner) findFirstChar() bool {
1260
1261	if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) {
1262		if !r.code.RightToLeft {
1263			if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) ||
1264				(0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) {
1265				r.runtextpos = r.runtextend
1266				return false
1267			}
1268			if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 {
1269				r.runtextpos = r.runtextend - 1
1270			} else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend {
1271				r.runtextpos = r.runtextend
1272			}
1273		} else {
1274			if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) ||
1275				(0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 ||
1276					(r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) ||
1277				(0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) {
1278				r.runtextpos = 0
1279				return false
1280			}
1281			if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 {
1282				r.runtextpos = 0
1283			}
1284		}
1285
1286		if r.code.BmPrefix != nil {
1287			return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend)
1288		}
1289
1290		return true // found a valid start or end anchor
1291	} else if r.code.BmPrefix != nil {
1292		r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend)
1293
1294		if r.runtextpos == -1 {
1295			if r.code.RightToLeft {
1296				r.runtextpos = 0
1297			} else {
1298				r.runtextpos = r.runtextend
1299			}
1300			return false
1301		}
1302
1303		return true
1304	} else if r.code.FcPrefix == nil {
1305		return true
1306	}
1307
1308	r.rightToLeft = r.code.RightToLeft
1309	r.caseInsensitive = r.code.FcPrefix.CaseInsensitive
1310
1311	set := r.code.FcPrefix.PrefixSet
1312	if set.IsSingleton() {
1313		ch := set.SingletonChar()
1314		for i := r.forwardchars(); i > 0; i-- {
1315			if ch == r.forwardcharnext() {
1316				r.backwardnext()
1317				return true
1318			}
1319		}
1320	} else {
1321		for i := r.forwardchars(); i > 0; i-- {
1322			n := r.forwardcharnext()
1323			//fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n))
1324			if set.CharIn(n) {
1325				r.backwardnext()
1326				return true
1327			}
1328		}
1329	}
1330
1331	return false
1332}
1333
1334func (r *runner) initMatch() {
1335	// Use a hashtable'ed Match object if the capture numbers are sparse
1336
1337	if r.runmatch == nil {
1338		if r.re.caps != nil {
1339			r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart)
1340		} else {
1341			r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart)
1342		}
1343	} else {
1344		r.runmatch.reset(r.runtext, r.runtextstart)
1345	}
1346
1347	// note we test runcrawl, because it is the last one to be allocated
1348	// If there is an alloc failure in the middle of the three allocations,
1349	// we may still return to reuse this instance, and we want to behave
1350	// as if the allocations didn't occur. (we used to test _trackcount != 0)
1351
1352	if r.runcrawl != nil {
1353		r.runtrackpos = len(r.runtrack)
1354		r.runstackpos = len(r.runstack)
1355		r.runcrawlpos = len(r.runcrawl)
1356		return
1357	}
1358
1359	r.initTrackCount()
1360
1361	tracksize := r.runtrackcount * 8
1362	stacksize := r.runtrackcount * 8
1363
1364	if tracksize < 32 {
1365		tracksize = 32
1366	}
1367	if stacksize < 16 {
1368		stacksize = 16
1369	}
1370
1371	r.runtrack = make([]int, tracksize)
1372	r.runtrackpos = tracksize
1373
1374	r.runstack = make([]int, stacksize)
1375	r.runstackpos = stacksize
1376
1377	r.runcrawl = make([]int, 32)
1378	r.runcrawlpos = 32
1379}
1380
1381func (r *runner) tidyMatch(quick bool) *Match {
1382	if !quick {
1383		match := r.runmatch
1384
1385		r.runmatch = nil
1386
1387		match.tidy(r.runtextpos)
1388		return match
1389	} else {
1390		// send back our match -- it's not leaving the package, so it's safe to not clean it up
1391		// this reduces allocs for frequent calls to the "IsMatch" bool-only functions
1392		return r.runmatch
1393	}
1394}
1395
1396// capture captures a subexpression. Note that the
1397// capnum used here has already been mapped to a non-sparse
1398// index (by the code generator RegexWriter).
1399func (r *runner) capture(capnum, start, end int) {
1400	if end < start {
1401		T := end
1402		end = start
1403		start = T
1404	}
1405
1406	r.crawl(capnum)
1407	r.runmatch.addMatch(capnum, start, end-start)
1408}
1409
1410// transferCapture captures a subexpression. Note that the
1411// capnum used here has already been mapped to a non-sparse
1412// index (by the code generator RegexWriter).
1413func (r *runner) transferCapture(capnum, uncapnum, start, end int) {
1414	var start2, end2 int
1415
1416	// these are the two intervals that are cancelling each other
1417
1418	if end < start {
1419		T := end
1420		end = start
1421		start = T
1422	}
1423
1424	start2 = r.runmatch.matchIndex(uncapnum)
1425	end2 = start2 + r.runmatch.matchLength(uncapnum)
1426
1427	// The new capture gets the innermost defined interval
1428
1429	if start >= end2 {
1430		end = start
1431		start = end2
1432	} else if end <= start2 {
1433		start = start2
1434	} else {
1435		if end > end2 {
1436			end = end2
1437		}
1438		if start2 > start {
1439			start = start2
1440		}
1441	}
1442
1443	r.crawl(uncapnum)
1444	r.runmatch.balanceMatch(uncapnum)
1445
1446	if capnum != -1 {
1447		r.crawl(capnum)
1448		r.runmatch.addMatch(capnum, start, end-start)
1449	}
1450}
1451
1452// revert the last capture
1453func (r *runner) uncapture() {
1454	capnum := r.popcrawl()
1455	r.runmatch.removeMatch(capnum)
1456}
1457
1458//debug
1459
1460func (r *runner) dumpState() {
1461	back := ""
1462	if r.operator&syntax.Back != 0 {
1463		back = " Back"
1464	}
1465	if r.operator&syntax.Back2 != 0 {
1466		back += " Back2"
1467	}
1468	fmt.Printf("Text:  %v\nTrack: %v\nStack: %v\n       %s%s\n\n",
1469		r.textposDescription(),
1470		r.stackDescription(r.runtrack, r.runtrackpos),
1471		r.stackDescription(r.runstack, r.runstackpos),
1472		r.code.OpcodeDescription(r.codepos),
1473		back)
1474}
1475
1476func (r *runner) stackDescription(a []int, index int) string {
1477	buf := &bytes.Buffer{}
1478
1479	fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a))
1480	if buf.Len() < 8 {
1481		buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1482	}
1483
1484	buf.WriteRune('(')
1485	for i := index; i < len(a); i++ {
1486		if i > index {
1487			buf.WriteRune(' ')
1488		}
1489
1490		buf.WriteString(strconv.Itoa(a[i]))
1491	}
1492
1493	buf.WriteRune(')')
1494
1495	return buf.String()
1496}
1497
1498func (r *runner) textposDescription() string {
1499	buf := &bytes.Buffer{}
1500
1501	buf.WriteString(strconv.Itoa(r.runtextpos))
1502
1503	if buf.Len() < 8 {
1504		buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1505	}
1506
1507	if r.runtextpos > 0 {
1508		buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1]))
1509	} else {
1510		buf.WriteRune('^')
1511	}
1512
1513	buf.WriteRune('>')
1514
1515	for i := r.runtextpos; i < r.runtextend; i++ {
1516		buf.WriteString(syntax.CharDescription(r.runtext[i]))
1517	}
1518	if buf.Len() >= 64 {
1519		buf.Truncate(61)
1520		buf.WriteString("...")
1521	} else {
1522		buf.WriteRune('$')
1523	}
1524
1525	return buf.String()
1526}
1527
1528// decide whether the pos
1529// at the specified index is a boundary or not. It's just not worth
1530// emitting inline code for this logic.
1531func (r *runner) isBoundary(index, startpos, endpos int) bool {
1532	return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) !=
1533		(index < endpos && syntax.IsWordChar(r.runtext[index]))
1534}
1535
1536func (r *runner) isECMABoundary(index, startpos, endpos int) bool {
1537	return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) !=
1538		(index < endpos && syntax.IsECMAWordChar(r.runtext[index]))
1539}
1540
1541// this seems like a comment to justify randomly picking 1000 :-P
1542// We have determined this value in a series of experiments where x86 retail
1543// builds (ono-lab-optimized) were run on different pattern/input pairs. Larger values
1544// of TimeoutCheckFrequency did not tend to increase performance; smaller values
1545// of TimeoutCheckFrequency tended to slow down the execution.
1546const timeoutCheckFrequency int = 1000
1547
1548func (r *runner) startTimeoutWatch() {
1549	if r.ignoreTimeout {
1550		return
1551	}
1552
1553	r.timeoutChecksToSkip = timeoutCheckFrequency
1554	r.timeoutAt = time.Now().Add(r.timeout)
1555}
1556
1557func (r *runner) checkTimeout() error {
1558	if r.ignoreTimeout {
1559		return nil
1560	}
1561	r.timeoutChecksToSkip--
1562	if r.timeoutChecksToSkip != 0 {
1563		return nil
1564	}
1565
1566	r.timeoutChecksToSkip = timeoutCheckFrequency
1567	return r.doCheckTimeout()
1568}
1569
1570func (r *runner) doCheckTimeout() error {
1571	current := time.Now()
1572
1573	if current.Before(r.timeoutAt) {
1574		return nil
1575	}
1576
1577	if r.re.Debug() {
1578		//Debug.WriteLine("")
1579		//Debug.WriteLine("RegEx match timeout occurred!")
1580		//Debug.WriteLine("Specified timeout:       " + TimeSpan.FromMilliseconds(_timeout).ToString())
1581		//Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency)
1582		//Debug.WriteLine("Search pattern:          " + _runregex._pattern)
1583		//Debug.WriteLine("Input:                   " + r.runtext)
1584		//Debug.WriteLine("About to throw RegexMatchTimeoutException.")
1585	}
1586
1587	return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext))
1588}
1589
1590func (r *runner) initTrackCount() {
1591	r.runtrackcount = r.code.TrackCount
1592}
1593
1594// getRunner returns a run to use for matching re.
1595// It uses the re's runner cache if possible, to avoid
1596// unnecessary allocation.
1597func (re *Regexp) getRunner() *runner {
1598	re.muRun.Lock()
1599	if n := len(re.runner); n > 0 {
1600		z := re.runner[n-1]
1601		re.runner = re.runner[:n-1]
1602		re.muRun.Unlock()
1603		return z
1604	}
1605	re.muRun.Unlock()
1606	z := &runner{
1607		re:   re,
1608		code: re.code,
1609	}
1610	return z
1611}
1612
1613// putRunner returns a runner to the re's cache.
1614// There is no attempt to limit the size of the cache, so it will
1615// grow to the maximum number of simultaneous matches
1616// run using re.  (The cache empties when re gets garbage collected.)
1617func (re *Regexp) putRunner(r *runner) {
1618	re.muRun.Lock()
1619	re.runner = append(re.runner, r)
1620	re.muRun.Unlock()
1621}