runner.go

   1package regexp2
   2
   3import (
   4	"bytes"
   5	"errors"
   6	"fmt"
   7	"math"
   8	"strconv"
   9	"strings"
  10	"time"
  11	"unicode"
  12
  13	"github.com/dlclark/regexp2/syntax"
  14)
  15
  16type runner struct {
  17	re   *Regexp
  18	code *syntax.Code
  19
  20	runtextstart int // starting point for search
  21
  22	runtext    []rune // text to search
  23	runtextpos int    // current position in text
  24	runtextend int
  25
  26	// The backtracking stack.  Opcodes use this to store data regarding
  27	// what they have matched and where to backtrack to.  Each "frame" on
  28	// the stack takes the form of [CodePosition Data1 Data2...], where
  29	// CodePosition is the position of the current opcode and
  30	// the data values are all optional.  The CodePosition can be negative, and
  31	// these values (also called "back2") are used by the BranchMark family of opcodes
  32	// to indicate whether they are backtracking after a successful or failed
  33	// match.
  34	// When we backtrack, we pop the CodePosition off the stack, set the current
  35	// instruction pointer to that code position, and mark the opcode
  36	// with a backtracking flag ("Back").  Each opcode then knows how to
  37	// handle its own data.
  38	runtrack    []int
  39	runtrackpos int
  40
  41	// This stack is used to track text positions across different opcodes.
  42	// For example, in /(a*b)+/, the parentheses result in a SetMark/CaptureMark
  43	// pair. SetMark records the text position before we match a*b.  Then
  44	// CaptureMark uses that position to figure out where the capture starts.
  45	// Opcodes which push onto this stack are always paired with other opcodes
  46	// which will pop the value from it later.  A successful match should mean
  47	// that this stack is empty.
  48	runstack    []int
  49	runstackpos int
  50
  51	// The crawl stack is used to keep track of captures.  Every time a group
  52	// has a capture, we push its group number onto the runcrawl stack.  In
  53	// the case of a balanced match, we push BOTH groups onto the stack.
  54	runcrawl    []int
  55	runcrawlpos int
  56
  57	runtrackcount int // count of states that may do backtracking
  58
  59	runmatch *Match // result object
  60
  61	ignoreTimeout bool
  62	timeout       time.Duration // timeout in milliseconds (needed for actual)
  63	deadline      fasttime
  64
  65	operator        syntax.InstOp
  66	codepos         int
  67	rightToLeft     bool
  68	caseInsensitive bool
  69}
  70
  71// run searches for matches and can continue from the previous match
  72//
  73// quick is usually false, but can be true to not return matches, just put it in caches
  74// textstart is -1 to start at the "beginning" (depending on Right-To-Left), otherwise an index in input
  75// input is the string to search for our regex pattern
  76func (re *Regexp) run(quick bool, textstart int, input []rune) (*Match, error) {
  77
  78	// get a cached runner
  79	runner := re.getRunner()
  80	defer re.putRunner(runner)
  81
  82	if textstart < 0 {
  83		if re.RightToLeft() {
  84			textstart = len(input)
  85		} else {
  86			textstart = 0
  87		}
  88	}
  89
  90	return runner.scan(input, textstart, quick, re.MatchTimeout)
  91}
  92
  93// Scans the string to find the first match. Uses the Match object
  94// both to feed text in and as a place to store matches that come out.
  95//
  96// All the action is in the Go() method. Our
  97// responsibility is to load up the class members before
  98// calling Go.
  99//
 100// The optimizer can compute a set of candidate starting characters,
 101// and we could use a separate method Skip() that will quickly scan past
 102// any characters that we know can't match.
 103func (r *runner) scan(rt []rune, textstart int, quick bool, timeout time.Duration) (*Match, error) {
 104	r.timeout = timeout
 105	r.ignoreTimeout = (time.Duration(math.MaxInt64) == timeout)
 106	r.runtextstart = textstart
 107	r.runtext = rt
 108	r.runtextend = len(rt)
 109
 110	stoppos := r.runtextend
 111	bump := 1
 112
 113	if r.re.RightToLeft() {
 114		bump = -1
 115		stoppos = 0
 116	}
 117
 118	r.runtextpos = textstart
 119	initted := false
 120
 121	r.startTimeoutWatch()
 122	for {
 123		if r.re.Debug() {
 124			//fmt.Printf("\nSearch content: %v\n", string(r.runtext))
 125			fmt.Printf("\nSearch range: from 0 to %v\n", r.runtextend)
 126			fmt.Printf("Firstchar search starting at %v stopping at %v\n", r.runtextpos, stoppos)
 127		}
 128
 129		if r.findFirstChar() {
 130			if err := r.checkTimeout(); err != nil {
 131				return nil, err
 132			}
 133
 134			if !initted {
 135				r.initMatch()
 136				initted = true
 137			}
 138
 139			if r.re.Debug() {
 140				fmt.Printf("Executing engine starting at %v\n\n", r.runtextpos)
 141			}
 142
 143			if err := r.execute(); err != nil {
 144				return nil, err
 145			}
 146
 147			if r.runmatch.matchcount[0] > 0 {
 148				// We'll return a match even if it touches a previous empty match
 149				return r.tidyMatch(quick), nil
 150			}
 151
 152			// reset state for another go
 153			r.runtrackpos = len(r.runtrack)
 154			r.runstackpos = len(r.runstack)
 155			r.runcrawlpos = len(r.runcrawl)
 156		}
 157
 158		// failure!
 159
 160		if r.runtextpos == stoppos {
 161			r.tidyMatch(true)
 162			return nil, nil
 163		}
 164
 165		// Recognize leading []* and various anchors, and bump on failure accordingly
 166
 167		// r.bump by one and start again
 168
 169		r.runtextpos += bump
 170	}
 171	// We never get here
 172}
 173
 174func (r *runner) execute() error {
 175
 176	r.goTo(0)
 177
 178	for {
 179
 180		if r.re.Debug() {
 181			r.dumpState()
 182		}
 183
 184		if err := r.checkTimeout(); err != nil {
 185			return err
 186		}
 187
 188		switch r.operator {
 189		case syntax.Stop:
 190			return nil
 191
 192		case syntax.Nothing:
 193			break
 194
 195		case syntax.Goto:
 196			r.goTo(r.operand(0))
 197			continue
 198
 199		case syntax.Testref:
 200			if !r.runmatch.isMatched(r.operand(0)) {
 201				break
 202			}
 203			r.advance(1)
 204			continue
 205
 206		case syntax.Lazybranch:
 207			r.trackPush1(r.textPos())
 208			r.advance(1)
 209			continue
 210
 211		case syntax.Lazybranch | syntax.Back:
 212			r.trackPop()
 213			r.textto(r.trackPeek())
 214			r.goTo(r.operand(0))
 215			continue
 216
 217		case syntax.Setmark:
 218			r.stackPush(r.textPos())
 219			r.trackPush()
 220			r.advance(0)
 221			continue
 222
 223		case syntax.Nullmark:
 224			r.stackPush(-1)
 225			r.trackPush()
 226			r.advance(0)
 227			continue
 228
 229		case syntax.Setmark | syntax.Back, syntax.Nullmark | syntax.Back:
 230			r.stackPop()
 231			break
 232
 233		case syntax.Getmark:
 234			r.stackPop()
 235			r.trackPush1(r.stackPeek())
 236			r.textto(r.stackPeek())
 237			r.advance(0)
 238			continue
 239
 240		case syntax.Getmark | syntax.Back:
 241			r.trackPop()
 242			r.stackPush(r.trackPeek())
 243			break
 244
 245		case syntax.Capturemark:
 246			if r.operand(1) != -1 && !r.runmatch.isMatched(r.operand(1)) {
 247				break
 248			}
 249			r.stackPop()
 250			if r.operand(1) != -1 {
 251				r.transferCapture(r.operand(0), r.operand(1), r.stackPeek(), r.textPos())
 252			} else {
 253				r.capture(r.operand(0), r.stackPeek(), r.textPos())
 254			}
 255			r.trackPush1(r.stackPeek())
 256
 257			r.advance(2)
 258
 259			continue
 260
 261		case syntax.Capturemark | syntax.Back:
 262			r.trackPop()
 263			r.stackPush(r.trackPeek())
 264			r.uncapture()
 265			if r.operand(0) != -1 && r.operand(1) != -1 {
 266				r.uncapture()
 267			}
 268
 269			break
 270
 271		case syntax.Branchmark:
 272			r.stackPop()
 273
 274			matched := r.textPos() - r.stackPeek()
 275
 276			if matched != 0 { // Nonempty match -> loop now
 277				r.trackPush2(r.stackPeek(), r.textPos()) // Save old mark, textpos
 278				r.stackPush(r.textPos())                 // Make new mark
 279				r.goTo(r.operand(0))                     // Loop
 280			} else { // Empty match -> straight now
 281				r.trackPushNeg1(r.stackPeek()) // Save old mark
 282				r.advance(1)                   // Straight
 283			}
 284			continue
 285
 286		case syntax.Branchmark | syntax.Back:
 287			r.trackPopN(2)
 288			r.stackPop()
 289			r.textto(r.trackPeekN(1))      // Recall position
 290			r.trackPushNeg1(r.trackPeek()) // Save old mark
 291			r.advance(1)                   // Straight
 292			continue
 293
 294		case syntax.Branchmark | syntax.Back2:
 295			r.trackPop()
 296			r.stackPush(r.trackPeek()) // Recall old mark
 297			break                      // Backtrack
 298
 299		case syntax.Lazybranchmark:
 300			{
 301				// We hit this the first time through a lazy loop and after each
 302				// successful match of the inner expression.  It simply continues
 303				// on and doesn't loop.
 304				r.stackPop()
 305
 306				oldMarkPos := r.stackPeek()
 307
 308				if r.textPos() != oldMarkPos { // Nonempty match -> try to loop again by going to 'back' state
 309					if oldMarkPos != -1 {
 310						r.trackPush2(oldMarkPos, r.textPos()) // Save old mark, textpos
 311					} else {
 312						r.trackPush2(r.textPos(), r.textPos())
 313					}
 314				} else {
 315					// The inner expression found an empty match, so we'll go directly to 'back2' if we
 316					// backtrack.  In this case, we need to push something on the stack, since back2 pops.
 317					// However, in the case of ()+? or similar, this empty match may be legitimate, so push the text
 318					// position associated with that empty match.
 319					r.stackPush(oldMarkPos)
 320
 321					r.trackPushNeg1(r.stackPeek()) // Save old mark
 322				}
 323				r.advance(1)
 324				continue
 325			}
 326
 327		case syntax.Lazybranchmark | syntax.Back:
 328
 329			// After the first time, Lazybranchmark | syntax.Back occurs
 330			// with each iteration of the loop, and therefore with every attempted
 331			// match of the inner expression.  We'll try to match the inner expression,
 332			// then go back to Lazybranchmark if successful.  If the inner expression
 333			// fails, we go to Lazybranchmark | syntax.Back2
 334
 335			r.trackPopN(2)
 336			pos := r.trackPeekN(1)
 337			r.trackPushNeg1(r.trackPeek()) // Save old mark
 338			r.stackPush(pos)               // Make new mark
 339			r.textto(pos)                  // Recall position
 340			r.goTo(r.operand(0))           // Loop
 341			continue
 342
 343		case syntax.Lazybranchmark | syntax.Back2:
 344			// The lazy loop has failed.  We'll do a true backtrack and
 345			// start over before the lazy loop.
 346			r.stackPop()
 347			r.trackPop()
 348			r.stackPush(r.trackPeek()) // Recall old mark
 349			break
 350
 351		case syntax.Setcount:
 352			r.stackPush2(r.textPos(), r.operand(0))
 353			r.trackPush()
 354			r.advance(1)
 355			continue
 356
 357		case syntax.Nullcount:
 358			r.stackPush2(-1, r.operand(0))
 359			r.trackPush()
 360			r.advance(1)
 361			continue
 362
 363		case syntax.Setcount | syntax.Back:
 364			r.stackPopN(2)
 365			break
 366
 367		case syntax.Nullcount | syntax.Back:
 368			r.stackPopN(2)
 369			break
 370
 371		case syntax.Branchcount:
 372			// r.stackPush:
 373			//  0: Mark
 374			//  1: Count
 375
 376			r.stackPopN(2)
 377			mark := r.stackPeek()
 378			count := r.stackPeekN(1)
 379			matched := r.textPos() - mark
 380
 381			if count >= r.operand(1) || (matched == 0 && count >= 0) { // Max loops or empty match -> straight now
 382				r.trackPushNeg2(mark, count) // Save old mark, count
 383				r.advance(2)                 // Straight
 384			} else { // Nonempty match -> count+loop now
 385				r.trackPush1(mark)                 // remember mark
 386				r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
 387				r.goTo(r.operand(0))               // Loop
 388			}
 389			continue
 390
 391		case syntax.Branchcount | syntax.Back:
 392			// r.trackPush:
 393			//  0: Previous mark
 394			// r.stackPush:
 395			//  0: Mark (= current pos, discarded)
 396			//  1: Count
 397			r.trackPop()
 398			r.stackPopN(2)
 399			if r.stackPeekN(1) > 0 { // Positive -> can go straight
 400				r.textto(r.stackPeek())                           // Zap to mark
 401				r.trackPushNeg2(r.trackPeek(), r.stackPeekN(1)-1) // Save old mark, old count
 402				r.advance(2)                                      // Straight
 403				continue
 404			}
 405			r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // recall old mark, old count
 406			break
 407
 408		case syntax.Branchcount | syntax.Back2:
 409			// r.trackPush:
 410			//  0: Previous mark
 411			//  1: Previous count
 412			r.trackPopN(2)
 413			r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, old count
 414			break                                        // Backtrack
 415
 416		case syntax.Lazybranchcount:
 417			// r.stackPush:
 418			//  0: Mark
 419			//  1: Count
 420
 421			r.stackPopN(2)
 422			mark := r.stackPeek()
 423			count := r.stackPeekN(1)
 424
 425			if count < 0 { // Negative count -> loop now
 426				r.trackPushNeg1(mark)              // Save old mark
 427				r.stackPush2(r.textPos(), count+1) // Make new mark, incr count
 428				r.goTo(r.operand(0))               // Loop
 429			} else { // Nonneg count -> straight now
 430				r.trackPush3(mark, count, r.textPos()) // Save mark, count, position
 431				r.advance(2)                           // Straight
 432			}
 433			continue
 434
 435		case syntax.Lazybranchcount | syntax.Back:
 436			// r.trackPush:
 437			//  0: Mark
 438			//  1: Count
 439			//  2: r.textPos
 440
 441			r.trackPopN(3)
 442			mark := r.trackPeek()
 443			textpos := r.trackPeekN(2)
 444
 445			if r.trackPeekN(1) < r.operand(1) && textpos != mark { // Under limit and not empty match -> loop
 446				r.textto(textpos)                        // Recall position
 447				r.stackPush2(textpos, r.trackPeekN(1)+1) // Make new mark, incr count
 448				r.trackPushNeg1(mark)                    // Save old mark
 449				r.goTo(r.operand(0))                     // Loop
 450				continue
 451			} else { // Max loops or empty match -> backtrack
 452				r.stackPush2(r.trackPeek(), r.trackPeekN(1)) // Recall old mark, count
 453				break                                        // backtrack
 454			}
 455
 456		case syntax.Lazybranchcount | syntax.Back2:
 457			// r.trackPush:
 458			//  0: Previous mark
 459			// r.stackPush:
 460			//  0: Mark (== current pos, discarded)
 461			//  1: Count
 462			r.trackPop()
 463			r.stackPopN(2)
 464			r.stackPush2(r.trackPeek(), r.stackPeekN(1)-1) // Recall old mark, count
 465			break                                          // Backtrack
 466
 467		case syntax.Setjump:
 468			r.stackPush2(r.trackpos(), r.crawlpos())
 469			r.trackPush()
 470			r.advance(0)
 471			continue
 472
 473		case syntax.Setjump | syntax.Back:
 474			r.stackPopN(2)
 475			break
 476
 477		case syntax.Backjump:
 478			// r.stackPush:
 479			//  0: Saved trackpos
 480			//  1: r.crawlpos
 481			r.stackPopN(2)
 482			r.trackto(r.stackPeek())
 483
 484			for r.crawlpos() != r.stackPeekN(1) {
 485				r.uncapture()
 486			}
 487
 488			break
 489
 490		case syntax.Forejump:
 491			// r.stackPush:
 492			//  0: Saved trackpos
 493			//  1: r.crawlpos
 494			r.stackPopN(2)
 495			r.trackto(r.stackPeek())
 496			r.trackPush1(r.stackPeekN(1))
 497			r.advance(0)
 498			continue
 499
 500		case syntax.Forejump | syntax.Back:
 501			// r.trackPush:
 502			//  0: r.crawlpos
 503			r.trackPop()
 504
 505			for r.crawlpos() != r.trackPeek() {
 506				r.uncapture()
 507			}
 508
 509			break
 510
 511		case syntax.Bol:
 512			if r.leftchars() > 0 && r.charAt(r.textPos()-1) != '\n' {
 513				break
 514			}
 515			r.advance(0)
 516			continue
 517
 518		case syntax.Eol:
 519			if r.rightchars() > 0 && r.charAt(r.textPos()) != '\n' {
 520				break
 521			}
 522			r.advance(0)
 523			continue
 524
 525		case syntax.Boundary:
 526			if !r.isBoundary(r.textPos(), 0, r.runtextend) {
 527				break
 528			}
 529			r.advance(0)
 530			continue
 531
 532		case syntax.Nonboundary:
 533			if r.isBoundary(r.textPos(), 0, r.runtextend) {
 534				break
 535			}
 536			r.advance(0)
 537			continue
 538
 539		case syntax.ECMABoundary:
 540			if !r.isECMABoundary(r.textPos(), 0, r.runtextend) {
 541				break
 542			}
 543			r.advance(0)
 544			continue
 545
 546		case syntax.NonECMABoundary:
 547			if r.isECMABoundary(r.textPos(), 0, r.runtextend) {
 548				break
 549			}
 550			r.advance(0)
 551			continue
 552
 553		case syntax.Beginning:
 554			if r.leftchars() > 0 {
 555				break
 556			}
 557			r.advance(0)
 558			continue
 559
 560		case syntax.Start:
 561			if r.textPos() != r.textstart() {
 562				break
 563			}
 564			r.advance(0)
 565			continue
 566
 567		case syntax.EndZ:
 568			rchars := r.rightchars()
 569			if rchars > 1 {
 570				break
 571			}
 572			// RE2 and EcmaScript define $ as "asserts position at the end of the string"
 573			// PCRE/.NET adds "or before the line terminator right at the end of the string (if any)"
 574			if (r.re.options & (RE2 | ECMAScript)) != 0 {
 575				// RE2/Ecmascript mode
 576				if rchars > 0 {
 577					break
 578				}
 579			} else if rchars == 1 && r.charAt(r.textPos()) != '\n' {
 580				// "regular" mode
 581				break
 582			}
 583
 584			r.advance(0)
 585			continue
 586
 587		case syntax.End:
 588			if r.rightchars() > 0 {
 589				break
 590			}
 591			r.advance(0)
 592			continue
 593
 594		case syntax.One:
 595			if r.forwardchars() < 1 || r.forwardcharnext() != rune(r.operand(0)) {
 596				break
 597			}
 598
 599			r.advance(1)
 600			continue
 601
 602		case syntax.Notone:
 603			if r.forwardchars() < 1 || r.forwardcharnext() == rune(r.operand(0)) {
 604				break
 605			}
 606
 607			r.advance(1)
 608			continue
 609
 610		case syntax.Set:
 611
 612			if r.forwardchars() < 1 || !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
 613				break
 614			}
 615
 616			r.advance(1)
 617			continue
 618
 619		case syntax.Multi:
 620			if !r.runematch(r.code.Strings[r.operand(0)]) {
 621				break
 622			}
 623
 624			r.advance(1)
 625			continue
 626
 627		case syntax.Ref:
 628
 629			capnum := r.operand(0)
 630
 631			if r.runmatch.isMatched(capnum) {
 632				if !r.refmatch(r.runmatch.matchIndex(capnum), r.runmatch.matchLength(capnum)) {
 633					break
 634				}
 635			} else {
 636				if (r.re.options & ECMAScript) == 0 {
 637					break
 638				}
 639			}
 640
 641			r.advance(1)
 642			continue
 643
 644		case syntax.Onerep:
 645
 646			c := r.operand(1)
 647
 648			if r.forwardchars() < c {
 649				break
 650			}
 651
 652			ch := rune(r.operand(0))
 653
 654			for c > 0 {
 655				if r.forwardcharnext() != ch {
 656					goto BreakBackward
 657				}
 658				c--
 659			}
 660
 661			r.advance(2)
 662			continue
 663
 664		case syntax.Notonerep:
 665
 666			c := r.operand(1)
 667
 668			if r.forwardchars() < c {
 669				break
 670			}
 671			ch := rune(r.operand(0))
 672
 673			for c > 0 {
 674				if r.forwardcharnext() == ch {
 675					goto BreakBackward
 676				}
 677				c--
 678			}
 679
 680			r.advance(2)
 681			continue
 682
 683		case syntax.Setrep:
 684
 685			c := r.operand(1)
 686
 687			if r.forwardchars() < c {
 688				break
 689			}
 690
 691			set := r.code.Sets[r.operand(0)]
 692
 693			for c > 0 {
 694				if !set.CharIn(r.forwardcharnext()) {
 695					goto BreakBackward
 696				}
 697				c--
 698			}
 699
 700			r.advance(2)
 701			continue
 702
 703		case syntax.Oneloop:
 704
 705			c := r.operand(1)
 706
 707			if c > r.forwardchars() {
 708				c = r.forwardchars()
 709			}
 710
 711			ch := rune(r.operand(0))
 712			i := c
 713
 714			for ; i > 0; i-- {
 715				if r.forwardcharnext() != ch {
 716					r.backwardnext()
 717					break
 718				}
 719			}
 720
 721			if c > i {
 722				r.trackPush2(c-i-1, r.textPos()-r.bump())
 723			}
 724
 725			r.advance(2)
 726			continue
 727
 728		case syntax.Notoneloop:
 729
 730			c := r.operand(1)
 731
 732			if c > r.forwardchars() {
 733				c = r.forwardchars()
 734			}
 735
 736			ch := rune(r.operand(0))
 737			i := c
 738
 739			for ; i > 0; i-- {
 740				if r.forwardcharnext() == ch {
 741					r.backwardnext()
 742					break
 743				}
 744			}
 745
 746			if c > i {
 747				r.trackPush2(c-i-1, r.textPos()-r.bump())
 748			}
 749
 750			r.advance(2)
 751			continue
 752
 753		case syntax.Setloop:
 754
 755			c := r.operand(1)
 756
 757			if c > r.forwardchars() {
 758				c = r.forwardchars()
 759			}
 760
 761			set := r.code.Sets[r.operand(0)]
 762			i := c
 763
 764			for ; i > 0; i-- {
 765				if !set.CharIn(r.forwardcharnext()) {
 766					r.backwardnext()
 767					break
 768				}
 769			}
 770
 771			if c > i {
 772				r.trackPush2(c-i-1, r.textPos()-r.bump())
 773			}
 774
 775			r.advance(2)
 776			continue
 777
 778		case syntax.Oneloop | syntax.Back, syntax.Notoneloop | syntax.Back:
 779
 780			r.trackPopN(2)
 781			i := r.trackPeek()
 782			pos := r.trackPeekN(1)
 783
 784			r.textto(pos)
 785
 786			if i > 0 {
 787				r.trackPush2(i-1, pos-r.bump())
 788			}
 789
 790			r.advance(2)
 791			continue
 792
 793		case syntax.Setloop | syntax.Back:
 794
 795			r.trackPopN(2)
 796			i := r.trackPeek()
 797			pos := r.trackPeekN(1)
 798
 799			r.textto(pos)
 800
 801			if i > 0 {
 802				r.trackPush2(i-1, pos-r.bump())
 803			}
 804
 805			r.advance(2)
 806			continue
 807
 808		case syntax.Onelazy, syntax.Notonelazy:
 809
 810			c := r.operand(1)
 811
 812			if c > r.forwardchars() {
 813				c = r.forwardchars()
 814			}
 815
 816			if c > 0 {
 817				r.trackPush2(c-1, r.textPos())
 818			}
 819
 820			r.advance(2)
 821			continue
 822
 823		case syntax.Setlazy:
 824
 825			c := r.operand(1)
 826
 827			if c > r.forwardchars() {
 828				c = r.forwardchars()
 829			}
 830
 831			if c > 0 {
 832				r.trackPush2(c-1, r.textPos())
 833			}
 834
 835			r.advance(2)
 836			continue
 837
 838		case syntax.Onelazy | syntax.Back:
 839
 840			r.trackPopN(2)
 841			pos := r.trackPeekN(1)
 842			r.textto(pos)
 843
 844			if r.forwardcharnext() != rune(r.operand(0)) {
 845				break
 846			}
 847
 848			i := r.trackPeek()
 849
 850			if i > 0 {
 851				r.trackPush2(i-1, pos+r.bump())
 852			}
 853
 854			r.advance(2)
 855			continue
 856
 857		case syntax.Notonelazy | syntax.Back:
 858
 859			r.trackPopN(2)
 860			pos := r.trackPeekN(1)
 861			r.textto(pos)
 862
 863			if r.forwardcharnext() == rune(r.operand(0)) {
 864				break
 865			}
 866
 867			i := r.trackPeek()
 868
 869			if i > 0 {
 870				r.trackPush2(i-1, pos+r.bump())
 871			}
 872
 873			r.advance(2)
 874			continue
 875
 876		case syntax.Setlazy | syntax.Back:
 877
 878			r.trackPopN(2)
 879			pos := r.trackPeekN(1)
 880			r.textto(pos)
 881
 882			if !r.code.Sets[r.operand(0)].CharIn(r.forwardcharnext()) {
 883				break
 884			}
 885
 886			i := r.trackPeek()
 887
 888			if i > 0 {
 889				r.trackPush2(i-1, pos+r.bump())
 890			}
 891
 892			r.advance(2)
 893			continue
 894
 895		default:
 896			return errors.New("unknown state in regex runner")
 897		}
 898
 899	BreakBackward:
 900		;
 901
 902		// "break Backward" comes here:
 903		r.backtrack()
 904	}
 905}
 906
 907// increase the size of stack and track storage
 908func (r *runner) ensureStorage() {
 909	if r.runstackpos < r.runtrackcount*4 {
 910		doubleIntSlice(&r.runstack, &r.runstackpos)
 911	}
 912	if r.runtrackpos < r.runtrackcount*4 {
 913		doubleIntSlice(&r.runtrack, &r.runtrackpos)
 914	}
 915}
 916
 917func doubleIntSlice(s *[]int, pos *int) {
 918	oldLen := len(*s)
 919	newS := make([]int, oldLen*2)
 920
 921	copy(newS[oldLen:], *s)
 922	*pos += oldLen
 923	*s = newS
 924}
 925
 926// Save a number on the longjump unrolling stack
 927func (r *runner) crawl(i int) {
 928	if r.runcrawlpos == 0 {
 929		doubleIntSlice(&r.runcrawl, &r.runcrawlpos)
 930	}
 931	r.runcrawlpos--
 932	r.runcrawl[r.runcrawlpos] = i
 933}
 934
 935// Remove a number from the longjump unrolling stack
 936func (r *runner) popcrawl() int {
 937	val := r.runcrawl[r.runcrawlpos]
 938	r.runcrawlpos++
 939	return val
 940}
 941
 942// Get the height of the stack
 943func (r *runner) crawlpos() int {
 944	return len(r.runcrawl) - r.runcrawlpos
 945}
 946
 947func (r *runner) advance(i int) {
 948	r.codepos += (i + 1)
 949	r.setOperator(r.code.Codes[r.codepos])
 950}
 951
 952func (r *runner) goTo(newpos int) {
 953	// when branching backward or in place, ensure storage
 954	if newpos <= r.codepos {
 955		r.ensureStorage()
 956	}
 957
 958	r.setOperator(r.code.Codes[newpos])
 959	r.codepos = newpos
 960}
 961
 962func (r *runner) textto(newpos int) {
 963	r.runtextpos = newpos
 964}
 965
 966func (r *runner) trackto(newpos int) {
 967	r.runtrackpos = len(r.runtrack) - newpos
 968}
 969
 970func (r *runner) textstart() int {
 971	return r.runtextstart
 972}
 973
 974func (r *runner) textPos() int {
 975	return r.runtextpos
 976}
 977
 978// push onto the backtracking stack
 979func (r *runner) trackpos() int {
 980	return len(r.runtrack) - r.runtrackpos
 981}
 982
 983func (r *runner) trackPush() {
 984	r.runtrackpos--
 985	r.runtrack[r.runtrackpos] = r.codepos
 986}
 987
 988func (r *runner) trackPush1(I1 int) {
 989	r.runtrackpos--
 990	r.runtrack[r.runtrackpos] = I1
 991	r.runtrackpos--
 992	r.runtrack[r.runtrackpos] = r.codepos
 993}
 994
 995func (r *runner) trackPush2(I1, I2 int) {
 996	r.runtrackpos--
 997	r.runtrack[r.runtrackpos] = I1
 998	r.runtrackpos--
 999	r.runtrack[r.runtrackpos] = I2
1000	r.runtrackpos--
1001	r.runtrack[r.runtrackpos] = r.codepos
1002}
1003
1004func (r *runner) trackPush3(I1, I2, I3 int) {
1005	r.runtrackpos--
1006	r.runtrack[r.runtrackpos] = I1
1007	r.runtrackpos--
1008	r.runtrack[r.runtrackpos] = I2
1009	r.runtrackpos--
1010	r.runtrack[r.runtrackpos] = I3
1011	r.runtrackpos--
1012	r.runtrack[r.runtrackpos] = r.codepos
1013}
1014
1015func (r *runner) trackPushNeg1(I1 int) {
1016	r.runtrackpos--
1017	r.runtrack[r.runtrackpos] = I1
1018	r.runtrackpos--
1019	r.runtrack[r.runtrackpos] = -r.codepos
1020}
1021
1022func (r *runner) trackPushNeg2(I1, I2 int) {
1023	r.runtrackpos--
1024	r.runtrack[r.runtrackpos] = I1
1025	r.runtrackpos--
1026	r.runtrack[r.runtrackpos] = I2
1027	r.runtrackpos--
1028	r.runtrack[r.runtrackpos] = -r.codepos
1029}
1030
1031func (r *runner) backtrack() {
1032	newpos := r.runtrack[r.runtrackpos]
1033	r.runtrackpos++
1034
1035	if r.re.Debug() {
1036		if newpos < 0 {
1037			fmt.Printf("       Backtracking (back2) to code position %v\n", -newpos)
1038		} else {
1039			fmt.Printf("       Backtracking to code position %v\n", newpos)
1040		}
1041	}
1042
1043	if newpos < 0 {
1044		newpos = -newpos
1045		r.setOperator(r.code.Codes[newpos] | syntax.Back2)
1046	} else {
1047		r.setOperator(r.code.Codes[newpos] | syntax.Back)
1048	}
1049
1050	// When branching backward, ensure storage
1051	if newpos < r.codepos {
1052		r.ensureStorage()
1053	}
1054
1055	r.codepos = newpos
1056}
1057
1058func (r *runner) setOperator(op int) {
1059	r.caseInsensitive = (0 != (op & syntax.Ci))
1060	r.rightToLeft = (0 != (op & syntax.Rtl))
1061	r.operator = syntax.InstOp(op & ^(syntax.Rtl | syntax.Ci))
1062}
1063
1064func (r *runner) trackPop() {
1065	r.runtrackpos++
1066}
1067
1068// pop framesize items from the backtracking stack
1069func (r *runner) trackPopN(framesize int) {
1070	r.runtrackpos += framesize
1071}
1072
1073// Technically we are actually peeking at items already popped.  So if you want to
1074// get and pop the top item from the stack, you do
1075// r.trackPop();
1076// r.trackPeek();
1077func (r *runner) trackPeek() int {
1078	return r.runtrack[r.runtrackpos-1]
1079}
1080
1081// get the ith element down on the backtracking stack
1082func (r *runner) trackPeekN(i int) int {
1083	return r.runtrack[r.runtrackpos-i-1]
1084}
1085
1086// Push onto the grouping stack
1087func (r *runner) stackPush(I1 int) {
1088	r.runstackpos--
1089	r.runstack[r.runstackpos] = I1
1090}
1091
1092func (r *runner) stackPush2(I1, I2 int) {
1093	r.runstackpos--
1094	r.runstack[r.runstackpos] = I1
1095	r.runstackpos--
1096	r.runstack[r.runstackpos] = I2
1097}
1098
1099func (r *runner) stackPop() {
1100	r.runstackpos++
1101}
1102
1103// pop framesize items from the grouping stack
1104func (r *runner) stackPopN(framesize int) {
1105	r.runstackpos += framesize
1106}
1107
1108// Technically we are actually peeking at items already popped.  So if you want to
1109// get and pop the top item from the stack, you do
1110// r.stackPop();
1111// r.stackPeek();
1112func (r *runner) stackPeek() int {
1113	return r.runstack[r.runstackpos-1]
1114}
1115
1116// get the ith element down on the grouping stack
1117func (r *runner) stackPeekN(i int) int {
1118	return r.runstack[r.runstackpos-i-1]
1119}
1120
1121func (r *runner) operand(i int) int {
1122	return r.code.Codes[r.codepos+i+1]
1123}
1124
1125func (r *runner) leftchars() int {
1126	return r.runtextpos
1127}
1128
1129func (r *runner) rightchars() int {
1130	return r.runtextend - r.runtextpos
1131}
1132
1133func (r *runner) bump() int {
1134	if r.rightToLeft {
1135		return -1
1136	}
1137	return 1
1138}
1139
1140func (r *runner) forwardchars() int {
1141	if r.rightToLeft {
1142		return r.runtextpos
1143	}
1144	return r.runtextend - r.runtextpos
1145}
1146
1147func (r *runner) forwardcharnext() rune {
1148	var ch rune
1149	if r.rightToLeft {
1150		r.runtextpos--
1151		ch = r.runtext[r.runtextpos]
1152	} else {
1153		ch = r.runtext[r.runtextpos]
1154		r.runtextpos++
1155	}
1156
1157	if r.caseInsensitive {
1158		return unicode.ToLower(ch)
1159	}
1160	return ch
1161}
1162
1163func (r *runner) runematch(str []rune) bool {
1164	var pos int
1165
1166	c := len(str)
1167	if !r.rightToLeft {
1168		if r.runtextend-r.runtextpos < c {
1169			return false
1170		}
1171
1172		pos = r.runtextpos + c
1173	} else {
1174		if r.runtextpos-0 < c {
1175			return false
1176		}
1177
1178		pos = r.runtextpos
1179	}
1180
1181	if !r.caseInsensitive {
1182		for c != 0 {
1183			c--
1184			pos--
1185			if str[c] != r.runtext[pos] {
1186				return false
1187			}
1188		}
1189	} else {
1190		for c != 0 {
1191			c--
1192			pos--
1193			if str[c] != unicode.ToLower(r.runtext[pos]) {
1194				return false
1195			}
1196		}
1197	}
1198
1199	if !r.rightToLeft {
1200		pos += len(str)
1201	}
1202
1203	r.runtextpos = pos
1204
1205	return true
1206}
1207
1208func (r *runner) refmatch(index, len int) bool {
1209	var c, pos, cmpos int
1210
1211	if !r.rightToLeft {
1212		if r.runtextend-r.runtextpos < len {
1213			return false
1214		}
1215
1216		pos = r.runtextpos + len
1217	} else {
1218		if r.runtextpos-0 < len {
1219			return false
1220		}
1221
1222		pos = r.runtextpos
1223	}
1224	cmpos = index + len
1225
1226	c = len
1227
1228	if !r.caseInsensitive {
1229		for c != 0 {
1230			c--
1231			cmpos--
1232			pos--
1233			if r.runtext[cmpos] != r.runtext[pos] {
1234				return false
1235			}
1236
1237		}
1238	} else {
1239		for c != 0 {
1240			c--
1241			cmpos--
1242			pos--
1243
1244			if unicode.ToLower(r.runtext[cmpos]) != unicode.ToLower(r.runtext[pos]) {
1245				return false
1246			}
1247		}
1248	}
1249
1250	if !r.rightToLeft {
1251		pos += len
1252	}
1253
1254	r.runtextpos = pos
1255
1256	return true
1257}
1258
1259func (r *runner) backwardnext() {
1260	if r.rightToLeft {
1261		r.runtextpos++
1262	} else {
1263		r.runtextpos--
1264	}
1265}
1266
1267func (r *runner) charAt(j int) rune {
1268	return r.runtext[j]
1269}
1270
1271func (r *runner) findFirstChar() bool {
1272
1273	if 0 != (r.code.Anchors & (syntax.AnchorBeginning | syntax.AnchorStart | syntax.AnchorEndZ | syntax.AnchorEnd)) {
1274		if !r.code.RightToLeft {
1275			if (0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0) ||
1276				(0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos > r.runtextstart) {
1277				r.runtextpos = r.runtextend
1278				return false
1279			}
1280			if 0 != (r.code.Anchors&syntax.AnchorEndZ) && r.runtextpos < r.runtextend-1 {
1281				r.runtextpos = r.runtextend - 1
1282			} else if 0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend {
1283				r.runtextpos = r.runtextend
1284			}
1285		} else {
1286			if (0 != (r.code.Anchors&syntax.AnchorEnd) && r.runtextpos < r.runtextend) ||
1287				(0 != (r.code.Anchors&syntax.AnchorEndZ) && (r.runtextpos < r.runtextend-1 ||
1288					(r.runtextpos == r.runtextend-1 && r.charAt(r.runtextpos) != '\n'))) ||
1289				(0 != (r.code.Anchors&syntax.AnchorStart) && r.runtextpos < r.runtextstart) {
1290				r.runtextpos = 0
1291				return false
1292			}
1293			if 0 != (r.code.Anchors&syntax.AnchorBeginning) && r.runtextpos > 0 {
1294				r.runtextpos = 0
1295			}
1296		}
1297
1298		if r.code.BmPrefix != nil {
1299			return r.code.BmPrefix.IsMatch(r.runtext, r.runtextpos, 0, r.runtextend)
1300		}
1301
1302		return true // found a valid start or end anchor
1303	} else if r.code.BmPrefix != nil {
1304		r.runtextpos = r.code.BmPrefix.Scan(r.runtext, r.runtextpos, 0, r.runtextend)
1305
1306		if r.runtextpos == -1 {
1307			if r.code.RightToLeft {
1308				r.runtextpos = 0
1309			} else {
1310				r.runtextpos = r.runtextend
1311			}
1312			return false
1313		}
1314
1315		return true
1316	} else if r.code.FcPrefix == nil {
1317		return true
1318	}
1319
1320	r.rightToLeft = r.code.RightToLeft
1321	r.caseInsensitive = r.code.FcPrefix.CaseInsensitive
1322
1323	set := r.code.FcPrefix.PrefixSet
1324	if set.IsSingleton() {
1325		ch := set.SingletonChar()
1326		for i := r.forwardchars(); i > 0; i-- {
1327			if ch == r.forwardcharnext() {
1328				r.backwardnext()
1329				return true
1330			}
1331		}
1332	} else {
1333		for i := r.forwardchars(); i > 0; i-- {
1334			n := r.forwardcharnext()
1335			//fmt.Printf("%v in %v: %v\n", string(n), set.String(), set.CharIn(n))
1336			if set.CharIn(n) {
1337				r.backwardnext()
1338				return true
1339			}
1340		}
1341	}
1342
1343	return false
1344}
1345
1346func (r *runner) initMatch() {
1347	// Use a hashtable'ed Match object if the capture numbers are sparse
1348
1349	if r.runmatch == nil {
1350		if r.re.caps != nil {
1351			r.runmatch = newMatchSparse(r.re, r.re.caps, r.re.capsize, r.runtext, r.runtextstart)
1352		} else {
1353			r.runmatch = newMatch(r.re, r.re.capsize, r.runtext, r.runtextstart)
1354		}
1355	} else {
1356		r.runmatch.reset(r.runtext, r.runtextstart)
1357	}
1358
1359	// note we test runcrawl, because it is the last one to be allocated
1360	// If there is an alloc failure in the middle of the three allocations,
1361	// we may still return to reuse this instance, and we want to behave
1362	// as if the allocations didn't occur. (we used to test _trackcount != 0)
1363
1364	if r.runcrawl != nil {
1365		r.runtrackpos = len(r.runtrack)
1366		r.runstackpos = len(r.runstack)
1367		r.runcrawlpos = len(r.runcrawl)
1368		return
1369	}
1370
1371	r.initTrackCount()
1372
1373	tracksize := r.runtrackcount * 8
1374	stacksize := r.runtrackcount * 8
1375
1376	if tracksize < 32 {
1377		tracksize = 32
1378	}
1379	if stacksize < 16 {
1380		stacksize = 16
1381	}
1382
1383	r.runtrack = make([]int, tracksize)
1384	r.runtrackpos = tracksize
1385
1386	r.runstack = make([]int, stacksize)
1387	r.runstackpos = stacksize
1388
1389	r.runcrawl = make([]int, 32)
1390	r.runcrawlpos = 32
1391}
1392
1393func (r *runner) tidyMatch(quick bool) *Match {
1394	if !quick {
1395		match := r.runmatch
1396
1397		r.runmatch = nil
1398
1399		match.tidy(r.runtextpos)
1400		return match
1401	} else {
1402		// send back our match -- it's not leaving the package, so it's safe to not clean it up
1403		// this reduces allocs for frequent calls to the "IsMatch" bool-only functions
1404		return r.runmatch
1405	}
1406}
1407
1408// capture captures a subexpression. Note that the
1409// capnum used here has already been mapped to a non-sparse
1410// index (by the code generator RegexWriter).
1411func (r *runner) capture(capnum, start, end int) {
1412	if end < start {
1413		T := end
1414		end = start
1415		start = T
1416	}
1417
1418	r.crawl(capnum)
1419	r.runmatch.addMatch(capnum, start, end-start)
1420}
1421
1422// transferCapture captures a subexpression. Note that the
1423// capnum used here has already been mapped to a non-sparse
1424// index (by the code generator RegexWriter).
1425func (r *runner) transferCapture(capnum, uncapnum, start, end int) {
1426	var start2, end2 int
1427
1428	// these are the two intervals that are cancelling each other
1429
1430	if end < start {
1431		T := end
1432		end = start
1433		start = T
1434	}
1435
1436	start2 = r.runmatch.matchIndex(uncapnum)
1437	end2 = start2 + r.runmatch.matchLength(uncapnum)
1438
1439	// The new capture gets the innermost defined interval
1440
1441	if start >= end2 {
1442		end = start
1443		start = end2
1444	} else if end <= start2 {
1445		start = start2
1446	} else {
1447		if end > end2 {
1448			end = end2
1449		}
1450		if start2 > start {
1451			start = start2
1452		}
1453	}
1454
1455	r.crawl(uncapnum)
1456	r.runmatch.balanceMatch(uncapnum)
1457
1458	if capnum != -1 {
1459		r.crawl(capnum)
1460		r.runmatch.addMatch(capnum, start, end-start)
1461	}
1462}
1463
1464// revert the last capture
1465func (r *runner) uncapture() {
1466	capnum := r.popcrawl()
1467	r.runmatch.removeMatch(capnum)
1468}
1469
1470//debug
1471
1472func (r *runner) dumpState() {
1473	back := ""
1474	if r.operator&syntax.Back != 0 {
1475		back = " Back"
1476	}
1477	if r.operator&syntax.Back2 != 0 {
1478		back += " Back2"
1479	}
1480	fmt.Printf("Text:  %v\nTrack: %v\nStack: %v\n       %s%s\n\n",
1481		r.textposDescription(),
1482		r.stackDescription(r.runtrack, r.runtrackpos),
1483		r.stackDescription(r.runstack, r.runstackpos),
1484		r.code.OpcodeDescription(r.codepos),
1485		back)
1486}
1487
1488func (r *runner) stackDescription(a []int, index int) string {
1489	buf := &bytes.Buffer{}
1490
1491	fmt.Fprintf(buf, "%v/%v", len(a)-index, len(a))
1492	if buf.Len() < 8 {
1493		buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1494	}
1495
1496	buf.WriteRune('(')
1497	for i := index; i < len(a); i++ {
1498		if i > index {
1499			buf.WriteRune(' ')
1500		}
1501
1502		buf.WriteString(strconv.Itoa(a[i]))
1503	}
1504
1505	buf.WriteRune(')')
1506
1507	return buf.String()
1508}
1509
1510func (r *runner) textposDescription() string {
1511	buf := &bytes.Buffer{}
1512
1513	buf.WriteString(strconv.Itoa(r.runtextpos))
1514
1515	if buf.Len() < 8 {
1516		buf.WriteString(strings.Repeat(" ", 8-buf.Len()))
1517	}
1518
1519	if r.runtextpos > 0 {
1520		buf.WriteString(syntax.CharDescription(r.runtext[r.runtextpos-1]))
1521	} else {
1522		buf.WriteRune('^')
1523	}
1524
1525	buf.WriteRune('>')
1526
1527	for i := r.runtextpos; i < r.runtextend; i++ {
1528		buf.WriteString(syntax.CharDescription(r.runtext[i]))
1529	}
1530	if buf.Len() >= 64 {
1531		buf.Truncate(61)
1532		buf.WriteString("...")
1533	} else {
1534		buf.WriteRune('$')
1535	}
1536
1537	return buf.String()
1538}
1539
1540// decide whether the pos
1541// at the specified index is a boundary or not. It's just not worth
1542// emitting inline code for this logic.
1543func (r *runner) isBoundary(index, startpos, endpos int) bool {
1544	return (index > startpos && syntax.IsWordChar(r.runtext[index-1])) !=
1545		(index < endpos && syntax.IsWordChar(r.runtext[index]))
1546}
1547
1548func (r *runner) isECMABoundary(index, startpos, endpos int) bool {
1549	return (index > startpos && syntax.IsECMAWordChar(r.runtext[index-1])) !=
1550		(index < endpos && syntax.IsECMAWordChar(r.runtext[index]))
1551}
1552
1553func (r *runner) startTimeoutWatch() {
1554	if r.ignoreTimeout {
1555		return
1556	}
1557	r.deadline = makeDeadline(r.timeout)
1558}
1559
1560func (r *runner) checkTimeout() error {
1561	if r.ignoreTimeout || !r.deadline.reached() {
1562		return nil
1563	}
1564
1565	if r.re.Debug() {
1566		//Debug.WriteLine("")
1567		//Debug.WriteLine("RegEx match timeout occurred!")
1568		//Debug.WriteLine("Specified timeout:       " + TimeSpan.FromMilliseconds(_timeout).ToString())
1569		//Debug.WriteLine("Timeout check frequency: " + TimeoutCheckFrequency)
1570		//Debug.WriteLine("Search pattern:          " + _runregex._pattern)
1571		//Debug.WriteLine("Input:                   " + r.runtext)
1572		//Debug.WriteLine("About to throw RegexMatchTimeoutException.")
1573	}
1574
1575	return fmt.Errorf("match timeout after %v on input `%v`", r.timeout, string(r.runtext))
1576}
1577
1578func (r *runner) initTrackCount() {
1579	r.runtrackcount = r.code.TrackCount
1580}
1581
1582// getRunner returns a run to use for matching re.
1583// It uses the re's runner cache if possible, to avoid
1584// unnecessary allocation.
1585func (re *Regexp) getRunner() *runner {
1586	re.muRun.Lock()
1587	if n := len(re.runner); n > 0 {
1588		z := re.runner[n-1]
1589		re.runner = re.runner[:n-1]
1590		re.muRun.Unlock()
1591		return z
1592	}
1593	re.muRun.Unlock()
1594	z := &runner{
1595		re:   re,
1596		code: re.code,
1597	}
1598	return z
1599}
1600
1601// putRunner returns a runner to the re's cache.
1602// There is no attempt to limit the size of the cache, so it will
1603// grow to the maximum number of simultaneous matches
1604// run using re.  (The cache empties when re gets garbage collected.)
1605func (re *Regexp) putRunner(r *runner) {
1606	re.muRun.Lock()
1607	r.runtext = nil
1608	if r.runmatch != nil {
1609		r.runmatch.text = nil
1610	}
1611	re.runner = append(re.runner, r)
1612	re.muRun.Unlock()
1613}