match.go

  1package regexp2
  2
  3import (
  4	"bytes"
  5	"fmt"
  6)
  7
  8// Match is a single regex result match that contains groups and repeated captures
  9//
 10//		-Groups
 11//	   -Capture
 12type Match struct {
 13	Group //embeded group 0
 14
 15	regex       *Regexp
 16	otherGroups []Group
 17
 18	// input to the match
 19	textpos   int
 20	textstart int
 21
 22	capcount   int
 23	caps       []int
 24	sparseCaps map[int]int
 25
 26	// output from the match
 27	matches    [][]int
 28	matchcount []int
 29
 30	// whether we've done any balancing with this match.  If we
 31	// have done balancing, we'll need to do extra work in Tidy().
 32	balancing bool
 33}
 34
 35// Group is an explicit or implit (group 0) matched group within the pattern
 36type Group struct {
 37	Capture // the last capture of this group is embeded for ease of use
 38
 39	Name     string    // group name
 40	Captures []Capture // captures of this group
 41}
 42
 43// Capture is a single capture of text within the larger original string
 44type Capture struct {
 45	// the original string
 46	text []rune
 47	// Index is the position in the underlying rune slice where the first character of
 48	// captured substring was found. Even if you pass in a string this will be in Runes.
 49	Index int
 50	// Length is the number of runes in the captured substring.
 51	Length int
 52}
 53
 54// String returns the captured text as a String
 55func (c *Capture) String() string {
 56	return string(c.text[c.Index : c.Index+c.Length])
 57}
 58
 59// Runes returns the captured text as a rune slice
 60func (c *Capture) Runes() []rune {
 61	return c.text[c.Index : c.Index+c.Length]
 62}
 63
 64func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match {
 65	m := Match{
 66		regex:      regex,
 67		matchcount: make([]int, capcount),
 68		matches:    make([][]int, capcount),
 69		textstart:  startpos,
 70		balancing:  false,
 71	}
 72	m.Name = "0"
 73	m.text = text
 74	m.matches[0] = make([]int, 2)
 75	return &m
 76}
 77
 78func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match {
 79	m := newMatch(regex, capcount, text, startpos)
 80	m.sparseCaps = caps
 81	return m
 82}
 83
 84func (m *Match) reset(text []rune, textstart int) {
 85	m.text = text
 86	m.textstart = textstart
 87	for i := 0; i < len(m.matchcount); i++ {
 88		m.matchcount[i] = 0
 89	}
 90	m.balancing = false
 91}
 92
 93func (m *Match) tidy(textpos int) {
 94
 95	interval := m.matches[0]
 96	m.Index = interval[0]
 97	m.Length = interval[1]
 98	m.textpos = textpos
 99	m.capcount = m.matchcount[0]
100	//copy our root capture to the list
101	m.Group.Captures = []Capture{m.Group.Capture}
102
103	if m.balancing {
104		// The idea here is that we want to compact all of our unbalanced captures.  To do that we
105		// use j basically as a count of how many unbalanced captures we have at any given time
106		// (really j is an index, but j/2 is the count).  First we skip past all of the real captures
107		// until we find a balance captures.  Then we check each subsequent entry.  If it's a balance
108		// capture (it's negative), we decrement j.  If it's a real capture, we increment j and copy
109		// it down to the last free position.
110		for cap := 0; cap < len(m.matchcount); cap++ {
111			limit := m.matchcount[cap] * 2
112			matcharray := m.matches[cap]
113
114			var i, j int
115
116			for i = 0; i < limit; i++ {
117				if matcharray[i] < 0 {
118					break
119				}
120			}
121
122			for j = i; i < limit; i++ {
123				if matcharray[i] < 0 {
124					// skip negative values
125					j--
126				} else {
127					// but if we find something positive (an actual capture), copy it back to the last
128					// unbalanced position.
129					if i != j {
130						matcharray[j] = matcharray[i]
131					}
132					j++
133				}
134			}
135
136			m.matchcount[cap] = j / 2
137		}
138
139		m.balancing = false
140	}
141}
142
143// isMatched tells if a group was matched by capnum
144func (m *Match) isMatched(cap int) bool {
145	return cap < len(m.matchcount) && m.matchcount[cap] > 0 && m.matches[cap][m.matchcount[cap]*2-1] != (-3+1)
146}
147
148// matchIndex returns the index of the last specified matched group by capnum
149func (m *Match) matchIndex(cap int) int {
150	i := m.matches[cap][m.matchcount[cap]*2-2]
151	if i >= 0 {
152		return i
153	}
154
155	return m.matches[cap][-3-i]
156}
157
158// matchLength returns the length of the last specified matched group by capnum
159func (m *Match) matchLength(cap int) int {
160	i := m.matches[cap][m.matchcount[cap]*2-1]
161	if i >= 0 {
162		return i
163	}
164
165	return m.matches[cap][-3-i]
166}
167
168// Nonpublic builder: add a capture to the group specified by "c"
169func (m *Match) addMatch(c, start, l int) {
170
171	if m.matches[c] == nil {
172		m.matches[c] = make([]int, 2)
173	}
174
175	capcount := m.matchcount[c]
176
177	if capcount*2+2 > len(m.matches[c]) {
178		oldmatches := m.matches[c]
179		newmatches := make([]int, capcount*8)
180		copy(newmatches, oldmatches[:capcount*2])
181		m.matches[c] = newmatches
182	}
183
184	m.matches[c][capcount*2] = start
185	m.matches[c][capcount*2+1] = l
186	m.matchcount[c] = capcount + 1
187	//log.Printf("addMatch: c=%v, i=%v, l=%v ... matches: %v", c, start, l, m.matches)
188}
189
190// Nonpublic builder: Add a capture to balance the specified group.  This is used by the
191//
192//	balanced match construct. (?<foo-foo2>...)
193//
194// If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(c).
195// However, since we have backtracking, we need to keep track of everything.
196func (m *Match) balanceMatch(c int) {
197	m.balancing = true
198
199	// we'll look at the last capture first
200	capcount := m.matchcount[c]
201	target := capcount*2 - 2
202
203	// first see if it is negative, and therefore is a reference to the next available
204	// capture group for balancing.  If it is, we'll reset target to point to that capture.
205	if m.matches[c][target] < 0 {
206		target = -3 - m.matches[c][target]
207	}
208
209	// move back to the previous capture
210	target -= 2
211
212	// if the previous capture is a reference, just copy that reference to the end.  Otherwise, point to it.
213	if target >= 0 && m.matches[c][target] < 0 {
214		m.addMatch(c, m.matches[c][target], m.matches[c][target+1])
215	} else {
216		m.addMatch(c, -3-target, -4-target /* == -3 - (target + 1) */)
217	}
218}
219
220// Nonpublic builder: removes a group match by capnum
221func (m *Match) removeMatch(c int) {
222	m.matchcount[c]--
223}
224
225// GroupCount returns the number of groups this match has matched
226func (m *Match) GroupCount() int {
227	return len(m.matchcount)
228}
229
230// GroupByName returns a group based on the name of the group, or nil if the group name does not exist
231func (m *Match) GroupByName(name string) *Group {
232	num := m.regex.GroupNumberFromName(name)
233	if num < 0 {
234		return nil
235	}
236	return m.GroupByNumber(num)
237}
238
239// GroupByNumber returns a group based on the number of the group, or nil if the group number does not exist
240func (m *Match) GroupByNumber(num int) *Group {
241	// check our sparse map
242	if m.sparseCaps != nil {
243		if newNum, ok := m.sparseCaps[num]; ok {
244			num = newNum
245		}
246	}
247	if num >= len(m.matchcount) || num < 0 {
248		return nil
249	}
250
251	if num == 0 {
252		return &m.Group
253	}
254
255	m.populateOtherGroups()
256
257	return &m.otherGroups[num-1]
258}
259
260// Groups returns all the capture groups, starting with group 0 (the full match)
261func (m *Match) Groups() []Group {
262	m.populateOtherGroups()
263	g := make([]Group, len(m.otherGroups)+1)
264	g[0] = m.Group
265	copy(g[1:], m.otherGroups)
266	return g
267}
268
269func (m *Match) populateOtherGroups() {
270	// Construct all the Group objects first time called
271	if m.otherGroups == nil {
272		m.otherGroups = make([]Group, len(m.matchcount)-1)
273		for i := 0; i < len(m.otherGroups); i++ {
274			m.otherGroups[i] = newGroup(m.regex.GroupNameFromNumber(i+1), m.text, m.matches[i+1], m.matchcount[i+1])
275		}
276	}
277}
278
279func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) {
280	c := m.matchcount[groupnum]
281	if c == 0 {
282		return
283	}
284
285	matches := m.matches[groupnum]
286
287	index := matches[(c-1)*2]
288	last := index + matches[(c*2)-1]
289
290	for ; index < last; index++ {
291		buf.WriteRune(m.text[index])
292	}
293}
294
295func newGroup(name string, text []rune, caps []int, capcount int) Group {
296	g := Group{}
297	g.text = text
298	if capcount > 0 {
299		g.Index = caps[(capcount-1)*2]
300		g.Length = caps[(capcount*2)-1]
301	}
302	g.Name = name
303	g.Captures = make([]Capture, capcount)
304	for i := 0; i < capcount; i++ {
305		g.Captures[i] = Capture{
306			text:   text,
307			Index:  caps[i*2],
308			Length: caps[i*2+1],
309		}
310	}
311	//log.Printf("newGroup! capcount %v, %+v", capcount, g)
312
313	return g
314}
315
316func (m *Match) dump() string {
317	buf := &bytes.Buffer{}
318	buf.WriteRune('\n')
319	if len(m.sparseCaps) > 0 {
320		for k, v := range m.sparseCaps {
321			fmt.Fprintf(buf, "Slot %v -> %v\n", k, v)
322		}
323	}
324
325	for i, g := range m.Groups() {
326		fmt.Fprintf(buf, "Group %v (%v), %v caps:\n", i, g.Name, len(g.Captures))
327
328		for _, c := range g.Captures {
329			fmt.Fprintf(buf, "  (%v, %v) %v\n", c.Index, c.Length, c.String())
330		}
331	}
332	/*
333		for i := 0; i < len(m.matchcount); i++ {
334			fmt.Fprintf(buf, "\nGroup %v (%v):\n", i, m.regex.GroupNameFromNumber(i))
335
336			for j := 0; j < m.matchcount[i]; j++ {
337				text := ""
338
339				if m.matches[i][j*2] >= 0 {
340					start := m.matches[i][j*2]
341					text = m.text[start : start+m.matches[i][j*2+1]]
342				}
343
344				fmt.Fprintf(buf, "  (%v, %v) %v\n", m.matches[i][j*2], m.matches[i][j*2+1], text)
345			}
346		}
347	*/
348	return buf.String()
349}