1package regexp2
2
3import (
4 "bytes"
5 "fmt"
6)
7
8// Match is a single regex result match that contains groups and repeated captures
9//
10// -Groups
11// -Capture
12type Match struct {
13 Group //embeded group 0
14
15 regex *Regexp
16 otherGroups []Group
17
18 // input to the match
19 textpos int
20 textstart int
21
22 capcount int
23 caps []int
24 sparseCaps map[int]int
25
26 // output from the match
27 matches [][]int
28 matchcount []int
29
30 // whether we've done any balancing with this match. If we
31 // have done balancing, we'll need to do extra work in Tidy().
32 balancing bool
33}
34
35// Group is an explicit or implit (group 0) matched group within the pattern
36type Group struct {
37 Capture // the last capture of this group is embeded for ease of use
38
39 Name string // group name
40 Captures []Capture // captures of this group
41}
42
43// Capture is a single capture of text within the larger original string
44type Capture struct {
45 // the original string
46 text []rune
47 // Index is the position in the underlying rune slice where the first character of
48 // captured substring was found. Even if you pass in a string this will be in Runes.
49 Index int
50 // Length is the number of runes in the captured substring.
51 Length int
52}
53
54// String returns the captured text as a String
55func (c *Capture) String() string {
56 return string(c.text[c.Index : c.Index+c.Length])
57}
58
59// Runes returns the captured text as a rune slice
60func (c *Capture) Runes() []rune {
61 return c.text[c.Index : c.Index+c.Length]
62}
63
64func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match {
65 m := Match{
66 regex: regex,
67 matchcount: make([]int, capcount),
68 matches: make([][]int, capcount),
69 textstart: startpos,
70 balancing: false,
71 }
72 m.Name = "0"
73 m.text = text
74 m.matches[0] = make([]int, 2)
75 return &m
76}
77
78func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match {
79 m := newMatch(regex, capcount, text, startpos)
80 m.sparseCaps = caps
81 return m
82}
83
84func (m *Match) reset(text []rune, textstart int) {
85 m.text = text
86 m.textstart = textstart
87 for i := 0; i < len(m.matchcount); i++ {
88 m.matchcount[i] = 0
89 }
90 m.balancing = false
91}
92
93func (m *Match) tidy(textpos int) {
94
95 interval := m.matches[0]
96 m.Index = interval[0]
97 m.Length = interval[1]
98 m.textpos = textpos
99 m.capcount = m.matchcount[0]
100 //copy our root capture to the list
101 m.Group.Captures = []Capture{m.Group.Capture}
102
103 if m.balancing {
104 // The idea here is that we want to compact all of our unbalanced captures. To do that we
105 // use j basically as a count of how many unbalanced captures we have at any given time
106 // (really j is an index, but j/2 is the count). First we skip past all of the real captures
107 // until we find a balance captures. Then we check each subsequent entry. If it's a balance
108 // capture (it's negative), we decrement j. If it's a real capture, we increment j and copy
109 // it down to the last free position.
110 for cap := 0; cap < len(m.matchcount); cap++ {
111 limit := m.matchcount[cap] * 2
112 matcharray := m.matches[cap]
113
114 var i, j int
115
116 for i = 0; i < limit; i++ {
117 if matcharray[i] < 0 {
118 break
119 }
120 }
121
122 for j = i; i < limit; i++ {
123 if matcharray[i] < 0 {
124 // skip negative values
125 j--
126 } else {
127 // but if we find something positive (an actual capture), copy it back to the last
128 // unbalanced position.
129 if i != j {
130 matcharray[j] = matcharray[i]
131 }
132 j++
133 }
134 }
135
136 m.matchcount[cap] = j / 2
137 }
138
139 m.balancing = false
140 }
141}
142
143// isMatched tells if a group was matched by capnum
144func (m *Match) isMatched(cap int) bool {
145 return cap < len(m.matchcount) && m.matchcount[cap] > 0 && m.matches[cap][m.matchcount[cap]*2-1] != (-3+1)
146}
147
148// matchIndex returns the index of the last specified matched group by capnum
149func (m *Match) matchIndex(cap int) int {
150 i := m.matches[cap][m.matchcount[cap]*2-2]
151 if i >= 0 {
152 return i
153 }
154
155 return m.matches[cap][-3-i]
156}
157
158// matchLength returns the length of the last specified matched group by capnum
159func (m *Match) matchLength(cap int) int {
160 i := m.matches[cap][m.matchcount[cap]*2-1]
161 if i >= 0 {
162 return i
163 }
164
165 return m.matches[cap][-3-i]
166}
167
168// Nonpublic builder: add a capture to the group specified by "c"
169func (m *Match) addMatch(c, start, l int) {
170
171 if m.matches[c] == nil {
172 m.matches[c] = make([]int, 2)
173 }
174
175 capcount := m.matchcount[c]
176
177 if capcount*2+2 > len(m.matches[c]) {
178 oldmatches := m.matches[c]
179 newmatches := make([]int, capcount*8)
180 copy(newmatches, oldmatches[:capcount*2])
181 m.matches[c] = newmatches
182 }
183
184 m.matches[c][capcount*2] = start
185 m.matches[c][capcount*2+1] = l
186 m.matchcount[c] = capcount + 1
187 //log.Printf("addMatch: c=%v, i=%v, l=%v ... matches: %v", c, start, l, m.matches)
188}
189
190// Nonpublic builder: Add a capture to balance the specified group. This is used by the
191//
192// balanced match construct. (?<foo-foo2>...)
193//
194// If there were no such thing as backtracking, this would be as simple as calling RemoveMatch(c).
195// However, since we have backtracking, we need to keep track of everything.
196func (m *Match) balanceMatch(c int) {
197 m.balancing = true
198
199 // we'll look at the last capture first
200 capcount := m.matchcount[c]
201 target := capcount*2 - 2
202
203 // first see if it is negative, and therefore is a reference to the next available
204 // capture group for balancing. If it is, we'll reset target to point to that capture.
205 if m.matches[c][target] < 0 {
206 target = -3 - m.matches[c][target]
207 }
208
209 // move back to the previous capture
210 target -= 2
211
212 // if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it.
213 if target >= 0 && m.matches[c][target] < 0 {
214 m.addMatch(c, m.matches[c][target], m.matches[c][target+1])
215 } else {
216 m.addMatch(c, -3-target, -4-target /* == -3 - (target + 1) */)
217 }
218}
219
220// Nonpublic builder: removes a group match by capnum
221func (m *Match) removeMatch(c int) {
222 m.matchcount[c]--
223}
224
225// GroupCount returns the number of groups this match has matched
226func (m *Match) GroupCount() int {
227 return len(m.matchcount)
228}
229
230// GroupByName returns a group based on the name of the group, or nil if the group name does not exist
231func (m *Match) GroupByName(name string) *Group {
232 num := m.regex.GroupNumberFromName(name)
233 if num < 0 {
234 return nil
235 }
236 return m.GroupByNumber(num)
237}
238
239// GroupByNumber returns a group based on the number of the group, or nil if the group number does not exist
240func (m *Match) GroupByNumber(num int) *Group {
241 // check our sparse map
242 if m.sparseCaps != nil {
243 if newNum, ok := m.sparseCaps[num]; ok {
244 num = newNum
245 }
246 }
247 if num >= len(m.matchcount) || num < 0 {
248 return nil
249 }
250
251 if num == 0 {
252 return &m.Group
253 }
254
255 m.populateOtherGroups()
256
257 return &m.otherGroups[num-1]
258}
259
260// Groups returns all the capture groups, starting with group 0 (the full match)
261func (m *Match) Groups() []Group {
262 m.populateOtherGroups()
263 g := make([]Group, len(m.otherGroups)+1)
264 g[0] = m.Group
265 copy(g[1:], m.otherGroups)
266 return g
267}
268
269func (m *Match) populateOtherGroups() {
270 // Construct all the Group objects first time called
271 if m.otherGroups == nil {
272 m.otherGroups = make([]Group, len(m.matchcount)-1)
273 for i := 0; i < len(m.otherGroups); i++ {
274 m.otherGroups[i] = newGroup(m.regex.GroupNameFromNumber(i+1), m.text, m.matches[i+1], m.matchcount[i+1])
275 }
276 }
277}
278
279func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) {
280 c := m.matchcount[groupnum]
281 if c == 0 {
282 return
283 }
284
285 matches := m.matches[groupnum]
286
287 index := matches[(c-1)*2]
288 last := index + matches[(c*2)-1]
289
290 for ; index < last; index++ {
291 buf.WriteRune(m.text[index])
292 }
293}
294
295func newGroup(name string, text []rune, caps []int, capcount int) Group {
296 g := Group{}
297 g.text = text
298 if capcount > 0 {
299 g.Index = caps[(capcount-1)*2]
300 g.Length = caps[(capcount*2)-1]
301 }
302 g.Name = name
303 g.Captures = make([]Capture, capcount)
304 for i := 0; i < capcount; i++ {
305 g.Captures[i] = Capture{
306 text: text,
307 Index: caps[i*2],
308 Length: caps[i*2+1],
309 }
310 }
311 //log.Printf("newGroup! capcount %v, %+v", capcount, g)
312
313 return g
314}
315
316func (m *Match) dump() string {
317 buf := &bytes.Buffer{}
318 buf.WriteRune('\n')
319 if len(m.sparseCaps) > 0 {
320 for k, v := range m.sparseCaps {
321 fmt.Fprintf(buf, "Slot %v -> %v\n", k, v)
322 }
323 }
324
325 for i, g := range m.Groups() {
326 fmt.Fprintf(buf, "Group %v (%v), %v caps:\n", i, g.Name, len(g.Captures))
327
328 for _, c := range g.Captures {
329 fmt.Fprintf(buf, " (%v, %v) %v\n", c.Index, c.Length, c.String())
330 }
331 }
332 /*
333 for i := 0; i < len(m.matchcount); i++ {
334 fmt.Fprintf(buf, "\nGroup %v (%v):\n", i, m.regex.GroupNameFromNumber(i))
335
336 for j := 0; j < m.matchcount[i]; j++ {
337 text := ""
338
339 if m.matches[i][j*2] >= 0 {
340 start := m.matches[i][j*2]
341 text = m.text[start : start+m.matches[i][j*2+1]]
342 }
343
344 fmt.Fprintf(buf, " (%v, %v) %v\n", m.matches[i][j*2], m.matches[i][j*2+1], text)
345 }
346 }
347 */
348 return buf.String()
349}