1package cascadia
2
3import (
4 "fmt"
5 "regexp"
6 "strings"
7
8 "golang.org/x/net/html"
9)
10
11// Matcher is the interface for basic selector functionality.
12// Match returns whether a selector matches n.
13type Matcher interface {
14 Match(n *html.Node) bool
15}
16
17// Sel is the interface for all the functionality provided by selectors.
18type Sel interface {
19 Matcher
20 Specificity() Specificity
21
22 // Returns a CSS input compiling to this selector.
23 String() string
24
25 // Returns a pseudo-element, or an empty string.
26 PseudoElement() string
27}
28
29// Parse parses a selector. Use `ParseWithPseudoElement`
30// if you need support for pseudo-elements.
31func Parse(sel string) (Sel, error) {
32 p := &parser{s: sel}
33 compiled, err := p.parseSelector()
34 if err != nil {
35 return nil, err
36 }
37
38 if p.i < len(sel) {
39 return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
40 }
41
42 return compiled, nil
43}
44
45// ParseWithPseudoElement parses a single selector,
46// with support for pseudo-element.
47func ParseWithPseudoElement(sel string) (Sel, error) {
48 p := &parser{s: sel, acceptPseudoElements: true}
49 compiled, err := p.parseSelector()
50 if err != nil {
51 return nil, err
52 }
53
54 if p.i < len(sel) {
55 return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
56 }
57
58 return compiled, nil
59}
60
61// ParseGroup parses a selector, or a group of selectors separated by commas.
62// Use `ParseGroupWithPseudoElements`
63// if you need support for pseudo-elements.
64func ParseGroup(sel string) (SelectorGroup, error) {
65 p := &parser{s: sel}
66 compiled, err := p.parseSelectorGroup()
67 if err != nil {
68 return nil, err
69 }
70
71 if p.i < len(sel) {
72 return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
73 }
74
75 return compiled, nil
76}
77
78// ParseGroupWithPseudoElements parses a selector, or a group of selectors separated by commas.
79// It supports pseudo-elements.
80func ParseGroupWithPseudoElements(sel string) (SelectorGroup, error) {
81 p := &parser{s: sel, acceptPseudoElements: true}
82 compiled, err := p.parseSelectorGroup()
83 if err != nil {
84 return nil, err
85 }
86
87 if p.i < len(sel) {
88 return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
89 }
90
91 return compiled, nil
92}
93
94// A Selector is a function which tells whether a node matches or not.
95//
96// This type is maintained for compatibility; I recommend using the newer and
97// more idiomatic interfaces Sel and Matcher.
98type Selector func(*html.Node) bool
99
100// Compile parses a selector and returns, if successful, a Selector object
101// that can be used to match against html.Node objects.
102func Compile(sel string) (Selector, error) {
103 compiled, err := ParseGroup(sel)
104 if err != nil {
105 return nil, err
106 }
107
108 return Selector(compiled.Match), nil
109}
110
111// MustCompile is like Compile, but panics instead of returning an error.
112func MustCompile(sel string) Selector {
113 compiled, err := Compile(sel)
114 if err != nil {
115 panic(err)
116 }
117 return compiled
118}
119
120// MatchAll returns a slice of the nodes that match the selector,
121// from n and its children.
122func (s Selector) MatchAll(n *html.Node) []*html.Node {
123 return s.matchAllInto(n, nil)
124}
125
126func (s Selector) matchAllInto(n *html.Node, storage []*html.Node) []*html.Node {
127 if s(n) {
128 storage = append(storage, n)
129 }
130
131 for child := n.FirstChild; child != nil; child = child.NextSibling {
132 storage = s.matchAllInto(child, storage)
133 }
134
135 return storage
136}
137
138func queryInto(n *html.Node, m Matcher, storage []*html.Node) []*html.Node {
139 for child := n.FirstChild; child != nil; child = child.NextSibling {
140 if m.Match(child) {
141 storage = append(storage, child)
142 }
143 storage = queryInto(child, m, storage)
144 }
145
146 return storage
147}
148
149// QueryAll returns a slice of all the nodes that match m, from the descendants
150// of n.
151func QueryAll(n *html.Node, m Matcher) []*html.Node {
152 return queryInto(n, m, nil)
153}
154
155// Match returns true if the node matches the selector.
156func (s Selector) Match(n *html.Node) bool {
157 return s(n)
158}
159
160// MatchFirst returns the first node that matches s, from n and its children.
161func (s Selector) MatchFirst(n *html.Node) *html.Node {
162 if s.Match(n) {
163 return n
164 }
165
166 for c := n.FirstChild; c != nil; c = c.NextSibling {
167 m := s.MatchFirst(c)
168 if m != nil {
169 return m
170 }
171 }
172 return nil
173}
174
175// Query returns the first node that matches m, from the descendants of n.
176// If none matches, it returns nil.
177func Query(n *html.Node, m Matcher) *html.Node {
178 for c := n.FirstChild; c != nil; c = c.NextSibling {
179 if m.Match(c) {
180 return c
181 }
182 if matched := Query(c, m); matched != nil {
183 return matched
184 }
185 }
186
187 return nil
188}
189
190// Filter returns the nodes in nodes that match the selector.
191func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) {
192 for _, n := range nodes {
193 if s(n) {
194 result = append(result, n)
195 }
196 }
197 return result
198}
199
200// Filter returns the nodes that match m.
201func Filter(nodes []*html.Node, m Matcher) (result []*html.Node) {
202 for _, n := range nodes {
203 if m.Match(n) {
204 result = append(result, n)
205 }
206 }
207 return result
208}
209
210type tagSelector struct {
211 tag string
212}
213
214// Matches elements with a given tag name.
215func (t tagSelector) Match(n *html.Node) bool {
216 return n.Type == html.ElementNode && n.Data == t.tag
217}
218
219func (c tagSelector) Specificity() Specificity {
220 return Specificity{0, 0, 1}
221}
222
223func (c tagSelector) PseudoElement() string {
224 return ""
225}
226
227type classSelector struct {
228 class string
229}
230
231// Matches elements by class attribute.
232func (t classSelector) Match(n *html.Node) bool {
233 return matchAttribute(n, "class", func(s string) bool {
234 return matchInclude(t.class, s, false)
235 })
236}
237
238func (c classSelector) Specificity() Specificity {
239 return Specificity{0, 1, 0}
240}
241
242func (c classSelector) PseudoElement() string {
243 return ""
244}
245
246type idSelector struct {
247 id string
248}
249
250// Matches elements by id attribute.
251func (t idSelector) Match(n *html.Node) bool {
252 return matchAttribute(n, "id", func(s string) bool {
253 return s == t.id
254 })
255}
256
257func (c idSelector) Specificity() Specificity {
258 return Specificity{1, 0, 0}
259}
260
261func (c idSelector) PseudoElement() string {
262 return ""
263}
264
265type attrSelector struct {
266 key, val, operation string
267 regexp *regexp.Regexp
268 insensitive bool
269}
270
271// Matches elements by attribute value.
272func (t attrSelector) Match(n *html.Node) bool {
273 switch t.operation {
274 case "":
275 return matchAttribute(n, t.key, func(string) bool { return true })
276 case "=":
277 return matchAttribute(n, t.key, func(s string) bool { return matchInsensitiveValue(s, t.val, t.insensitive) })
278 case "!=":
279 return attributeNotEqualMatch(t.key, t.val, n, t.insensitive)
280 case "~=":
281 // matches elements where the attribute named key is a whitespace-separated list that includes val.
282 return matchAttribute(n, t.key, func(s string) bool { return matchInclude(t.val, s, t.insensitive) })
283 case "|=":
284 return attributeDashMatch(t.key, t.val, n, t.insensitive)
285 case "^=":
286 return attributePrefixMatch(t.key, t.val, n, t.insensitive)
287 case "$=":
288 return attributeSuffixMatch(t.key, t.val, n, t.insensitive)
289 case "*=":
290 return attributeSubstringMatch(t.key, t.val, n, t.insensitive)
291 case "#=":
292 return attributeRegexMatch(t.key, t.regexp, n)
293 default:
294 panic(fmt.Sprintf("unsuported operation : %s", t.operation))
295 }
296}
297
298// matches elements where we ignore (or not) the case of the attribute value
299// the user attribute is the value set by the user to match elements
300// the real attribute is the attribute value found in the code parsed
301func matchInsensitiveValue(userAttr string, realAttr string, ignoreCase bool) bool {
302 if ignoreCase {
303 return strings.EqualFold(userAttr, realAttr)
304 }
305 return userAttr == realAttr
306
307}
308
309// matches elements where the attribute named key satisifes the function f.
310func matchAttribute(n *html.Node, key string, f func(string) bool) bool {
311 if n.Type != html.ElementNode {
312 return false
313 }
314 for _, a := range n.Attr {
315 if a.Key == key && f(a.Val) {
316 return true
317 }
318 }
319 return false
320}
321
322// attributeNotEqualMatch matches elements where
323// the attribute named key does not have the value val.
324func attributeNotEqualMatch(key, val string, n *html.Node, ignoreCase bool) bool {
325 if n.Type != html.ElementNode {
326 return false
327 }
328 for _, a := range n.Attr {
329 if a.Key == key && matchInsensitiveValue(a.Val, val, ignoreCase) {
330 return false
331 }
332 }
333 return true
334}
335
336// returns true if s is a whitespace-separated list that includes val.
337func matchInclude(val string, s string, ignoreCase bool) bool {
338 for s != "" {
339 i := strings.IndexAny(s, " \t\r\n\f")
340 if i == -1 {
341 return matchInsensitiveValue(s, val, ignoreCase)
342 }
343 if matchInsensitiveValue(s[:i], val, ignoreCase) {
344 return true
345 }
346 s = s[i+1:]
347 }
348 return false
349}
350
351// matches elements where the attribute named key equals val or starts with val plus a hyphen.
352func attributeDashMatch(key, val string, n *html.Node, ignoreCase bool) bool {
353 return matchAttribute(n, key,
354 func(s string) bool {
355 if matchInsensitiveValue(s, val, ignoreCase) {
356 return true
357 }
358 if len(s) <= len(val) {
359 return false
360 }
361 if matchInsensitiveValue(s[:len(val)], val, ignoreCase) && s[len(val)] == '-' {
362 return true
363 }
364 return false
365 })
366}
367
368// attributePrefixMatch returns a Selector that matches elements where
369// the attribute named key starts with val.
370func attributePrefixMatch(key, val string, n *html.Node, ignoreCase bool) bool {
371 return matchAttribute(n, key,
372 func(s string) bool {
373 if strings.TrimSpace(s) == "" {
374 return false
375 }
376 if ignoreCase {
377 return strings.HasPrefix(strings.ToLower(s), strings.ToLower(val))
378 }
379 return strings.HasPrefix(s, val)
380 })
381}
382
383// attributeSuffixMatch matches elements where
384// the attribute named key ends with val.
385func attributeSuffixMatch(key, val string, n *html.Node, ignoreCase bool) bool {
386 return matchAttribute(n, key,
387 func(s string) bool {
388 if strings.TrimSpace(s) == "" {
389 return false
390 }
391 if ignoreCase {
392 return strings.HasSuffix(strings.ToLower(s), strings.ToLower(val))
393 }
394 return strings.HasSuffix(s, val)
395 })
396}
397
398// attributeSubstringMatch matches nodes where
399// the attribute named key contains val.
400func attributeSubstringMatch(key, val string, n *html.Node, ignoreCase bool) bool {
401 return matchAttribute(n, key,
402 func(s string) bool {
403 if strings.TrimSpace(s) == "" {
404 return false
405 }
406 if ignoreCase {
407 return strings.Contains(strings.ToLower(s), strings.ToLower(val))
408 }
409 return strings.Contains(s, val)
410 })
411}
412
413// attributeRegexMatch matches nodes where
414// the attribute named key matches the regular expression rx
415func attributeRegexMatch(key string, rx *regexp.Regexp, n *html.Node) bool {
416 return matchAttribute(n, key,
417 func(s string) bool {
418 return rx.MatchString(s)
419 })
420}
421
422func (c attrSelector) Specificity() Specificity {
423 return Specificity{0, 1, 0}
424}
425
426func (c attrSelector) PseudoElement() string {
427 return ""
428}
429
430// see pseudo_classes.go for pseudo classes selectors
431
432// on a static context, some selectors can't match anything
433type neverMatchSelector struct {
434 value string
435}
436
437func (s neverMatchSelector) Match(n *html.Node) bool {
438 return false
439}
440
441func (s neverMatchSelector) Specificity() Specificity {
442 return Specificity{0, 0, 0}
443}
444
445func (c neverMatchSelector) PseudoElement() string {
446 return ""
447}
448
449type compoundSelector struct {
450 selectors []Sel
451 pseudoElement string
452}
453
454// Matches elements if each sub-selectors matches.
455func (t compoundSelector) Match(n *html.Node) bool {
456 if len(t.selectors) == 0 {
457 return n.Type == html.ElementNode
458 }
459
460 for _, sel := range t.selectors {
461 if !sel.Match(n) {
462 return false
463 }
464 }
465 return true
466}
467
468func (s compoundSelector) Specificity() Specificity {
469 var out Specificity
470 for _, sel := range s.selectors {
471 out = out.Add(sel.Specificity())
472 }
473 if s.pseudoElement != "" {
474 // https://drafts.csswg.org/selectors-3/#specificity
475 out = out.Add(Specificity{0, 0, 1})
476 }
477 return out
478}
479
480func (c compoundSelector) PseudoElement() string {
481 return c.pseudoElement
482}
483
484type combinedSelector struct {
485 first Sel
486 combinator byte
487 second Sel
488}
489
490func (t combinedSelector) Match(n *html.Node) bool {
491 if t.first == nil {
492 return false // maybe we should panic
493 }
494 switch t.combinator {
495 case 0:
496 return t.first.Match(n)
497 case ' ':
498 return descendantMatch(t.first, t.second, n)
499 case '>':
500 return childMatch(t.first, t.second, n)
501 case '+':
502 return siblingMatch(t.first, t.second, true, n)
503 case '~':
504 return siblingMatch(t.first, t.second, false, n)
505 default:
506 panic("unknown combinator")
507 }
508}
509
510// matches an element if it matches d and has an ancestor that matches a.
511func descendantMatch(a, d Matcher, n *html.Node) bool {
512 if !d.Match(n) {
513 return false
514 }
515
516 for p := n.Parent; p != nil; p = p.Parent {
517 if a.Match(p) {
518 return true
519 }
520 }
521
522 return false
523}
524
525// matches an element if it matches d and its parent matches a.
526func childMatch(a, d Matcher, n *html.Node) bool {
527 return d.Match(n) && n.Parent != nil && a.Match(n.Parent)
528}
529
530// matches an element if it matches s2 and is preceded by an element that matches s1.
531// If adjacent is true, the sibling must be immediately before the element.
532func siblingMatch(s1, s2 Matcher, adjacent bool, n *html.Node) bool {
533 if !s2.Match(n) {
534 return false
535 }
536
537 if adjacent {
538 for n = n.PrevSibling; n != nil; n = n.PrevSibling {
539 if n.Type == html.TextNode || n.Type == html.CommentNode {
540 continue
541 }
542 return s1.Match(n)
543 }
544 return false
545 }
546
547 // Walk backwards looking for element that matches s1
548 for c := n.PrevSibling; c != nil; c = c.PrevSibling {
549 if s1.Match(c) {
550 return true
551 }
552 }
553
554 return false
555}
556
557func (s combinedSelector) Specificity() Specificity {
558 spec := s.first.Specificity()
559 if s.second != nil {
560 spec = spec.Add(s.second.Specificity())
561 }
562 return spec
563}
564
565// on combinedSelector, a pseudo-element only makes sens on the last
566// selector, although others increase specificity.
567func (c combinedSelector) PseudoElement() string {
568 if c.second == nil {
569 return ""
570 }
571 return c.second.PseudoElement()
572}
573
574// A SelectorGroup is a list of selectors, which matches if any of the
575// individual selectors matches.
576type SelectorGroup []Sel
577
578// Match returns true if the node matches one of the single selectors.
579func (s SelectorGroup) Match(n *html.Node) bool {
580 for _, sel := range s {
581 if sel.Match(n) {
582 return true
583 }
584 }
585 return false
586}