selector.go

  1package cascadia
  2
  3import (
  4	"fmt"
  5	"regexp"
  6	"strings"
  7
  8	"golang.org/x/net/html"
  9)
 10
 11// Matcher is the interface for basic selector functionality.
 12// Match returns whether a selector matches n.
 13type Matcher interface {
 14	Match(n *html.Node) bool
 15}
 16
 17// Sel is the interface for all the functionality provided by selectors.
 18type Sel interface {
 19	Matcher
 20	Specificity() Specificity
 21
 22	// Returns a CSS input compiling to this selector.
 23	String() string
 24
 25	// Returns a pseudo-element, or an empty string.
 26	PseudoElement() string
 27}
 28
 29// Parse parses a selector. Use `ParseWithPseudoElement`
 30// if you need support for pseudo-elements.
 31func Parse(sel string) (Sel, error) {
 32	p := &parser{s: sel}
 33	compiled, err := p.parseSelector()
 34	if err != nil {
 35		return nil, err
 36	}
 37
 38	if p.i < len(sel) {
 39		return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
 40	}
 41
 42	return compiled, nil
 43}
 44
 45// ParseWithPseudoElement parses a single selector,
 46// with support for pseudo-element.
 47func ParseWithPseudoElement(sel string) (Sel, error) {
 48	p := &parser{s: sel, acceptPseudoElements: true}
 49	compiled, err := p.parseSelector()
 50	if err != nil {
 51		return nil, err
 52	}
 53
 54	if p.i < len(sel) {
 55		return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
 56	}
 57
 58	return compiled, nil
 59}
 60
 61// ParseGroup parses a selector, or a group of selectors separated by commas.
 62// Use `ParseGroupWithPseudoElements`
 63// if you need support for pseudo-elements.
 64func ParseGroup(sel string) (SelectorGroup, error) {
 65	p := &parser{s: sel}
 66	compiled, err := p.parseSelectorGroup()
 67	if err != nil {
 68		return nil, err
 69	}
 70
 71	if p.i < len(sel) {
 72		return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
 73	}
 74
 75	return compiled, nil
 76}
 77
 78// ParseGroupWithPseudoElements parses a selector, or a group of selectors separated by commas.
 79// It supports pseudo-elements.
 80func ParseGroupWithPseudoElements(sel string) (SelectorGroup, error) {
 81	p := &parser{s: sel, acceptPseudoElements: true}
 82	compiled, err := p.parseSelectorGroup()
 83	if err != nil {
 84		return nil, err
 85	}
 86
 87	if p.i < len(sel) {
 88		return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
 89	}
 90
 91	return compiled, nil
 92}
 93
 94// A Selector is a function which tells whether a node matches or not.
 95//
 96// This type is maintained for compatibility; I recommend using the newer and
 97// more idiomatic interfaces Sel and Matcher.
 98type Selector func(*html.Node) bool
 99
100// Compile parses a selector and returns, if successful, a Selector object
101// that can be used to match against html.Node objects.
102func Compile(sel string) (Selector, error) {
103	compiled, err := ParseGroup(sel)
104	if err != nil {
105		return nil, err
106	}
107
108	return Selector(compiled.Match), nil
109}
110
111// MustCompile is like Compile, but panics instead of returning an error.
112func MustCompile(sel string) Selector {
113	compiled, err := Compile(sel)
114	if err != nil {
115		panic(err)
116	}
117	return compiled
118}
119
120// MatchAll returns a slice of the nodes that match the selector,
121// from n and its children.
122func (s Selector) MatchAll(n *html.Node) []*html.Node {
123	return s.matchAllInto(n, nil)
124}
125
126func (s Selector) matchAllInto(n *html.Node, storage []*html.Node) []*html.Node {
127	if s(n) {
128		storage = append(storage, n)
129	}
130
131	for child := n.FirstChild; child != nil; child = child.NextSibling {
132		storage = s.matchAllInto(child, storage)
133	}
134
135	return storage
136}
137
138func queryInto(n *html.Node, m Matcher, storage []*html.Node) []*html.Node {
139	for child := n.FirstChild; child != nil; child = child.NextSibling {
140		if m.Match(child) {
141			storage = append(storage, child)
142		}
143		storage = queryInto(child, m, storage)
144	}
145
146	return storage
147}
148
149// QueryAll returns a slice of all the nodes that match m, from the descendants
150// of n.
151func QueryAll(n *html.Node, m Matcher) []*html.Node {
152	return queryInto(n, m, nil)
153}
154
155// Match returns true if the node matches the selector.
156func (s Selector) Match(n *html.Node) bool {
157	return s(n)
158}
159
160// MatchFirst returns the first node that matches s, from n and its children.
161func (s Selector) MatchFirst(n *html.Node) *html.Node {
162	if s.Match(n) {
163		return n
164	}
165
166	for c := n.FirstChild; c != nil; c = c.NextSibling {
167		m := s.MatchFirst(c)
168		if m != nil {
169			return m
170		}
171	}
172	return nil
173}
174
175// Query returns the first node that matches m, from the descendants of n.
176// If none matches, it returns nil.
177func Query(n *html.Node, m Matcher) *html.Node {
178	for c := n.FirstChild; c != nil; c = c.NextSibling {
179		if m.Match(c) {
180			return c
181		}
182		if matched := Query(c, m); matched != nil {
183			return matched
184		}
185	}
186
187	return nil
188}
189
190// Filter returns the nodes in nodes that match the selector.
191func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) {
192	for _, n := range nodes {
193		if s(n) {
194			result = append(result, n)
195		}
196	}
197	return result
198}
199
200// Filter returns the nodes that match m.
201func Filter(nodes []*html.Node, m Matcher) (result []*html.Node) {
202	for _, n := range nodes {
203		if m.Match(n) {
204			result = append(result, n)
205		}
206	}
207	return result
208}
209
210type tagSelector struct {
211	tag string
212}
213
214// Matches elements with a given tag name.
215func (t tagSelector) Match(n *html.Node) bool {
216	return n.Type == html.ElementNode && n.Data == t.tag
217}
218
219func (c tagSelector) Specificity() Specificity {
220	return Specificity{0, 0, 1}
221}
222
223func (c tagSelector) PseudoElement() string {
224	return ""
225}
226
227type classSelector struct {
228	class string
229}
230
231// Matches elements by class attribute.
232func (t classSelector) Match(n *html.Node) bool {
233	return matchAttribute(n, "class", func(s string) bool {
234		return matchInclude(t.class, s, false)
235	})
236}
237
238func (c classSelector) Specificity() Specificity {
239	return Specificity{0, 1, 0}
240}
241
242func (c classSelector) PseudoElement() string {
243	return ""
244}
245
246type idSelector struct {
247	id string
248}
249
250// Matches elements by id attribute.
251func (t idSelector) Match(n *html.Node) bool {
252	return matchAttribute(n, "id", func(s string) bool {
253		return s == t.id
254	})
255}
256
257func (c idSelector) Specificity() Specificity {
258	return Specificity{1, 0, 0}
259}
260
261func (c idSelector) PseudoElement() string {
262	return ""
263}
264
265type attrSelector struct {
266	key, val, operation string
267	regexp              *regexp.Regexp
268	insensitive         bool
269}
270
271// Matches elements by attribute value.
272func (t attrSelector) Match(n *html.Node) bool {
273	switch t.operation {
274	case "":
275		return matchAttribute(n, t.key, func(string) bool { return true })
276	case "=":
277		return matchAttribute(n, t.key, func(s string) bool { return matchInsensitiveValue(s, t.val, t.insensitive) })
278	case "!=":
279		return attributeNotEqualMatch(t.key, t.val, n, t.insensitive)
280	case "~=":
281		// matches elements where the attribute named key is a whitespace-separated list that includes val.
282		return matchAttribute(n, t.key, func(s string) bool { return matchInclude(t.val, s, t.insensitive) })
283	case "|=":
284		return attributeDashMatch(t.key, t.val, n, t.insensitive)
285	case "^=":
286		return attributePrefixMatch(t.key, t.val, n, t.insensitive)
287	case "$=":
288		return attributeSuffixMatch(t.key, t.val, n, t.insensitive)
289	case "*=":
290		return attributeSubstringMatch(t.key, t.val, n, t.insensitive)
291	case "#=":
292		return attributeRegexMatch(t.key, t.regexp, n)
293	default:
294		panic(fmt.Sprintf("unsuported operation : %s", t.operation))
295	}
296}
297
298// matches elements where we ignore (or not) the case of the attribute value
299// the user attribute is the value set by the user to match elements
300// the real attribute is the attribute value found in the code parsed
301func matchInsensitiveValue(userAttr string, realAttr string, ignoreCase bool) bool {
302	if ignoreCase {
303		return strings.EqualFold(userAttr, realAttr)
304	}
305	return userAttr == realAttr
306
307}
308
309// matches elements where the attribute named key satisifes the function f.
310func matchAttribute(n *html.Node, key string, f func(string) bool) bool {
311	if n.Type != html.ElementNode {
312		return false
313	}
314	for _, a := range n.Attr {
315		if a.Key == key && f(a.Val) {
316			return true
317		}
318	}
319	return false
320}
321
322// attributeNotEqualMatch matches elements where
323// the attribute named key does not have the value val.
324func attributeNotEqualMatch(key, val string, n *html.Node, ignoreCase bool) bool {
325	if n.Type != html.ElementNode {
326		return false
327	}
328	for _, a := range n.Attr {
329		if a.Key == key && matchInsensitiveValue(a.Val, val, ignoreCase) {
330			return false
331		}
332	}
333	return true
334}
335
336// returns true if s is a whitespace-separated list that includes val.
337func matchInclude(val string, s string, ignoreCase bool) bool {
338	for s != "" {
339		i := strings.IndexAny(s, " \t\r\n\f")
340		if i == -1 {
341			return matchInsensitiveValue(s, val, ignoreCase)
342		}
343		if matchInsensitiveValue(s[:i], val, ignoreCase) {
344			return true
345		}
346		s = s[i+1:]
347	}
348	return false
349}
350
351//  matches elements where the attribute named key equals val or starts with val plus a hyphen.
352func attributeDashMatch(key, val string, n *html.Node, ignoreCase bool) bool {
353	return matchAttribute(n, key,
354		func(s string) bool {
355			if matchInsensitiveValue(s, val, ignoreCase) {
356				return true
357			}
358			if len(s) <= len(val) {
359				return false
360			}
361			if matchInsensitiveValue(s[:len(val)], val, ignoreCase) && s[len(val)] == '-' {
362				return true
363			}
364			return false
365		})
366}
367
368// attributePrefixMatch returns a Selector that matches elements where
369// the attribute named key starts with val.
370func attributePrefixMatch(key, val string, n *html.Node, ignoreCase bool) bool {
371	return matchAttribute(n, key,
372		func(s string) bool {
373			if strings.TrimSpace(s) == "" {
374				return false
375			}
376			if ignoreCase {
377				return strings.HasPrefix(strings.ToLower(s), strings.ToLower(val))
378			}
379			return strings.HasPrefix(s, val)
380		})
381}
382
383// attributeSuffixMatch matches elements where
384// the attribute named key ends with val.
385func attributeSuffixMatch(key, val string, n *html.Node, ignoreCase bool) bool {
386	return matchAttribute(n, key,
387		func(s string) bool {
388			if strings.TrimSpace(s) == "" {
389				return false
390			}
391			if ignoreCase {
392				return strings.HasSuffix(strings.ToLower(s), strings.ToLower(val))
393			}
394			return strings.HasSuffix(s, val)
395		})
396}
397
398// attributeSubstringMatch matches nodes where
399// the attribute named key contains val.
400func attributeSubstringMatch(key, val string, n *html.Node, ignoreCase bool) bool {
401	return matchAttribute(n, key,
402		func(s string) bool {
403			if strings.TrimSpace(s) == "" {
404				return false
405			}
406			if ignoreCase {
407				return strings.Contains(strings.ToLower(s), strings.ToLower(val))
408			}
409			return strings.Contains(s, val)
410		})
411}
412
413// attributeRegexMatch  matches nodes where
414// the attribute named key matches the regular expression rx
415func attributeRegexMatch(key string, rx *regexp.Regexp, n *html.Node) bool {
416	return matchAttribute(n, key,
417		func(s string) bool {
418			return rx.MatchString(s)
419		})
420}
421
422func (c attrSelector) Specificity() Specificity {
423	return Specificity{0, 1, 0}
424}
425
426func (c attrSelector) PseudoElement() string {
427	return ""
428}
429
430// see pseudo_classes.go for pseudo classes selectors
431
432// on a static context, some selectors can't match anything
433type neverMatchSelector struct {
434	value string
435}
436
437func (s neverMatchSelector) Match(n *html.Node) bool {
438	return false
439}
440
441func (s neverMatchSelector) Specificity() Specificity {
442	return Specificity{0, 0, 0}
443}
444
445func (c neverMatchSelector) PseudoElement() string {
446	return ""
447}
448
449type compoundSelector struct {
450	selectors     []Sel
451	pseudoElement string
452}
453
454// Matches elements if each sub-selectors matches.
455func (t compoundSelector) Match(n *html.Node) bool {
456	if len(t.selectors) == 0 {
457		return n.Type == html.ElementNode
458	}
459
460	for _, sel := range t.selectors {
461		if !sel.Match(n) {
462			return false
463		}
464	}
465	return true
466}
467
468func (s compoundSelector) Specificity() Specificity {
469	var out Specificity
470	for _, sel := range s.selectors {
471		out = out.Add(sel.Specificity())
472	}
473	if s.pseudoElement != "" {
474		// https://drafts.csswg.org/selectors-3/#specificity
475		out = out.Add(Specificity{0, 0, 1})
476	}
477	return out
478}
479
480func (c compoundSelector) PseudoElement() string {
481	return c.pseudoElement
482}
483
484type combinedSelector struct {
485	first      Sel
486	combinator byte
487	second     Sel
488}
489
490func (t combinedSelector) Match(n *html.Node) bool {
491	if t.first == nil {
492		return false // maybe we should panic
493	}
494	switch t.combinator {
495	case 0:
496		return t.first.Match(n)
497	case ' ':
498		return descendantMatch(t.first, t.second, n)
499	case '>':
500		return childMatch(t.first, t.second, n)
501	case '+':
502		return siblingMatch(t.first, t.second, true, n)
503	case '~':
504		return siblingMatch(t.first, t.second, false, n)
505	default:
506		panic("unknown combinator")
507	}
508}
509
510// matches an element if it matches d and has an ancestor that matches a.
511func descendantMatch(a, d Matcher, n *html.Node) bool {
512	if !d.Match(n) {
513		return false
514	}
515
516	for p := n.Parent; p != nil; p = p.Parent {
517		if a.Match(p) {
518			return true
519		}
520	}
521
522	return false
523}
524
525// matches an element if it matches d and its parent matches a.
526func childMatch(a, d Matcher, n *html.Node) bool {
527	return d.Match(n) && n.Parent != nil && a.Match(n.Parent)
528}
529
530// matches an element if it matches s2 and is preceded by an element that matches s1.
531// If adjacent is true, the sibling must be immediately before the element.
532func siblingMatch(s1, s2 Matcher, adjacent bool, n *html.Node) bool {
533	if !s2.Match(n) {
534		return false
535	}
536
537	if adjacent {
538		for n = n.PrevSibling; n != nil; n = n.PrevSibling {
539			if n.Type == html.TextNode || n.Type == html.CommentNode {
540				continue
541			}
542			return s1.Match(n)
543		}
544		return false
545	}
546
547	// Walk backwards looking for element that matches s1
548	for c := n.PrevSibling; c != nil; c = c.PrevSibling {
549		if s1.Match(c) {
550			return true
551		}
552	}
553
554	return false
555}
556
557func (s combinedSelector) Specificity() Specificity {
558	spec := s.first.Specificity()
559	if s.second != nil {
560		spec = spec.Add(s.second.Specificity())
561	}
562	return spec
563}
564
565// on combinedSelector, a pseudo-element only makes sens on the last
566// selector, although others increase specificity.
567func (c combinedSelector) PseudoElement() string {
568	if c.second == nil {
569		return ""
570	}
571	return c.second.PseudoElement()
572}
573
574// A SelectorGroup is a list of selectors, which matches if any of the
575// individual selectors matches.
576type SelectorGroup []Sel
577
578// Match returns true if the node matches one of the single selectors.
579func (s SelectorGroup) Match(n *html.Node) bool {
580	for _, sel := range s {
581		if sel.Match(n) {
582			return true
583		}
584	}
585	return false
586}