atom.go

 1// Copyright 2012 The Go Authors. All rights reserved.
 2// Use of this source code is governed by a BSD-style
 3// license that can be found in the LICENSE file.
 4
 5// Package atom provides integer codes (also known as atoms) for a fixed set of
 6// frequently occurring HTML strings: tag names and attribute keys such as "p"
 7// and "id".
 8//
 9// Sharing an atom's name between all elements with the same tag can result in
10// fewer string allocations when tokenizing and parsing HTML. Integer
11// comparisons are also generally faster than string comparisons.
12//
13// The value of an atom's particular code is not guaranteed to stay the same
14// between versions of this package. Neither is any ordering guaranteed:
15// whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to
16// be dense. The only guarantees are that e.g. looking up "div" will yield
17// atom.Div, calling atom.Div.String will return "div", and atom.Div != 0.
18package atom // import "golang.org/x/net/html/atom"
19
20// Atom is an integer code for a string. The zero value maps to "".
21type Atom uint32
22
23// String returns the atom's name.
24func (a Atom) String() string {
25	start := uint32(a >> 8)
26	n := uint32(a & 0xff)
27	if start+n > uint32(len(atomText)) {
28		return ""
29	}
30	return atomText[start : start+n]
31}
32
33func (a Atom) string() string {
34	return atomText[a>>8 : a>>8+a&0xff]
35}
36
37// fnv computes the FNV hash with an arbitrary starting value h.
38func fnv(h uint32, s []byte) uint32 {
39	for i := range s {
40		h ^= uint32(s[i])
41		h *= 16777619
42	}
43	return h
44}
45
46func match(s string, t []byte) bool {
47	for i, c := range t {
48		if s[i] != c {
49			return false
50		}
51	}
52	return true
53}
54
55// Lookup returns the atom whose name is s. It returns zero if there is no
56// such atom. The lookup is case sensitive.
57func Lookup(s []byte) Atom {
58	if len(s) == 0 || len(s) > maxAtomLen {
59		return 0
60	}
61	h := fnv(hash0, s)
62	if a := table[h&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
63		return a
64	}
65	if a := table[(h>>16)&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
66		return a
67	}
68	return 0
69}
70
71// String returns a string whose contents are equal to s. In that sense, it is
72// equivalent to string(s) but may be more efficient.
73func String(s []byte) string {
74	if a := Lookup(s); a != 0 {
75		return a.String()
76	}
77	return string(s)
78}