util.go

  1// Package util provides utility functions for the goldmark.
  2package util
  3
  4import (
  5	"bytes"
  6	"io"
  7	"net/url"
  8	"regexp"
  9	"sort"
 10	"strconv"
 11	"unicode"
 12	"unicode/utf8"
 13)
 14
 15// A CopyOnWriteBuffer is a byte buffer that copies buffer when
 16// it need to be changed.
 17type CopyOnWriteBuffer struct {
 18	buffer []byte
 19	copied bool
 20}
 21
 22// NewCopyOnWriteBuffer returns a new CopyOnWriteBuffer.
 23func NewCopyOnWriteBuffer(buffer []byte) CopyOnWriteBuffer {
 24	return CopyOnWriteBuffer{
 25		buffer: buffer,
 26		copied: false,
 27	}
 28}
 29
 30// Write writes given bytes to the buffer.
 31// Write allocate new buffer and clears it at the first time.
 32func (b *CopyOnWriteBuffer) Write(value []byte) {
 33	if !b.copied {
 34		b.buffer = make([]byte, 0, len(b.buffer)+20)
 35		b.copied = true
 36	}
 37	b.buffer = append(b.buffer, value...)
 38}
 39
 40// WriteString writes given string to the buffer.
 41// WriteString allocate new buffer and clears it at the first time.
 42func (b *CopyOnWriteBuffer) WriteString(value string) {
 43	b.Write(StringToReadOnlyBytes(value))
 44}
 45
 46// Append appends given bytes to the buffer.
 47// Append copy buffer at the first time.
 48func (b *CopyOnWriteBuffer) Append(value []byte) {
 49	if !b.copied {
 50		tmp := make([]byte, len(b.buffer), len(b.buffer)+20)
 51		copy(tmp, b.buffer)
 52		b.buffer = tmp
 53		b.copied = true
 54	}
 55	b.buffer = append(b.buffer, value...)
 56}
 57
 58// AppendString appends given string to the buffer.
 59// AppendString copy buffer at the first time.
 60func (b *CopyOnWriteBuffer) AppendString(value string) {
 61	b.Append(StringToReadOnlyBytes(value))
 62}
 63
 64// WriteByte writes the given byte to the buffer.
 65// WriteByte allocate new buffer and clears it at the first time.
 66func (b *CopyOnWriteBuffer) WriteByte(c byte) error {
 67	if !b.copied {
 68		b.buffer = make([]byte, 0, len(b.buffer)+20)
 69		b.copied = true
 70	}
 71	b.buffer = append(b.buffer, c)
 72	return nil
 73}
 74
 75// AppendByte appends given bytes to the buffer.
 76// AppendByte copy buffer at the first time.
 77func (b *CopyOnWriteBuffer) AppendByte(c byte) {
 78	if !b.copied {
 79		tmp := make([]byte, len(b.buffer), len(b.buffer)+20)
 80		copy(tmp, b.buffer)
 81		b.buffer = tmp
 82		b.copied = true
 83	}
 84	b.buffer = append(b.buffer, c)
 85}
 86
 87// Bytes returns bytes of this buffer.
 88func (b *CopyOnWriteBuffer) Bytes() []byte {
 89	return b.buffer
 90}
 91
 92// IsCopied returns true if buffer has been copied, otherwise false.
 93func (b *CopyOnWriteBuffer) IsCopied() bool {
 94	return b.copied
 95}
 96
 97// IsEscapedPunctuation returns true if character at a given index i
 98// is an escaped punctuation, otherwise false.
 99func IsEscapedPunctuation(source []byte, i int) bool {
100	return source[i] == '\\' && i < len(source)-1 && IsPunct(source[i+1])
101}
102
103// ReadWhile read the given source while pred is true.
104func ReadWhile(source []byte, index [2]int, pred func(byte) bool) (int, bool) {
105	j := index[0]
106	ok := false
107	for ; j < index[1]; j++ {
108		c1 := source[j]
109		if pred(c1) {
110			ok = true
111			continue
112		}
113		break
114	}
115	return j, ok
116}
117
118// IsBlank returns true if the given string is all space characters.
119func IsBlank(bs []byte) bool {
120	for _, b := range bs {
121		if !IsSpace(b) {
122			return false
123		}
124	}
125	return true
126}
127
128// VisualizeSpaces visualize invisible space characters.
129func VisualizeSpaces(bs []byte) []byte {
130	bs = bytes.Replace(bs, []byte(" "), []byte("[SPACE]"), -1)
131	bs = bytes.Replace(bs, []byte("\t"), []byte("[TAB]"), -1)
132	bs = bytes.Replace(bs, []byte("\n"), []byte("[NEWLINE]\n"), -1)
133	bs = bytes.Replace(bs, []byte("\r"), []byte("[CR]"), -1)
134	bs = bytes.Replace(bs, []byte("\v"), []byte("[VTAB]"), -1)
135	bs = bytes.Replace(bs, []byte("\x00"), []byte("[NUL]"), -1)
136	bs = bytes.Replace(bs, []byte("\ufffd"), []byte("[U+FFFD]"), -1)
137	return bs
138}
139
140// TabWidth calculates actual width of a tab at the given position.
141func TabWidth(currentPos int) int {
142	return 4 - currentPos%4
143}
144
145// IndentPosition searches an indent position with the given width for the given line.
146// If the line contains tab characters, paddings may be not zero.
147// currentPos==0 and width==2:
148//
149//	position: 0    1
150//	          [TAB]aaaa
151//	width:    1234 5678
152//
153// width=2 is in the tab character. In this case, IndentPosition returns
154// (pos=1, padding=2).
155func IndentPosition(bs []byte, currentPos, width int) (pos, padding int) {
156	return IndentPositionPadding(bs, currentPos, 0, width)
157}
158
159// IndentPositionPadding searches an indent position with the given width for the given line.
160// This function is mostly same as IndentPosition except this function
161// takes account into additional paddings.
162func IndentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
163	if width == 0 {
164		return 0, paddingv
165	}
166	w := 0
167	i := 0
168	l := len(bs)
169	p := paddingv
170	for ; i < l; i++ {
171		if p > 0 {
172			p--
173			w++
174			continue
175		}
176		if bs[i] == '\t' && w < width {
177			w += TabWidth(currentPos + w)
178		} else if bs[i] == ' ' && w < width {
179			w++
180		} else {
181			break
182		}
183	}
184	if w >= width {
185		return i - paddingv, w - width
186	}
187	return -1, -1
188}
189
190// DedentPosition dedents lines by the given width.
191//
192// Deprecated: This function has bugs. Use util.IndentPositionPadding and util.FirstNonSpacePosition.
193func DedentPosition(bs []byte, currentPos, width int) (pos, padding int) {
194	if width == 0 {
195		return 0, 0
196	}
197	w := 0
198	l := len(bs)
199	i := 0
200	for ; i < l; i++ {
201		if bs[i] == '\t' {
202			w += TabWidth(currentPos + w)
203		} else if bs[i] == ' ' {
204			w++
205		} else {
206			break
207		}
208	}
209	if w >= width {
210		return i, w - width
211	}
212	return i, 0
213}
214
215// DedentPositionPadding dedents lines by the given width.
216// This function is mostly same as DedentPosition except this function
217// takes account into additional paddings.
218//
219// Deprecated: This function has bugs. Use util.IndentPositionPadding and util.FirstNonSpacePosition.
220func DedentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
221	if width == 0 {
222		return 0, paddingv
223	}
224
225	w := 0
226	i := 0
227	l := len(bs)
228	for ; i < l; i++ {
229		if bs[i] == '\t' {
230			w += TabWidth(currentPos + w)
231		} else if bs[i] == ' ' {
232			w++
233		} else {
234			break
235		}
236	}
237	if w >= width {
238		return i - paddingv, w - width
239	}
240	return i - paddingv, 0
241}
242
243// IndentWidth calculate an indent width for the given line.
244func IndentWidth(bs []byte, currentPos int) (width, pos int) {
245	l := len(bs)
246	for i := 0; i < l; i++ {
247		b := bs[i]
248		if b == ' ' {
249			width++
250			pos++
251		} else if b == '\t' {
252			width += TabWidth(currentPos + width)
253			pos++
254		} else {
255			break
256		}
257	}
258	return
259}
260
261// FirstNonSpacePosition returns a position line that is a first nonspace
262// character.
263func FirstNonSpacePosition(bs []byte) int {
264	i := 0
265	for ; i < len(bs); i++ {
266		c := bs[i]
267		if c == ' ' || c == '\t' {
268			continue
269		}
270		if c == '\n' {
271			return -1
272		}
273		return i
274	}
275	return -1
276}
277
278// FindClosure returns a position that closes the given opener.
279// If codeSpan is set true, it ignores characters in code spans.
280// If allowNesting is set true, closures correspond to nested opener will be
281// ignored.
282//
283// Deprecated: This function can not handle newlines. Many elements
284// can be existed over multiple lines(e.g. link labels).
285// Use text.Reader.FindClosure.
286func FindClosure(bs []byte, opener, closure byte, codeSpan, allowNesting bool) int {
287	i := 0
288	opened := 1
289	codeSpanOpener := 0
290	for i < len(bs) {
291		c := bs[i]
292		if codeSpan && codeSpanOpener != 0 && c == '`' {
293			codeSpanCloser := 0
294			for ; i < len(bs); i++ {
295				if bs[i] == '`' {
296					codeSpanCloser++
297				} else {
298					i--
299					break
300				}
301			}
302			if codeSpanCloser == codeSpanOpener {
303				codeSpanOpener = 0
304			}
305		} else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && IsPunct(bs[i+1]) {
306			i += 2
307			continue
308		} else if codeSpan && codeSpanOpener == 0 && c == '`' {
309			for ; i < len(bs); i++ {
310				if bs[i] == '`' {
311					codeSpanOpener++
312				} else {
313					i--
314					break
315				}
316			}
317		} else if (codeSpan && codeSpanOpener == 0) || !codeSpan {
318			if c == closure {
319				opened--
320				if opened == 0 {
321					return i
322				}
323			} else if c == opener {
324				if !allowNesting {
325					return -1
326				}
327				opened++
328			}
329		}
330		i++
331	}
332	return -1
333}
334
335// TrimLeft trims characters in the given s from head of the source.
336// bytes.TrimLeft offers same functionalities, but bytes.TrimLeft
337// allocates new buffer for the result.
338func TrimLeft(source, b []byte) []byte {
339	i := 0
340	for ; i < len(source); i++ {
341		c := source[i]
342		found := false
343		for j := 0; j < len(b); j++ {
344			if c == b[j] {
345				found = true
346				break
347			}
348		}
349		if !found {
350			break
351		}
352	}
353	return source[i:]
354}
355
356// TrimRight trims characters in the given s from tail of the source.
357func TrimRight(source, b []byte) []byte {
358	i := len(source) - 1
359	for ; i >= 0; i-- {
360		c := source[i]
361		found := false
362		for j := 0; j < len(b); j++ {
363			if c == b[j] {
364				found = true
365				break
366			}
367		}
368		if !found {
369			break
370		}
371	}
372	return source[:i+1]
373}
374
375// TrimLeftLength returns a length of leading specified characters.
376func TrimLeftLength(source, s []byte) int {
377	return len(source) - len(TrimLeft(source, s))
378}
379
380// TrimRightLength returns a length of trailing specified characters.
381func TrimRightLength(source, s []byte) int {
382	return len(source) - len(TrimRight(source, s))
383}
384
385// TrimLeftSpaceLength returns a length of leading space characters.
386func TrimLeftSpaceLength(source []byte) int {
387	i := 0
388	for ; i < len(source); i++ {
389		if !IsSpace(source[i]) {
390			break
391		}
392	}
393	return i
394}
395
396// TrimRightSpaceLength returns a length of trailing space characters.
397func TrimRightSpaceLength(source []byte) int {
398	l := len(source)
399	i := l - 1
400	for ; i >= 0; i-- {
401		if !IsSpace(source[i]) {
402			break
403		}
404	}
405	if i < 0 {
406		return l
407	}
408	return l - 1 - i
409}
410
411// TrimLeftSpace returns a subslice of the given string by slicing off all leading
412// space characters.
413func TrimLeftSpace(source []byte) []byte {
414	return TrimLeft(source, spaces)
415}
416
417// TrimRightSpace returns a subslice of the given string by slicing off all trailing
418// space characters.
419func TrimRightSpace(source []byte) []byte {
420	return TrimRight(source, spaces)
421}
422
423// DoFullUnicodeCaseFolding performs full unicode case folding to given bytes.
424func DoFullUnicodeCaseFolding(v []byte) []byte {
425	var rbuf []byte
426	cob := NewCopyOnWriteBuffer(v)
427	n := 0
428	for i := 0; i < len(v); i++ {
429		c := v[i]
430		if c < 0xb5 {
431			if c >= 0x41 && c <= 0x5a {
432				// A-Z to a-z
433				cob.Write(v[n:i])
434				_ = cob.WriteByte(c + 32)
435				n = i + 1
436			}
437			continue
438		}
439
440		if !utf8.RuneStart(c) {
441			continue
442		}
443		r, length := utf8.DecodeRune(v[i:])
444		if r == utf8.RuneError {
445			continue
446		}
447		folded, ok := unicodeCaseFoldings[r]
448		if !ok {
449			continue
450		}
451
452		cob.Write(v[n:i])
453		if rbuf == nil {
454			rbuf = make([]byte, 4)
455		}
456		for _, f := range folded {
457			l := utf8.EncodeRune(rbuf, f)
458			cob.Write(rbuf[:l])
459		}
460		i += length - 1
461		n = i + 1
462	}
463	if cob.IsCopied() {
464		cob.Write(v[n:])
465	}
466	return cob.Bytes()
467}
468
469// ReplaceSpaces replaces sequence of spaces with the given repl.
470func ReplaceSpaces(source []byte, repl byte) []byte {
471	var ret []byte
472	start := -1
473	for i, c := range source {
474		iss := IsSpace(c)
475		if start < 0 && iss {
476			start = i
477			continue
478		} else if start >= 0 && iss {
479			continue
480		} else if start >= 0 {
481			if ret == nil {
482				ret = make([]byte, 0, len(source))
483				ret = append(ret, source[:start]...)
484			}
485			ret = append(ret, repl)
486			start = -1
487		}
488		if ret != nil {
489			ret = append(ret, c)
490		}
491	}
492	if start >= 0 && ret != nil {
493		ret = append(ret, repl)
494	}
495	if ret == nil {
496		return source
497	}
498	return ret
499}
500
501// ToRune decode given bytes start at pos and returns a rune.
502func ToRune(source []byte, pos int) rune {
503	i := pos
504	for ; i >= 0; i-- {
505		if utf8.RuneStart(source[i]) {
506			break
507		}
508	}
509	r, _ := utf8.DecodeRune(source[i:])
510	return r
511}
512
513// ToValidRune returns 0xFFFD if the given rune is invalid, otherwise v.
514func ToValidRune(v rune) rune {
515	if v == 0 || !utf8.ValidRune(v) {
516		return rune(0xFFFD)
517	}
518	return v
519}
520
521// ToLinkReference converts given bytes into a valid link reference string.
522// ToLinkReference performs unicode case folding, trims leading and trailing spaces,  converts into lower
523// case and replace spaces with a single space character.
524func ToLinkReference(v []byte) string {
525	v = TrimLeftSpace(v)
526	v = TrimRightSpace(v)
527	v = DoFullUnicodeCaseFolding(v)
528	return string(ReplaceSpaces(v, ' '))
529}
530
531var htmlEscapeTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("&quot;"), nil, nil, nil, []byte("&amp;"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("&lt;"), nil, []byte("&gt;"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil} //nolint:golint,lll
532
533// EscapeHTMLByte returns HTML escaped bytes if the given byte should be escaped,
534// otherwise nil.
535func EscapeHTMLByte(b byte) []byte {
536	return htmlEscapeTable[b]
537}
538
539// EscapeHTML escapes characters that should be escaped in HTML text.
540func EscapeHTML(v []byte) []byte {
541	cob := NewCopyOnWriteBuffer(v)
542	n := 0
543	for i := 0; i < len(v); i++ {
544		c := v[i]
545		escaped := htmlEscapeTable[c]
546		if escaped != nil {
547			cob.Write(v[n:i])
548			cob.Write(escaped)
549			n = i + 1
550		}
551	}
552	if cob.IsCopied() {
553		cob.Write(v[n:])
554	}
555	return cob.Bytes()
556}
557
558// UnescapePunctuations unescapes blackslash escaped punctuations.
559func UnescapePunctuations(source []byte) []byte {
560	cob := NewCopyOnWriteBuffer(source)
561	limit := len(source)
562	n := 0
563	for i := 0; i < limit; {
564		c := source[i]
565		if i < limit-1 && c == '\\' && IsPunct(source[i+1]) {
566			cob.Write(source[n:i])
567			_ = cob.WriteByte(source[i+1])
568			i += 2
569			n = i
570			continue
571		}
572		i++
573	}
574	if cob.IsCopied() {
575		cob.Write(source[n:])
576	}
577	return cob.Bytes()
578}
579
580// ResolveNumericReferences resolve numeric references like '&#1234;" .
581func ResolveNumericReferences(source []byte) []byte {
582	cob := NewCopyOnWriteBuffer(source)
583	buf := make([]byte, 6)
584	limit := len(source)
585	var ok bool
586	n := 0
587	for i := 0; i < limit; i++ {
588		if source[i] == '&' {
589			pos := i
590			next := i + 1
591			if next < limit && source[next] == '#' {
592				nnext := next + 1
593				if nnext < limit {
594					nc := source[nnext]
595					// code point like #x22;
596					if nnext < limit && nc == 'x' || nc == 'X' {
597						start := nnext + 1
598						i, ok = ReadWhile(source, [2]int{start, limit}, IsHexDecimal)
599						if ok && i < limit && source[i] == ';' {
600							v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 16, 32)
601							cob.Write(source[n:pos])
602							n = i + 1
603							runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v)))
604							cob.Write(buf[:runeSize])
605							continue
606						}
607						// code point like #1234;
608					} else if nc >= '0' && nc <= '9' {
609						start := nnext
610						i, ok = ReadWhile(source, [2]int{start, limit}, IsNumeric)
611						if ok && i < limit && i-start < 8 && source[i] == ';' {
612							v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 0, 32)
613							cob.Write(source[n:pos])
614							n = i + 1
615							runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v)))
616							cob.Write(buf[:runeSize])
617							continue
618						}
619					}
620				}
621			}
622			i = next - 1
623		}
624	}
625	if cob.IsCopied() {
626		cob.Write(source[n:])
627	}
628	return cob.Bytes()
629}
630
631// ResolveEntityNames resolve entity references like '&ouml;" .
632func ResolveEntityNames(source []byte) []byte {
633	cob := NewCopyOnWriteBuffer(source)
634	limit := len(source)
635	var ok bool
636	n := 0
637	for i := 0; i < limit; i++ {
638		if source[i] == '&' {
639			pos := i
640			next := i + 1
641			if !(next < limit && source[next] == '#') {
642				start := next
643				i, ok = ReadWhile(source, [2]int{start, limit}, IsAlphaNumeric)
644				if ok && i < limit && source[i] == ';' {
645					name := BytesToReadOnlyString(source[start:i])
646					entity, ok := LookUpHTML5EntityByName(name)
647					if ok {
648						cob.Write(source[n:pos])
649						n = i + 1
650						cob.Write(entity.Characters)
651						continue
652					}
653				}
654			}
655			i = next - 1
656		}
657	}
658	if cob.IsCopied() {
659		cob.Write(source[n:])
660	}
661	return cob.Bytes()
662}
663
664var htmlSpace = []byte("%20")
665
666// URLEscape escape the given URL.
667// If resolveReference is set true:
668//  1. unescape punctuations
669//  2. resolve numeric references
670//  3. resolve entity references
671//
672// URL encoded values (%xx) are kept as is.
673func URLEscape(v []byte, resolveReference bool) []byte {
674	if resolveReference {
675		v = UnescapePunctuations(v)
676		v = ResolveNumericReferences(v)
677		v = ResolveEntityNames(v)
678	}
679	cob := NewCopyOnWriteBuffer(v)
680	limit := len(v)
681	n := 0
682
683	for i := 0; i < limit; {
684		c := v[i]
685		if urlEscapeTable[c] == 1 {
686			i++
687			continue
688		}
689		if c == '%' && i+2 < limit && IsHexDecimal(v[i+1]) && IsHexDecimal(v[i+1]) {
690			i += 3
691			continue
692		}
693		u8len := utf8lenTable[c]
694		if u8len == 99 { // invalid utf8 leading byte, skip it
695			i++
696			continue
697		}
698		if c == ' ' {
699			cob.Write(v[n:i])
700			cob.Write(htmlSpace)
701			i++
702			n = i
703			continue
704		}
705		if int(u8len) > len(v) {
706			u8len = int8(len(v) - 1)
707		}
708		if u8len == 0 {
709			i++
710			n = i
711			continue
712		}
713		cob.Write(v[n:i])
714		stop := i + int(u8len)
715		if stop > len(v) {
716			i++
717			n = i
718			continue
719		}
720		cob.Write(StringToReadOnlyBytes(url.QueryEscape(string(v[i:stop]))))
721		i += int(u8len)
722		n = i
723	}
724	if cob.IsCopied() && n < limit {
725		cob.Write(v[n:])
726	}
727	return cob.Bytes()
728}
729
730// FindURLIndex returns a stop index value if the given bytes seem an URL.
731// This function is equivalent to [A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]* .
732func FindURLIndex(b []byte) int {
733	i := 0
734	if !(len(b) > 0 && urlTable[b[i]]&7 == 7) {
735		return -1
736	}
737	i++
738	for ; i < len(b); i++ {
739		c := b[i]
740		if urlTable[c]&4 != 4 {
741			break
742		}
743	}
744	if i == 1 || i > 33 || i >= len(b) {
745		return -1
746	}
747	if b[i] != ':' {
748		return -1
749	}
750	i++
751	for ; i < len(b); i++ {
752		c := b[i]
753		if urlTable[c]&1 != 1 {
754			break
755		}
756	}
757	return i
758}
759
760var emailDomainRegexp = regexp.MustCompile(`^[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*`) //nolint:golint,lll
761
762// FindEmailIndex returns a stop index value if the given bytes seem an email address.
763func FindEmailIndex(b []byte) int {
764	// TODO: eliminate regexps
765	i := 0
766	for ; i < len(b); i++ {
767		c := b[i]
768		if emailTable[c]&1 != 1 {
769			break
770		}
771	}
772	if i == 0 {
773		return -1
774	}
775	if i >= len(b) || b[i] != '@' {
776		return -1
777	}
778	i++
779	if i >= len(b) {
780		return -1
781	}
782	match := emailDomainRegexp.FindSubmatchIndex(b[i:])
783	if match == nil {
784		return -1
785	}
786	return i + match[1]
787}
788
789var spaces = []byte(" \t\n\x0b\x0c\x0d")
790
791var spaceTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} //nolint:golint,lll
792
793var punctTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} //nolint:golint,lll
794
795// a-zA-Z0-9, ;/?:@&=+$,-_.!~*'()#
796
797var urlEscapeTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} //nolint:golint,lll
798
799var utf8lenTable = [256]int8{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 99, 99, 99, 99, 99, 99, 99, 99} //nolint:golint,lll
800
801var urlTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 1, 0, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} //nolint:golint,lll
802
803var emailTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} //nolint:golint,lll
804
805// UTF8Len returns a byte length of the utf-8 character.
806func UTF8Len(b byte) int8 {
807	return utf8lenTable[b]
808}
809
810// IsPunct returns true if the given character is a punctuation, otherwise false.
811func IsPunct(c byte) bool {
812	return punctTable[c] == 1
813}
814
815// IsPunctRune returns true if the given rune is a punctuation, otherwise false.
816func IsPunctRune(r rune) bool {
817	return unicode.IsSymbol(r) || unicode.IsPunct(r)
818}
819
820// IsSpace returns true if the given character is a space, otherwise false.
821func IsSpace(c byte) bool {
822	return spaceTable[c] == 1
823}
824
825// IsSpaceRune returns true if the given rune is a space, otherwise false.
826func IsSpaceRune(r rune) bool {
827	return int32(r) <= 256 && IsSpace(byte(r)) || unicode.IsSpace(r)
828}
829
830// IsNumeric returns true if the given character is a numeric, otherwise false.
831func IsNumeric(c byte) bool {
832	return c >= '0' && c <= '9'
833}
834
835// IsHexDecimal returns true if the given character is a hexdecimal, otherwise false.
836func IsHexDecimal(c byte) bool {
837	return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F'
838}
839
840// IsAlphaNumeric returns true if the given character is a alphabet or a numeric, otherwise false.
841func IsAlphaNumeric(c byte) bool {
842	return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9'
843}
844
845// A BufWriter is a subset of the bufio.Writer .
846type BufWriter interface {
847	io.Writer
848	Available() int
849	Buffered() int
850	Flush() error
851	WriteByte(c byte) error
852	WriteRune(r rune) (size int, err error)
853	WriteString(s string) (int, error)
854}
855
856// A PrioritizedValue struct holds pair of an arbitrary value and a priority.
857type PrioritizedValue struct {
858	// Value is an arbitrary value that you want to prioritize.
859	Value interface{}
860	// Priority is a priority of the value.
861	Priority int
862}
863
864// PrioritizedSlice is a slice of the PrioritizedValues.
865type PrioritizedSlice []PrioritizedValue
866
867// Sort sorts the PrioritizedSlice in ascending order.
868func (s PrioritizedSlice) Sort() {
869	sort.Slice(s, func(i, j int) bool {
870		return s[i].Priority < s[j].Priority
871	})
872}
873
874// Remove removes the given value from this slice.
875func (s PrioritizedSlice) Remove(v interface{}) PrioritizedSlice {
876	i := 0
877	found := false
878	for ; i < len(s); i++ {
879		if s[i].Value == v {
880			found = true
881			break
882		}
883	}
884	if !found {
885		return s
886	}
887	return append(s[:i], s[i+1:]...)
888}
889
890// Prioritized returns a new PrioritizedValue.
891func Prioritized(v interface{}, priority int) PrioritizedValue {
892	return PrioritizedValue{v, priority}
893}
894
895func bytesHash(b []byte) uint64 {
896	var hash uint64 = 5381
897	for _, c := range b {
898		hash = ((hash << 5) + hash) + uint64(c)
899	}
900	return hash
901}
902
903// BytesFilter is a efficient data structure for checking whether bytes exist or not.
904// BytesFilter is thread-safe.
905type BytesFilter interface {
906	// Add adds given bytes to this set.
907	Add([]byte)
908
909	// Contains return true if this set contains given bytes, otherwise false.
910	Contains([]byte) bool
911
912	// Extend copies this filter and adds given bytes to new filter.
913	Extend(...[]byte) BytesFilter
914}
915
916type bytesFilter struct {
917	chars     [256]uint8
918	threshold int
919	slots     [][][]byte
920}
921
922// NewBytesFilter returns a new BytesFilter.
923func NewBytesFilter(elements ...[]byte) BytesFilter {
924	s := &bytesFilter{
925		threshold: 3,
926		slots:     make([][][]byte, 64),
927	}
928	for _, element := range elements {
929		s.Add(element)
930	}
931	return s
932}
933
934func (s *bytesFilter) Add(b []byte) {
935	l := len(b)
936	m := s.threshold
937	if l < s.threshold {
938		m = l
939	}
940	for i := 0; i < m; i++ {
941		s.chars[b[i]] |= 1 << uint8(i)
942	}
943	h := bytesHash(b) % uint64(len(s.slots))
944	slot := s.slots[h]
945	if slot == nil {
946		slot = [][]byte{}
947	}
948	s.slots[h] = append(slot, b)
949}
950
951func (s *bytesFilter) Extend(bs ...[]byte) BytesFilter {
952	newFilter := NewBytesFilter().(*bytesFilter)
953	newFilter.chars = s.chars
954	newFilter.threshold = s.threshold
955	for k, v := range s.slots {
956		newSlot := make([][]byte, len(v))
957		copy(newSlot, v)
958		newFilter.slots[k] = v
959	}
960	for _, b := range bs {
961		newFilter.Add(b)
962	}
963	return newFilter
964}
965
966func (s *bytesFilter) Contains(b []byte) bool {
967	l := len(b)
968	m := s.threshold
969	if l < s.threshold {
970		m = l
971	}
972	for i := 0; i < m; i++ {
973		if (s.chars[b[i]] & (1 << uint8(i))) == 0 {
974			return false
975		}
976	}
977	h := bytesHash(b) % uint64(len(s.slots))
978	slot := s.slots[h]
979	if len(slot) == 0 {
980		return false
981	}
982	for _, element := range slot {
983		if bytes.Equal(element, b) {
984			return true
985		}
986	}
987	return false
988}