1// Package util provides utility functions for the goldmark.
2package util
3
4import (
5 "bytes"
6 "io"
7 "net/url"
8 "regexp"
9 "sort"
10 "strconv"
11 "unicode"
12 "unicode/utf8"
13)
14
15// A CopyOnWriteBuffer is a byte buffer that copies buffer when
16// it need to be changed.
17type CopyOnWriteBuffer struct {
18 buffer []byte
19 copied bool
20}
21
22// NewCopyOnWriteBuffer returns a new CopyOnWriteBuffer.
23func NewCopyOnWriteBuffer(buffer []byte) CopyOnWriteBuffer {
24 return CopyOnWriteBuffer{
25 buffer: buffer,
26 copied: false,
27 }
28}
29
30// Write writes given bytes to the buffer.
31// Write allocate new buffer and clears it at the first time.
32func (b *CopyOnWriteBuffer) Write(value []byte) {
33 if !b.copied {
34 b.buffer = make([]byte, 0, len(b.buffer)+20)
35 b.copied = true
36 }
37 b.buffer = append(b.buffer, value...)
38}
39
40// WriteString writes given string to the buffer.
41// WriteString allocate new buffer and clears it at the first time.
42func (b *CopyOnWriteBuffer) WriteString(value string) {
43 b.Write(StringToReadOnlyBytes(value))
44}
45
46// Append appends given bytes to the buffer.
47// Append copy buffer at the first time.
48func (b *CopyOnWriteBuffer) Append(value []byte) {
49 if !b.copied {
50 tmp := make([]byte, len(b.buffer), len(b.buffer)+20)
51 copy(tmp, b.buffer)
52 b.buffer = tmp
53 b.copied = true
54 }
55 b.buffer = append(b.buffer, value...)
56}
57
58// AppendString appends given string to the buffer.
59// AppendString copy buffer at the first time.
60func (b *CopyOnWriteBuffer) AppendString(value string) {
61 b.Append(StringToReadOnlyBytes(value))
62}
63
64// WriteByte writes the given byte to the buffer.
65// WriteByte allocate new buffer and clears it at the first time.
66func (b *CopyOnWriteBuffer) WriteByte(c byte) error {
67 if !b.copied {
68 b.buffer = make([]byte, 0, len(b.buffer)+20)
69 b.copied = true
70 }
71 b.buffer = append(b.buffer, c)
72 return nil
73}
74
75// AppendByte appends given bytes to the buffer.
76// AppendByte copy buffer at the first time.
77func (b *CopyOnWriteBuffer) AppendByte(c byte) {
78 if !b.copied {
79 tmp := make([]byte, len(b.buffer), len(b.buffer)+20)
80 copy(tmp, b.buffer)
81 b.buffer = tmp
82 b.copied = true
83 }
84 b.buffer = append(b.buffer, c)
85}
86
87// Bytes returns bytes of this buffer.
88func (b *CopyOnWriteBuffer) Bytes() []byte {
89 return b.buffer
90}
91
92// IsCopied returns true if buffer has been copied, otherwise false.
93func (b *CopyOnWriteBuffer) IsCopied() bool {
94 return b.copied
95}
96
97// IsEscapedPunctuation returns true if character at a given index i
98// is an escaped punctuation, otherwise false.
99func IsEscapedPunctuation(source []byte, i int) bool {
100 return source[i] == '\\' && i < len(source)-1 && IsPunct(source[i+1])
101}
102
103// ReadWhile read the given source while pred is true.
104func ReadWhile(source []byte, index [2]int, pred func(byte) bool) (int, bool) {
105 j := index[0]
106 ok := false
107 for ; j < index[1]; j++ {
108 c1 := source[j]
109 if pred(c1) {
110 ok = true
111 continue
112 }
113 break
114 }
115 return j, ok
116}
117
118// IsBlank returns true if the given string is all space characters.
119func IsBlank(bs []byte) bool {
120 for _, b := range bs {
121 if !IsSpace(b) {
122 return false
123 }
124 }
125 return true
126}
127
128// VisualizeSpaces visualize invisible space characters.
129func VisualizeSpaces(bs []byte) []byte {
130 bs = bytes.Replace(bs, []byte(" "), []byte("[SPACE]"), -1)
131 bs = bytes.Replace(bs, []byte("\t"), []byte("[TAB]"), -1)
132 bs = bytes.Replace(bs, []byte("\n"), []byte("[NEWLINE]\n"), -1)
133 bs = bytes.Replace(bs, []byte("\r"), []byte("[CR]"), -1)
134 bs = bytes.Replace(bs, []byte("\v"), []byte("[VTAB]"), -1)
135 bs = bytes.Replace(bs, []byte("\x00"), []byte("[NUL]"), -1)
136 bs = bytes.Replace(bs, []byte("\ufffd"), []byte("[U+FFFD]"), -1)
137 return bs
138}
139
140// TabWidth calculates actual width of a tab at the given position.
141func TabWidth(currentPos int) int {
142 return 4 - currentPos%4
143}
144
145// IndentPosition searches an indent position with the given width for the given line.
146// If the line contains tab characters, paddings may be not zero.
147// currentPos==0 and width==2:
148//
149// position: 0 1
150// [TAB]aaaa
151// width: 1234 5678
152//
153// width=2 is in the tab character. In this case, IndentPosition returns
154// (pos=1, padding=2).
155func IndentPosition(bs []byte, currentPos, width int) (pos, padding int) {
156 return IndentPositionPadding(bs, currentPos, 0, width)
157}
158
159// IndentPositionPadding searches an indent position with the given width for the given line.
160// This function is mostly same as IndentPosition except this function
161// takes account into additional paddings.
162func IndentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
163 if width == 0 {
164 return 0, paddingv
165 }
166 w := 0
167 i := 0
168 l := len(bs)
169 p := paddingv
170 for ; i < l; i++ {
171 if p > 0 {
172 p--
173 w++
174 continue
175 }
176 if bs[i] == '\t' && w < width {
177 w += TabWidth(currentPos + w)
178 } else if bs[i] == ' ' && w < width {
179 w++
180 } else {
181 break
182 }
183 }
184 if w >= width {
185 return i - paddingv, w - width
186 }
187 return -1, -1
188}
189
190// DedentPosition dedents lines by the given width.
191//
192// Deprecated: This function has bugs. Use util.IndentPositionPadding and util.FirstNonSpacePosition.
193func DedentPosition(bs []byte, currentPos, width int) (pos, padding int) {
194 if width == 0 {
195 return 0, 0
196 }
197 w := 0
198 l := len(bs)
199 i := 0
200 for ; i < l; i++ {
201 if bs[i] == '\t' {
202 w += TabWidth(currentPos + w)
203 } else if bs[i] == ' ' {
204 w++
205 } else {
206 break
207 }
208 }
209 if w >= width {
210 return i, w - width
211 }
212 return i, 0
213}
214
215// DedentPositionPadding dedents lines by the given width.
216// This function is mostly same as DedentPosition except this function
217// takes account into additional paddings.
218//
219// Deprecated: This function has bugs. Use util.IndentPositionPadding and util.FirstNonSpacePosition.
220func DedentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
221 if width == 0 {
222 return 0, paddingv
223 }
224
225 w := 0
226 i := 0
227 l := len(bs)
228 for ; i < l; i++ {
229 if bs[i] == '\t' {
230 w += TabWidth(currentPos + w)
231 } else if bs[i] == ' ' {
232 w++
233 } else {
234 break
235 }
236 }
237 if w >= width {
238 return i - paddingv, w - width
239 }
240 return i - paddingv, 0
241}
242
243// IndentWidth calculate an indent width for the given line.
244func IndentWidth(bs []byte, currentPos int) (width, pos int) {
245 l := len(bs)
246 for i := 0; i < l; i++ {
247 b := bs[i]
248 if b == ' ' {
249 width++
250 pos++
251 } else if b == '\t' {
252 width += TabWidth(currentPos + width)
253 pos++
254 } else {
255 break
256 }
257 }
258 return
259}
260
261// FirstNonSpacePosition returns a position line that is a first nonspace
262// character.
263func FirstNonSpacePosition(bs []byte) int {
264 i := 0
265 for ; i < len(bs); i++ {
266 c := bs[i]
267 if c == ' ' || c == '\t' {
268 continue
269 }
270 if c == '\n' {
271 return -1
272 }
273 return i
274 }
275 return -1
276}
277
278// FindClosure returns a position that closes the given opener.
279// If codeSpan is set true, it ignores characters in code spans.
280// If allowNesting is set true, closures correspond to nested opener will be
281// ignored.
282//
283// Deprecated: This function can not handle newlines. Many elements
284// can be existed over multiple lines(e.g. link labels).
285// Use text.Reader.FindClosure.
286func FindClosure(bs []byte, opener, closure byte, codeSpan, allowNesting bool) int {
287 i := 0
288 opened := 1
289 codeSpanOpener := 0
290 for i < len(bs) {
291 c := bs[i]
292 if codeSpan && codeSpanOpener != 0 && c == '`' {
293 codeSpanCloser := 0
294 for ; i < len(bs); i++ {
295 if bs[i] == '`' {
296 codeSpanCloser++
297 } else {
298 i--
299 break
300 }
301 }
302 if codeSpanCloser == codeSpanOpener {
303 codeSpanOpener = 0
304 }
305 } else if codeSpanOpener == 0 && c == '\\' && i < len(bs)-1 && IsPunct(bs[i+1]) {
306 i += 2
307 continue
308 } else if codeSpan && codeSpanOpener == 0 && c == '`' {
309 for ; i < len(bs); i++ {
310 if bs[i] == '`' {
311 codeSpanOpener++
312 } else {
313 i--
314 break
315 }
316 }
317 } else if (codeSpan && codeSpanOpener == 0) || !codeSpan {
318 if c == closure {
319 opened--
320 if opened == 0 {
321 return i
322 }
323 } else if c == opener {
324 if !allowNesting {
325 return -1
326 }
327 opened++
328 }
329 }
330 i++
331 }
332 return -1
333}
334
335// TrimLeft trims characters in the given s from head of the source.
336// bytes.TrimLeft offers same functionalities, but bytes.TrimLeft
337// allocates new buffer for the result.
338func TrimLeft(source, b []byte) []byte {
339 i := 0
340 for ; i < len(source); i++ {
341 c := source[i]
342 found := false
343 for j := 0; j < len(b); j++ {
344 if c == b[j] {
345 found = true
346 break
347 }
348 }
349 if !found {
350 break
351 }
352 }
353 return source[i:]
354}
355
356// TrimRight trims characters in the given s from tail of the source.
357func TrimRight(source, b []byte) []byte {
358 i := len(source) - 1
359 for ; i >= 0; i-- {
360 c := source[i]
361 found := false
362 for j := 0; j < len(b); j++ {
363 if c == b[j] {
364 found = true
365 break
366 }
367 }
368 if !found {
369 break
370 }
371 }
372 return source[:i+1]
373}
374
375// TrimLeftLength returns a length of leading specified characters.
376func TrimLeftLength(source, s []byte) int {
377 return len(source) - len(TrimLeft(source, s))
378}
379
380// TrimRightLength returns a length of trailing specified characters.
381func TrimRightLength(source, s []byte) int {
382 return len(source) - len(TrimRight(source, s))
383}
384
385// TrimLeftSpaceLength returns a length of leading space characters.
386func TrimLeftSpaceLength(source []byte) int {
387 i := 0
388 for ; i < len(source); i++ {
389 if !IsSpace(source[i]) {
390 break
391 }
392 }
393 return i
394}
395
396// TrimRightSpaceLength returns a length of trailing space characters.
397func TrimRightSpaceLength(source []byte) int {
398 l := len(source)
399 i := l - 1
400 for ; i >= 0; i-- {
401 if !IsSpace(source[i]) {
402 break
403 }
404 }
405 if i < 0 {
406 return l
407 }
408 return l - 1 - i
409}
410
411// TrimLeftSpace returns a subslice of the given string by slicing off all leading
412// space characters.
413func TrimLeftSpace(source []byte) []byte {
414 return TrimLeft(source, spaces)
415}
416
417// TrimRightSpace returns a subslice of the given string by slicing off all trailing
418// space characters.
419func TrimRightSpace(source []byte) []byte {
420 return TrimRight(source, spaces)
421}
422
423// DoFullUnicodeCaseFolding performs full unicode case folding to given bytes.
424func DoFullUnicodeCaseFolding(v []byte) []byte {
425 var rbuf []byte
426 cob := NewCopyOnWriteBuffer(v)
427 n := 0
428 for i := 0; i < len(v); i++ {
429 c := v[i]
430 if c < 0xb5 {
431 if c >= 0x41 && c <= 0x5a {
432 // A-Z to a-z
433 cob.Write(v[n:i])
434 _ = cob.WriteByte(c + 32)
435 n = i + 1
436 }
437 continue
438 }
439
440 if !utf8.RuneStart(c) {
441 continue
442 }
443 r, length := utf8.DecodeRune(v[i:])
444 if r == utf8.RuneError {
445 continue
446 }
447 folded, ok := unicodeCaseFoldings[r]
448 if !ok {
449 continue
450 }
451
452 cob.Write(v[n:i])
453 if rbuf == nil {
454 rbuf = make([]byte, 4)
455 }
456 for _, f := range folded {
457 l := utf8.EncodeRune(rbuf, f)
458 cob.Write(rbuf[:l])
459 }
460 i += length - 1
461 n = i + 1
462 }
463 if cob.IsCopied() {
464 cob.Write(v[n:])
465 }
466 return cob.Bytes()
467}
468
469// ReplaceSpaces replaces sequence of spaces with the given repl.
470func ReplaceSpaces(source []byte, repl byte) []byte {
471 var ret []byte
472 start := -1
473 for i, c := range source {
474 iss := IsSpace(c)
475 if start < 0 && iss {
476 start = i
477 continue
478 } else if start >= 0 && iss {
479 continue
480 } else if start >= 0 {
481 if ret == nil {
482 ret = make([]byte, 0, len(source))
483 ret = append(ret, source[:start]...)
484 }
485 ret = append(ret, repl)
486 start = -1
487 }
488 if ret != nil {
489 ret = append(ret, c)
490 }
491 }
492 if start >= 0 && ret != nil {
493 ret = append(ret, repl)
494 }
495 if ret == nil {
496 return source
497 }
498 return ret
499}
500
501// ToRune decode given bytes start at pos and returns a rune.
502func ToRune(source []byte, pos int) rune {
503 i := pos
504 for ; i >= 0; i-- {
505 if utf8.RuneStart(source[i]) {
506 break
507 }
508 }
509 r, _ := utf8.DecodeRune(source[i:])
510 return r
511}
512
513// ToValidRune returns 0xFFFD if the given rune is invalid, otherwise v.
514func ToValidRune(v rune) rune {
515 if v == 0 || !utf8.ValidRune(v) {
516 return rune(0xFFFD)
517 }
518 return v
519}
520
521// ToLinkReference converts given bytes into a valid link reference string.
522// ToLinkReference performs unicode case folding, trims leading and trailing spaces, converts into lower
523// case and replace spaces with a single space character.
524func ToLinkReference(v []byte) string {
525 v = TrimLeftSpace(v)
526 v = TrimRightSpace(v)
527 v = DoFullUnicodeCaseFolding(v)
528 return string(ReplaceSpaces(v, ' '))
529}
530
531var htmlEscapeTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("""), nil, nil, nil, []byte("&"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("<"), nil, []byte(">"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil} //nolint:golint,lll
532
533// EscapeHTMLByte returns HTML escaped bytes if the given byte should be escaped,
534// otherwise nil.
535func EscapeHTMLByte(b byte) []byte {
536 return htmlEscapeTable[b]
537}
538
539// EscapeHTML escapes characters that should be escaped in HTML text.
540func EscapeHTML(v []byte) []byte {
541 cob := NewCopyOnWriteBuffer(v)
542 n := 0
543 for i := 0; i < len(v); i++ {
544 c := v[i]
545 escaped := htmlEscapeTable[c]
546 if escaped != nil {
547 cob.Write(v[n:i])
548 cob.Write(escaped)
549 n = i + 1
550 }
551 }
552 if cob.IsCopied() {
553 cob.Write(v[n:])
554 }
555 return cob.Bytes()
556}
557
558// UnescapePunctuations unescapes blackslash escaped punctuations.
559func UnescapePunctuations(source []byte) []byte {
560 cob := NewCopyOnWriteBuffer(source)
561 limit := len(source)
562 n := 0
563 for i := 0; i < limit; {
564 c := source[i]
565 if i < limit-1 && c == '\\' && IsPunct(source[i+1]) {
566 cob.Write(source[n:i])
567 _ = cob.WriteByte(source[i+1])
568 i += 2
569 n = i
570 continue
571 }
572 i++
573 }
574 if cob.IsCopied() {
575 cob.Write(source[n:])
576 }
577 return cob.Bytes()
578}
579
580// ResolveNumericReferences resolve numeric references like 'Ӓ" .
581func ResolveNumericReferences(source []byte) []byte {
582 cob := NewCopyOnWriteBuffer(source)
583 buf := make([]byte, 6)
584 limit := len(source)
585 var ok bool
586 n := 0
587 for i := 0; i < limit; i++ {
588 if source[i] == '&' {
589 pos := i
590 next := i + 1
591 if next < limit && source[next] == '#' {
592 nnext := next + 1
593 if nnext < limit {
594 nc := source[nnext]
595 // code point like #x22;
596 if nnext < limit && nc == 'x' || nc == 'X' {
597 start := nnext + 1
598 i, ok = ReadWhile(source, [2]int{start, limit}, IsHexDecimal)
599 if ok && i < limit && source[i] == ';' {
600 v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 16, 32)
601 cob.Write(source[n:pos])
602 n = i + 1
603 runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v)))
604 cob.Write(buf[:runeSize])
605 continue
606 }
607 // code point like #1234;
608 } else if nc >= '0' && nc <= '9' {
609 start := nnext
610 i, ok = ReadWhile(source, [2]int{start, limit}, IsNumeric)
611 if ok && i < limit && i-start < 8 && source[i] == ';' {
612 v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 0, 32)
613 cob.Write(source[n:pos])
614 n = i + 1
615 runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v)))
616 cob.Write(buf[:runeSize])
617 continue
618 }
619 }
620 }
621 }
622 i = next - 1
623 }
624 }
625 if cob.IsCopied() {
626 cob.Write(source[n:])
627 }
628 return cob.Bytes()
629}
630
631// ResolveEntityNames resolve entity references like 'ö" .
632func ResolveEntityNames(source []byte) []byte {
633 cob := NewCopyOnWriteBuffer(source)
634 limit := len(source)
635 var ok bool
636 n := 0
637 for i := 0; i < limit; i++ {
638 if source[i] == '&' {
639 pos := i
640 next := i + 1
641 if !(next < limit && source[next] == '#') {
642 start := next
643 i, ok = ReadWhile(source, [2]int{start, limit}, IsAlphaNumeric)
644 if ok && i < limit && source[i] == ';' {
645 name := BytesToReadOnlyString(source[start:i])
646 entity, ok := LookUpHTML5EntityByName(name)
647 if ok {
648 cob.Write(source[n:pos])
649 n = i + 1
650 cob.Write(entity.Characters)
651 continue
652 }
653 }
654 }
655 i = next - 1
656 }
657 }
658 if cob.IsCopied() {
659 cob.Write(source[n:])
660 }
661 return cob.Bytes()
662}
663
664var htmlSpace = []byte("%20")
665
666// URLEscape escape the given URL.
667// If resolveReference is set true:
668// 1. unescape punctuations
669// 2. resolve numeric references
670// 3. resolve entity references
671//
672// URL encoded values (%xx) are kept as is.
673func URLEscape(v []byte, resolveReference bool) []byte {
674 if resolveReference {
675 v = UnescapePunctuations(v)
676 v = ResolveNumericReferences(v)
677 v = ResolveEntityNames(v)
678 }
679 cob := NewCopyOnWriteBuffer(v)
680 limit := len(v)
681 n := 0
682
683 for i := 0; i < limit; {
684 c := v[i]
685 if urlEscapeTable[c] == 1 {
686 i++
687 continue
688 }
689 if c == '%' && i+2 < limit && IsHexDecimal(v[i+1]) && IsHexDecimal(v[i+1]) {
690 i += 3
691 continue
692 }
693 u8len := utf8lenTable[c]
694 if u8len == 99 { // invalid utf8 leading byte, skip it
695 i++
696 continue
697 }
698 if c == ' ' {
699 cob.Write(v[n:i])
700 cob.Write(htmlSpace)
701 i++
702 n = i
703 continue
704 }
705 if int(u8len) > len(v) {
706 u8len = int8(len(v) - 1)
707 }
708 if u8len == 0 {
709 i++
710 n = i
711 continue
712 }
713 cob.Write(v[n:i])
714 stop := i + int(u8len)
715 if stop > len(v) {
716 i++
717 n = i
718 continue
719 }
720 cob.Write(StringToReadOnlyBytes(url.QueryEscape(string(v[i:stop]))))
721 i += int(u8len)
722 n = i
723 }
724 if cob.IsCopied() && n < limit {
725 cob.Write(v[n:])
726 }
727 return cob.Bytes()
728}
729
730// FindURLIndex returns a stop index value if the given bytes seem an URL.
731// This function is equivalent to [A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]* .
732func FindURLIndex(b []byte) int {
733 i := 0
734 if !(len(b) > 0 && urlTable[b[i]]&7 == 7) {
735 return -1
736 }
737 i++
738 for ; i < len(b); i++ {
739 c := b[i]
740 if urlTable[c]&4 != 4 {
741 break
742 }
743 }
744 if i == 1 || i > 33 || i >= len(b) {
745 return -1
746 }
747 if b[i] != ':' {
748 return -1
749 }
750 i++
751 for ; i < len(b); i++ {
752 c := b[i]
753 if urlTable[c]&1 != 1 {
754 break
755 }
756 }
757 return i
758}
759
760var emailDomainRegexp = regexp.MustCompile(`^[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*`) //nolint:golint,lll
761
762// FindEmailIndex returns a stop index value if the given bytes seem an email address.
763func FindEmailIndex(b []byte) int {
764 // TODO: eliminate regexps
765 i := 0
766 for ; i < len(b); i++ {
767 c := b[i]
768 if emailTable[c]&1 != 1 {
769 break
770 }
771 }
772 if i == 0 {
773 return -1
774 }
775 if i >= len(b) || b[i] != '@' {
776 return -1
777 }
778 i++
779 if i >= len(b) {
780 return -1
781 }
782 match := emailDomainRegexp.FindSubmatchIndex(b[i:])
783 if match == nil {
784 return -1
785 }
786 return i + match[1]
787}
788
789var spaces = []byte(" \t\n\x0b\x0c\x0d")
790
791var spaceTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} //nolint:golint,lll
792
793var punctTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} //nolint:golint,lll
794
795// a-zA-Z0-9, ;/?:@&=+$,-_.!~*'()#
796
797var urlEscapeTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} //nolint:golint,lll
798
799var utf8lenTable = [256]int8{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 99, 99, 99, 99, 99, 99, 99, 99} //nolint:golint,lll
800
801var urlTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 1, 0, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} //nolint:golint,lll
802
803var emailTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} //nolint:golint,lll
804
805// UTF8Len returns a byte length of the utf-8 character.
806func UTF8Len(b byte) int8 {
807 return utf8lenTable[b]
808}
809
810// IsPunct returns true if the given character is a punctuation, otherwise false.
811func IsPunct(c byte) bool {
812 return punctTable[c] == 1
813}
814
815// IsPunctRune returns true if the given rune is a punctuation, otherwise false.
816func IsPunctRune(r rune) bool {
817 return unicode.IsSymbol(r) || unicode.IsPunct(r)
818}
819
820// IsSpace returns true if the given character is a space, otherwise false.
821func IsSpace(c byte) bool {
822 return spaceTable[c] == 1
823}
824
825// IsSpaceRune returns true if the given rune is a space, otherwise false.
826func IsSpaceRune(r rune) bool {
827 return int32(r) <= 256 && IsSpace(byte(r)) || unicode.IsSpace(r)
828}
829
830// IsNumeric returns true if the given character is a numeric, otherwise false.
831func IsNumeric(c byte) bool {
832 return c >= '0' && c <= '9'
833}
834
835// IsHexDecimal returns true if the given character is a hexdecimal, otherwise false.
836func IsHexDecimal(c byte) bool {
837 return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F'
838}
839
840// IsAlphaNumeric returns true if the given character is a alphabet or a numeric, otherwise false.
841func IsAlphaNumeric(c byte) bool {
842 return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9'
843}
844
845// A BufWriter is a subset of the bufio.Writer .
846type BufWriter interface {
847 io.Writer
848 Available() int
849 Buffered() int
850 Flush() error
851 WriteByte(c byte) error
852 WriteRune(r rune) (size int, err error)
853 WriteString(s string) (int, error)
854}
855
856// A PrioritizedValue struct holds pair of an arbitrary value and a priority.
857type PrioritizedValue struct {
858 // Value is an arbitrary value that you want to prioritize.
859 Value interface{}
860 // Priority is a priority of the value.
861 Priority int
862}
863
864// PrioritizedSlice is a slice of the PrioritizedValues.
865type PrioritizedSlice []PrioritizedValue
866
867// Sort sorts the PrioritizedSlice in ascending order.
868func (s PrioritizedSlice) Sort() {
869 sort.Slice(s, func(i, j int) bool {
870 return s[i].Priority < s[j].Priority
871 })
872}
873
874// Remove removes the given value from this slice.
875func (s PrioritizedSlice) Remove(v interface{}) PrioritizedSlice {
876 i := 0
877 found := false
878 for ; i < len(s); i++ {
879 if s[i].Value == v {
880 found = true
881 break
882 }
883 }
884 if !found {
885 return s
886 }
887 return append(s[:i], s[i+1:]...)
888}
889
890// Prioritized returns a new PrioritizedValue.
891func Prioritized(v interface{}, priority int) PrioritizedValue {
892 return PrioritizedValue{v, priority}
893}
894
895func bytesHash(b []byte) uint64 {
896 var hash uint64 = 5381
897 for _, c := range b {
898 hash = ((hash << 5) + hash) + uint64(c)
899 }
900 return hash
901}
902
903// BytesFilter is a efficient data structure for checking whether bytes exist or not.
904// BytesFilter is thread-safe.
905type BytesFilter interface {
906 // Add adds given bytes to this set.
907 Add([]byte)
908
909 // Contains return true if this set contains given bytes, otherwise false.
910 Contains([]byte) bool
911
912 // Extend copies this filter and adds given bytes to new filter.
913 Extend(...[]byte) BytesFilter
914}
915
916type bytesFilter struct {
917 chars [256]uint8
918 threshold int
919 slots [][][]byte
920}
921
922// NewBytesFilter returns a new BytesFilter.
923func NewBytesFilter(elements ...[]byte) BytesFilter {
924 s := &bytesFilter{
925 threshold: 3,
926 slots: make([][][]byte, 64),
927 }
928 for _, element := range elements {
929 s.Add(element)
930 }
931 return s
932}
933
934func (s *bytesFilter) Add(b []byte) {
935 l := len(b)
936 m := s.threshold
937 if l < s.threshold {
938 m = l
939 }
940 for i := 0; i < m; i++ {
941 s.chars[b[i]] |= 1 << uint8(i)
942 }
943 h := bytesHash(b) % uint64(len(s.slots))
944 slot := s.slots[h]
945 if slot == nil {
946 slot = [][]byte{}
947 }
948 s.slots[h] = append(slot, b)
949}
950
951func (s *bytesFilter) Extend(bs ...[]byte) BytesFilter {
952 newFilter := NewBytesFilter().(*bytesFilter)
953 newFilter.chars = s.chars
954 newFilter.threshold = s.threshold
955 for k, v := range s.slots {
956 newSlot := make([][]byte, len(v))
957 copy(newSlot, v)
958 newFilter.slots[k] = v
959 }
960 for _, b := range bs {
961 newFilter.Add(b)
962 }
963 return newFilter
964}
965
966func (s *bytesFilter) Contains(b []byte) bool {
967 l := len(b)
968 m := s.threshold
969 if l < s.threshold {
970 m = l
971 }
972 for i := 0; i < m; i++ {
973 if (s.chars[b[i]] & (1 << uint8(i))) == 0 {
974 return false
975 }
976 }
977 h := bytesHash(b) % uint64(len(s.slots))
978 slot := s.slots[h]
979 if len(slot) == 0 {
980 return false
981 }
982 for _, element := range slot {
983 if bytes.Equal(element, b) {
984 return true
985 }
986 }
987 return false
988}