1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse inline elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18 "regexp"
19 "strconv"
20)
21
22var (
23 urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
24 anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
25)
26
27// Functions to parse text within a block
28// Each function returns the number of chars taken care of
29// data is the complete block being rendered
30// offset is the number of valid chars before the current cursor
31
32func (p *parser) inline(out *bytes.Buffer, data []byte) {
33 // this is called recursively: enforce a maximum depth
34 if p.nesting >= p.maxNesting {
35 return
36 }
37 p.nesting++
38
39 i, end := 0, 0
40 for i < len(data) {
41 // copy inactive chars into the output
42 for end < len(data) && p.inlineCallback[data[end]] == nil {
43 end++
44 }
45
46 p.r.NormalText(out, data[i:end])
47
48 if end >= len(data) {
49 break
50 }
51 i = end
52
53 // call the trigger
54 handler := p.inlineCallback[data[end]]
55 if consumed := handler(p, out, data, i); consumed == 0 {
56 // no action from the callback; buffer the byte for later
57 end = i + 1
58 } else {
59 // skip past whatever the callback used
60 i += consumed
61 end = i
62 }
63 }
64
65 p.nesting--
66}
67
68// single and double emphasis parsing
69func emphasis(p *parser, out *bytes.Buffer, data []byte, offset int) int {
70 data = data[offset:]
71 c := data[0]
72 ret := 0
73
74 if len(data) > 2 && data[1] != c {
75 // whitespace cannot follow an opening emphasis;
76 // strikethrough only takes two characters '~~'
77 if c == '~' || isspace(data[1]) {
78 return 0
79 }
80 if ret = helperEmphasis(p, out, data[1:], c); ret == 0 {
81 return 0
82 }
83
84 return ret + 1
85 }
86
87 if len(data) > 3 && data[1] == c && data[2] != c {
88 if isspace(data[2]) {
89 return 0
90 }
91 if ret = helperDoubleEmphasis(p, out, data[2:], c); ret == 0 {
92 return 0
93 }
94
95 return ret + 2
96 }
97
98 if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
99 if c == '~' || isspace(data[3]) {
100 return 0
101 }
102 if ret = helperTripleEmphasis(p, out, data, 3, c); ret == 0 {
103 return 0
104 }
105
106 return ret + 3
107 }
108
109 return 0
110}
111
112func codeSpan(p *parser, out *bytes.Buffer, data []byte, offset int) int {
113 data = data[offset:]
114
115 nb := 0
116
117 // count the number of backticks in the delimiter
118 for nb < len(data) && data[nb] == '`' {
119 nb++
120 }
121
122 // find the next delimiter
123 i, end := 0, 0
124 for end = nb; end < len(data) && i < nb; end++ {
125 if data[end] == '`' {
126 i++
127 } else {
128 i = 0
129 }
130 }
131
132 // no matching delimiter?
133 if i < nb && end >= len(data) {
134 return 0
135 }
136
137 // trim outside whitespace
138 fBegin := nb
139 for fBegin < end && data[fBegin] == ' ' {
140 fBegin++
141 }
142
143 fEnd := end - nb
144 for fEnd > fBegin && data[fEnd-1] == ' ' {
145 fEnd--
146 }
147
148 // render the code span
149 if fBegin != fEnd {
150 p.r.CodeSpan(out, data[fBegin:fEnd])
151 }
152
153 return end
154
155}
156
157// newline preceded by two spaces becomes <br>
158// newline without two spaces works when EXTENSION_HARD_LINE_BREAK is enabled
159func lineBreak(p *parser, out *bytes.Buffer, data []byte, offset int) int {
160 // remove trailing spaces from out
161 outBytes := out.Bytes()
162 end := len(outBytes)
163 eol := end
164 for eol > 0 && outBytes[eol-1] == ' ' {
165 eol--
166 }
167 out.Truncate(eol)
168
169 precededByTwoSpaces := offset >= 2 && data[offset-2] == ' ' && data[offset-1] == ' '
170 precededByBackslash := offset >= 1 && data[offset-1] == '\\' // see http://spec.commonmark.org/0.18/#example-527
171 precededByBackslash = precededByBackslash && p.flags&EXTENSION_BACKSLASH_LINE_BREAK != 0
172
173 if p.flags&EXTENSION_JOIN_LINES != 0 {
174 return 1
175 }
176
177 // should there be a hard line break here?
178 if p.flags&EXTENSION_HARD_LINE_BREAK == 0 && !precededByTwoSpaces && !precededByBackslash {
179 return 0
180 }
181
182 if precededByBackslash && eol > 0 {
183 out.Truncate(eol - 1)
184 }
185 p.r.LineBreak(out)
186 return 1
187}
188
189type linkType int
190
191const (
192 linkNormal linkType = iota
193 linkImg
194 linkDeferredFootnote
195 linkInlineFootnote
196)
197
198func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
199 if t == linkDeferredFootnote {
200 return false
201 }
202 return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
203}
204
205// '[': parse a link or an image or a footnote
206func link(p *parser, out *bytes.Buffer, data []byte, offset int) int {
207 // no links allowed inside regular links, footnote, and deferred footnotes
208 if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
209 return 0
210 }
211
212 var t linkType
213 switch {
214 // special case: ![^text] == deferred footnote (that follows something with
215 // an exclamation point)
216 case p.flags&EXTENSION_FOOTNOTES != 0 && len(data)-1 > offset && data[offset+1] == '^':
217 t = linkDeferredFootnote
218 // ![alt] == image
219 case offset > 0 && data[offset-1] == '!':
220 t = linkImg
221 // ^[text] == inline footnote
222 // [^refId] == deferred footnote
223 case p.flags&EXTENSION_FOOTNOTES != 0:
224 if offset > 0 && data[offset-1] == '^' {
225 t = linkInlineFootnote
226 } else if len(data)-1 > offset && data[offset+1] == '^' {
227 t = linkDeferredFootnote
228 }
229 // [text] == regular link
230 default:
231 t = linkNormal
232 }
233
234 data = data[offset:]
235
236 var (
237 i = 1
238 noteId int
239 title, link, altContent []byte
240 textHasNl = false
241 )
242
243 if t == linkDeferredFootnote {
244 i++
245 }
246
247 brace := 0
248
249 // look for the matching closing bracket
250 for level := 1; level > 0 && i < len(data); i++ {
251 switch {
252 case data[i] == '\n':
253 textHasNl = true
254
255 case data[i-1] == '\\':
256 continue
257
258 case data[i] == '[':
259 level++
260
261 case data[i] == ']':
262 level--
263 if level <= 0 {
264 i-- // compensate for extra i++ in for loop
265 }
266 }
267 }
268
269 if i >= len(data) {
270 return 0
271 }
272
273 txtE := i
274 i++
275
276 // skip any amount of whitespace or newline
277 // (this is much more lax than original markdown syntax)
278 for i < len(data) && isspace(data[i]) {
279 i++
280 }
281
282 switch {
283 // inline style link
284 case i < len(data) && data[i] == '(':
285 // skip initial whitespace
286 i++
287
288 for i < len(data) && isspace(data[i]) {
289 i++
290 }
291
292 linkB := i
293
294 // look for link end: ' " ), check for new opening braces and take this
295 // into account, this may lead for overshooting and probably will require
296 // some fine-tuning.
297 findlinkend:
298 for i < len(data) {
299 switch {
300 case data[i] == '\\':
301 i += 2
302
303 case data[i] == '(':
304 brace++
305 i++
306
307 case data[i] == ')':
308 if brace <= 0 {
309 break findlinkend
310 }
311 brace--
312 i++
313
314 case data[i] == '\'' || data[i] == '"':
315 break findlinkend
316
317 default:
318 i++
319 }
320 }
321
322 if i >= len(data) {
323 return 0
324 }
325 linkE := i
326
327 // look for title end if present
328 titleB, titleE := 0, 0
329 if data[i] == '\'' || data[i] == '"' {
330 i++
331 titleB = i
332
333 findtitleend:
334 for i < len(data) {
335 switch {
336 case data[i] == '\\':
337 i += 2
338
339 case data[i] == ')':
340 break findtitleend
341
342 default:
343 i++
344 }
345 }
346
347 if i >= len(data) {
348 return 0
349 }
350
351 // skip whitespace after title
352 titleE = i - 1
353 for titleE > titleB && isspace(data[titleE]) {
354 titleE--
355 }
356
357 // check for closing quote presence
358 if data[titleE] != '\'' && data[titleE] != '"' {
359 titleB, titleE = 0, 0
360 linkE = i
361 }
362 }
363
364 // remove whitespace at the end of the link
365 for linkE > linkB && isspace(data[linkE-1]) {
366 linkE--
367 }
368
369 // remove optional angle brackets around the link
370 if data[linkB] == '<' {
371 linkB++
372 }
373 if data[linkE-1] == '>' {
374 linkE--
375 }
376
377 // build escaped link and title
378 if linkE > linkB {
379 link = data[linkB:linkE]
380 }
381
382 if titleE > titleB {
383 title = data[titleB:titleE]
384 }
385
386 i++
387
388 // reference style link
389 case isReferenceStyleLink(data, i, t):
390 var id []byte
391 altContentConsidered := false
392
393 // look for the id
394 i++
395 linkB := i
396 for i < len(data) && data[i] != ']' {
397 i++
398 }
399 if i >= len(data) {
400 return 0
401 }
402 linkE := i
403
404 // find the reference
405 if linkB == linkE {
406 if textHasNl {
407 var b bytes.Buffer
408
409 for j := 1; j < txtE; j++ {
410 switch {
411 case data[j] != '\n':
412 b.WriteByte(data[j])
413 case data[j-1] != ' ':
414 b.WriteByte(' ')
415 }
416 }
417
418 id = b.Bytes()
419 } else {
420 id = data[1:txtE]
421 altContentConsidered = true
422 }
423 } else {
424 id = data[linkB:linkE]
425 }
426
427 // find the reference with matching id
428 lr, ok := p.getRef(string(id))
429 if !ok {
430 return 0
431 }
432
433 // keep link and title from reference
434 link = lr.link
435 title = lr.title
436 if altContentConsidered {
437 altContent = lr.text
438 }
439 i++
440
441 // shortcut reference style link or reference or inline footnote
442 default:
443 var id []byte
444
445 // craft the id
446 if textHasNl {
447 var b bytes.Buffer
448
449 for j := 1; j < txtE; j++ {
450 switch {
451 case data[j] != '\n':
452 b.WriteByte(data[j])
453 case data[j-1] != ' ':
454 b.WriteByte(' ')
455 }
456 }
457
458 id = b.Bytes()
459 } else {
460 if t == linkDeferredFootnote {
461 id = data[2:txtE] // get rid of the ^
462 } else {
463 id = data[1:txtE]
464 }
465 }
466
467 if t == linkInlineFootnote {
468 // create a new reference
469 noteId = len(p.notes) + 1
470
471 var fragment []byte
472 if len(id) > 0 {
473 if len(id) < 16 {
474 fragment = make([]byte, len(id))
475 } else {
476 fragment = make([]byte, 16)
477 }
478 copy(fragment, slugify(id))
479 } else {
480 fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteId))...)
481 }
482
483 ref := &reference{
484 noteId: noteId,
485 hasBlock: false,
486 link: fragment,
487 title: id,
488 }
489
490 p.notes = append(p.notes, ref)
491 p.notesRecord[string(ref.link)] = struct{}{}
492
493 link = ref.link
494 title = ref.title
495 } else {
496 // find the reference with matching id
497 lr, ok := p.getRef(string(id))
498 if !ok {
499 return 0
500 }
501
502 if t == linkDeferredFootnote && !p.isFootnote(lr) {
503 lr.noteId = len(p.notes) + 1
504 p.notes = append(p.notes, lr)
505 p.notesRecord[string(lr.link)] = struct{}{}
506 }
507
508 // keep link and title from reference
509 link = lr.link
510 // if inline footnote, title == footnote contents
511 title = lr.title
512 noteId = lr.noteId
513 }
514
515 // rewind the whitespace
516 i = txtE + 1
517 }
518
519 // build content: img alt is escaped, link content is parsed
520 var content bytes.Buffer
521 if txtE > 1 {
522 if t == linkImg {
523 content.Write(data[1:txtE])
524 } else {
525 // links cannot contain other links, so turn off link parsing temporarily
526 insideLink := p.insideLink
527 p.insideLink = true
528 p.inline(&content, data[1:txtE])
529 p.insideLink = insideLink
530 }
531 }
532
533 var uLink []byte
534 if t == linkNormal || t == linkImg {
535 if len(link) > 0 {
536 var uLinkBuf bytes.Buffer
537 unescapeText(&uLinkBuf, link)
538 uLink = uLinkBuf.Bytes()
539 }
540
541 // links need something to click on and somewhere to go
542 if len(uLink) == 0 || (t == linkNormal && content.Len() == 0) {
543 return 0
544 }
545 }
546
547 // call the relevant rendering function
548 switch t {
549 case linkNormal:
550 if len(altContent) > 0 {
551 p.r.Link(out, uLink, title, altContent)
552 } else {
553 p.r.Link(out, uLink, title, content.Bytes())
554 }
555
556 case linkImg:
557 outSize := out.Len()
558 outBytes := out.Bytes()
559 if outSize > 0 && outBytes[outSize-1] == '!' {
560 out.Truncate(outSize - 1)
561 }
562
563 p.r.Image(out, uLink, title, content.Bytes())
564
565 case linkInlineFootnote:
566 outSize := out.Len()
567 outBytes := out.Bytes()
568 if outSize > 0 && outBytes[outSize-1] == '^' {
569 out.Truncate(outSize - 1)
570 }
571
572 p.r.FootnoteRef(out, link, noteId)
573
574 case linkDeferredFootnote:
575 p.r.FootnoteRef(out, link, noteId)
576
577 default:
578 return 0
579 }
580
581 return i
582}
583
584func (p *parser) inlineHTMLComment(out *bytes.Buffer, data []byte) int {
585 if len(data) < 5 {
586 return 0
587 }
588 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
589 return 0
590 }
591 i := 5
592 // scan for an end-of-comment marker, across lines if necessary
593 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
594 i++
595 }
596 // no end-of-comment marker
597 if i >= len(data) {
598 return 0
599 }
600 return i + 1
601}
602
603// '<' when tags or autolinks are allowed
604func leftAngle(p *parser, out *bytes.Buffer, data []byte, offset int) int {
605 data = data[offset:]
606 altype := LINK_TYPE_NOT_AUTOLINK
607 end := tagLength(data, &altype)
608 if size := p.inlineHTMLComment(out, data); size > 0 {
609 end = size
610 }
611 if end > 2 {
612 if altype != LINK_TYPE_NOT_AUTOLINK {
613 var uLink bytes.Buffer
614 unescapeText(&uLink, data[1:end+1-2])
615 if uLink.Len() > 0 {
616 p.r.AutoLink(out, uLink.Bytes(), altype)
617 }
618 } else {
619 p.r.RawHtmlTag(out, data[:end])
620 }
621 }
622
623 return end
624}
625
626// '\\' backslash escape
627var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
628
629func escape(p *parser, out *bytes.Buffer, data []byte, offset int) int {
630 data = data[offset:]
631
632 if len(data) > 1 {
633 if bytes.IndexByte(escapeChars, data[1]) < 0 {
634 return 0
635 }
636
637 p.r.NormalText(out, data[1:2])
638 }
639
640 return 2
641}
642
643func unescapeText(ob *bytes.Buffer, src []byte) {
644 i := 0
645 for i < len(src) {
646 org := i
647 for i < len(src) && src[i] != '\\' {
648 i++
649 }
650
651 if i > org {
652 ob.Write(src[org:i])
653 }
654
655 if i+1 >= len(src) {
656 break
657 }
658
659 ob.WriteByte(src[i+1])
660 i += 2
661 }
662}
663
664// '&' escaped when it doesn't belong to an entity
665// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
666func entity(p *parser, out *bytes.Buffer, data []byte, offset int) int {
667 data = data[offset:]
668
669 end := 1
670
671 if end < len(data) && data[end] == '#' {
672 end++
673 }
674
675 for end < len(data) && isalnum(data[end]) {
676 end++
677 }
678
679 if end < len(data) && data[end] == ';' {
680 end++ // real entity
681 } else {
682 return 0 // lone '&'
683 }
684
685 p.r.Entity(out, data[:end])
686
687 return end
688}
689
690func linkEndsWithEntity(data []byte, linkEnd int) bool {
691 entityRanges := htmlEntity.FindAllIndex(data[:linkEnd], -1)
692 return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
693}
694
695func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int {
696 // quick check to rule out most false hits on ':'
697 if p.insideLink || len(data) < offset+3 || data[offset+1] != '/' || data[offset+2] != '/' {
698 return 0
699 }
700
701 // Now a more expensive check to see if we're not inside an anchor element
702 anchorStart := offset
703 offsetFromAnchor := 0
704 for anchorStart > 0 && data[anchorStart] != '<' {
705 anchorStart--
706 offsetFromAnchor++
707 }
708
709 anchorStr := anchorRe.Find(data[anchorStart:])
710 if anchorStr != nil {
711 out.Write(anchorStr[offsetFromAnchor:])
712 return len(anchorStr) - offsetFromAnchor
713 }
714
715 // scan backward for a word boundary
716 rewind := 0
717 for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
718 rewind++
719 }
720 if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
721 return 0
722 }
723
724 origData := data
725 data = data[offset-rewind:]
726
727 if !isSafeLink(data) {
728 return 0
729 }
730
731 linkEnd := 0
732 for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
733 linkEnd++
734 }
735
736 // Skip punctuation at the end of the link
737 if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
738 linkEnd--
739 }
740
741 // But don't skip semicolon if it's a part of escaped entity:
742 if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
743 linkEnd--
744 }
745
746 // See if the link finishes with a punctuation sign that can be closed.
747 var copen byte
748 switch data[linkEnd-1] {
749 case '"':
750 copen = '"'
751 case '\'':
752 copen = '\''
753 case ')':
754 copen = '('
755 case ']':
756 copen = '['
757 case '}':
758 copen = '{'
759 default:
760 copen = 0
761 }
762
763 if copen != 0 {
764 bufEnd := offset - rewind + linkEnd - 2
765
766 openDelim := 1
767
768 /* Try to close the final punctuation sign in this same line;
769 * if we managed to close it outside of the URL, that means that it's
770 * not part of the URL. If it closes inside the URL, that means it
771 * is part of the URL.
772 *
773 * Examples:
774 *
775 * foo http://www.pokemon.com/Pikachu_(Electric) bar
776 * => http://www.pokemon.com/Pikachu_(Electric)
777 *
778 * foo (http://www.pokemon.com/Pikachu_(Electric)) bar
779 * => http://www.pokemon.com/Pikachu_(Electric)
780 *
781 * foo http://www.pokemon.com/Pikachu_(Electric)) bar
782 * => http://www.pokemon.com/Pikachu_(Electric))
783 *
784 * (foo http://www.pokemon.com/Pikachu_(Electric)) bar
785 * => foo http://www.pokemon.com/Pikachu_(Electric)
786 */
787
788 for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
789 if origData[bufEnd] == data[linkEnd-1] {
790 openDelim++
791 }
792
793 if origData[bufEnd] == copen {
794 openDelim--
795 }
796
797 bufEnd--
798 }
799
800 if openDelim == 0 {
801 linkEnd--
802 }
803 }
804
805 // we were triggered on the ':', so we need to rewind the output a bit
806 if out.Len() >= rewind {
807 out.Truncate(len(out.Bytes()) - rewind)
808 }
809
810 var uLink bytes.Buffer
811 unescapeText(&uLink, data[:linkEnd])
812
813 if uLink.Len() > 0 {
814 p.r.AutoLink(out, uLink.Bytes(), LINK_TYPE_NORMAL)
815 }
816
817 return linkEnd - rewind
818}
819
820func isEndOfLink(char byte) bool {
821 return isspace(char) || char == '<'
822}
823
824var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
825var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
826
827func isSafeLink(link []byte) bool {
828 for _, path := range validPaths {
829 if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
830 if len(link) == len(path) {
831 return true
832 } else if isalnum(link[len(path)]) {
833 return true
834 }
835 }
836 }
837
838 for _, prefix := range validUris {
839 // TODO: handle unicode here
840 // case-insensitive prefix test
841 if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
842 return true
843 }
844 }
845
846 return false
847}
848
849// return the length of the given tag, or 0 is it's not valid
850func tagLength(data []byte, autolink *int) int {
851 var i, j int
852
853 // a valid tag can't be shorter than 3 chars
854 if len(data) < 3 {
855 return 0
856 }
857
858 // begins with a '<' optionally followed by '/', followed by letter or number
859 if data[0] != '<' {
860 return 0
861 }
862 if data[1] == '/' {
863 i = 2
864 } else {
865 i = 1
866 }
867
868 if !isalnum(data[i]) {
869 return 0
870 }
871
872 // scheme test
873 *autolink = LINK_TYPE_NOT_AUTOLINK
874
875 // try to find the beginning of an URI
876 for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
877 i++
878 }
879
880 if i > 1 && i < len(data) && data[i] == '@' {
881 if j = isMailtoAutoLink(data[i:]); j != 0 {
882 *autolink = LINK_TYPE_EMAIL
883 return i + j
884 }
885 }
886
887 if i > 2 && i < len(data) && data[i] == ':' {
888 *autolink = LINK_TYPE_NORMAL
889 i++
890 }
891
892 // complete autolink test: no whitespace or ' or "
893 switch {
894 case i >= len(data):
895 *autolink = LINK_TYPE_NOT_AUTOLINK
896 case *autolink != 0:
897 j = i
898
899 for i < len(data) {
900 if data[i] == '\\' {
901 i += 2
902 } else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
903 break
904 } else {
905 i++
906 }
907
908 }
909
910 if i >= len(data) {
911 return 0
912 }
913 if i > j && data[i] == '>' {
914 return i + 1
915 }
916
917 // one of the forbidden chars has been found
918 *autolink = LINK_TYPE_NOT_AUTOLINK
919 }
920
921 // look for something looking like a tag end
922 for i < len(data) && data[i] != '>' {
923 i++
924 }
925 if i >= len(data) {
926 return 0
927 }
928 return i + 1
929}
930
931// look for the address part of a mail autolink and '>'
932// this is less strict than the original markdown e-mail address matching
933func isMailtoAutoLink(data []byte) int {
934 nb := 0
935
936 // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
937 for i := 0; i < len(data); i++ {
938 if isalnum(data[i]) {
939 continue
940 }
941
942 switch data[i] {
943 case '@':
944 nb++
945
946 case '-', '.', '_':
947 // Do nothing.
948
949 case '>':
950 if nb == 1 {
951 return i + 1
952 } else {
953 return 0
954 }
955 default:
956 return 0
957 }
958 }
959
960 return 0
961}
962
963// look for the next emph char, skipping other constructs
964func helperFindEmphChar(data []byte, c byte) int {
965 i := 0
966
967 for i < len(data) {
968 for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
969 i++
970 }
971 if i >= len(data) {
972 return 0
973 }
974 // do not count escaped chars
975 if i != 0 && data[i-1] == '\\' {
976 i++
977 continue
978 }
979 if data[i] == c {
980 return i
981 }
982
983 if data[i] == '`' {
984 // skip a code span
985 tmpI := 0
986 i++
987 for i < len(data) && data[i] != '`' {
988 if tmpI == 0 && data[i] == c {
989 tmpI = i
990 }
991 i++
992 }
993 if i >= len(data) {
994 return tmpI
995 }
996 i++
997 } else if data[i] == '[' {
998 // skip a link
999 tmpI := 0
1000 i++
1001 for i < len(data) && data[i] != ']' {
1002 if tmpI == 0 && data[i] == c {
1003 tmpI = i
1004 }
1005 i++
1006 }
1007 i++
1008 for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
1009 i++
1010 }
1011 if i >= len(data) {
1012 return tmpI
1013 }
1014 if data[i] != '[' && data[i] != '(' { // not a link
1015 if tmpI > 0 {
1016 return tmpI
1017 } else {
1018 continue
1019 }
1020 }
1021 cc := data[i]
1022 i++
1023 for i < len(data) && data[i] != cc {
1024 if tmpI == 0 && data[i] == c {
1025 return i
1026 }
1027 i++
1028 }
1029 if i >= len(data) {
1030 return tmpI
1031 }
1032 i++
1033 }
1034 }
1035 return 0
1036}
1037
1038func helperEmphasis(p *parser, out *bytes.Buffer, data []byte, c byte) int {
1039 i := 0
1040
1041 // skip one symbol if coming from emph3
1042 if len(data) > 1 && data[0] == c && data[1] == c {
1043 i = 1
1044 }
1045
1046 for i < len(data) {
1047 length := helperFindEmphChar(data[i:], c)
1048 if length == 0 {
1049 return 0
1050 }
1051 i += length
1052 if i >= len(data) {
1053 return 0
1054 }
1055
1056 if i+1 < len(data) && data[i+1] == c {
1057 i++
1058 continue
1059 }
1060
1061 if data[i] == c && !isspace(data[i-1]) {
1062
1063 if p.flags&EXTENSION_NO_INTRA_EMPHASIS != 0 {
1064 if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
1065 continue
1066 }
1067 }
1068
1069 var work bytes.Buffer
1070 p.inline(&work, data[:i])
1071 p.r.Emphasis(out, work.Bytes())
1072 return i + 1
1073 }
1074 }
1075
1076 return 0
1077}
1078
1079func helperDoubleEmphasis(p *parser, out *bytes.Buffer, data []byte, c byte) int {
1080 i := 0
1081
1082 for i < len(data) {
1083 length := helperFindEmphChar(data[i:], c)
1084 if length == 0 {
1085 return 0
1086 }
1087 i += length
1088
1089 if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1090 var work bytes.Buffer
1091 p.inline(&work, data[:i])
1092
1093 if work.Len() > 0 {
1094 // pick the right renderer
1095 if c == '~' {
1096 p.r.StrikeThrough(out, work.Bytes())
1097 } else {
1098 p.r.DoubleEmphasis(out, work.Bytes())
1099 }
1100 }
1101 return i + 2
1102 }
1103 i++
1104 }
1105 return 0
1106}
1107
1108func helperTripleEmphasis(p *parser, out *bytes.Buffer, data []byte, offset int, c byte) int {
1109 i := 0
1110 origData := data
1111 data = data[offset:]
1112
1113 for i < len(data) {
1114 length := helperFindEmphChar(data[i:], c)
1115 if length == 0 {
1116 return 0
1117 }
1118 i += length
1119
1120 // skip whitespace preceded symbols
1121 if data[i] != c || isspace(data[i-1]) {
1122 continue
1123 }
1124
1125 switch {
1126 case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
1127 // triple symbol found
1128 var work bytes.Buffer
1129
1130 p.inline(&work, data[:i])
1131 if work.Len() > 0 {
1132 p.r.TripleEmphasis(out, work.Bytes())
1133 }
1134 return i + 3
1135 case (i+1 < len(data) && data[i+1] == c):
1136 // double symbol found, hand over to emph1
1137 length = helperEmphasis(p, out, origData[offset-2:], c)
1138 if length == 0 {
1139 return 0
1140 } else {
1141 return length - 2
1142 }
1143 default:
1144 // single symbol found, hand over to emph2
1145 length = helperDoubleEmphasis(p, out, origData[offset-1:], c)
1146 if length == 0 {
1147 return 0
1148 } else {
1149 return length - 1
1150 }
1151 }
1152 }
1153 return 0
1154}