1package parser
2
3import (
4 "bytes"
5 "regexp"
6 "strconv"
7
8 "github.com/gomarkdown/markdown/ast"
9)
10
11// Parsing of inline elements
12
13var (
14 urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
15 anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
16
17 // TODO: improve this regexp to catch all possible entities:
18 htmlEntityRe = regexp.MustCompile(`&[a-z]{2,5};`)
19)
20
21// Inline parses text within a block.
22// Each function returns the number of consumed chars.
23func (p *Parser) Inline(currBlock ast.Node, data []byte) {
24 // handlers might call us recursively: enforce a maximum depth
25 if p.nesting >= p.maxNesting || len(data) == 0 {
26 return
27 }
28 p.nesting++
29 beg, end := 0, 0
30
31 n := len(data)
32 for end < n {
33 handler := p.inlineCallback[data[end]]
34 if handler == nil {
35 end++
36 continue
37 }
38 consumed, node := handler(p, data, end)
39 if consumed == 0 {
40 // no action from the callback
41 end++
42 continue
43 }
44 // copy inactive chars into the output
45 ast.AppendChild(currBlock, newTextNode(data[beg:end]))
46 if node != nil {
47 ast.AppendChild(currBlock, node)
48 }
49 beg = end + consumed
50 end = beg
51 }
52
53 if beg < n {
54 if data[end-1] == '\n' {
55 end--
56 }
57 ast.AppendChild(currBlock, newTextNode(data[beg:end]))
58 }
59 p.nesting--
60}
61
62// single and double emphasis parsing
63func emphasis(p *Parser, data []byte, offset int) (int, ast.Node) {
64 data = data[offset:]
65 c := data[0]
66
67 n := len(data)
68 if n > 2 && data[1] != c {
69 // whitespace cannot follow an opening emphasis;
70 // strikethrough only takes two characters '~~'
71 if isSpace(data[1]) {
72 return 0, nil
73 }
74 if p.extensions&SuperSubscript != 0 && c == '~' {
75 // potential subscript, no spaces, except when escaped, helperEmphasis does
76 // not check that for us, so walk the bytes and check.
77 ret := skipUntilChar(data[1:], 0, c)
78 if ret == 0 {
79 return 0, nil
80 }
81 ret++ // we started with data[1:] above.
82 for i := 1; i < ret; i++ {
83 if isSpace(data[i]) && !isEscape(data, i) {
84 return 0, nil
85 }
86 }
87 sub := &ast.Subscript{}
88 sub.Literal = data[1:ret]
89 return ret + 1, sub
90 }
91 ret, node := helperEmphasis(p, data[1:], c)
92 if ret == 0 {
93 return 0, nil
94 }
95
96 return ret + 1, node
97 }
98
99 if n > 3 && data[1] == c && data[2] != c {
100 if isSpace(data[2]) {
101 return 0, nil
102 }
103 ret, node := helperDoubleEmphasis(p, data[2:], c)
104 if ret == 0 {
105 return 0, nil
106 }
107
108 return ret + 2, node
109 }
110
111 if n > 4 && data[1] == c && data[2] == c && data[3] != c {
112 if c == '~' || isSpace(data[3]) {
113 return 0, nil
114 }
115 ret, node := helperTripleEmphasis(p, data, 3, c)
116 if ret == 0 {
117 return 0, nil
118 }
119
120 return ret + 3, node
121 }
122
123 return 0, nil
124}
125
126func codeSpan(p *Parser, data []byte, offset int) (int, ast.Node) {
127 data = data[offset:]
128
129 // count the number of backticks in the delimiter
130 nb := skipChar(data, 0, '`')
131
132 // find the next delimiter
133 i, end := 0, 0
134 for end = nb; end < len(data) && i < nb; end++ {
135 if data[end] == '`' {
136 i++
137 } else {
138 i = 0
139 }
140 }
141
142 // no matching delimiter?
143 if i < nb && end >= len(data) {
144 return 0, nil
145 }
146
147 // trim outside whitespace
148 fBegin := nb
149 for fBegin < end && data[fBegin] == ' ' {
150 fBegin++
151 }
152
153 fEnd := end - nb
154 for fEnd > fBegin && data[fEnd-1] == ' ' {
155 fEnd--
156 }
157
158 // render the code span
159 if fBegin != fEnd {
160 code := &ast.Code{}
161 code.Literal = data[fBegin:fEnd]
162 return end, code
163 }
164
165 return end, nil
166}
167
168// newline preceded by two spaces becomes <br>
169func maybeLineBreak(p *Parser, data []byte, offset int) (int, ast.Node) {
170 origOffset := offset
171 offset = skipChar(data, offset, ' ')
172
173 if offset < len(data) && data[offset] == '\n' {
174 if offset-origOffset >= 2 {
175 return offset - origOffset + 1, &ast.Hardbreak{}
176 }
177 return offset - origOffset, nil
178 }
179 return 0, nil
180}
181
182// newline without two spaces works when HardLineBreak is enabled
183func lineBreak(p *Parser, data []byte, offset int) (int, ast.Node) {
184 if p.extensions&HardLineBreak != 0 {
185 return 1, &ast.Hardbreak{}
186 }
187 return 0, nil
188}
189
190type linkType int
191
192const (
193 linkNormal linkType = iota
194 linkImg
195 linkDeferredFootnote
196 linkInlineFootnote
197 linkCitation
198)
199
200func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
201 if t == linkDeferredFootnote {
202 return false
203 }
204 return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
205}
206
207func maybeImage(p *Parser, data []byte, offset int) (int, ast.Node) {
208 if offset < len(data)-1 && data[offset+1] == '[' {
209 return link(p, data, offset)
210 }
211 return 0, nil
212}
213
214func maybeInlineFootnoteOrSuper(p *Parser, data []byte, offset int) (int, ast.Node) {
215 if offset < len(data)-1 && data[offset+1] == '[' {
216 return link(p, data, offset)
217 }
218
219 if p.extensions&SuperSubscript != 0 {
220 ret := skipUntilChar(data[offset:], 1, '^')
221 if ret == 0 {
222 return 0, nil
223 }
224 for i := offset; i < offset+ret; i++ {
225 if isSpace(data[i]) && !isEscape(data, i) {
226 return 0, nil
227 }
228 }
229 sup := &ast.Superscript{}
230 sup.Literal = data[offset+1 : offset+ret]
231 return offset + ret, sup
232 }
233
234 return 0, nil
235}
236
237// '[': parse a link or an image or a footnote or a citation
238func link(p *Parser, data []byte, offset int) (int, ast.Node) {
239 // no links allowed inside regular links, footnote, and deferred footnotes
240 if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
241 return 0, nil
242 }
243
244 var t linkType
245 switch {
246 // special case: ![^text] == deferred footnote (that follows something with
247 // an exclamation point)
248 case p.extensions&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
249 t = linkDeferredFootnote
250 // ![alt] == image
251 case offset >= 0 && data[offset] == '!':
252 t = linkImg
253 offset++
254 // [@citation], [@-citation], [@?citation], [@!citation]
255 case p.extensions&Mmark != 0 && len(data)-1 > offset && data[offset+1] == '@':
256 t = linkCitation
257 // [text] == regular link
258 // ^[text] == inline footnote
259 // [^refId] == deferred footnote
260 case p.extensions&Footnotes != 0:
261 if offset >= 0 && data[offset] == '^' {
262 t = linkInlineFootnote
263 offset++
264 } else if len(data)-1 > offset && data[offset+1] == '^' {
265 t = linkDeferredFootnote
266 }
267 default:
268 t = linkNormal
269 }
270
271 data = data[offset:]
272
273 if t == linkCitation {
274 return citation(p, data, 0)
275 }
276
277 var (
278 i = 1
279 noteID int
280 title, link, linkID, altContent []byte
281 textHasNl = false
282 )
283
284 if t == linkDeferredFootnote {
285 i++
286 }
287
288 // look for the matching closing bracket
289 for level := 1; level > 0 && i < len(data); i++ {
290 switch {
291 case data[i] == '\n':
292 textHasNl = true
293
294 case data[i-1] == '\\':
295 continue
296
297 case data[i] == '[':
298 level++
299
300 case data[i] == ']':
301 level--
302 if level <= 0 {
303 i-- // compensate for extra i++ in for loop
304 }
305 }
306 }
307
308 if i >= len(data) {
309 return 0, nil
310 }
311
312 txtE := i
313 i++
314 var footnoteNode ast.Node
315
316 // skip any amount of whitespace or newline
317 // (this is much more lax than original markdown syntax)
318 i = skipSpace(data, i)
319
320 // inline style link
321 switch {
322 case i < len(data) && data[i] == '(':
323 // skip initial whitespace
324 i++
325
326 i = skipSpace(data, i)
327
328 linkB := i
329
330 // look for link end: ' " )
331 findlinkend:
332 for i < len(data) {
333 switch {
334 case data[i] == '\\':
335 i += 2
336
337 case data[i] == ')' || data[i] == '\'' || data[i] == '"':
338 break findlinkend
339
340 default:
341 i++
342 }
343 }
344
345 if i >= len(data) {
346 return 0, nil
347 }
348 linkE := i
349
350 // look for title end if present
351 titleB, titleE := 0, 0
352 if data[i] == '\'' || data[i] == '"' {
353 i++
354 titleB = i
355
356 findtitleend:
357 for i < len(data) {
358 switch {
359 case data[i] == '\\':
360 i += 2
361
362 case data[i] == ')':
363 break findtitleend
364
365 default:
366 i++
367 }
368 }
369
370 if i >= len(data) {
371 return 0, nil
372 }
373
374 // skip whitespace after title
375 titleE = i - 1
376 for titleE > titleB && isSpace(data[titleE]) {
377 titleE--
378 }
379
380 // check for closing quote presence
381 if data[titleE] != '\'' && data[titleE] != '"' {
382 titleB, titleE = 0, 0
383 linkE = i
384 }
385 }
386
387 // remove whitespace at the end of the link
388 for linkE > linkB && isSpace(data[linkE-1]) {
389 linkE--
390 }
391
392 // remove optional angle brackets around the link
393 if data[linkB] == '<' {
394 linkB++
395 }
396 if data[linkE-1] == '>' {
397 linkE--
398 }
399
400 // build escaped link and title
401 if linkE > linkB {
402 link = data[linkB:linkE]
403 }
404
405 if titleE > titleB {
406 title = data[titleB:titleE]
407 }
408
409 i++
410
411 // reference style link
412 case isReferenceStyleLink(data, i, t):
413 var id []byte
414 altContentConsidered := false
415
416 // look for the id
417 i++
418 linkB := i
419 i = skipUntilChar(data, i, ']')
420
421 if i >= len(data) {
422 return 0, nil
423 }
424 linkE := i
425
426 // find the reference
427 if linkB == linkE {
428 if textHasNl {
429 var b bytes.Buffer
430
431 for j := 1; j < txtE; j++ {
432 switch {
433 case data[j] != '\n':
434 b.WriteByte(data[j])
435 case data[j-1] != ' ':
436 b.WriteByte(' ')
437 }
438 }
439
440 id = b.Bytes()
441 } else {
442 id = data[1:txtE]
443 altContentConsidered = true
444 }
445 } else {
446 id = data[linkB:linkE]
447 }
448
449 // find the reference with matching id
450 lr, ok := p.getRef(string(id))
451 if !ok {
452 return 0, nil
453 }
454
455 // keep link and title from reference
456 linkID = id
457 link = lr.link
458 title = lr.title
459 if altContentConsidered {
460 altContent = lr.text
461 }
462 i++
463
464 // shortcut reference style link or reference or inline footnote
465 default:
466 var id []byte
467
468 // craft the id
469 if textHasNl {
470 var b bytes.Buffer
471
472 for j := 1; j < txtE; j++ {
473 switch {
474 case data[j] != '\n':
475 b.WriteByte(data[j])
476 case data[j-1] != ' ':
477 b.WriteByte(' ')
478 }
479 }
480
481 id = b.Bytes()
482 } else {
483 if t == linkDeferredFootnote {
484 id = data[2:txtE] // get rid of the ^
485 } else {
486 id = data[1:txtE]
487 }
488 }
489
490 footnoteNode = &ast.ListItem{}
491 if t == linkInlineFootnote {
492 // create a new reference
493 noteID = len(p.notes) + 1
494
495 var fragment []byte
496 if len(id) > 0 {
497 if len(id) < 16 {
498 fragment = make([]byte, len(id))
499 } else {
500 fragment = make([]byte, 16)
501 }
502 copy(fragment, slugify(id))
503 } else {
504 fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...)
505 }
506
507 ref := &reference{
508 noteID: noteID,
509 hasBlock: false,
510 link: fragment,
511 title: id,
512 footnote: footnoteNode,
513 }
514
515 p.notes = append(p.notes, ref)
516 p.refsRecord[string(ref.link)] = struct{}{}
517
518 link = ref.link
519 title = ref.title
520 } else {
521 // find the reference with matching id
522 lr, ok := p.getRef(string(id))
523 if !ok {
524 return 0, nil
525 }
526
527 if t == linkDeferredFootnote && !p.isFootnote(lr) {
528 lr.noteID = len(p.notes) + 1
529 lr.footnote = footnoteNode
530 p.notes = append(p.notes, lr)
531 p.refsRecord[string(lr.link)] = struct{}{}
532 }
533
534 // keep link and title from reference
535 link = lr.link
536 // if inline footnote, title == footnote contents
537 title = lr.title
538 noteID = lr.noteID
539 }
540
541 // rewind the whitespace
542 i = txtE + 1
543 }
544
545 var uLink []byte
546 if t == linkNormal || t == linkImg {
547 if len(link) > 0 {
548 var uLinkBuf bytes.Buffer
549 unescapeText(&uLinkBuf, link)
550 uLink = uLinkBuf.Bytes()
551 }
552
553 // links need something to click on and somewhere to go
554 if len(uLink) == 0 || (t == linkNormal && txtE <= 1) {
555 return 0, nil
556 }
557 }
558
559 // call the relevant rendering function
560 switch t {
561 case linkNormal:
562 link := &ast.Link{
563 Destination: normalizeURI(uLink),
564 Title: title,
565 DeferredID: linkID,
566 }
567 if len(altContent) > 0 {
568 ast.AppendChild(link, newTextNode(altContent))
569 } else {
570 // links cannot contain other links, so turn off link parsing
571 // temporarily and recurse
572 insideLink := p.insideLink
573 p.insideLink = true
574 p.Inline(link, data[1:txtE])
575 p.insideLink = insideLink
576 }
577 return i, link
578
579 case linkImg:
580 image := &ast.Image{
581 Destination: uLink,
582 Title: title,
583 }
584 ast.AppendChild(image, newTextNode(data[1:txtE]))
585 return i + 1, image
586
587 case linkInlineFootnote, linkDeferredFootnote:
588 link := &ast.Link{
589 Destination: link,
590 Title: title,
591 NoteID: noteID,
592 Footnote: footnoteNode,
593 }
594 if t == linkDeferredFootnote {
595 link.DeferredID = data[2:txtE]
596 }
597 if t == linkInlineFootnote {
598 i++
599 }
600 return i, link
601
602 default:
603 return 0, nil
604 }
605}
606
607func (p *Parser) inlineHTMLComment(data []byte) int {
608 if len(data) < 5 {
609 return 0
610 }
611 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
612 return 0
613 }
614 i := 5
615 // scan for an end-of-comment marker, across lines if necessary
616 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
617 i++
618 }
619 // no end-of-comment marker
620 if i >= len(data) {
621 return 0
622 }
623 return i + 1
624}
625
626func stripMailto(link []byte) []byte {
627 if bytes.HasPrefix(link, []byte("mailto://")) {
628 return link[9:]
629 } else if bytes.HasPrefix(link, []byte("mailto:")) {
630 return link[7:]
631 } else {
632 return link
633 }
634}
635
636// autolinkType specifies a kind of autolink that gets detected.
637type autolinkType int
638
639// These are the possible flag values for the autolink renderer.
640const (
641 notAutolink autolinkType = iota
642 normalAutolink
643 emailAutolink
644)
645
646// '<' when tags or autolinks are allowed
647func leftAngle(p *Parser, data []byte, offset int) (int, ast.Node) {
648 data = data[offset:]
649
650 if p.extensions&Mmark != 0 {
651 id, consumed := IsCallout(data)
652 if consumed > 0 {
653 node := &ast.Callout{}
654 node.ID = id
655 return consumed, node
656 }
657 }
658
659 altype, end := tagLength(data)
660 if size := p.inlineHTMLComment(data); size > 0 {
661 end = size
662 }
663 if end <= 2 {
664 return end, nil
665 }
666 if altype == notAutolink {
667 htmlTag := &ast.HTMLSpan{}
668 htmlTag.Literal = data[:end]
669 return end, htmlTag
670 }
671
672 var uLink bytes.Buffer
673 unescapeText(&uLink, data[1:end+1-2])
674 if uLink.Len() <= 0 {
675 return end, nil
676 }
677 link := uLink.Bytes()
678 node := &ast.Link{
679 Destination: link,
680 }
681 if altype == emailAutolink {
682 node.Destination = append([]byte("mailto:"), link...)
683 }
684 ast.AppendChild(node, newTextNode(stripMailto(link)))
685 return end, node
686}
687
688// '\\' backslash escape
689var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
690
691func escape(p *Parser, data []byte, offset int) (int, ast.Node) {
692 data = data[offset:]
693
694 if len(data) <= 1 {
695 return 2, nil
696 }
697
698 if p.extensions&NonBlockingSpace != 0 && data[1] == ' ' {
699 return 2, &ast.NonBlockingSpace{}
700 }
701
702 if p.extensions&BackslashLineBreak != 0 && data[1] == '\n' {
703 return 2, &ast.Hardbreak{}
704 }
705
706 if bytes.IndexByte(escapeChars, data[1]) < 0 {
707 return 0, nil
708 }
709
710 return 2, newTextNode(data[1:2])
711}
712
713func unescapeText(ob *bytes.Buffer, src []byte) {
714 i := 0
715 for i < len(src) {
716 org := i
717 for i < len(src) && src[i] != '\\' {
718 i++
719 }
720
721 if i > org {
722 ob.Write(src[org:i])
723 }
724
725 if i+1 >= len(src) {
726 break
727 }
728
729 ob.WriteByte(src[i+1])
730 i += 2
731 }
732}
733
734// '&' escaped when it doesn't belong to an entity
735// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
736func entity(p *Parser, data []byte, offset int) (int, ast.Node) {
737 data = data[offset:]
738
739 end := skipCharN(data, 1, '#', 1)
740 end = skipAlnum(data, end)
741
742 if end < len(data) && data[end] == ';' {
743 end++ // real entity
744 } else {
745 return 0, nil // lone '&'
746 }
747
748 ent := data[:end]
749 // undo & escaping or it will be converted to &amp; by another
750 // escaper in the renderer
751 if bytes.Equal(ent, []byte("&")) {
752 ent = []byte{'&'}
753 }
754
755 return end, newTextNode(ent)
756}
757
758func linkEndsWithEntity(data []byte, linkEnd int) bool {
759 entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
760 return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
761}
762
763// hasPrefixCaseInsensitive is a custom implementation of
764// strings.HasPrefix(strings.ToLower(s), prefix)
765// we rolled our own because ToLower pulls in a huge machinery of lowercasing
766// anything from Unicode and that's very slow. Since this func will only be
767// used on ASCII protocol prefixes, we can take shortcuts.
768func hasPrefixCaseInsensitive(s, prefix []byte) bool {
769 if len(s) < len(prefix) {
770 return false
771 }
772 delta := byte('a' - 'A')
773 for i, b := range prefix {
774 if b != s[i] && b != s[i]+delta {
775 return false
776 }
777 }
778 return true
779}
780
781var protocolPrefixes = [][]byte{
782 []byte("http://"),
783 []byte("https://"),
784 []byte("ftp://"),
785 []byte("file://"),
786 []byte("mailto:"),
787}
788
789const shortestPrefix = 6 // len("ftp://"), the shortest of the above
790
791func maybeAutoLink(p *Parser, data []byte, offset int) (int, ast.Node) {
792 // quick check to rule out most false hits
793 if p.insideLink || len(data) < offset+shortestPrefix {
794 return 0, nil
795 }
796 for _, prefix := range protocolPrefixes {
797 endOfHead := offset + 8 // 8 is the len() of the longest prefix
798 if endOfHead > len(data) {
799 endOfHead = len(data)
800 }
801 if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) {
802 return autoLink(p, data, offset)
803 }
804 }
805 return 0, nil
806}
807
808func autoLink(p *Parser, data []byte, offset int) (int, ast.Node) {
809 // Now a more expensive check to see if we're not inside an anchor element
810 anchorStart := offset
811 offsetFromAnchor := 0
812 for anchorStart > 0 && data[anchorStart] != '<' {
813 anchorStart--
814 offsetFromAnchor++
815 }
816
817 anchorStr := anchorRe.Find(data[anchorStart:])
818 if anchorStr != nil {
819 anchorClose := &ast.HTMLSpan{}
820 anchorClose.Literal = anchorStr[offsetFromAnchor:]
821 return len(anchorStr) - offsetFromAnchor, anchorClose
822 }
823
824 // scan backward for a word boundary
825 rewind := 0
826 for offset-rewind > 0 && rewind <= 7 && isLetter(data[offset-rewind-1]) {
827 rewind++
828 }
829 if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
830 return 0, nil
831 }
832
833 origData := data
834 data = data[offset-rewind:]
835
836 if !isSafeLink(data) {
837 return 0, nil
838 }
839
840 linkEnd := 0
841 for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
842 linkEnd++
843 }
844
845 // Skip punctuation at the end of the link
846 if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
847 linkEnd--
848 }
849
850 // But don't skip semicolon if it's a part of escaped entity:
851 if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
852 linkEnd--
853 }
854
855 // See if the link finishes with a punctuation sign that can be closed.
856 var copen byte
857 switch data[linkEnd-1] {
858 case '"':
859 copen = '"'
860 case '\'':
861 copen = '\''
862 case ')':
863 copen = '('
864 case ']':
865 copen = '['
866 case '}':
867 copen = '{'
868 default:
869 copen = 0
870 }
871
872 if copen != 0 {
873 bufEnd := offset - rewind + linkEnd - 2
874
875 openDelim := 1
876
877 /* Try to close the final punctuation sign in this same line;
878 * if we managed to close it outside of the URL, that means that it's
879 * not part of the URL. If it closes inside the URL, that means it
880 * is part of the URL.
881 *
882 * Examples:
883 *
884 * foo http://www.pokemon.com/Pikachu_(Electric) bar
885 * => http://www.pokemon.com/Pikachu_(Electric)
886 *
887 * foo (http://www.pokemon.com/Pikachu_(Electric)) bar
888 * => http://www.pokemon.com/Pikachu_(Electric)
889 *
890 * foo http://www.pokemon.com/Pikachu_(Electric)) bar
891 * => http://www.pokemon.com/Pikachu_(Electric))
892 *
893 * (foo http://www.pokemon.com/Pikachu_(Electric)) bar
894 * => foo http://www.pokemon.com/Pikachu_(Electric)
895 */
896
897 for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
898 if origData[bufEnd] == data[linkEnd-1] {
899 openDelim++
900 }
901
902 if origData[bufEnd] == copen {
903 openDelim--
904 }
905
906 bufEnd--
907 }
908
909 if openDelim == 0 {
910 linkEnd--
911 }
912 }
913
914 var uLink bytes.Buffer
915 unescapeText(&uLink, data[:linkEnd])
916
917 if uLink.Len() > 0 {
918 node := &ast.Link{
919 Destination: uLink.Bytes(),
920 }
921 ast.AppendChild(node, newTextNode(uLink.Bytes()))
922 return linkEnd, node
923 }
924
925 return linkEnd, nil
926}
927
928func isEndOfLink(char byte) bool {
929 return isSpace(char) || char == '<'
930}
931
932var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
933var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
934
935func isSafeLink(link []byte) bool {
936 nLink := len(link)
937 for _, path := range validPaths {
938 nPath := len(path)
939 linkPrefix := link[:nPath]
940 if nLink >= nPath && bytes.Equal(linkPrefix, path) {
941 if nLink == nPath {
942 return true
943 } else if isAlnum(link[nPath]) {
944 return true
945 }
946 }
947 }
948
949 for _, prefix := range validUris {
950 // TODO: handle unicode here
951 // case-insensitive prefix test
952 nPrefix := len(prefix)
953 if nLink > nPrefix {
954 linkPrefix := bytes.ToLower(link[:nPrefix])
955 if bytes.Equal(linkPrefix, prefix) && isAlnum(link[nPrefix]) {
956 return true
957 }
958 }
959 }
960
961 return false
962}
963
964// return the length of the given tag, or 0 is it's not valid
965func tagLength(data []byte) (autolink autolinkType, end int) {
966 var i, j int
967
968 // a valid tag can't be shorter than 3 chars
969 if len(data) < 3 {
970 return notAutolink, 0
971 }
972
973 // begins with a '<' optionally followed by '/', followed by letter or number
974 if data[0] != '<' {
975 return notAutolink, 0
976 }
977 if data[1] == '/' {
978 i = 2
979 } else {
980 i = 1
981 }
982
983 if !isAlnum(data[i]) {
984 return notAutolink, 0
985 }
986
987 // scheme test
988 autolink = notAutolink
989
990 // try to find the beginning of an URI
991 for i < len(data) && (isAlnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
992 i++
993 }
994
995 if i > 1 && i < len(data) && data[i] == '@' {
996 if j = isMailtoAutoLink(data[i:]); j != 0 {
997 return emailAutolink, i + j
998 }
999 }
1000
1001 if i > 2 && i < len(data) && data[i] == ':' {
1002 autolink = normalAutolink
1003 i++
1004 }
1005
1006 // complete autolink test: no whitespace or ' or "
1007 switch {
1008 case i >= len(data):
1009 autolink = notAutolink
1010 case autolink != notAutolink:
1011 j = i
1012
1013 for i < len(data) {
1014 if data[i] == '\\' {
1015 i += 2
1016 } else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isSpace(data[i]) {
1017 break
1018 } else {
1019 i++
1020 }
1021
1022 }
1023
1024 if i >= len(data) {
1025 return autolink, 0
1026 }
1027 if i > j && data[i] == '>' {
1028 return autolink, i + 1
1029 }
1030
1031 // one of the forbidden chars has been found
1032 autolink = notAutolink
1033 }
1034 i += bytes.IndexByte(data[i:], '>')
1035 if i < 0 {
1036 return autolink, 0
1037 }
1038 return autolink, i + 1
1039}
1040
1041// look for the address part of a mail autolink and '>'
1042// this is less strict than the original markdown e-mail address matching
1043func isMailtoAutoLink(data []byte) int {
1044 nb := 0
1045
1046 // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
1047 for i, c := range data {
1048 if isAlnum(c) {
1049 continue
1050 }
1051
1052 switch c {
1053 case '@':
1054 nb++
1055
1056 case '-', '.', '_':
1057 break
1058
1059 case '>':
1060 if nb == 1 {
1061 return i + 1
1062 }
1063 return 0
1064 default:
1065 return 0
1066 }
1067 }
1068
1069 return 0
1070}
1071
1072// look for the next emph char, skipping other constructs
1073func helperFindEmphChar(data []byte, c byte) int {
1074 i := 0
1075
1076 for i < len(data) {
1077 for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1078 i++
1079 }
1080 if i >= len(data) {
1081 return 0
1082 }
1083 // do not count escaped chars
1084 if i != 0 && data[i-1] == '\\' {
1085 i++
1086 continue
1087 }
1088 if data[i] == c {
1089 return i
1090 }
1091
1092 if data[i] == '`' {
1093 // skip a code span
1094 tmpI := 0
1095 i++
1096 for i < len(data) && data[i] != '`' {
1097 if tmpI == 0 && data[i] == c {
1098 tmpI = i
1099 }
1100 i++
1101 }
1102 if i >= len(data) {
1103 return tmpI
1104 }
1105 i++
1106 } else if data[i] == '[' {
1107 // skip a link
1108 tmpI := 0
1109 i++
1110 for i < len(data) && data[i] != ']' {
1111 if tmpI == 0 && data[i] == c {
1112 tmpI = i
1113 }
1114 i++
1115 }
1116 i++
1117 for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
1118 i++
1119 }
1120 if i >= len(data) {
1121 return tmpI
1122 }
1123 if data[i] != '[' && data[i] != '(' { // not a link
1124 if tmpI > 0 {
1125 return tmpI
1126 }
1127 continue
1128 }
1129 cc := data[i]
1130 i++
1131 for i < len(data) && data[i] != cc {
1132 if tmpI == 0 && data[i] == c {
1133 return i
1134 }
1135 i++
1136 }
1137 if i >= len(data) {
1138 return tmpI
1139 }
1140 i++
1141 }
1142 }
1143 return 0
1144}
1145
1146func helperEmphasis(p *Parser, data []byte, c byte) (int, ast.Node) {
1147 i := 0
1148
1149 // skip one symbol if coming from emph3
1150 if len(data) > 1 && data[0] == c && data[1] == c {
1151 i = 1
1152 }
1153
1154 for i < len(data) {
1155 length := helperFindEmphChar(data[i:], c)
1156 if length == 0 {
1157 return 0, nil
1158 }
1159 i += length
1160 if i >= len(data) {
1161 return 0, nil
1162 }
1163
1164 if i+1 < len(data) && data[i+1] == c {
1165 i++
1166 continue
1167 }
1168
1169 if data[i] == c && !isSpace(data[i-1]) {
1170
1171 if p.extensions&NoIntraEmphasis != 0 {
1172 if !(i+1 == len(data) || isSpace(data[i+1]) || isPunctuation(data[i+1])) {
1173 continue
1174 }
1175 }
1176
1177 emph := &ast.Emph{}
1178 p.Inline(emph, data[:i])
1179 return i + 1, emph
1180 }
1181 }
1182
1183 return 0, nil
1184}
1185
1186func helperDoubleEmphasis(p *Parser, data []byte, c byte) (int, ast.Node) {
1187 i := 0
1188
1189 for i < len(data) {
1190 length := helperFindEmphChar(data[i:], c)
1191 if length == 0 {
1192 return 0, nil
1193 }
1194 i += length
1195
1196 if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isSpace(data[i-1]) {
1197 var node ast.Node = &ast.Strong{}
1198 if c == '~' {
1199 node = &ast.Del{}
1200 }
1201 p.Inline(node, data[:i])
1202 return i + 2, node
1203 }
1204 i++
1205 }
1206 return 0, nil
1207}
1208
1209func helperTripleEmphasis(p *Parser, data []byte, offset int, c byte) (int, ast.Node) {
1210 i := 0
1211 origData := data
1212 data = data[offset:]
1213
1214 for i < len(data) {
1215 length := helperFindEmphChar(data[i:], c)
1216 if length == 0 {
1217 return 0, nil
1218 }
1219 i += length
1220
1221 // skip whitespace preceded symbols
1222 if data[i] != c || isSpace(data[i-1]) {
1223 continue
1224 }
1225
1226 switch {
1227 case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
1228 // triple symbol found
1229 strong := &ast.Strong{}
1230 em := &ast.Emph{}
1231 ast.AppendChild(strong, em)
1232 p.Inline(em, data[:i])
1233 return i + 3, strong
1234 case i+1 < len(data) && data[i+1] == c:
1235 // double symbol found, hand over to emph1
1236 length, node := helperEmphasis(p, origData[offset-2:], c)
1237 if length == 0 {
1238 return 0, nil
1239 }
1240 return length - 2, node
1241 default:
1242 // single symbol found, hand over to emph2
1243 length, node := helperDoubleEmphasis(p, origData[offset-1:], c)
1244 if length == 0 {
1245 return 0, nil
1246 }
1247 return length - 1, node
1248 }
1249 }
1250 return 0, nil
1251}
1252
1253// math handle inline math wrapped with '$'
1254func math(p *Parser, data []byte, offset int) (int, ast.Node) {
1255 data = data[offset:]
1256
1257 // too short, or block math
1258 if len(data) <= 2 || data[1] == '$' {
1259 return 0, nil
1260 }
1261
1262 // find next '$'
1263 var end int
1264 for end = 1; end < len(data) && data[end] != '$'; end++ {
1265 }
1266
1267 // $ not match
1268 if end == len(data) {
1269 return 0, nil
1270 }
1271
1272 // create inline math node
1273 math := &ast.Math{}
1274 math.Literal = data[1:end]
1275 return end + 1, math
1276}
1277
1278func newTextNode(d []byte) *ast.Text {
1279 return &ast.Text{ast.Leaf{Literal: d}}
1280}
1281
1282func normalizeURI(s []byte) []byte {
1283 return s // TODO: implement
1284}