1package parser
2
3import (
4 "bytes"
5 "html"
6 "regexp"
7 "strconv"
8 "unicode"
9
10 "github.com/gomarkdown/markdown/ast"
11)
12
13// Parsing block-level elements.
14
15const (
16 charEntity = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"
17 escapable = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]"
18)
19
20var (
21 reBackslashOrAmp = regexp.MustCompile("[\\&]")
22 reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + escapable + "|" + charEntity)
23
24 // blockTags is a set of tags that are recognized as HTML block tags.
25 // Any of these can be included in markdown text without special escaping.
26 blockTags = map[string]struct{}{
27 "blockquote": struct{}{},
28 "del": struct{}{},
29 "div": struct{}{},
30 "dl": struct{}{},
31 "fieldset": struct{}{},
32 "form": struct{}{},
33 "h1": struct{}{},
34 "h2": struct{}{},
35 "h3": struct{}{},
36 "h4": struct{}{},
37 "h5": struct{}{},
38 "h6": struct{}{},
39 "iframe": struct{}{},
40 "ins": struct{}{},
41 "math": struct{}{},
42 "noscript": struct{}{},
43 "ol": struct{}{},
44 "pre": struct{}{},
45 "p": struct{}{},
46 "script": struct{}{},
47 "style": struct{}{},
48 "table": struct{}{},
49 "ul": struct{}{},
50
51 // HTML5
52 "address": struct{}{},
53 "article": struct{}{},
54 "aside": struct{}{},
55 "canvas": struct{}{},
56 "figcaption": struct{}{},
57 "figure": struct{}{},
58 "footer": struct{}{},
59 "header": struct{}{},
60 "hgroup": struct{}{},
61 "main": struct{}{},
62 "nav": struct{}{},
63 "output": struct{}{},
64 "progress": struct{}{},
65 "section": struct{}{},
66 "video": struct{}{},
67 }
68)
69
70// sanitizeAnchorName returns a sanitized anchor name for the given text.
71// Taken from https://github.com/shurcooL/sanitized_anchor_name/blob/master/main.go#L14:1
72func sanitizeAnchorName(text string) string {
73 var anchorName []rune
74 var futureDash = false
75 for _, r := range text {
76 switch {
77 case unicode.IsLetter(r) || unicode.IsNumber(r):
78 if futureDash && len(anchorName) > 0 {
79 anchorName = append(anchorName, '-')
80 }
81 futureDash = false
82 anchorName = append(anchorName, unicode.ToLower(r))
83 default:
84 futureDash = true
85 }
86 }
87 return string(anchorName)
88}
89
90// Parse block-level data.
91// Note: this function and many that it calls assume that
92// the input buffer ends with a newline.
93func (p *Parser) block(data []byte) {
94 // this is called recursively: enforce a maximum depth
95 if p.nesting >= p.maxNesting {
96 return
97 }
98 p.nesting++
99
100 // parse out one block-level construct at a time
101 for len(data) > 0 {
102 // attributes that can be specific before a block element:
103 //
104 // {#id .class1 .class2 key="value"}
105 if p.extensions&Attributes != 0 {
106 data = p.attribute(data)
107 }
108
109 if p.extensions&Includes != 0 {
110 f := p.readInclude
111 path, address, consumed := p.isInclude(data)
112 if consumed == 0 {
113 path, address, consumed = p.isCodeInclude(data)
114 f = p.readCodeInclude
115 }
116 if consumed > 0 {
117 included := f(p.includeStack.Last(), path, address)
118 p.includeStack.Push(path)
119 p.block(included)
120 p.includeStack.Pop()
121 data = data[consumed:]
122 continue
123 }
124 }
125
126 // user supplied parser function
127 if p.Opts.ParserHook != nil {
128 node, blockdata, consumed := p.Opts.ParserHook(data)
129 if consumed > 0 {
130 data = data[consumed:]
131
132 if node != nil {
133 p.addBlock(node)
134 if blockdata != nil {
135 p.block(blockdata)
136 p.finalize(node)
137 }
138 }
139 continue
140 }
141 }
142
143 // prefixed heading:
144 //
145 // # Heading 1
146 // ## Heading 2
147 // ...
148 // ###### Heading 6
149 if p.isPrefixHeading(data) {
150 data = data[p.prefixHeading(data):]
151 continue
152 }
153
154 // prefixed special heading:
155 // (there are no levels.)
156 //
157 // .# Abstract
158 if p.isPrefixSpecialHeading(data) {
159 data = data[p.prefixSpecialHeading(data):]
160 continue
161 }
162
163 // block of preformatted HTML:
164 //
165 // <div>
166 // ...
167 // </div>
168 if data[0] == '<' {
169 if i := p.html(data, true); i > 0 {
170 data = data[i:]
171 continue
172 }
173 }
174
175 // title block
176 //
177 // % stuff
178 // % more stuff
179 // % even more stuff
180 if p.extensions&Titleblock != 0 {
181 if data[0] == '%' {
182 if i := p.titleBlock(data, true); i > 0 {
183 data = data[i:]
184 continue
185 }
186 }
187 }
188
189 // blank lines. note: returns the # of bytes to skip
190 if i := p.isEmpty(data); i > 0 {
191 data = data[i:]
192 continue
193 }
194
195 // indented code block:
196 //
197 // func max(a, b int) int {
198 // if a > b {
199 // return a
200 // }
201 // return b
202 // }
203 if p.codePrefix(data) > 0 {
204 data = data[p.code(data):]
205 continue
206 }
207
208 // fenced code block:
209 //
210 // ``` go
211 // func fact(n int) int {
212 // if n <= 1 {
213 // return n
214 // }
215 // return n * fact(n-1)
216 // }
217 // ```
218 if p.extensions&FencedCode != 0 {
219 if i := p.fencedCodeBlock(data, true); i > 0 {
220 data = data[i:]
221 continue
222 }
223 }
224
225 // horizontal rule:
226 //
227 // ------
228 // or
229 // ******
230 // or
231 // ______
232 if p.isHRule(data) {
233 p.addBlock(&ast.HorizontalRule{})
234 i := skipUntilChar(data, 0, '\n')
235 data = data[i:]
236 continue
237 }
238
239 // block quote:
240 //
241 // > A big quote I found somewhere
242 // > on the web
243 if p.quotePrefix(data) > 0 {
244 data = data[p.quote(data):]
245 continue
246 }
247
248 // aside:
249 //
250 // A> The proof is too large to fit
251 // A> in the margin.
252 if p.extensions&Mmark != 0 {
253 if p.asidePrefix(data) > 0 {
254 data = data[p.aside(data):]
255 continue
256 }
257 }
258
259 // figure block:
260 //
261 // !---
262 // 
263 // 
264 // !---
265 if p.extensions&Mmark != 0 {
266 if i := p.figureBlock(data, true); i > 0 {
267 data = data[i:]
268 continue
269 }
270 }
271
272 // table:
273 //
274 // Name | Age | Phone
275 // ------|-----|---------
276 // Bob | 31 | 555-1234
277 // Alice | 27 | 555-4321
278 if p.extensions&Tables != 0 {
279 if i := p.table(data); i > 0 {
280 data = data[i:]
281 continue
282 }
283 }
284
285 // an itemized/unordered list:
286 //
287 // * Item 1
288 // * Item 2
289 //
290 // also works with + or -
291 if p.uliPrefix(data) > 0 {
292 data = data[p.list(data, 0, 0):]
293 continue
294 }
295
296 // a numbered/ordered list:
297 //
298 // 1. Item 1
299 // 2. Item 2
300 if i := p.oliPrefix(data); i > 0 {
301 start := 0
302 if i > 2 && p.extensions&OrderedListStart != 0 {
303 s := string(data[:i-2])
304 start, _ = strconv.Atoi(s)
305 if start == 1 {
306 start = 0
307 }
308 }
309 data = data[p.list(data, ast.ListTypeOrdered, start):]
310 continue
311 }
312
313 // definition lists:
314 //
315 // Term 1
316 // : Definition a
317 // : Definition b
318 //
319 // Term 2
320 // : Definition c
321 if p.extensions&DefinitionLists != 0 {
322 if p.dliPrefix(data) > 0 {
323 data = data[p.list(data, ast.ListTypeDefinition, 0):]
324 continue
325 }
326 }
327
328 if p.extensions&MathJax != 0 {
329 if i := p.blockMath(data); i > 0 {
330 data = data[i:]
331 continue
332 }
333 }
334
335 // document matters:
336 //
337 // {frontmatter}/{mainmatter}/{backmatter}
338 if p.extensions&Mmark != 0 {
339 if i := p.documentMatter(data); i > 0 {
340 data = data[i:]
341 continue
342 }
343 }
344
345 // anything else must look like a normal paragraph
346 // note: this finds underlined headings, too
347 idx := p.paragraph(data)
348 data = data[idx:]
349 }
350
351 p.nesting--
352}
353
354func (p *Parser) addBlock(n ast.Node) ast.Node {
355 p.closeUnmatchedBlocks()
356
357 if p.attr != nil {
358 if c := n.AsContainer(); c != nil {
359 c.Attribute = p.attr
360 }
361 if l := n.AsLeaf(); l != nil {
362 l.Attribute = p.attr
363 }
364 p.attr = nil
365 }
366 return p.addChild(n)
367}
368
369func (p *Parser) isPrefixHeading(data []byte) bool {
370 if data[0] != '#' {
371 return false
372 }
373
374 if p.extensions&SpaceHeadings != 0 {
375 level := skipCharN(data, 0, '#', 6)
376 if level == len(data) || data[level] != ' ' {
377 return false
378 }
379 }
380 return true
381}
382
383func (p *Parser) prefixHeading(data []byte) int {
384 level := skipCharN(data, 0, '#', 6)
385 i := skipChar(data, level, ' ')
386 end := skipUntilChar(data, i, '\n')
387 skip := end
388 id := ""
389 if p.extensions&HeadingIDs != 0 {
390 j, k := 0, 0
391 // find start/end of heading id
392 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
393 }
394 for k = j + 1; k < end && data[k] != '}'; k++ {
395 }
396 // extract heading id iff found
397 if j < end && k < end {
398 id = string(data[j+2 : k])
399 end = j
400 skip = k + 1
401 for end > 0 && data[end-1] == ' ' {
402 end--
403 }
404 }
405 }
406 for end > 0 && data[end-1] == '#' {
407 if isBackslashEscaped(data, end-1) {
408 break
409 }
410 end--
411 }
412 for end > 0 && data[end-1] == ' ' {
413 end--
414 }
415 if end > i {
416 if id == "" && p.extensions&AutoHeadingIDs != 0 {
417 id = sanitizeAnchorName(string(data[i:end]))
418 }
419 block := &ast.Heading{
420 HeadingID: id,
421 Level: level,
422 }
423 block.Content = data[i:end]
424 p.addBlock(block)
425 }
426 return skip
427}
428
429func (p *Parser) isPrefixSpecialHeading(data []byte) bool {
430 if p.extensions|Mmark == 0 {
431 return false
432 }
433 if len(data) < 4 {
434 return false
435 }
436 if data[0] != '.' {
437 return false
438 }
439 if data[1] != '#' {
440 return false
441 }
442 if data[2] == '#' { // we don't support level, so nack this.
443 return false
444 }
445
446 if p.extensions&SpaceHeadings != 0 {
447 if data[2] != ' ' {
448 return false
449 }
450 }
451 return true
452}
453
454func (p *Parser) prefixSpecialHeading(data []byte) int {
455 i := skipChar(data, 2, ' ') // ".#" skipped
456 end := skipUntilChar(data, i, '\n')
457 skip := end
458 id := ""
459 if p.extensions&HeadingIDs != 0 {
460 j, k := 0, 0
461 // find start/end of heading id
462 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
463 }
464 for k = j + 1; k < end && data[k] != '}'; k++ {
465 }
466 // extract heading id iff found
467 if j < end && k < end {
468 id = string(data[j+2 : k])
469 end = j
470 skip = k + 1
471 for end > 0 && data[end-1] == ' ' {
472 end--
473 }
474 }
475 }
476 for end > 0 && data[end-1] == '#' {
477 if isBackslashEscaped(data, end-1) {
478 break
479 }
480 end--
481 }
482 for end > 0 && data[end-1] == ' ' {
483 end--
484 }
485 if end > i {
486 if id == "" && p.extensions&AutoHeadingIDs != 0 {
487 id = sanitizeAnchorName(string(data[i:end]))
488 }
489 block := &ast.Heading{
490 HeadingID: id,
491 IsSpecial: true,
492 Level: 1, // always level 1.
493 }
494 block.Literal = data[i:end]
495 block.Content = data[i:end]
496 p.addBlock(block)
497 }
498 return skip
499}
500
501func (p *Parser) isUnderlinedHeading(data []byte) int {
502 // test of level 1 heading
503 if data[0] == '=' {
504 i := skipChar(data, 1, '=')
505 i = skipChar(data, i, ' ')
506 if i < len(data) && data[i] == '\n' {
507 return 1
508 }
509 return 0
510 }
511
512 // test of level 2 heading
513 if data[0] == '-' {
514 i := skipChar(data, 1, '-')
515 i = skipChar(data, i, ' ')
516 if i < len(data) && data[i] == '\n' {
517 return 2
518 }
519 return 0
520 }
521
522 return 0
523}
524
525func (p *Parser) titleBlock(data []byte, doRender bool) int {
526 if data[0] != '%' {
527 return 0
528 }
529 splitData := bytes.Split(data, []byte("\n"))
530 var i int
531 for idx, b := range splitData {
532 if !bytes.HasPrefix(b, []byte("%")) {
533 i = idx // - 1
534 break
535 }
536 }
537
538 data = bytes.Join(splitData[0:i], []byte("\n"))
539 consumed := len(data)
540 data = bytes.TrimPrefix(data, []byte("% "))
541 data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1)
542 block := &ast.Heading{
543 Level: 1,
544 IsTitleblock: true,
545 }
546 block.Content = data
547 p.addBlock(block)
548
549 return consumed
550}
551
552func (p *Parser) html(data []byte, doRender bool) int {
553 var i, j int
554
555 // identify the opening tag
556 if data[0] != '<' {
557 return 0
558 }
559 curtag, tagfound := p.htmlFindTag(data[1:])
560
561 // handle special cases
562 if !tagfound {
563 // check for an HTML comment
564 if size := p.htmlComment(data, doRender); size > 0 {
565 return size
566 }
567
568 // check for an <hr> tag
569 if size := p.htmlHr(data, doRender); size > 0 {
570 return size
571 }
572
573 // no special case recognized
574 return 0
575 }
576
577 // look for an unindented matching closing tag
578 // followed by a blank line
579 found := false
580 /*
581 closetag := []byte("\n</" + curtag + ">")
582 j = len(curtag) + 1
583 for !found {
584 // scan for a closing tag at the beginning of a line
585 if skip := bytes.Index(data[j:], closetag); skip >= 0 {
586 j += skip + len(closetag)
587 } else {
588 break
589 }
590
591 // see if it is the only thing on the line
592 if skip := p.isEmpty(data[j:]); skip > 0 {
593 // see if it is followed by a blank line/eof
594 j += skip
595 if j >= len(data) {
596 found = true
597 i = j
598 } else {
599 if skip := p.isEmpty(data[j:]); skip > 0 {
600 j += skip
601 found = true
602 i = j
603 }
604 }
605 }
606 }
607 */
608
609 // if not found, try a second pass looking for indented match
610 // but not if tag is "ins" or "del" (following original Markdown.pl)
611 if !found && curtag != "ins" && curtag != "del" {
612 i = 1
613 for i < len(data) {
614 i++
615 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
616 i++
617 }
618
619 if i+2+len(curtag) >= len(data) {
620 break
621 }
622
623 j = p.htmlFindEnd(curtag, data[i-1:])
624
625 if j > 0 {
626 i += j - 1
627 found = true
628 break
629 }
630 }
631 }
632
633 if !found {
634 return 0
635 }
636
637 // the end of the block has been found
638 if doRender {
639 // trim newlines
640 end := backChar(data, i, '\n')
641 htmlBLock := &ast.HTMLBlock{ast.Leaf{Content: data[:end]}}
642 p.addBlock(htmlBLock)
643 finalizeHTMLBlock(htmlBLock)
644 }
645
646 return i
647}
648
649func finalizeHTMLBlock(block *ast.HTMLBlock) {
650 block.Literal = block.Content
651 block.Content = nil
652}
653
654// HTML comment, lax form
655func (p *Parser) htmlComment(data []byte, doRender bool) int {
656 i := p.inlineHTMLComment(data)
657 // needs to end with a blank line
658 if j := p.isEmpty(data[i:]); j > 0 {
659 size := i + j
660 if doRender {
661 // trim trailing newlines
662 end := backChar(data, size, '\n')
663 htmlBLock := &ast.HTMLBlock{ast.Leaf{Content: data[:end]}}
664 p.addBlock(htmlBLock)
665 finalizeHTMLBlock(htmlBLock)
666 }
667 return size
668 }
669 return 0
670}
671
672// HR, which is the only self-closing block tag considered
673func (p *Parser) htmlHr(data []byte, doRender bool) int {
674 if len(data) < 4 {
675 return 0
676 }
677 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
678 return 0
679 }
680 if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
681 // not an <hr> tag after all; at least not a valid one
682 return 0
683 }
684 i := 3
685 for i < len(data) && data[i] != '>' && data[i] != '\n' {
686 i++
687 }
688 if i < len(data) && data[i] == '>' {
689 i++
690 if j := p.isEmpty(data[i:]); j > 0 {
691 size := i + j
692 if doRender {
693 // trim newlines
694 end := backChar(data, size, '\n')
695 htmlBlock := &ast.HTMLBlock{ast.Leaf{Content: data[:end]}}
696 p.addBlock(htmlBlock)
697 finalizeHTMLBlock(htmlBlock)
698 }
699 return size
700 }
701 }
702 return 0
703}
704
705func (p *Parser) htmlFindTag(data []byte) (string, bool) {
706 i := skipAlnum(data, 0)
707 key := string(data[:i])
708 if _, ok := blockTags[key]; ok {
709 return key, true
710 }
711 return "", false
712}
713
714func (p *Parser) htmlFindEnd(tag string, data []byte) int {
715 // assume data[0] == '<' && data[1] == '/' already tested
716 if tag == "hr" {
717 return 2
718 }
719 // check if tag is a match
720 closetag := []byte("</" + tag + ">")
721 if !bytes.HasPrefix(data, closetag) {
722 return 0
723 }
724 i := len(closetag)
725
726 // check that the rest of the line is blank
727 skip := 0
728 if skip = p.isEmpty(data[i:]); skip == 0 {
729 return 0
730 }
731 i += skip
732 skip = 0
733
734 if i >= len(data) {
735 return i
736 }
737
738 if p.extensions&LaxHTMLBlocks != 0 {
739 return i
740 }
741 if skip = p.isEmpty(data[i:]); skip == 0 {
742 // following line must be blank
743 return 0
744 }
745
746 return i + skip
747}
748
749func (*Parser) isEmpty(data []byte) int {
750 // it is okay to call isEmpty on an empty buffer
751 if len(data) == 0 {
752 return 0
753 }
754
755 var i int
756 for i = 0; i < len(data) && data[i] != '\n'; i++ {
757 if data[i] != ' ' && data[i] != '\t' {
758 return 0
759 }
760 }
761 i = skipCharN(data, i, '\n', 1)
762 return i
763}
764
765func (*Parser) isHRule(data []byte) bool {
766 i := 0
767
768 // skip up to three spaces
769 for i < 3 && data[i] == ' ' {
770 i++
771 }
772
773 // look at the hrule char
774 if data[i] != '*' && data[i] != '-' && data[i] != '_' {
775 return false
776 }
777 c := data[i]
778
779 // the whole line must be the char or whitespace
780 n := 0
781 for i < len(data) && data[i] != '\n' {
782 switch {
783 case data[i] == c:
784 n++
785 case data[i] != ' ':
786 return false
787 }
788 i++
789 }
790
791 return n >= 3
792}
793
794// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
795// and returns the end index if so, or 0 otherwise. It also returns the marker found.
796// If syntax is not nil, it gets set to the syntax specified in the fence line.
797func isFenceLine(data []byte, syntax *string, oldmarker string) (end int, marker string) {
798 i, size := 0, 0
799
800 n := len(data)
801 // skip up to three spaces
802 for i < n && i < 3 && data[i] == ' ' {
803 i++
804 }
805
806 // check for the marker characters: ~ or `
807 if i >= n {
808 return 0, ""
809 }
810 if data[i] != '~' && data[i] != '`' {
811 return 0, ""
812 }
813
814 c := data[i]
815
816 // the whole line must be the same char or whitespace
817 for i < n && data[i] == c {
818 size++
819 i++
820 }
821
822 // the marker char must occur at least 3 times
823 if size < 3 {
824 return 0, ""
825 }
826 marker = string(data[i-size : i])
827
828 // if this is the end marker, it must match the beginning marker
829 if oldmarker != "" && marker != oldmarker {
830 return 0, ""
831 }
832
833 // TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
834 // into one, always get the syntax, and discard it if the caller doesn't care.
835 if syntax != nil {
836 syn := 0
837 i = skipChar(data, i, ' ')
838
839 if i >= n {
840 if i == n {
841 return i, marker
842 }
843 return 0, ""
844 }
845
846 syntaxStart := i
847
848 if data[i] == '{' {
849 i++
850 syntaxStart++
851
852 for i < n && data[i] != '}' && data[i] != '\n' {
853 syn++
854 i++
855 }
856
857 if i >= n || data[i] != '}' {
858 return 0, ""
859 }
860
861 // strip all whitespace at the beginning and the end
862 // of the {} block
863 for syn > 0 && isSpace(data[syntaxStart]) {
864 syntaxStart++
865 syn--
866 }
867
868 for syn > 0 && isSpace(data[syntaxStart+syn-1]) {
869 syn--
870 }
871
872 i++
873 } else {
874 for i < n && !isSpace(data[i]) {
875 syn++
876 i++
877 }
878 }
879
880 *syntax = string(data[syntaxStart : syntaxStart+syn])
881 }
882
883 i = skipChar(data, i, ' ')
884 if i >= n || data[i] != '\n' {
885 if i == n {
886 return i, marker
887 }
888 return 0, ""
889 }
890 return i + 1, marker // Take newline into account.
891}
892
893// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
894// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
895// If doRender is true, a final newline is mandatory to recognize the fenced code block.
896func (p *Parser) fencedCodeBlock(data []byte, doRender bool) int {
897 var syntax string
898 beg, marker := isFenceLine(data, &syntax, "")
899 if beg == 0 || beg >= len(data) {
900 return 0
901 }
902
903 var work bytes.Buffer
904 work.WriteString(syntax)
905 work.WriteByte('\n')
906
907 for {
908 // safe to assume beg < len(data)
909
910 // check for the end of the code block
911 fenceEnd, _ := isFenceLine(data[beg:], nil, marker)
912 if fenceEnd != 0 {
913 beg += fenceEnd
914 break
915 }
916
917 // copy the current line
918 end := skipUntilChar(data, beg, '\n') + 1
919
920 // did we reach the end of the buffer without a closing marker?
921 if end >= len(data) {
922 return 0
923 }
924
925 // verbatim copy to the working buffer
926 if doRender {
927 work.Write(data[beg:end])
928 }
929 beg = end
930 }
931
932 if doRender {
933 codeBlock := &ast.CodeBlock{
934 IsFenced: true,
935 }
936 codeBlock.Content = work.Bytes() // TODO: get rid of temp buffer
937
938 if p.extensions&Mmark == 0 {
939 p.addBlock(codeBlock)
940 finalizeCodeBlock(codeBlock)
941 return beg
942 }
943
944 // Check for caption and if found make it a figure.
945 if captionContent, id, consumed := p.caption(data[beg:], []byte("Figure: ")); consumed > 0 {
946 figure := &ast.CaptionFigure{}
947 caption := &ast.Caption{}
948 figure.HeadingID = id
949 p.Inline(caption, captionContent)
950
951 p.addBlock(figure)
952 codeBlock.AsLeaf().Attribute = figure.AsContainer().Attribute
953 p.addChild(codeBlock)
954 finalizeCodeBlock(codeBlock)
955 p.addChild(caption)
956 p.finalize(figure)
957
958 beg += consumed
959
960 return beg
961 }
962
963 // Still here, normal block
964 p.addBlock(codeBlock)
965 finalizeCodeBlock(codeBlock)
966 }
967
968 return beg
969}
970
971func unescapeChar(str []byte) []byte {
972 if str[0] == '\\' {
973 return []byte{str[1]}
974 }
975 return []byte(html.UnescapeString(string(str)))
976}
977
978func unescapeString(str []byte) []byte {
979 if reBackslashOrAmp.Match(str) {
980 return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar)
981 }
982 return str
983}
984
985func finalizeCodeBlock(code *ast.CodeBlock) {
986 c := code.Content
987 if code.IsFenced {
988 newlinePos := bytes.IndexByte(c, '\n')
989 firstLine := c[:newlinePos]
990 rest := c[newlinePos+1:]
991 code.Info = unescapeString(bytes.Trim(firstLine, "\n"))
992 code.Literal = rest
993 } else {
994 code.Literal = c
995 }
996 code.Content = nil
997}
998
999func (p *Parser) table(data []byte) int {
1000 i, columns, table := p.tableHeader(data)
1001 if i == 0 {
1002 return 0
1003 }
1004
1005 p.addBlock(&ast.TableBody{})
1006
1007 for i < len(data) {
1008 pipes, rowStart := 0, i
1009 for ; i < len(data) && data[i] != '\n'; i++ {
1010 if data[i] == '|' {
1011 pipes++
1012 }
1013 }
1014
1015 if pipes == 0 {
1016 i = rowStart
1017 break
1018 }
1019
1020 // include the newline in data sent to tableRow
1021 i = skipCharN(data, i, '\n', 1)
1022
1023 if p.tableFooter(data[rowStart:i]) {
1024 continue
1025 }
1026
1027 p.tableRow(data[rowStart:i], columns, false)
1028 }
1029 if captionContent, id, consumed := p.caption(data[i:], []byte("Table: ")); consumed > 0 {
1030 caption := &ast.Caption{}
1031 p.Inline(caption, captionContent)
1032
1033 // Some switcheroo to re-insert the parsed table as a child of the captionfigure.
1034 figure := &ast.CaptionFigure{}
1035 figure.HeadingID = id
1036 table2 := &ast.Table{}
1037 // Retain any block level attributes.
1038 table2.AsContainer().Attribute = table.AsContainer().Attribute
1039 children := table.GetChildren()
1040 ast.RemoveFromTree(table)
1041
1042 table2.SetChildren(children)
1043 ast.AppendChild(figure, table2)
1044 ast.AppendChild(figure, caption)
1045
1046 p.addChild(figure)
1047 p.finalize(figure)
1048
1049 i += consumed
1050 }
1051
1052 return i
1053}
1054
1055// check if the specified position is preceded by an odd number of backslashes
1056func isBackslashEscaped(data []byte, i int) bool {
1057 backslashes := 0
1058 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
1059 backslashes++
1060 }
1061 return backslashes&1 == 1
1062}
1063
1064// tableHeaders parses the header. If recognized it will also add a table.
1065func (p *Parser) tableHeader(data []byte) (size int, columns []ast.CellAlignFlags, table ast.Node) {
1066 i := 0
1067 colCount := 1
1068 for i = 0; i < len(data) && data[i] != '\n'; i++ {
1069 if data[i] == '|' && !isBackslashEscaped(data, i) {
1070 colCount++
1071 }
1072 }
1073
1074 // doesn't look like a table header
1075 if colCount == 1 {
1076 return
1077 }
1078
1079 // include the newline in the data sent to tableRow
1080 j := skipCharN(data, i, '\n', 1)
1081 header := data[:j]
1082
1083 // column count ignores pipes at beginning or end of line
1084 if data[0] == '|' {
1085 colCount--
1086 }
1087 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
1088 colCount--
1089 }
1090
1091 columns = make([]ast.CellAlignFlags, colCount)
1092
1093 // move on to the header underline
1094 i++
1095 if i >= len(data) {
1096 return
1097 }
1098
1099 if data[i] == '|' && !isBackslashEscaped(data, i) {
1100 i++
1101 }
1102 i = skipChar(data, i, ' ')
1103
1104 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
1105 // and trailing | optional on last column
1106 col := 0
1107 n := len(data)
1108 for i < n && data[i] != '\n' {
1109 dashes := 0
1110
1111 if data[i] == ':' {
1112 i++
1113 columns[col] |= ast.TableAlignmentLeft
1114 dashes++
1115 }
1116 for i < n && data[i] == '-' {
1117 i++
1118 dashes++
1119 }
1120 if i < n && data[i] == ':' {
1121 i++
1122 columns[col] |= ast.TableAlignmentRight
1123 dashes++
1124 }
1125 for i < n && data[i] == ' ' {
1126 i++
1127 }
1128 if i == n {
1129 return
1130 }
1131 // end of column test is messy
1132 switch {
1133 case dashes < 3:
1134 // not a valid column
1135 return
1136
1137 case data[i] == '|' && !isBackslashEscaped(data, i):
1138 // marker found, now skip past trailing whitespace
1139 col++
1140 i++
1141 for i < n && data[i] == ' ' {
1142 i++
1143 }
1144
1145 // trailing junk found after last column
1146 if col >= colCount && i < len(data) && data[i] != '\n' {
1147 return
1148 }
1149
1150 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
1151 // something else found where marker was required
1152 return
1153
1154 case data[i] == '\n':
1155 // marker is optional for the last column
1156 col++
1157
1158 default:
1159 // trailing junk found after last column
1160 return
1161 }
1162 }
1163 if col != colCount {
1164 return
1165 }
1166
1167 table = &ast.Table{}
1168 p.addBlock(table)
1169 p.addBlock(&ast.TableHeader{})
1170 p.tableRow(header, columns, true)
1171 size = skipCharN(data, i, '\n', 1)
1172 return
1173}
1174
1175func (p *Parser) tableRow(data []byte, columns []ast.CellAlignFlags, header bool) {
1176 p.addBlock(&ast.TableRow{})
1177 i, col := 0, 0
1178
1179 if data[i] == '|' && !isBackslashEscaped(data, i) {
1180 i++
1181 }
1182
1183 n := len(data)
1184 for col = 0; col < len(columns) && i < n; col++ {
1185 for i < n && data[i] == ' ' {
1186 i++
1187 }
1188
1189 cellStart := i
1190
1191 for i < n && (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
1192 i++
1193 }
1194
1195 cellEnd := i
1196
1197 // skip the end-of-cell marker, possibly taking us past end of buffer
1198 i++
1199
1200 for cellEnd > cellStart && cellEnd-1 < n && data[cellEnd-1] == ' ' {
1201 cellEnd--
1202 }
1203
1204 block := &ast.TableCell{
1205 IsHeader: header,
1206 Align: columns[col],
1207 }
1208 block.Content = data[cellStart:cellEnd]
1209 p.addBlock(block)
1210 }
1211
1212 // pad it out with empty columns to get the right number
1213 for ; col < len(columns); col++ {
1214 block := &ast.TableCell{
1215 IsHeader: header,
1216 Align: columns[col],
1217 }
1218 p.addBlock(block)
1219 }
1220
1221 // silently ignore rows with too many cells
1222}
1223
1224// tableFooter parses the (optional) table footer.
1225func (p *Parser) tableFooter(data []byte) bool {
1226 colCount := 1
1227 for i := 0; i < len(data) && data[i] != '\n'; i++ {
1228 if data[i] == '|' && !isBackslashEscaped(data, i) {
1229 colCount++
1230 continue
1231 }
1232 // remaining data must be the = character
1233 if data[i] != '=' {
1234 return false
1235 }
1236 }
1237
1238 // doesn't look like a table footer
1239 if colCount == 1 {
1240 return false
1241 }
1242
1243 p.addBlock(&ast.TableFooter{})
1244
1245 return true
1246}
1247
1248// returns blockquote prefix length
1249func (p *Parser) quotePrefix(data []byte) int {
1250 i := 0
1251 n := len(data)
1252 for i < 3 && i < n && data[i] == ' ' {
1253 i++
1254 }
1255 if i < n && data[i] == '>' {
1256 if i+1 < n && data[i+1] == ' ' {
1257 return i + 2
1258 }
1259 return i + 1
1260 }
1261 return 0
1262}
1263
1264// blockquote ends with at least one blank line
1265// followed by something without a blockquote prefix
1266func (p *Parser) terminateBlockquote(data []byte, beg, end int) bool {
1267 if p.isEmpty(data[beg:]) <= 0 {
1268 return false
1269 }
1270 if end >= len(data) {
1271 return true
1272 }
1273 return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
1274}
1275
1276// parse a blockquote fragment
1277func (p *Parser) quote(data []byte) int {
1278 var raw bytes.Buffer
1279 beg, end := 0, 0
1280 for beg < len(data) {
1281 end = beg
1282 // Step over whole lines, collecting them. While doing that, check for
1283 // fenced code and if one's found, incorporate it altogether,
1284 // irregardless of any contents inside it
1285 for end < len(data) && data[end] != '\n' {
1286 if p.extensions&FencedCode != 0 {
1287 if i := p.fencedCodeBlock(data[end:], false); i > 0 {
1288 // -1 to compensate for the extra end++ after the loop:
1289 end += i - 1
1290 break
1291 }
1292 }
1293 end++
1294 }
1295 end = skipCharN(data, end, '\n', 1)
1296 if pre := p.quotePrefix(data[beg:]); pre > 0 {
1297 // skip the prefix
1298 beg += pre
1299 } else if p.terminateBlockquote(data, beg, end) {
1300 break
1301 }
1302 // this line is part of the blockquote
1303 raw.Write(data[beg:end])
1304 beg = end
1305 }
1306
1307 if p.extensions&Mmark == 0 {
1308 block := p.addBlock(&ast.BlockQuote{})
1309 p.block(raw.Bytes())
1310 p.finalize(block)
1311 return end
1312 }
1313
1314 if captionContent, id, consumed := p.caption(data[end:], []byte("Quote: ")); consumed > 0 {
1315 figure := &ast.CaptionFigure{}
1316 caption := &ast.Caption{}
1317 figure.HeadingID = id
1318 p.Inline(caption, captionContent)
1319
1320 p.addBlock(figure) // this discard any attributes
1321 block := &ast.BlockQuote{}
1322 block.AsContainer().Attribute = figure.AsContainer().Attribute
1323 p.addChild(block)
1324 p.block(raw.Bytes())
1325 p.finalize(block)
1326
1327 p.addChild(caption)
1328 p.finalize(figure)
1329
1330 end += consumed
1331
1332 return end
1333 }
1334
1335 block := p.addBlock(&ast.BlockQuote{})
1336 p.block(raw.Bytes())
1337 p.finalize(block)
1338
1339 return end
1340}
1341
1342// returns prefix length for block code
1343func (p *Parser) codePrefix(data []byte) int {
1344 n := len(data)
1345 if n >= 1 && data[0] == '\t' {
1346 return 1
1347 }
1348 if n >= 4 && data[3] == ' ' && data[2] == ' ' && data[1] == ' ' && data[0] == ' ' {
1349 return 4
1350 }
1351 return 0
1352}
1353
1354func (p *Parser) code(data []byte) int {
1355 var work bytes.Buffer
1356
1357 i := 0
1358 for i < len(data) {
1359 beg := i
1360
1361 i = skipUntilChar(data, i, '\n')
1362 i = skipCharN(data, i, '\n', 1)
1363
1364 blankline := p.isEmpty(data[beg:i]) > 0
1365 if pre := p.codePrefix(data[beg:i]); pre > 0 {
1366 beg += pre
1367 } else if !blankline {
1368 // non-empty, non-prefixed line breaks the pre
1369 i = beg
1370 break
1371 }
1372
1373 // verbatim copy to the working buffer
1374 if blankline {
1375 work.WriteByte('\n')
1376 } else {
1377 work.Write(data[beg:i])
1378 }
1379 }
1380
1381 // trim all the \n off the end of work
1382 workbytes := work.Bytes()
1383
1384 eol := backChar(workbytes, len(workbytes), '\n')
1385
1386 if eol != len(workbytes) {
1387 work.Truncate(eol)
1388 }
1389
1390 work.WriteByte('\n')
1391
1392 codeBlock := &ast.CodeBlock{
1393 IsFenced: false,
1394 }
1395 // TODO: get rid of temp buffer
1396 codeBlock.Content = work.Bytes()
1397 p.addBlock(codeBlock)
1398 finalizeCodeBlock(codeBlock)
1399
1400 return i
1401}
1402
1403// returns unordered list item prefix
1404func (p *Parser) uliPrefix(data []byte) int {
1405 // start with up to 3 spaces
1406 i := skipCharN(data, 0, ' ', 3)
1407
1408 if i >= len(data)-1 {
1409 return 0
1410 }
1411 // need one of {'*', '+', '-'} followed by a space or a tab
1412 if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1413 (data[i+1] != ' ' && data[i+1] != '\t') {
1414 return 0
1415 }
1416 return i + 2
1417}
1418
1419// returns ordered list item prefix
1420func (p *Parser) oliPrefix(data []byte) int {
1421 // start with up to 3 spaces
1422 i := skipCharN(data, 0, ' ', 3)
1423
1424 // count the digits
1425 start := i
1426 for i < len(data) && data[i] >= '0' && data[i] <= '9' {
1427 i++
1428 }
1429 if start == i || i >= len(data)-1 {
1430 return 0
1431 }
1432
1433 // we need >= 1 digits followed by a dot and a space or a tab
1434 if data[i] != '.' || !(data[i+1] == ' ' || data[i+1] == '\t') {
1435 return 0
1436 }
1437 return i + 2
1438}
1439
1440// returns definition list item prefix
1441func (p *Parser) dliPrefix(data []byte) int {
1442 if len(data) < 2 {
1443 return 0
1444 }
1445 // need a ':' followed by a space or a tab
1446 if data[0] != ':' || !(data[1] == ' ' || data[1] == '\t') {
1447 return 0
1448 }
1449 i := skipChar(data, 0, ' ')
1450 return i + 2
1451}
1452
1453// parse ordered or unordered list block
1454func (p *Parser) list(data []byte, flags ast.ListType, start int) int {
1455 i := 0
1456 flags |= ast.ListItemBeginningOfList
1457 list := &ast.List{
1458 ListFlags: flags,
1459 Tight: true,
1460 Start: start,
1461 }
1462 block := p.addBlock(list)
1463
1464 for i < len(data) {
1465 skip := p.listItem(data[i:], &flags)
1466 if flags&ast.ListItemContainsBlock != 0 {
1467 list.Tight = false
1468 }
1469 i += skip
1470 if skip == 0 || flags&ast.ListItemEndOfList != 0 {
1471 break
1472 }
1473 flags &= ^ast.ListItemBeginningOfList
1474 }
1475
1476 above := block.GetParent()
1477 finalizeList(list)
1478 p.tip = above
1479 return i
1480}
1481
1482// Returns true if the list item is not the same type as its parent list
1483func (p *Parser) listTypeChanged(data []byte, flags *ast.ListType) bool {
1484 if p.dliPrefix(data) > 0 && *flags&ast.ListTypeDefinition == 0 {
1485 return true
1486 } else if p.oliPrefix(data) > 0 && *flags&ast.ListTypeOrdered == 0 {
1487 return true
1488 } else if p.uliPrefix(data) > 0 && (*flags&ast.ListTypeOrdered != 0 || *flags&ast.ListTypeDefinition != 0) {
1489 return true
1490 }
1491 return false
1492}
1493
1494// Returns true if block ends with a blank line, descending if needed
1495// into lists and sublists.
1496func endsWithBlankLine(block ast.Node) bool {
1497 // TODO: figure this out. Always false now.
1498 for block != nil {
1499 //if block.lastLineBlank {
1500 //return true
1501 //}
1502 switch block.(type) {
1503 case *ast.List, *ast.ListItem:
1504 block = ast.GetLastChild(block)
1505 default:
1506 return false
1507 }
1508 }
1509 return false
1510}
1511
1512func finalizeList(list *ast.List) {
1513 items := list.Parent.GetChildren()
1514 lastItemIdx := len(items) - 1
1515 for i, item := range items {
1516 isLastItem := i == lastItemIdx
1517 // check for non-final list item ending with blank line:
1518 if !isLastItem && endsWithBlankLine(item) {
1519 list.Tight = false
1520 break
1521 }
1522 // recurse into children of list item, to see if there are spaces
1523 // between any of them:
1524 subItems := item.GetParent().GetChildren()
1525 lastSubItemIdx := len(subItems) - 1
1526 for j, subItem := range subItems {
1527 isLastSubItem := j == lastSubItemIdx
1528 if (!isLastItem || !isLastSubItem) && endsWithBlankLine(subItem) {
1529 list.Tight = false
1530 break
1531 }
1532 }
1533 }
1534}
1535
1536// Parse a single list item.
1537// Assumes initial prefix is already removed if this is a sublist.
1538func (p *Parser) listItem(data []byte, flags *ast.ListType) int {
1539 // keep track of the indentation of the first line
1540 itemIndent := 0
1541 if data[0] == '\t' {
1542 itemIndent += 4
1543 } else {
1544 for itemIndent < 3 && data[itemIndent] == ' ' {
1545 itemIndent++
1546 }
1547 }
1548
1549 var bulletChar byte = '*'
1550 i := p.uliPrefix(data)
1551 if i == 0 {
1552 i = p.oliPrefix(data)
1553 } else {
1554 bulletChar = data[i-2]
1555 }
1556 if i == 0 {
1557 i = p.dliPrefix(data)
1558 // reset definition term flag
1559 if i > 0 {
1560 *flags &= ^ast.ListTypeTerm
1561 }
1562 }
1563 if i == 0 {
1564 // if in definition list, set term flag and continue
1565 if *flags&ast.ListTypeDefinition != 0 {
1566 *flags |= ast.ListTypeTerm
1567 } else {
1568 return 0
1569 }
1570 }
1571
1572 // skip leading whitespace on first line
1573 i = skipChar(data, i, ' ')
1574
1575 // find the end of the line
1576 line := i
1577 for i > 0 && i < len(data) && data[i-1] != '\n' {
1578 i++
1579 }
1580
1581 // get working buffer
1582 var raw bytes.Buffer
1583
1584 // put the first line into the working buffer
1585 raw.Write(data[line:i])
1586 line = i
1587
1588 // process the following lines
1589 containsBlankLine := false
1590 sublist := 0
1591
1592gatherlines:
1593 for line < len(data) {
1594 i++
1595
1596 // find the end of this line
1597 for i < len(data) && data[i-1] != '\n' {
1598 i++
1599 }
1600
1601 // if it is an empty line, guess that it is part of this item
1602 // and move on to the next line
1603 if p.isEmpty(data[line:i]) > 0 {
1604 containsBlankLine = true
1605 line = i
1606 continue
1607 }
1608
1609 // calculate the indentation
1610 indent := 0
1611 indentIndex := 0
1612 if data[line] == '\t' {
1613 indentIndex++
1614 indent += 4
1615 } else {
1616 for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1617 indent++
1618 indentIndex++
1619 }
1620 }
1621
1622 chunk := data[line+indentIndex : i]
1623
1624 // evaluate how this line fits in
1625 switch {
1626 // is this a nested list item?
1627 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) || p.oliPrefix(chunk) > 0 || p.dliPrefix(chunk) > 0:
1628
1629 // to be a nested list, it must be indented more
1630 // if not, it is either a different kind of list
1631 // or the next item in the same list
1632 if indent <= itemIndent {
1633 if p.listTypeChanged(chunk, flags) {
1634 *flags |= ast.ListItemEndOfList
1635 } else if containsBlankLine {
1636 *flags |= ast.ListItemContainsBlock
1637 }
1638
1639 break gatherlines
1640 }
1641
1642 if containsBlankLine {
1643 *flags |= ast.ListItemContainsBlock
1644 }
1645
1646 // is this the first item in the nested list?
1647 if sublist == 0 {
1648 sublist = raw.Len()
1649 // in the case of dliPrefix we are too late and need to search back for the definition item, which
1650 // should be on the previous line, we then adjust sublist to start there.
1651 if p.dliPrefix(chunk) > 0 {
1652 sublist = backUntilChar(raw.Bytes(), raw.Len()-1, '\n')
1653 }
1654 }
1655
1656 // is this a nested prefix heading?
1657 case p.isPrefixHeading(chunk), p.isPrefixSpecialHeading(chunk):
1658 // if the heading is not indented, it is not nested in the list
1659 // and thus ends the list
1660 if containsBlankLine && indent < 4 {
1661 *flags |= ast.ListItemEndOfList
1662 break gatherlines
1663 }
1664 *flags |= ast.ListItemContainsBlock
1665
1666 // anything following an empty line is only part
1667 // of this item if it is indented 4 spaces
1668 // (regardless of the indentation of the beginning of the item)
1669 case containsBlankLine && indent < 4:
1670 if *flags&ast.ListTypeDefinition != 0 && i < len(data)-1 {
1671 // is the next item still a part of this list?
1672 next := i
1673 for next < len(data) && data[next] != '\n' {
1674 next++
1675 }
1676 for next < len(data)-1 && data[next] == '\n' {
1677 next++
1678 }
1679 if i < len(data)-1 && data[i] != ':' && next < len(data)-1 && data[next] != ':' {
1680 *flags |= ast.ListItemEndOfList
1681 }
1682 } else {
1683 *flags |= ast.ListItemEndOfList
1684 }
1685 break gatherlines
1686
1687 // a blank line means this should be parsed as a block
1688 case containsBlankLine:
1689 raw.WriteByte('\n')
1690 *flags |= ast.ListItemContainsBlock
1691 }
1692
1693 // if this line was preceded by one or more blanks,
1694 // re-introduce the blank into the buffer
1695 if containsBlankLine {
1696 containsBlankLine = false
1697 raw.WriteByte('\n')
1698 }
1699
1700 // add the line into the working buffer without prefix
1701 raw.Write(data[line+indentIndex : i])
1702
1703 line = i
1704 }
1705
1706 rawBytes := raw.Bytes()
1707
1708 listItem := &ast.ListItem{
1709 ListFlags: *flags,
1710 Tight: false,
1711 BulletChar: bulletChar,
1712 Delimiter: '.', // Only '.' is possible in Markdown, but ')' will also be possible in CommonMark
1713 }
1714 p.addBlock(listItem)
1715
1716 // render the contents of the list item
1717 if *flags&ast.ListItemContainsBlock != 0 && *flags&ast.ListTypeTerm == 0 {
1718 // intermediate render of block item, except for definition term
1719 if sublist > 0 {
1720 p.block(rawBytes[:sublist])
1721 p.block(rawBytes[sublist:])
1722 } else {
1723 p.block(rawBytes)
1724 }
1725 } else {
1726 // intermediate render of inline item
1727 para := &ast.Paragraph{}
1728 if sublist > 0 {
1729 para.Content = rawBytes[:sublist]
1730 } else {
1731 para.Content = rawBytes
1732 }
1733 p.addChild(para)
1734 if sublist > 0 {
1735 p.block(rawBytes[sublist:])
1736 }
1737 }
1738 return line
1739}
1740
1741// render a single paragraph that has already been parsed out
1742func (p *Parser) renderParagraph(data []byte) {
1743 if len(data) == 0 {
1744 return
1745 }
1746
1747 // trim leading spaces
1748 beg := skipChar(data, 0, ' ')
1749
1750 end := len(data)
1751 // trim trailing newline
1752 if data[len(data)-1] == '\n' {
1753 end--
1754 }
1755
1756 // trim trailing spaces
1757 for end > beg && data[end-1] == ' ' {
1758 end--
1759 }
1760 para := &ast.Paragraph{}
1761 para.Content = data[beg:end]
1762 p.addBlock(para)
1763}
1764
1765// blockMath handle block surround with $$
1766func (p *Parser) blockMath(data []byte) int {
1767 if len(data) <= 4 || data[0] != '$' || data[1] != '$' || data[2] == '$' {
1768 return 0
1769 }
1770
1771 // find next $$
1772 var end int
1773 for end = 2; end+1 < len(data) && (data[end] != '$' || data[end+1] != '$'); end++ {
1774 }
1775
1776 // $$ not match
1777 if end+1 == len(data) {
1778 return 0
1779 }
1780
1781 // render the display math
1782 mathBlock := &ast.MathBlock{}
1783 mathBlock.Literal = data[2:end]
1784 p.addBlock(mathBlock)
1785
1786 return end + 2
1787}
1788
1789func (p *Parser) paragraph(data []byte) int {
1790 // prev: index of 1st char of previous line
1791 // line: index of 1st char of current line
1792 // i: index of cursor/end of current line
1793 var prev, line, i int
1794 tabSize := tabSizeDefault
1795 if p.extensions&TabSizeEight != 0 {
1796 tabSize = tabSizeDouble
1797 }
1798 // keep going until we find something to mark the end of the paragraph
1799 for i < len(data) {
1800 // mark the beginning of the current line
1801 prev = line
1802 current := data[i:]
1803 line = i
1804
1805 // did we find a reference or a footnote? If so, end a paragraph
1806 // preceding it and report that we have consumed up to the end of that
1807 // reference:
1808 if refEnd := isReference(p, current, tabSize); refEnd > 0 {
1809 p.renderParagraph(data[:i])
1810 return i + refEnd
1811 }
1812
1813 // did we find a blank line marking the end of the paragraph?
1814 if n := p.isEmpty(current); n > 0 {
1815 // did this blank line followed by a definition list item?
1816 if p.extensions&DefinitionLists != 0 {
1817 if i < len(data)-1 && data[i+1] == ':' {
1818 listLen := p.list(data[prev:], ast.ListTypeDefinition, 0)
1819 return prev + listLen
1820 }
1821 }
1822
1823 p.renderParagraph(data[:i])
1824 return i + n
1825 }
1826
1827 // an underline under some text marks a heading, so our paragraph ended on prev line
1828 if i > 0 {
1829 if level := p.isUnderlinedHeading(current); level > 0 {
1830 // render the paragraph
1831 p.renderParagraph(data[:prev])
1832
1833 // ignore leading and trailing whitespace
1834 eol := i - 1
1835 for prev < eol && data[prev] == ' ' {
1836 prev++
1837 }
1838 for eol > prev && data[eol-1] == ' ' {
1839 eol--
1840 }
1841
1842 id := ""
1843 if p.extensions&AutoHeadingIDs != 0 {
1844 id = sanitizeAnchorName(string(data[prev:eol]))
1845 }
1846
1847 block := &ast.Heading{
1848 Level: level,
1849 HeadingID: id,
1850 }
1851 block.Content = data[prev:eol]
1852 p.addBlock(block)
1853
1854 // find the end of the underline
1855 return skipUntilChar(data, i, '\n')
1856 }
1857 }
1858
1859 // if the next line starts a block of HTML, then the paragraph ends here
1860 if p.extensions&LaxHTMLBlocks != 0 {
1861 if data[i] == '<' && p.html(current, false) > 0 {
1862 // rewind to before the HTML block
1863 p.renderParagraph(data[:i])
1864 return i
1865 }
1866 }
1867
1868 // if there's a prefixed heading or a horizontal rule after this, paragraph is over
1869 if p.isPrefixHeading(current) || p.isPrefixSpecialHeading(current) || p.isHRule(current) {
1870 p.renderParagraph(data[:i])
1871 return i
1872 }
1873
1874 // if there's a fenced code block, paragraph is over
1875 if p.extensions&FencedCode != 0 {
1876 if p.fencedCodeBlock(current, false) > 0 {
1877 p.renderParagraph(data[:i])
1878 return i
1879 }
1880 }
1881
1882 // if there's a figure block, paragraph is over
1883 if p.extensions&Mmark != 0 {
1884 if p.figureBlock(current, false) > 0 {
1885 p.renderParagraph(data[:i])
1886 return i
1887 }
1888 }
1889
1890 // if there's a definition list item, prev line is a definition term
1891 if p.extensions&DefinitionLists != 0 {
1892 if p.dliPrefix(current) != 0 {
1893 ret := p.list(data[prev:], ast.ListTypeDefinition, 0)
1894 return ret + prev
1895 }
1896 }
1897
1898 // if there's a list after this, paragraph is over
1899 if p.extensions&NoEmptyLineBeforeBlock != 0 {
1900 if p.uliPrefix(current) != 0 ||
1901 p.oliPrefix(current) != 0 ||
1902 p.quotePrefix(current) != 0 ||
1903 p.codePrefix(current) != 0 {
1904 p.renderParagraph(data[:i])
1905 return i
1906 }
1907 }
1908
1909 // otherwise, scan to the beginning of the next line
1910 nl := bytes.IndexByte(data[i:], '\n')
1911 if nl >= 0 {
1912 i += nl + 1
1913 } else {
1914 i += len(data[i:])
1915 }
1916 }
1917
1918 p.renderParagraph(data[:i])
1919 return i
1920}
1921
1922// skipChar advances i as long as data[i] == c
1923func skipChar(data []byte, i int, c byte) int {
1924 n := len(data)
1925 for i < n && data[i] == c {
1926 i++
1927 }
1928 return i
1929}
1930
1931// like skipChar but only skips up to max characters
1932func skipCharN(data []byte, i int, c byte, max int) int {
1933 n := len(data)
1934 for i < n && max > 0 && data[i] == c {
1935 i++
1936 max--
1937 }
1938 return i
1939}
1940
1941// skipUntilChar advances i as long as data[i] != c
1942func skipUntilChar(data []byte, i int, c byte) int {
1943 n := len(data)
1944 for i < n && data[i] != c {
1945 i++
1946 }
1947 return i
1948}
1949
1950func skipAlnum(data []byte, i int) int {
1951 n := len(data)
1952 for i < n && isAlnum(data[i]) {
1953 i++
1954 }
1955 return i
1956}
1957
1958func skipSpace(data []byte, i int) int {
1959 n := len(data)
1960 for i < n && isSpace(data[i]) {
1961 i++
1962 }
1963 return i
1964}
1965
1966func backChar(data []byte, i int, c byte) int {
1967 for i > 0 && data[i-1] == c {
1968 i--
1969 }
1970 return i
1971}
1972
1973func backUntilChar(data []byte, i int, c byte) int {
1974 for i > 0 && data[i-1] != c {
1975 i--
1976 }
1977 return i
1978}