1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse block-level elements.
12//
13
14package blackfriday
15
16import (
17 "bytes"
18 "unicode"
19)
20
21// Parse block-level data.
22// Note: this function and many that it calls assume that
23// the input buffer ends with a newline.
24func (p *parser) block(out *bytes.Buffer, data []byte) {
25 if len(data) == 0 || data[len(data)-1] != '\n' {
26 panic("block input is missing terminating newline")
27 }
28
29 // this is called recursively: enforce a maximum depth
30 if p.nesting >= p.maxNesting {
31 return
32 }
33 p.nesting++
34
35 // parse out one block-level construct at a time
36 for len(data) > 0 {
37 // prefixed header:
38 //
39 // # Header 1
40 // ## Header 2
41 // ...
42 // ###### Header 6
43 if p.isPrefixHeader(data) {
44 data = data[p.prefixHeader(out, data):]
45 continue
46 }
47
48 // block of preformatted HTML:
49 //
50 // <div>
51 // ...
52 // </div>
53 if data[0] == '<' {
54 if i := p.html(out, data, true); i > 0 {
55 data = data[i:]
56 continue
57 }
58 }
59
60 // title block
61 //
62 // % stuff
63 // % more stuff
64 // % even more stuff
65 if p.flags&EXTENSION_TITLEBLOCK != 0 {
66 if data[0] == '%' {
67 if i := p.titleBlock(out, data, true); i > 0 {
68 data = data[i:]
69 continue
70 }
71 }
72 }
73
74 // blank lines. note: returns the # of bytes to skip
75 if i := p.isEmpty(data); i > 0 {
76 data = data[i:]
77 continue
78 }
79
80 // indented code block:
81 //
82 // func max(a, b int) int {
83 // if a > b {
84 // return a
85 // }
86 // return b
87 // }
88 if p.codePrefix(data) > 0 {
89 data = data[p.code(out, data):]
90 continue
91 }
92
93 // fenced code block:
94 //
95 // ``` go
96 // func fact(n int) int {
97 // if n <= 1 {
98 // return n
99 // }
100 // return n * fact(n-1)
101 // }
102 // ```
103 if p.flags&EXTENSION_FENCED_CODE != 0 {
104 if i := p.fencedCodeBlock(out, data, true); i > 0 {
105 data = data[i:]
106 continue
107 }
108 }
109
110 // horizontal rule:
111 //
112 // ------
113 // or
114 // ******
115 // or
116 // ______
117 if p.isHRule(data) {
118 p.r.HRule(out)
119 var i int
120 for i = 0; data[i] != '\n'; i++ {
121 }
122 data = data[i:]
123 continue
124 }
125
126 // block quote:
127 //
128 // > A big quote I found somewhere
129 // > on the web
130 if p.quotePrefix(data) > 0 {
131 data = data[p.quote(out, data):]
132 continue
133 }
134
135 // table:
136 //
137 // Name | Age | Phone
138 // ------|-----|---------
139 // Bob | 31 | 555-1234
140 // Alice | 27 | 555-4321
141 if p.flags&EXTENSION_TABLES != 0 {
142 if i := p.table(out, data); i > 0 {
143 data = data[i:]
144 continue
145 }
146 }
147
148 // an itemized/unordered list:
149 //
150 // * Item 1
151 // * Item 2
152 //
153 // also works with + or -
154 if p.uliPrefix(data) > 0 {
155 data = data[p.list(out, data, 0):]
156 continue
157 }
158
159 // a numbered/ordered list:
160 //
161 // 1. Item 1
162 // 2. Item 2
163 if p.oliPrefix(data) > 0 {
164 data = data[p.list(out, data, LIST_TYPE_ORDERED):]
165 continue
166 }
167
168 // definition lists:
169 //
170 // Term 1
171 // : Definition a
172 // : Definition b
173 //
174 // Term 2
175 // : Definition c
176 if p.flags&EXTENSION_DEFINITION_LISTS != 0 {
177 if p.dliPrefix(data) > 0 {
178 data = data[p.list(out, data, LIST_TYPE_DEFINITION):]
179 continue
180 }
181 }
182
183 // anything else must look like a normal paragraph
184 // note: this finds underlined headers, too
185 data = data[p.paragraph(out, data):]
186 }
187
188 p.nesting--
189}
190
191func (p *parser) isPrefixHeader(data []byte) bool {
192 if data[0] != '#' {
193 return false
194 }
195
196 if p.flags&EXTENSION_SPACE_HEADERS != 0 {
197 level := 0
198 for level < 6 && data[level] == '#' {
199 level++
200 }
201 if data[level] != ' ' {
202 return false
203 }
204 }
205 return true
206}
207
208func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
209 level := 0
210 for level < 6 && data[level] == '#' {
211 level++
212 }
213 i := skipChar(data, level, ' ')
214 end := skipUntilChar(data, i, '\n')
215 skip := end
216 id := ""
217 if p.flags&EXTENSION_HEADER_IDS != 0 {
218 j, k := 0, 0
219 // find start/end of header id
220 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
221 }
222 for k = j + 1; k < end && data[k] != '}'; k++ {
223 }
224 // extract header id iff found
225 if j < end && k < end {
226 id = string(data[j+2 : k])
227 end = j
228 skip = k + 1
229 for end > 0 && data[end-1] == ' ' {
230 end--
231 }
232 }
233 }
234 for end > 0 && data[end-1] == '#' {
235 if isBackslashEscaped(data, end-1) {
236 break
237 }
238 end--
239 }
240 for end > 0 && data[end-1] == ' ' {
241 end--
242 }
243 if end > i {
244 if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
245 id = SanitizedAnchorName(string(data[i:end]))
246 }
247 work := func() bool {
248 p.inline(out, data[i:end])
249 return true
250 }
251 p.r.Header(out, work, level, id)
252 }
253 return skip
254}
255
256func (p *parser) isUnderlinedHeader(data []byte) int {
257 // test of level 1 header
258 if data[0] == '=' {
259 i := skipChar(data, 1, '=')
260 i = skipChar(data, i, ' ')
261 if data[i] == '\n' {
262 return 1
263 } else {
264 return 0
265 }
266 }
267
268 // test of level 2 header
269 if data[0] == '-' {
270 i := skipChar(data, 1, '-')
271 i = skipChar(data, i, ' ')
272 if data[i] == '\n' {
273 return 2
274 } else {
275 return 0
276 }
277 }
278
279 return 0
280}
281
282func (p *parser) titleBlock(out *bytes.Buffer, data []byte, doRender bool) int {
283 if data[0] != '%' {
284 return 0
285 }
286 splitData := bytes.Split(data, []byte("\n"))
287 var i int
288 for idx, b := range splitData {
289 if !bytes.HasPrefix(b, []byte("%")) {
290 i = idx // - 1
291 break
292 }
293 }
294
295 data = bytes.Join(splitData[0:i], []byte("\n"))
296 p.r.TitleBlock(out, data)
297
298 return len(data)
299}
300
301func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int {
302 var i, j int
303
304 // identify the opening tag
305 if data[0] != '<' {
306 return 0
307 }
308 curtag, tagfound := p.htmlFindTag(data[1:])
309
310 // handle special cases
311 if !tagfound {
312 // check for an HTML comment
313 if size := p.htmlComment(out, data, doRender); size > 0 {
314 return size
315 }
316
317 // check for an <hr> tag
318 if size := p.htmlHr(out, data, doRender); size > 0 {
319 return size
320 }
321
322 // check for HTML CDATA
323 if size := p.htmlCDATA(out, data, doRender); size > 0 {
324 return size
325 }
326
327 // no special case recognized
328 return 0
329 }
330
331 // look for an unindented matching closing tag
332 // followed by a blank line
333 found := false
334 /*
335 closetag := []byte("\n</" + curtag + ">")
336 j = len(curtag) + 1
337 for !found {
338 // scan for a closing tag at the beginning of a line
339 if skip := bytes.Index(data[j:], closetag); skip >= 0 {
340 j += skip + len(closetag)
341 } else {
342 break
343 }
344
345 // see if it is the only thing on the line
346 if skip := p.isEmpty(data[j:]); skip > 0 {
347 // see if it is followed by a blank line/eof
348 j += skip
349 if j >= len(data) {
350 found = true
351 i = j
352 } else {
353 if skip := p.isEmpty(data[j:]); skip > 0 {
354 j += skip
355 found = true
356 i = j
357 }
358 }
359 }
360 }
361 */
362
363 // if not found, try a second pass looking for indented match
364 // but not if tag is "ins" or "del" (following original Markdown.pl)
365 if !found && curtag != "ins" && curtag != "del" {
366 i = 1
367 for i < len(data) {
368 i++
369 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
370 i++
371 }
372
373 if i+2+len(curtag) >= len(data) {
374 break
375 }
376
377 j = p.htmlFindEnd(curtag, data[i-1:])
378
379 if j > 0 {
380 i += j - 1
381 found = true
382 break
383 }
384 }
385 }
386
387 if !found {
388 return 0
389 }
390
391 // the end of the block has been found
392 if doRender {
393 // trim newlines
394 end := i
395 for end > 0 && data[end-1] == '\n' {
396 end--
397 }
398 p.r.BlockHtml(out, data[:end])
399 }
400
401 return i
402}
403
404func (p *parser) renderHTMLBlock(out *bytes.Buffer, data []byte, start int, doRender bool) int {
405 // html block needs to end with a blank line
406 if i := p.isEmpty(data[start:]); i > 0 {
407 size := start + i
408 if doRender {
409 // trim trailing newlines
410 end := size
411 for end > 0 && data[end-1] == '\n' {
412 end--
413 }
414 p.r.BlockHtml(out, data[:end])
415 }
416 return size
417 }
418 return 0
419}
420
421// HTML comment, lax form
422func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int {
423 i := p.inlineHTMLComment(out, data)
424 return p.renderHTMLBlock(out, data, i, doRender)
425}
426
427// HTML CDATA section
428func (p *parser) htmlCDATA(out *bytes.Buffer, data []byte, doRender bool) int {
429 const cdataTag = "<![cdata["
430 const cdataTagLen = len(cdataTag)
431 if len(data) < cdataTagLen+1 {
432 return 0
433 }
434 if !bytes.Equal(bytes.ToLower(data[:cdataTagLen]), []byte(cdataTag)) {
435 return 0
436 }
437 i := cdataTagLen
438 // scan for an end-of-comment marker, across lines if necessary
439 for i < len(data) && !(data[i-2] == ']' && data[i-1] == ']' && data[i] == '>') {
440 i++
441 }
442 i++
443 // no end-of-comment marker
444 if i >= len(data) {
445 return 0
446 }
447 return p.renderHTMLBlock(out, data, i, doRender)
448}
449
450// HR, which is the only self-closing block tag considered
451func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int {
452 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
453 return 0
454 }
455 if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
456 // not an <hr> tag after all; at least not a valid one
457 return 0
458 }
459
460 i := 3
461 for data[i] != '>' && data[i] != '\n' {
462 i++
463 }
464
465 if data[i] == '>' {
466 return p.renderHTMLBlock(out, data, i+1, doRender)
467 }
468
469 return 0
470}
471
472func (p *parser) htmlFindTag(data []byte) (string, bool) {
473 i := 0
474 for isalnum(data[i]) {
475 i++
476 }
477 key := string(data[:i])
478 if _, ok := blockTags[key]; ok {
479 return key, true
480 }
481 return "", false
482}
483
484func (p *parser) htmlFindEnd(tag string, data []byte) int {
485 // assume data[0] == '<' && data[1] == '/' already tested
486
487 // check if tag is a match
488 closetag := []byte("</" + tag + ">")
489 if !bytes.HasPrefix(data, closetag) {
490 return 0
491 }
492 i := len(closetag)
493
494 // check that the rest of the line is blank
495 skip := 0
496 if skip = p.isEmpty(data[i:]); skip == 0 {
497 return 0
498 }
499 i += skip
500 skip = 0
501
502 if i >= len(data) {
503 return i
504 }
505
506 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
507 return i
508 }
509 if skip = p.isEmpty(data[i:]); skip == 0 {
510 // following line must be blank
511 return 0
512 }
513
514 return i + skip
515}
516
517func (*parser) isEmpty(data []byte) int {
518 // it is okay to call isEmpty on an empty buffer
519 if len(data) == 0 {
520 return 0
521 }
522
523 var i int
524 for i = 0; i < len(data) && data[i] != '\n'; i++ {
525 if data[i] != ' ' && data[i] != '\t' {
526 return 0
527 }
528 }
529 return i + 1
530}
531
532func (*parser) isHRule(data []byte) bool {
533 i := 0
534
535 // skip up to three spaces
536 for i < 3 && data[i] == ' ' {
537 i++
538 }
539
540 // look at the hrule char
541 if data[i] != '*' && data[i] != '-' && data[i] != '_' {
542 return false
543 }
544 c := data[i]
545
546 // the whole line must be the char or whitespace
547 n := 0
548 for data[i] != '\n' {
549 switch {
550 case data[i] == c:
551 n++
552 case data[i] != ' ':
553 return false
554 }
555 i++
556 }
557
558 return n >= 3
559}
560
561// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
562// and returns the end index if so, or 0 otherwise. It also returns the marker found.
563// If syntax is not nil, it gets set to the syntax specified in the fence line.
564// A final newline is mandatory to recognize the fence line, unless newlineOptional is true.
565func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional bool) (end int, marker string) {
566 i, size := 0, 0
567
568 // skip up to three spaces
569 for i < len(data) && i < 3 && data[i] == ' ' {
570 i++
571 }
572
573 // check for the marker characters: ~ or `
574 if i >= len(data) {
575 return 0, ""
576 }
577 if data[i] != '~' && data[i] != '`' {
578 return 0, ""
579 }
580
581 c := data[i]
582
583 // the whole line must be the same char or whitespace
584 for i < len(data) && data[i] == c {
585 size++
586 i++
587 }
588
589 // the marker char must occur at least 3 times
590 if size < 3 {
591 return 0, ""
592 }
593 marker = string(data[i-size : i])
594
595 // if this is the end marker, it must match the beginning marker
596 if oldmarker != "" && marker != oldmarker {
597 return 0, ""
598 }
599
600 // TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
601 // into one, always get the syntax, and discard it if the caller doesn't care.
602 if syntax != nil {
603 syn := 0
604 i = skipChar(data, i, ' ')
605
606 if i >= len(data) {
607 if newlineOptional && i == len(data) {
608 return i, marker
609 }
610 return 0, ""
611 }
612
613 syntaxStart := i
614
615 if data[i] == '{' {
616 i++
617 syntaxStart++
618
619 for i < len(data) && data[i] != '}' && data[i] != '\n' {
620 syn++
621 i++
622 }
623
624 if i >= len(data) || data[i] != '}' {
625 return 0, ""
626 }
627
628 // strip all whitespace at the beginning and the end
629 // of the {} block
630 for syn > 0 && isspace(data[syntaxStart]) {
631 syntaxStart++
632 syn--
633 }
634
635 for syn > 0 && isspace(data[syntaxStart+syn-1]) {
636 syn--
637 }
638
639 i++
640 } else {
641 for i < len(data) && !isspace(data[i]) {
642 syn++
643 i++
644 }
645 }
646
647 *syntax = string(data[syntaxStart : syntaxStart+syn])
648 }
649
650 i = skipChar(data, i, ' ')
651 if i >= len(data) || data[i] != '\n' {
652 if newlineOptional && i == len(data) {
653 return i, marker
654 }
655 return 0, ""
656 }
657
658 return i + 1, marker // Take newline into account.
659}
660
661// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
662// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
663// If doRender is true, a final newline is mandatory to recognize the fenced code block.
664func (p *parser) fencedCodeBlock(out *bytes.Buffer, data []byte, doRender bool) int {
665 var syntax string
666 beg, marker := isFenceLine(data, &syntax, "", false)
667 if beg == 0 || beg >= len(data) {
668 return 0
669 }
670
671 var work bytes.Buffer
672
673 for {
674 // safe to assume beg < len(data)
675
676 // check for the end of the code block
677 newlineOptional := !doRender
678 fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional)
679 if fenceEnd != 0 {
680 beg += fenceEnd
681 break
682 }
683
684 // copy the current line
685 end := skipUntilChar(data, beg, '\n') + 1
686
687 // did we reach the end of the buffer without a closing marker?
688 if end >= len(data) {
689 return 0
690 }
691
692 // verbatim copy to the working buffer
693 if doRender {
694 work.Write(data[beg:end])
695 }
696 beg = end
697 }
698
699 if doRender {
700 p.r.BlockCode(out, work.Bytes(), syntax)
701 }
702
703 return beg
704}
705
706func (p *parser) table(out *bytes.Buffer, data []byte) int {
707 var header bytes.Buffer
708 i, columns := p.tableHeader(&header, data)
709 if i == 0 {
710 return 0
711 }
712
713 var body bytes.Buffer
714
715 for i < len(data) {
716 pipes, rowStart := 0, i
717 for ; data[i] != '\n'; i++ {
718 if data[i] == '|' {
719 pipes++
720 }
721 }
722
723 if pipes == 0 {
724 i = rowStart
725 break
726 }
727
728 // include the newline in data sent to tableRow
729 i++
730 p.tableRow(&body, data[rowStart:i], columns, false)
731 }
732
733 p.r.Table(out, header.Bytes(), body.Bytes(), columns)
734
735 return i
736}
737
738// check if the specified position is preceded by an odd number of backslashes
739func isBackslashEscaped(data []byte, i int) bool {
740 backslashes := 0
741 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
742 backslashes++
743 }
744 return backslashes&1 == 1
745}
746
747func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) {
748 i := 0
749 colCount := 1
750 for i = 0; data[i] != '\n'; i++ {
751 if data[i] == '|' && !isBackslashEscaped(data, i) {
752 colCount++
753 }
754 }
755
756 // doesn't look like a table header
757 if colCount == 1 {
758 return
759 }
760
761 // include the newline in the data sent to tableRow
762 header := data[:i+1]
763
764 // column count ignores pipes at beginning or end of line
765 if data[0] == '|' {
766 colCount--
767 }
768 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
769 colCount--
770 }
771
772 columns = make([]int, colCount)
773
774 // move on to the header underline
775 i++
776 if i >= len(data) {
777 return
778 }
779
780 if data[i] == '|' && !isBackslashEscaped(data, i) {
781 i++
782 }
783 i = skipChar(data, i, ' ')
784
785 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
786 // and trailing | optional on last column
787 col := 0
788 for data[i] != '\n' {
789 dashes := 0
790
791 if data[i] == ':' {
792 i++
793 columns[col] |= TABLE_ALIGNMENT_LEFT
794 dashes++
795 }
796 for data[i] == '-' {
797 i++
798 dashes++
799 }
800 if data[i] == ':' {
801 i++
802 columns[col] |= TABLE_ALIGNMENT_RIGHT
803 dashes++
804 }
805 for data[i] == ' ' {
806 i++
807 }
808
809 // end of column test is messy
810 switch {
811 case dashes < 3:
812 // not a valid column
813 return
814
815 case data[i] == '|' && !isBackslashEscaped(data, i):
816 // marker found, now skip past trailing whitespace
817 col++
818 i++
819 for data[i] == ' ' {
820 i++
821 }
822
823 // trailing junk found after last column
824 if col >= colCount && data[i] != '\n' {
825 return
826 }
827
828 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
829 // something else found where marker was required
830 return
831
832 case data[i] == '\n':
833 // marker is optional for the last column
834 col++
835
836 default:
837 // trailing junk found after last column
838 return
839 }
840 }
841 if col != colCount {
842 return
843 }
844
845 p.tableRow(out, header, columns, true)
846 size = i + 1
847 return
848}
849
850func (p *parser) tableRow(out *bytes.Buffer, data []byte, columns []int, header bool) {
851 i, col := 0, 0
852 var rowWork bytes.Buffer
853
854 if data[i] == '|' && !isBackslashEscaped(data, i) {
855 i++
856 }
857
858 for col = 0; col < len(columns) && i < len(data); col++ {
859 for data[i] == ' ' {
860 i++
861 }
862
863 cellStart := i
864
865 for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
866 i++
867 }
868
869 cellEnd := i
870
871 // skip the end-of-cell marker, possibly taking us past end of buffer
872 i++
873
874 for cellEnd > cellStart && data[cellEnd-1] == ' ' {
875 cellEnd--
876 }
877
878 var cellWork bytes.Buffer
879 p.inline(&cellWork, data[cellStart:cellEnd])
880
881 if header {
882 p.r.TableHeaderCell(&rowWork, cellWork.Bytes(), columns[col])
883 } else {
884 p.r.TableCell(&rowWork, cellWork.Bytes(), columns[col])
885 }
886 }
887
888 // pad it out with empty columns to get the right number
889 for ; col < len(columns); col++ {
890 if header {
891 p.r.TableHeaderCell(&rowWork, nil, columns[col])
892 } else {
893 p.r.TableCell(&rowWork, nil, columns[col])
894 }
895 }
896
897 // silently ignore rows with too many cells
898
899 p.r.TableRow(out, rowWork.Bytes())
900}
901
902// returns blockquote prefix length
903func (p *parser) quotePrefix(data []byte) int {
904 i := 0
905 for i < 3 && data[i] == ' ' {
906 i++
907 }
908 if data[i] == '>' {
909 if data[i+1] == ' ' {
910 return i + 2
911 }
912 return i + 1
913 }
914 return 0
915}
916
917// blockquote ends with at least one blank line
918// followed by something without a blockquote prefix
919func (p *parser) terminateBlockquote(data []byte, beg, end int) bool {
920 if p.isEmpty(data[beg:]) <= 0 {
921 return false
922 }
923 if end >= len(data) {
924 return true
925 }
926 return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
927}
928
929// parse a blockquote fragment
930func (p *parser) quote(out *bytes.Buffer, data []byte) int {
931 var raw bytes.Buffer
932 beg, end := 0, 0
933 for beg < len(data) {
934 end = beg
935 // Step over whole lines, collecting them. While doing that, check for
936 // fenced code and if one's found, incorporate it altogether,
937 // irregardless of any contents inside it
938 for data[end] != '\n' {
939 if p.flags&EXTENSION_FENCED_CODE != 0 {
940 if i := p.fencedCodeBlock(out, data[end:], false); i > 0 {
941 // -1 to compensate for the extra end++ after the loop:
942 end += i - 1
943 break
944 }
945 }
946 end++
947 }
948 end++
949
950 if pre := p.quotePrefix(data[beg:]); pre > 0 {
951 // skip the prefix
952 beg += pre
953 } else if p.terminateBlockquote(data, beg, end) {
954 break
955 }
956
957 // this line is part of the blockquote
958 raw.Write(data[beg:end])
959 beg = end
960 }
961
962 var cooked bytes.Buffer
963 p.block(&cooked, raw.Bytes())
964 p.r.BlockQuote(out, cooked.Bytes())
965 return end
966}
967
968// returns prefix length for block code
969func (p *parser) codePrefix(data []byte) int {
970 if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
971 return 4
972 }
973 return 0
974}
975
976func (p *parser) code(out *bytes.Buffer, data []byte) int {
977 var work bytes.Buffer
978
979 i := 0
980 for i < len(data) {
981 beg := i
982 for data[i] != '\n' {
983 i++
984 }
985 i++
986
987 blankline := p.isEmpty(data[beg:i]) > 0
988 if pre := p.codePrefix(data[beg:i]); pre > 0 {
989 beg += pre
990 } else if !blankline {
991 // non-empty, non-prefixed line breaks the pre
992 i = beg
993 break
994 }
995
996 // verbatim copy to the working buffeu
997 if blankline {
998 work.WriteByte('\n')
999 } else {
1000 work.Write(data[beg:i])
1001 }
1002 }
1003
1004 // trim all the \n off the end of work
1005 workbytes := work.Bytes()
1006 eol := len(workbytes)
1007 for eol > 0 && workbytes[eol-1] == '\n' {
1008 eol--
1009 }
1010 if eol != len(workbytes) {
1011 work.Truncate(eol)
1012 }
1013
1014 work.WriteByte('\n')
1015
1016 p.r.BlockCode(out, work.Bytes(), "")
1017
1018 return i
1019}
1020
1021// returns unordered list item prefix
1022func (p *parser) uliPrefix(data []byte) int {
1023 i := 0
1024
1025 // start with up to 3 spaces
1026 for i < 3 && data[i] == ' ' {
1027 i++
1028 }
1029
1030 // need a *, +, or - followed by a space
1031 if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1032 data[i+1] != ' ' {
1033 return 0
1034 }
1035 return i + 2
1036}
1037
1038// returns ordered list item prefix
1039func (p *parser) oliPrefix(data []byte) int {
1040 i := 0
1041
1042 // start with up to 3 spaces
1043 for i < 3 && data[i] == ' ' {
1044 i++
1045 }
1046
1047 // count the digits
1048 start := i
1049 for data[i] >= '0' && data[i] <= '9' {
1050 i++
1051 }
1052
1053 // we need >= 1 digits followed by a dot and a space
1054 if start == i || data[i] != '.' || data[i+1] != ' ' {
1055 return 0
1056 }
1057 return i + 2
1058}
1059
1060// returns definition list item prefix
1061func (p *parser) dliPrefix(data []byte) int {
1062 i := 0
1063
1064 // need a : followed by a spaces
1065 if data[i] != ':' || data[i+1] != ' ' {
1066 return 0
1067 }
1068 for data[i] == ' ' {
1069 i++
1070 }
1071 return i + 2
1072}
1073
1074// parse ordered or unordered list block
1075func (p *parser) list(out *bytes.Buffer, data []byte, flags int) int {
1076 i := 0
1077 flags |= LIST_ITEM_BEGINNING_OF_LIST
1078 work := func() bool {
1079 for i < len(data) {
1080 skip := p.listItem(out, data[i:], &flags)
1081 i += skip
1082
1083 if skip == 0 || flags&LIST_ITEM_END_OF_LIST != 0 {
1084 break
1085 }
1086 flags &= ^LIST_ITEM_BEGINNING_OF_LIST
1087 }
1088 return true
1089 }
1090
1091 p.r.List(out, work, flags)
1092 return i
1093}
1094
1095// Parse a single list item.
1096// Assumes initial prefix is already removed if this is a sublist.
1097func (p *parser) listItem(out *bytes.Buffer, data []byte, flags *int) int {
1098 // keep track of the indentation of the first line
1099 itemIndent := 0
1100 for itemIndent < 3 && data[itemIndent] == ' ' {
1101 itemIndent++
1102 }
1103
1104 i := p.uliPrefix(data)
1105 if i == 0 {
1106 i = p.oliPrefix(data)
1107 }
1108 if i == 0 {
1109 i = p.dliPrefix(data)
1110 // reset definition term flag
1111 if i > 0 {
1112 *flags &= ^LIST_TYPE_TERM
1113 }
1114 }
1115 if i == 0 {
1116 // if in defnition list, set term flag and continue
1117 if *flags&LIST_TYPE_DEFINITION != 0 {
1118 *flags |= LIST_TYPE_TERM
1119 } else {
1120 return 0
1121 }
1122 }
1123
1124 // skip leading whitespace on first line
1125 for data[i] == ' ' {
1126 i++
1127 }
1128
1129 // find the end of the line
1130 line := i
1131 for i > 0 && data[i-1] != '\n' {
1132 i++
1133 }
1134
1135 // get working buffer
1136 var raw bytes.Buffer
1137
1138 // put the first line into the working buffer
1139 raw.Write(data[line:i])
1140 line = i
1141
1142 // process the following lines
1143 containsBlankLine := false
1144 sublist := 0
1145
1146gatherlines:
1147 for line < len(data) {
1148 i++
1149
1150 // find the end of this line
1151 for data[i-1] != '\n' {
1152 i++
1153 }
1154
1155 // if it is an empty line, guess that it is part of this item
1156 // and move on to the next line
1157 if p.isEmpty(data[line:i]) > 0 {
1158 containsBlankLine = true
1159 raw.Write(data[line:i])
1160 line = i
1161 continue
1162 }
1163
1164 // calculate the indentation
1165 indent := 0
1166 for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1167 indent++
1168 }
1169
1170 chunk := data[line+indent : i]
1171
1172 // evaluate how this line fits in
1173 switch {
1174 // is this a nested list item?
1175 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1176 p.oliPrefix(chunk) > 0 ||
1177 p.dliPrefix(chunk) > 0:
1178
1179 if containsBlankLine {
1180 // end the list if the type changed after a blank line
1181 if indent <= itemIndent &&
1182 ((*flags&LIST_TYPE_ORDERED != 0 && p.uliPrefix(chunk) > 0) ||
1183 (*flags&LIST_TYPE_ORDERED == 0 && p.oliPrefix(chunk) > 0)) {
1184
1185 *flags |= LIST_ITEM_END_OF_LIST
1186 break gatherlines
1187 }
1188 *flags |= LIST_ITEM_CONTAINS_BLOCK
1189 }
1190
1191 // to be a nested list, it must be indented more
1192 // if not, it is the next item in the same list
1193 if indent <= itemIndent {
1194 break gatherlines
1195 }
1196
1197 // is this the first item in the nested list?
1198 if sublist == 0 {
1199 sublist = raw.Len()
1200 }
1201
1202 // is this a nested prefix header?
1203 case p.isPrefixHeader(chunk):
1204 // if the header is not indented, it is not nested in the list
1205 // and thus ends the list
1206 if containsBlankLine && indent < 4 {
1207 *flags |= LIST_ITEM_END_OF_LIST
1208 break gatherlines
1209 }
1210 *flags |= LIST_ITEM_CONTAINS_BLOCK
1211
1212 // anything following an empty line is only part
1213 // of this item if it is indented 4 spaces
1214 // (regardless of the indentation of the beginning of the item)
1215 case containsBlankLine && indent < 4:
1216 if *flags&LIST_TYPE_DEFINITION != 0 && i < len(data)-1 {
1217 // is the next item still a part of this list?
1218 next := i
1219 for data[next] != '\n' {
1220 next++
1221 }
1222 for next < len(data)-1 && data[next] == '\n' {
1223 next++
1224 }
1225 if i < len(data)-1 && data[i] != ':' && data[next] != ':' {
1226 *flags |= LIST_ITEM_END_OF_LIST
1227 }
1228 } else {
1229 *flags |= LIST_ITEM_END_OF_LIST
1230 }
1231 break gatherlines
1232
1233 // a blank line means this should be parsed as a block
1234 case containsBlankLine:
1235 *flags |= LIST_ITEM_CONTAINS_BLOCK
1236 }
1237
1238 containsBlankLine = false
1239
1240 // add the line into the working buffer without prefix
1241 raw.Write(data[line+indent : i])
1242
1243 line = i
1244 }
1245
1246 // If reached end of data, the Renderer.ListItem call we're going to make below
1247 // is definitely the last in the list.
1248 if line >= len(data) {
1249 *flags |= LIST_ITEM_END_OF_LIST
1250 }
1251
1252 rawBytes := raw.Bytes()
1253
1254 // render the contents of the list item
1255 var cooked bytes.Buffer
1256 if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 && *flags&LIST_TYPE_TERM == 0 {
1257 // intermediate render of block item, except for definition term
1258 if sublist > 0 {
1259 p.block(&cooked, rawBytes[:sublist])
1260 p.block(&cooked, rawBytes[sublist:])
1261 } else {
1262 p.block(&cooked, rawBytes)
1263 }
1264 } else {
1265 // intermediate render of inline item
1266 if sublist > 0 {
1267 p.inline(&cooked, rawBytes[:sublist])
1268 p.block(&cooked, rawBytes[sublist:])
1269 } else {
1270 p.inline(&cooked, rawBytes)
1271 }
1272 }
1273
1274 // render the actual list item
1275 cookedBytes := cooked.Bytes()
1276 parsedEnd := len(cookedBytes)
1277
1278 // strip trailing newlines
1279 for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' {
1280 parsedEnd--
1281 }
1282 p.r.ListItem(out, cookedBytes[:parsedEnd], *flags)
1283
1284 return line
1285}
1286
1287// render a single paragraph that has already been parsed out
1288func (p *parser) renderParagraph(out *bytes.Buffer, data []byte) {
1289 if len(data) == 0 {
1290 return
1291 }
1292
1293 // trim leading spaces
1294 beg := 0
1295 for data[beg] == ' ' {
1296 beg++
1297 }
1298
1299 // trim trailing newline
1300 end := len(data) - 1
1301
1302 // trim trailing spaces
1303 for end > beg && data[end-1] == ' ' {
1304 end--
1305 }
1306
1307 work := func() bool {
1308 p.inline(out, data[beg:end])
1309 return true
1310 }
1311 p.r.Paragraph(out, work)
1312}
1313
1314func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
1315 // prev: index of 1st char of previous line
1316 // line: index of 1st char of current line
1317 // i: index of cursor/end of current line
1318 var prev, line, i int
1319
1320 // keep going until we find something to mark the end of the paragraph
1321 for i < len(data) {
1322 // mark the beginning of the current line
1323 prev = line
1324 current := data[i:]
1325 line = i
1326
1327 // did we find a blank line marking the end of the paragraph?
1328 if n := p.isEmpty(current); n > 0 {
1329 // did this blank line followed by a definition list item?
1330 if p.flags&EXTENSION_DEFINITION_LISTS != 0 {
1331 if i < len(data)-1 && data[i+1] == ':' {
1332 return p.list(out, data[prev:], LIST_TYPE_DEFINITION)
1333 }
1334 }
1335
1336 p.renderParagraph(out, data[:i])
1337 return i + n
1338 }
1339
1340 // an underline under some text marks a header, so our paragraph ended on prev line
1341 if i > 0 {
1342 if level := p.isUnderlinedHeader(current); level > 0 {
1343 // render the paragraph
1344 p.renderParagraph(out, data[:prev])
1345
1346 // ignore leading and trailing whitespace
1347 eol := i - 1
1348 for prev < eol && data[prev] == ' ' {
1349 prev++
1350 }
1351 for eol > prev && data[eol-1] == ' ' {
1352 eol--
1353 }
1354
1355 // render the header
1356 // this ugly double closure avoids forcing variables onto the heap
1357 work := func(o *bytes.Buffer, pp *parser, d []byte) func() bool {
1358 return func() bool {
1359 pp.inline(o, d)
1360 return true
1361 }
1362 }(out, p, data[prev:eol])
1363
1364 id := ""
1365 if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
1366 id = SanitizedAnchorName(string(data[prev:eol]))
1367 }
1368
1369 p.r.Header(out, work, level, id)
1370
1371 // find the end of the underline
1372 for data[i] != '\n' {
1373 i++
1374 }
1375 return i
1376 }
1377 }
1378
1379 // if the next line starts a block of HTML, then the paragraph ends here
1380 if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
1381 if data[i] == '<' && p.html(out, current, false) > 0 {
1382 // rewind to before the HTML block
1383 p.renderParagraph(out, data[:i])
1384 return i
1385 }
1386 }
1387
1388 // if there's a prefixed header or a horizontal rule after this, paragraph is over
1389 if p.isPrefixHeader(current) || p.isHRule(current) {
1390 p.renderParagraph(out, data[:i])
1391 return i
1392 }
1393
1394 // if there's a fenced code block, paragraph is over
1395 if p.flags&EXTENSION_FENCED_CODE != 0 {
1396 if p.fencedCodeBlock(out, current, false) > 0 {
1397 p.renderParagraph(out, data[:i])
1398 return i
1399 }
1400 }
1401
1402 // if there's a definition list item, prev line is a definition term
1403 if p.flags&EXTENSION_DEFINITION_LISTS != 0 {
1404 if p.dliPrefix(current) != 0 {
1405 return p.list(out, data[prev:], LIST_TYPE_DEFINITION)
1406 }
1407 }
1408
1409 // if there's a list after this, paragraph is over
1410 if p.flags&EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK != 0 {
1411 if p.uliPrefix(current) != 0 ||
1412 p.oliPrefix(current) != 0 ||
1413 p.quotePrefix(current) != 0 ||
1414 p.codePrefix(current) != 0 {
1415 p.renderParagraph(out, data[:i])
1416 return i
1417 }
1418 }
1419
1420 // otherwise, scan to the beginning of the next line
1421 for data[i] != '\n' {
1422 i++
1423 }
1424 i++
1425 }
1426
1427 p.renderParagraph(out, data[:i])
1428 return i
1429}
1430
1431// SanitizedAnchorName returns a sanitized anchor name for the given text.
1432//
1433// It implements the algorithm specified in the package comment.
1434func SanitizedAnchorName(text string) string {
1435 var anchorName []rune
1436 futureDash := false
1437 for _, r := range text {
1438 switch {
1439 case unicode.IsLetter(r) || unicode.IsNumber(r):
1440 if futureDash && len(anchorName) > 0 {
1441 anchorName = append(anchorName, '-')
1442 }
1443 futureDash = false
1444 anchorName = append(anchorName, unicode.ToLower(r))
1445 default:
1446 futureDash = true
1447 }
1448 }
1449 return string(anchorName)
1450}