block.go

   1package parser
   2
   3import (
   4	"bytes"
   5	"html"
   6	"regexp"
   7	"strconv"
   8	"unicode"
   9
  10	"github.com/gomarkdown/markdown/ast"
  11)
  12
  13// Parsing block-level elements.
  14
  15const (
  16	charEntity = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"
  17	escapable  = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]"
  18)
  19
  20var (
  21	reBackslashOrAmp      = regexp.MustCompile("[\\&]")
  22	reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + escapable + "|" + charEntity)
  23
  24	// blockTags is a set of tags that are recognized as HTML block tags.
  25	// Any of these can be included in markdown text without special escaping.
  26	blockTags = map[string]struct{}{
  27		"blockquote": struct{}{},
  28		"del":        struct{}{},
  29		"div":        struct{}{},
  30		"dl":         struct{}{},
  31		"fieldset":   struct{}{},
  32		"form":       struct{}{},
  33		"h1":         struct{}{},
  34		"h2":         struct{}{},
  35		"h3":         struct{}{},
  36		"h4":         struct{}{},
  37		"h5":         struct{}{},
  38		"h6":         struct{}{},
  39		"iframe":     struct{}{},
  40		"ins":        struct{}{},
  41		"math":       struct{}{},
  42		"noscript":   struct{}{},
  43		"ol":         struct{}{},
  44		"pre":        struct{}{},
  45		"p":          struct{}{},
  46		"script":     struct{}{},
  47		"style":      struct{}{},
  48		"table":      struct{}{},
  49		"ul":         struct{}{},
  50
  51		// HTML5
  52		"address":    struct{}{},
  53		"article":    struct{}{},
  54		"aside":      struct{}{},
  55		"canvas":     struct{}{},
  56		"figcaption": struct{}{},
  57		"figure":     struct{}{},
  58		"footer":     struct{}{},
  59		"header":     struct{}{},
  60		"hgroup":     struct{}{},
  61		"main":       struct{}{},
  62		"nav":        struct{}{},
  63		"output":     struct{}{},
  64		"progress":   struct{}{},
  65		"section":    struct{}{},
  66		"video":      struct{}{},
  67	}
  68)
  69
  70// sanitizeAnchorName returns a sanitized anchor name for the given text.
  71// Taken from https://github.com/shurcooL/sanitized_anchor_name/blob/master/main.go#L14:1
  72func sanitizeAnchorName(text string) string {
  73	var anchorName []rune
  74	var futureDash = false
  75	for _, r := range text {
  76		switch {
  77		case unicode.IsLetter(r) || unicode.IsNumber(r):
  78			if futureDash && len(anchorName) > 0 {
  79				anchorName = append(anchorName, '-')
  80			}
  81			futureDash = false
  82			anchorName = append(anchorName, unicode.ToLower(r))
  83		default:
  84			futureDash = true
  85		}
  86	}
  87	return string(anchorName)
  88}
  89
  90// Parse block-level data.
  91// Note: this function and many that it calls assume that
  92// the input buffer ends with a newline.
  93func (p *Parser) block(data []byte) {
  94	// this is called recursively: enforce a maximum depth
  95	if p.nesting >= p.maxNesting {
  96		return
  97	}
  98	p.nesting++
  99
 100	// parse out one block-level construct at a time
 101	for len(data) > 0 {
 102		// attributes that can be specific before a block element:
 103		//
 104		// {#id .class1 .class2 key="value"}
 105		if p.extensions&Attributes != 0 {
 106			data = p.attribute(data)
 107		}
 108
 109		if p.extensions&Includes != 0 {
 110			f := p.readInclude
 111			path, address, consumed := p.isInclude(data)
 112			if consumed == 0 {
 113				path, address, consumed = p.isCodeInclude(data)
 114				f = p.readCodeInclude
 115			}
 116			if consumed > 0 {
 117				included := f(p.includeStack.Last(), path, address)
 118				p.includeStack.Push(path)
 119				p.block(included)
 120				p.includeStack.Pop()
 121				data = data[consumed:]
 122				continue
 123			}
 124		}
 125
 126		// user supplied parser function
 127		if p.Opts.ParserHook != nil {
 128			node, blockdata, consumed := p.Opts.ParserHook(data)
 129			if consumed > 0 {
 130				data = data[consumed:]
 131
 132				if node != nil {
 133					p.addBlock(node)
 134					if blockdata != nil {
 135						p.block(blockdata)
 136						p.finalize(node)
 137					}
 138				}
 139				continue
 140			}
 141		}
 142
 143		// prefixed heading:
 144		//
 145		// # Heading 1
 146		// ## Heading 2
 147		// ...
 148		// ###### Heading 6
 149		if p.isPrefixHeading(data) {
 150			data = data[p.prefixHeading(data):]
 151			continue
 152		}
 153
 154		// prefixed special heading:
 155		// (there are no levels.)
 156		//
 157		// .# Abstract
 158		if p.isPrefixSpecialHeading(data) {
 159			data = data[p.prefixSpecialHeading(data):]
 160			continue
 161		}
 162
 163		// block of preformatted HTML:
 164		//
 165		// <div>
 166		//     ...
 167		// </div>
 168		if data[0] == '<' {
 169			if i := p.html(data, true); i > 0 {
 170				data = data[i:]
 171				continue
 172			}
 173		}
 174
 175		// title block
 176		//
 177		// % stuff
 178		// % more stuff
 179		// % even more stuff
 180		if p.extensions&Titleblock != 0 {
 181			if data[0] == '%' {
 182				if i := p.titleBlock(data, true); i > 0 {
 183					data = data[i:]
 184					continue
 185				}
 186			}
 187		}
 188
 189		// blank lines.  note: returns the # of bytes to skip
 190		if i := p.isEmpty(data); i > 0 {
 191			data = data[i:]
 192			continue
 193		}
 194
 195		// indented code block:
 196		//
 197		//     func max(a, b int) int {
 198		//         if a > b {
 199		//             return a
 200		//         }
 201		//         return b
 202		//      }
 203		if p.codePrefix(data) > 0 {
 204			data = data[p.code(data):]
 205			continue
 206		}
 207
 208		// fenced code block:
 209		//
 210		// ``` go
 211		// func fact(n int) int {
 212		//     if n <= 1 {
 213		//         return n
 214		//     }
 215		//     return n * fact(n-1)
 216		// }
 217		// ```
 218		if p.extensions&FencedCode != 0 {
 219			if i := p.fencedCodeBlock(data, true); i > 0 {
 220				data = data[i:]
 221				continue
 222			}
 223		}
 224
 225		// horizontal rule:
 226		//
 227		// ------
 228		// or
 229		// ******
 230		// or
 231		// ______
 232		if p.isHRule(data) {
 233			p.addBlock(&ast.HorizontalRule{})
 234			i := skipUntilChar(data, 0, '\n')
 235			data = data[i:]
 236			continue
 237		}
 238
 239		// block quote:
 240		//
 241		// > A big quote I found somewhere
 242		// > on the web
 243		if p.quotePrefix(data) > 0 {
 244			data = data[p.quote(data):]
 245			continue
 246		}
 247
 248		// aside:
 249		//
 250		// A> The proof is too large to fit
 251		// A> in the margin.
 252		if p.extensions&Mmark != 0 {
 253			if p.asidePrefix(data) > 0 {
 254				data = data[p.aside(data):]
 255				continue
 256			}
 257		}
 258
 259		// figure block:
 260		//
 261		// !---
 262		// ![Alt Text](img.jpg "This is an image")
 263		// ![Alt Text](img2.jpg "This is a second image")
 264		// !---
 265		if p.extensions&Mmark != 0 {
 266			if i := p.figureBlock(data, true); i > 0 {
 267				data = data[i:]
 268				continue
 269			}
 270		}
 271
 272		// table:
 273		//
 274		// Name  | Age | Phone
 275		// ------|-----|---------
 276		// Bob   | 31  | 555-1234
 277		// Alice | 27  | 555-4321
 278		if p.extensions&Tables != 0 {
 279			if i := p.table(data); i > 0 {
 280				data = data[i:]
 281				continue
 282			}
 283		}
 284
 285		// an itemized/unordered list:
 286		//
 287		// * Item 1
 288		// * Item 2
 289		//
 290		// also works with + or -
 291		if p.uliPrefix(data) > 0 {
 292			data = data[p.list(data, 0, 0):]
 293			continue
 294		}
 295
 296		// a numbered/ordered list:
 297		//
 298		// 1. Item 1
 299		// 2. Item 2
 300		if i := p.oliPrefix(data); i > 0 {
 301			start := 0
 302			if i > 2 && p.extensions&OrderedListStart != 0 {
 303				s := string(data[:i-2])
 304				start, _ = strconv.Atoi(s)
 305				if start == 1 {
 306					start = 0
 307				}
 308			}
 309			data = data[p.list(data, ast.ListTypeOrdered, start):]
 310			continue
 311		}
 312
 313		// definition lists:
 314		//
 315		// Term 1
 316		// :   Definition a
 317		// :   Definition b
 318		//
 319		// Term 2
 320		// :   Definition c
 321		if p.extensions&DefinitionLists != 0 {
 322			if p.dliPrefix(data) > 0 {
 323				data = data[p.list(data, ast.ListTypeDefinition, 0):]
 324				continue
 325			}
 326		}
 327
 328		if p.extensions&MathJax != 0 {
 329			if i := p.blockMath(data); i > 0 {
 330				data = data[i:]
 331				continue
 332			}
 333		}
 334
 335		// document matters:
 336		//
 337		// {frontmatter}/{mainmatter}/{backmatter}
 338		if p.extensions&Mmark != 0 {
 339			if i := p.documentMatter(data); i > 0 {
 340				data = data[i:]
 341				continue
 342			}
 343		}
 344
 345		// anything else must look like a normal paragraph
 346		// note: this finds underlined headings, too
 347		idx := p.paragraph(data)
 348		data = data[idx:]
 349	}
 350
 351	p.nesting--
 352}
 353
 354func (p *Parser) addBlock(n ast.Node) ast.Node {
 355	p.closeUnmatchedBlocks()
 356
 357	if p.attr != nil {
 358		if c := n.AsContainer(); c != nil {
 359			c.Attribute = p.attr
 360		}
 361		if l := n.AsLeaf(); l != nil {
 362			l.Attribute = p.attr
 363		}
 364		p.attr = nil
 365	}
 366	return p.addChild(n)
 367}
 368
 369func (p *Parser) isPrefixHeading(data []byte) bool {
 370	if data[0] != '#' {
 371		return false
 372	}
 373
 374	if p.extensions&SpaceHeadings != 0 {
 375		level := skipCharN(data, 0, '#', 6)
 376		if level == len(data) || data[level] != ' ' {
 377			return false
 378		}
 379	}
 380	return true
 381}
 382
 383func (p *Parser) prefixHeading(data []byte) int {
 384	level := skipCharN(data, 0, '#', 6)
 385	i := skipChar(data, level, ' ')
 386	end := skipUntilChar(data, i, '\n')
 387	skip := end
 388	id := ""
 389	if p.extensions&HeadingIDs != 0 {
 390		j, k := 0, 0
 391		// find start/end of heading id
 392		for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
 393		}
 394		for k = j + 1; k < end && data[k] != '}'; k++ {
 395		}
 396		// extract heading id iff found
 397		if j < end && k < end {
 398			id = string(data[j+2 : k])
 399			end = j
 400			skip = k + 1
 401			for end > 0 && data[end-1] == ' ' {
 402				end--
 403			}
 404		}
 405	}
 406	for end > 0 && data[end-1] == '#' {
 407		if isBackslashEscaped(data, end-1) {
 408			break
 409		}
 410		end--
 411	}
 412	for end > 0 && data[end-1] == ' ' {
 413		end--
 414	}
 415	if end > i {
 416		if id == "" && p.extensions&AutoHeadingIDs != 0 {
 417			id = sanitizeAnchorName(string(data[i:end]))
 418		}
 419		block := &ast.Heading{
 420			HeadingID: id,
 421			Level:     level,
 422		}
 423		block.Content = data[i:end]
 424		p.addBlock(block)
 425	}
 426	return skip
 427}
 428
 429func (p *Parser) isPrefixSpecialHeading(data []byte) bool {
 430	if p.extensions|Mmark == 0 {
 431		return false
 432	}
 433	if len(data) < 4 {
 434		return false
 435	}
 436	if data[0] != '.' {
 437		return false
 438	}
 439	if data[1] != '#' {
 440		return false
 441	}
 442	if data[2] == '#' { // we don't support level, so nack this.
 443		return false
 444	}
 445
 446	if p.extensions&SpaceHeadings != 0 {
 447		if data[2] != ' ' {
 448			return false
 449		}
 450	}
 451	return true
 452}
 453
 454func (p *Parser) prefixSpecialHeading(data []byte) int {
 455	i := skipChar(data, 2, ' ') // ".#" skipped
 456	end := skipUntilChar(data, i, '\n')
 457	skip := end
 458	id := ""
 459	if p.extensions&HeadingIDs != 0 {
 460		j, k := 0, 0
 461		// find start/end of heading id
 462		for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
 463		}
 464		for k = j + 1; k < end && data[k] != '}'; k++ {
 465		}
 466		// extract heading id iff found
 467		if j < end && k < end {
 468			id = string(data[j+2 : k])
 469			end = j
 470			skip = k + 1
 471			for end > 0 && data[end-1] == ' ' {
 472				end--
 473			}
 474		}
 475	}
 476	for end > 0 && data[end-1] == '#' {
 477		if isBackslashEscaped(data, end-1) {
 478			break
 479		}
 480		end--
 481	}
 482	for end > 0 && data[end-1] == ' ' {
 483		end--
 484	}
 485	if end > i {
 486		if id == "" && p.extensions&AutoHeadingIDs != 0 {
 487			id = sanitizeAnchorName(string(data[i:end]))
 488		}
 489		block := &ast.Heading{
 490			HeadingID: id,
 491			IsSpecial: true,
 492			Level:     1, // always level 1.
 493		}
 494		block.Literal = data[i:end]
 495		block.Content = data[i:end]
 496		p.addBlock(block)
 497	}
 498	return skip
 499}
 500
 501func (p *Parser) isUnderlinedHeading(data []byte) int {
 502	// test of level 1 heading
 503	if data[0] == '=' {
 504		i := skipChar(data, 1, '=')
 505		i = skipChar(data, i, ' ')
 506		if i < len(data) && data[i] == '\n' {
 507			return 1
 508		}
 509		return 0
 510	}
 511
 512	// test of level 2 heading
 513	if data[0] == '-' {
 514		i := skipChar(data, 1, '-')
 515		i = skipChar(data, i, ' ')
 516		if i < len(data) && data[i] == '\n' {
 517			return 2
 518		}
 519		return 0
 520	}
 521
 522	return 0
 523}
 524
 525func (p *Parser) titleBlock(data []byte, doRender bool) int {
 526	if data[0] != '%' {
 527		return 0
 528	}
 529	splitData := bytes.Split(data, []byte("\n"))
 530	var i int
 531	for idx, b := range splitData {
 532		if !bytes.HasPrefix(b, []byte("%")) {
 533			i = idx // - 1
 534			break
 535		}
 536	}
 537
 538	data = bytes.Join(splitData[0:i], []byte("\n"))
 539	consumed := len(data)
 540	data = bytes.TrimPrefix(data, []byte("% "))
 541	data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1)
 542	block := &ast.Heading{
 543		Level:        1,
 544		IsTitleblock: true,
 545	}
 546	block.Content = data
 547	p.addBlock(block)
 548
 549	return consumed
 550}
 551
 552func (p *Parser) html(data []byte, doRender bool) int {
 553	var i, j int
 554
 555	// identify the opening tag
 556	if data[0] != '<' {
 557		return 0
 558	}
 559	curtag, tagfound := p.htmlFindTag(data[1:])
 560
 561	// handle special cases
 562	if !tagfound {
 563		// check for an HTML comment
 564		if size := p.htmlComment(data, doRender); size > 0 {
 565			return size
 566		}
 567
 568		// check for an <hr> tag
 569		if size := p.htmlHr(data, doRender); size > 0 {
 570			return size
 571		}
 572
 573		// no special case recognized
 574		return 0
 575	}
 576
 577	// look for an unindented matching closing tag
 578	// followed by a blank line
 579	found := false
 580	/*
 581		closetag := []byte("\n</" + curtag + ">")
 582		j = len(curtag) + 1
 583		for !found {
 584			// scan for a closing tag at the beginning of a line
 585			if skip := bytes.Index(data[j:], closetag); skip >= 0 {
 586				j += skip + len(closetag)
 587			} else {
 588				break
 589			}
 590
 591			// see if it is the only thing on the line
 592			if skip := p.isEmpty(data[j:]); skip > 0 {
 593				// see if it is followed by a blank line/eof
 594				j += skip
 595				if j >= len(data) {
 596					found = true
 597					i = j
 598				} else {
 599					if skip := p.isEmpty(data[j:]); skip > 0 {
 600						j += skip
 601						found = true
 602						i = j
 603					}
 604				}
 605			}
 606		}
 607	*/
 608
 609	// if not found, try a second pass looking for indented match
 610	// but not if tag is "ins" or "del" (following original Markdown.pl)
 611	if !found && curtag != "ins" && curtag != "del" {
 612		i = 1
 613		for i < len(data) {
 614			i++
 615			for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
 616				i++
 617			}
 618
 619			if i+2+len(curtag) >= len(data) {
 620				break
 621			}
 622
 623			j = p.htmlFindEnd(curtag, data[i-1:])
 624
 625			if j > 0 {
 626				i += j - 1
 627				found = true
 628				break
 629			}
 630		}
 631	}
 632
 633	if !found {
 634		return 0
 635	}
 636
 637	// the end of the block has been found
 638	if doRender {
 639		// trim newlines
 640		end := backChar(data, i, '\n')
 641		htmlBLock := &ast.HTMLBlock{ast.Leaf{Content: data[:end]}}
 642		p.addBlock(htmlBLock)
 643		finalizeHTMLBlock(htmlBLock)
 644	}
 645
 646	return i
 647}
 648
 649func finalizeHTMLBlock(block *ast.HTMLBlock) {
 650	block.Literal = block.Content
 651	block.Content = nil
 652}
 653
 654// HTML comment, lax form
 655func (p *Parser) htmlComment(data []byte, doRender bool) int {
 656	i := p.inlineHTMLComment(data)
 657	// needs to end with a blank line
 658	if j := p.isEmpty(data[i:]); j > 0 {
 659		size := i + j
 660		if doRender {
 661			// trim trailing newlines
 662			end := backChar(data, size, '\n')
 663			htmlBLock := &ast.HTMLBlock{ast.Leaf{Content: data[:end]}}
 664			p.addBlock(htmlBLock)
 665			finalizeHTMLBlock(htmlBLock)
 666		}
 667		return size
 668	}
 669	return 0
 670}
 671
 672// HR, which is the only self-closing block tag considered
 673func (p *Parser) htmlHr(data []byte, doRender bool) int {
 674	if len(data) < 4 {
 675		return 0
 676	}
 677	if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
 678		return 0
 679	}
 680	if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
 681		// not an <hr> tag after all; at least not a valid one
 682		return 0
 683	}
 684	i := 3
 685	for i < len(data) && data[i] != '>' && data[i] != '\n' {
 686		i++
 687	}
 688	if i < len(data) && data[i] == '>' {
 689		i++
 690		if j := p.isEmpty(data[i:]); j > 0 {
 691			size := i + j
 692			if doRender {
 693				// trim newlines
 694				end := backChar(data, size, '\n')
 695				htmlBlock := &ast.HTMLBlock{ast.Leaf{Content: data[:end]}}
 696				p.addBlock(htmlBlock)
 697				finalizeHTMLBlock(htmlBlock)
 698			}
 699			return size
 700		}
 701	}
 702	return 0
 703}
 704
 705func (p *Parser) htmlFindTag(data []byte) (string, bool) {
 706	i := skipAlnum(data, 0)
 707	key := string(data[:i])
 708	if _, ok := blockTags[key]; ok {
 709		return key, true
 710	}
 711	return "", false
 712}
 713
 714func (p *Parser) htmlFindEnd(tag string, data []byte) int {
 715	// assume data[0] == '<' && data[1] == '/' already tested
 716	if tag == "hr" {
 717		return 2
 718	}
 719	// check if tag is a match
 720	closetag := []byte("</" + tag + ">")
 721	if !bytes.HasPrefix(data, closetag) {
 722		return 0
 723	}
 724	i := len(closetag)
 725
 726	// check that the rest of the line is blank
 727	skip := 0
 728	if skip = p.isEmpty(data[i:]); skip == 0 {
 729		return 0
 730	}
 731	i += skip
 732	skip = 0
 733
 734	if i >= len(data) {
 735		return i
 736	}
 737
 738	if p.extensions&LaxHTMLBlocks != 0 {
 739		return i
 740	}
 741	if skip = p.isEmpty(data[i:]); skip == 0 {
 742		// following line must be blank
 743		return 0
 744	}
 745
 746	return i + skip
 747}
 748
 749func (*Parser) isEmpty(data []byte) int {
 750	// it is okay to call isEmpty on an empty buffer
 751	if len(data) == 0 {
 752		return 0
 753	}
 754
 755	var i int
 756	for i = 0; i < len(data) && data[i] != '\n'; i++ {
 757		if data[i] != ' ' && data[i] != '\t' {
 758			return 0
 759		}
 760	}
 761	i = skipCharN(data, i, '\n', 1)
 762	return i
 763}
 764
 765func (*Parser) isHRule(data []byte) bool {
 766	i := 0
 767
 768	// skip up to three spaces
 769	for i < 3 && data[i] == ' ' {
 770		i++
 771	}
 772
 773	// look at the hrule char
 774	if data[i] != '*' && data[i] != '-' && data[i] != '_' {
 775		return false
 776	}
 777	c := data[i]
 778
 779	// the whole line must be the char or whitespace
 780	n := 0
 781	for i < len(data) && data[i] != '\n' {
 782		switch {
 783		case data[i] == c:
 784			n++
 785		case data[i] != ' ':
 786			return false
 787		}
 788		i++
 789	}
 790
 791	return n >= 3
 792}
 793
 794// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
 795// and returns the end index if so, or 0 otherwise. It also returns the marker found.
 796// If syntax is not nil, it gets set to the syntax specified in the fence line.
 797func isFenceLine(data []byte, syntax *string, oldmarker string) (end int, marker string) {
 798	i, size := 0, 0
 799
 800	n := len(data)
 801	// skip up to three spaces
 802	for i < n && i < 3 && data[i] == ' ' {
 803		i++
 804	}
 805
 806	// check for the marker characters: ~ or `
 807	if i >= n {
 808		return 0, ""
 809	}
 810	if data[i] != '~' && data[i] != '`' {
 811		return 0, ""
 812	}
 813
 814	c := data[i]
 815
 816	// the whole line must be the same char or whitespace
 817	for i < n && data[i] == c {
 818		size++
 819		i++
 820	}
 821
 822	// the marker char must occur at least 3 times
 823	if size < 3 {
 824		return 0, ""
 825	}
 826	marker = string(data[i-size : i])
 827
 828	// if this is the end marker, it must match the beginning marker
 829	if oldmarker != "" && marker != oldmarker {
 830		return 0, ""
 831	}
 832
 833	// TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
 834	// into one, always get the syntax, and discard it if the caller doesn't care.
 835	if syntax != nil {
 836		syn := 0
 837		i = skipChar(data, i, ' ')
 838
 839		if i >= n {
 840			if i == n {
 841				return i, marker
 842			}
 843			return 0, ""
 844		}
 845
 846		syntaxStart := i
 847
 848		if data[i] == '{' {
 849			i++
 850			syntaxStart++
 851
 852			for i < n && data[i] != '}' && data[i] != '\n' {
 853				syn++
 854				i++
 855			}
 856
 857			if i >= n || data[i] != '}' {
 858				return 0, ""
 859			}
 860
 861			// strip all whitespace at the beginning and the end
 862			// of the {} block
 863			for syn > 0 && isSpace(data[syntaxStart]) {
 864				syntaxStart++
 865				syn--
 866			}
 867
 868			for syn > 0 && isSpace(data[syntaxStart+syn-1]) {
 869				syn--
 870			}
 871
 872			i++
 873		} else {
 874			for i < n && !isSpace(data[i]) {
 875				syn++
 876				i++
 877			}
 878		}
 879
 880		*syntax = string(data[syntaxStart : syntaxStart+syn])
 881	}
 882
 883	i = skipChar(data, i, ' ')
 884	if i >= n || data[i] != '\n' {
 885		if i == n {
 886			return i, marker
 887		}
 888		return 0, ""
 889	}
 890	return i + 1, marker // Take newline into account.
 891}
 892
 893// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
 894// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
 895// If doRender is true, a final newline is mandatory to recognize the fenced code block.
 896func (p *Parser) fencedCodeBlock(data []byte, doRender bool) int {
 897	var syntax string
 898	beg, marker := isFenceLine(data, &syntax, "")
 899	if beg == 0 || beg >= len(data) {
 900		return 0
 901	}
 902
 903	var work bytes.Buffer
 904	work.WriteString(syntax)
 905	work.WriteByte('\n')
 906
 907	for {
 908		// safe to assume beg < len(data)
 909
 910		// check for the end of the code block
 911		fenceEnd, _ := isFenceLine(data[beg:], nil, marker)
 912		if fenceEnd != 0 {
 913			beg += fenceEnd
 914			break
 915		}
 916
 917		// copy the current line
 918		end := skipUntilChar(data, beg, '\n') + 1
 919
 920		// did we reach the end of the buffer without a closing marker?
 921		if end >= len(data) {
 922			return 0
 923		}
 924
 925		// verbatim copy to the working buffer
 926		if doRender {
 927			work.Write(data[beg:end])
 928		}
 929		beg = end
 930	}
 931
 932	if doRender {
 933		codeBlock := &ast.CodeBlock{
 934			IsFenced: true,
 935		}
 936		codeBlock.Content = work.Bytes() // TODO: get rid of temp buffer
 937
 938		if p.extensions&Mmark == 0 {
 939			p.addBlock(codeBlock)
 940			finalizeCodeBlock(codeBlock)
 941			return beg
 942		}
 943
 944		// Check for caption and if found make it a figure.
 945		if captionContent, id, consumed := p.caption(data[beg:], []byte("Figure: ")); consumed > 0 {
 946			figure := &ast.CaptionFigure{}
 947			caption := &ast.Caption{}
 948			figure.HeadingID = id
 949			p.Inline(caption, captionContent)
 950
 951			p.addBlock(figure)
 952			codeBlock.AsLeaf().Attribute = figure.AsContainer().Attribute
 953			p.addChild(codeBlock)
 954			finalizeCodeBlock(codeBlock)
 955			p.addChild(caption)
 956			p.finalize(figure)
 957
 958			beg += consumed
 959
 960			return beg
 961		}
 962
 963		// Still here, normal block
 964		p.addBlock(codeBlock)
 965		finalizeCodeBlock(codeBlock)
 966	}
 967
 968	return beg
 969}
 970
 971func unescapeChar(str []byte) []byte {
 972	if str[0] == '\\' {
 973		return []byte{str[1]}
 974	}
 975	return []byte(html.UnescapeString(string(str)))
 976}
 977
 978func unescapeString(str []byte) []byte {
 979	if reBackslashOrAmp.Match(str) {
 980		return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar)
 981	}
 982	return str
 983}
 984
 985func finalizeCodeBlock(code *ast.CodeBlock) {
 986	c := code.Content
 987	if code.IsFenced {
 988		newlinePos := bytes.IndexByte(c, '\n')
 989		firstLine := c[:newlinePos]
 990		rest := c[newlinePos+1:]
 991		code.Info = unescapeString(bytes.Trim(firstLine, "\n"))
 992		code.Literal = rest
 993	} else {
 994		code.Literal = c
 995	}
 996	code.Content = nil
 997}
 998
 999func (p *Parser) table(data []byte) int {
1000	i, columns, table := p.tableHeader(data)
1001	if i == 0 {
1002		return 0
1003	}
1004
1005	p.addBlock(&ast.TableBody{})
1006
1007	for i < len(data) {
1008		pipes, rowStart := 0, i
1009		for ; i < len(data) && data[i] != '\n'; i++ {
1010			if data[i] == '|' {
1011				pipes++
1012			}
1013		}
1014
1015		if pipes == 0 {
1016			i = rowStart
1017			break
1018		}
1019
1020		// include the newline in data sent to tableRow
1021		i = skipCharN(data, i, '\n', 1)
1022
1023		if p.tableFooter(data[rowStart:i]) {
1024			continue
1025		}
1026
1027		p.tableRow(data[rowStart:i], columns, false)
1028	}
1029	if captionContent, id, consumed := p.caption(data[i:], []byte("Table: ")); consumed > 0 {
1030		caption := &ast.Caption{}
1031		p.Inline(caption, captionContent)
1032
1033		// Some switcheroo to re-insert the parsed table as a child of the captionfigure.
1034		figure := &ast.CaptionFigure{}
1035		figure.HeadingID = id
1036		table2 := &ast.Table{}
1037		// Retain any block level attributes.
1038		table2.AsContainer().Attribute = table.AsContainer().Attribute
1039		children := table.GetChildren()
1040		ast.RemoveFromTree(table)
1041
1042		table2.SetChildren(children)
1043		ast.AppendChild(figure, table2)
1044		ast.AppendChild(figure, caption)
1045
1046		p.addChild(figure)
1047		p.finalize(figure)
1048
1049		i += consumed
1050	}
1051
1052	return i
1053}
1054
1055// check if the specified position is preceded by an odd number of backslashes
1056func isBackslashEscaped(data []byte, i int) bool {
1057	backslashes := 0
1058	for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
1059		backslashes++
1060	}
1061	return backslashes&1 == 1
1062}
1063
1064// tableHeaders parses the header. If recognized it will also add a table.
1065func (p *Parser) tableHeader(data []byte) (size int, columns []ast.CellAlignFlags, table ast.Node) {
1066	i := 0
1067	colCount := 1
1068	for i = 0; i < len(data) && data[i] != '\n'; i++ {
1069		if data[i] == '|' && !isBackslashEscaped(data, i) {
1070			colCount++
1071		}
1072	}
1073
1074	// doesn't look like a table header
1075	if colCount == 1 {
1076		return
1077	}
1078
1079	// include the newline in the data sent to tableRow
1080	j := skipCharN(data, i, '\n', 1)
1081	header := data[:j]
1082
1083	// column count ignores pipes at beginning or end of line
1084	if data[0] == '|' {
1085		colCount--
1086	}
1087	if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
1088		colCount--
1089	}
1090
1091	columns = make([]ast.CellAlignFlags, colCount)
1092
1093	// move on to the header underline
1094	i++
1095	if i >= len(data) {
1096		return
1097	}
1098
1099	if data[i] == '|' && !isBackslashEscaped(data, i) {
1100		i++
1101	}
1102	i = skipChar(data, i, ' ')
1103
1104	// each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
1105	// and trailing | optional on last column
1106	col := 0
1107	n := len(data)
1108	for i < n && data[i] != '\n' {
1109		dashes := 0
1110
1111		if data[i] == ':' {
1112			i++
1113			columns[col] |= ast.TableAlignmentLeft
1114			dashes++
1115		}
1116		for i < n && data[i] == '-' {
1117			i++
1118			dashes++
1119		}
1120		if i < n && data[i] == ':' {
1121			i++
1122			columns[col] |= ast.TableAlignmentRight
1123			dashes++
1124		}
1125		for i < n && data[i] == ' ' {
1126			i++
1127		}
1128		if i == n {
1129			return
1130		}
1131		// end of column test is messy
1132		switch {
1133		case dashes < 3:
1134			// not a valid column
1135			return
1136
1137		case data[i] == '|' && !isBackslashEscaped(data, i):
1138			// marker found, now skip past trailing whitespace
1139			col++
1140			i++
1141			for i < n && data[i] == ' ' {
1142				i++
1143			}
1144
1145			// trailing junk found after last column
1146			if col >= colCount && i < len(data) && data[i] != '\n' {
1147				return
1148			}
1149
1150		case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
1151			// something else found where marker was required
1152			return
1153
1154		case data[i] == '\n':
1155			// marker is optional for the last column
1156			col++
1157
1158		default:
1159			// trailing junk found after last column
1160			return
1161		}
1162	}
1163	if col != colCount {
1164		return
1165	}
1166
1167	table = &ast.Table{}
1168	p.addBlock(table)
1169	p.addBlock(&ast.TableHeader{})
1170	p.tableRow(header, columns, true)
1171	size = skipCharN(data, i, '\n', 1)
1172	return
1173}
1174
1175func (p *Parser) tableRow(data []byte, columns []ast.CellAlignFlags, header bool) {
1176	p.addBlock(&ast.TableRow{})
1177	i, col := 0, 0
1178
1179	if data[i] == '|' && !isBackslashEscaped(data, i) {
1180		i++
1181	}
1182
1183	n := len(data)
1184	for col = 0; col < len(columns) && i < n; col++ {
1185		for i < n && data[i] == ' ' {
1186			i++
1187		}
1188
1189		cellStart := i
1190
1191		for i < n && (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
1192			i++
1193		}
1194
1195		cellEnd := i
1196
1197		// skip the end-of-cell marker, possibly taking us past end of buffer
1198		i++
1199
1200		for cellEnd > cellStart && cellEnd-1 < n && data[cellEnd-1] == ' ' {
1201			cellEnd--
1202		}
1203
1204		block := &ast.TableCell{
1205			IsHeader: header,
1206			Align:    columns[col],
1207		}
1208		block.Content = data[cellStart:cellEnd]
1209		p.addBlock(block)
1210	}
1211
1212	// pad it out with empty columns to get the right number
1213	for ; col < len(columns); col++ {
1214		block := &ast.TableCell{
1215			IsHeader: header,
1216			Align:    columns[col],
1217		}
1218		p.addBlock(block)
1219	}
1220
1221	// silently ignore rows with too many cells
1222}
1223
1224// tableFooter parses the (optional) table footer.
1225func (p *Parser) tableFooter(data []byte) bool {
1226	colCount := 1
1227	for i := 0; i < len(data) && data[i] != '\n'; i++ {
1228		if data[i] == '|' && !isBackslashEscaped(data, i) {
1229			colCount++
1230			continue
1231		}
1232		// remaining data must be the = character
1233		if data[i] != '=' {
1234			return false
1235		}
1236	}
1237
1238	// doesn't look like a table footer
1239	if colCount == 1 {
1240		return false
1241	}
1242
1243	p.addBlock(&ast.TableFooter{})
1244
1245	return true
1246}
1247
1248// returns blockquote prefix length
1249func (p *Parser) quotePrefix(data []byte) int {
1250	i := 0
1251	n := len(data)
1252	for i < 3 && i < n && data[i] == ' ' {
1253		i++
1254	}
1255	if i < n && data[i] == '>' {
1256		if i+1 < n && data[i+1] == ' ' {
1257			return i + 2
1258		}
1259		return i + 1
1260	}
1261	return 0
1262}
1263
1264// blockquote ends with at least one blank line
1265// followed by something without a blockquote prefix
1266func (p *Parser) terminateBlockquote(data []byte, beg, end int) bool {
1267	if p.isEmpty(data[beg:]) <= 0 {
1268		return false
1269	}
1270	if end >= len(data) {
1271		return true
1272	}
1273	return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
1274}
1275
1276// parse a blockquote fragment
1277func (p *Parser) quote(data []byte) int {
1278	var raw bytes.Buffer
1279	beg, end := 0, 0
1280	for beg < len(data) {
1281		end = beg
1282		// Step over whole lines, collecting them. While doing that, check for
1283		// fenced code and if one's found, incorporate it altogether,
1284		// irregardless of any contents inside it
1285		for end < len(data) && data[end] != '\n' {
1286			if p.extensions&FencedCode != 0 {
1287				if i := p.fencedCodeBlock(data[end:], false); i > 0 {
1288					// -1 to compensate for the extra end++ after the loop:
1289					end += i - 1
1290					break
1291				}
1292			}
1293			end++
1294		}
1295		end = skipCharN(data, end, '\n', 1)
1296		if pre := p.quotePrefix(data[beg:]); pre > 0 {
1297			// skip the prefix
1298			beg += pre
1299		} else if p.terminateBlockquote(data, beg, end) {
1300			break
1301		}
1302		// this line is part of the blockquote
1303		raw.Write(data[beg:end])
1304		beg = end
1305	}
1306
1307	if p.extensions&Mmark == 0 {
1308		block := p.addBlock(&ast.BlockQuote{})
1309		p.block(raw.Bytes())
1310		p.finalize(block)
1311		return end
1312	}
1313
1314	if captionContent, id, consumed := p.caption(data[end:], []byte("Quote: ")); consumed > 0 {
1315		figure := &ast.CaptionFigure{}
1316		caption := &ast.Caption{}
1317		figure.HeadingID = id
1318		p.Inline(caption, captionContent)
1319
1320		p.addBlock(figure) // this discard any attributes
1321		block := &ast.BlockQuote{}
1322		block.AsContainer().Attribute = figure.AsContainer().Attribute
1323		p.addChild(block)
1324		p.block(raw.Bytes())
1325		p.finalize(block)
1326
1327		p.addChild(caption)
1328		p.finalize(figure)
1329
1330		end += consumed
1331
1332		return end
1333	}
1334
1335	block := p.addBlock(&ast.BlockQuote{})
1336	p.block(raw.Bytes())
1337	p.finalize(block)
1338
1339	return end
1340}
1341
1342// returns prefix length for block code
1343func (p *Parser) codePrefix(data []byte) int {
1344	n := len(data)
1345	if n >= 1 && data[0] == '\t' {
1346		return 1
1347	}
1348	if n >= 4 && data[3] == ' ' && data[2] == ' ' && data[1] == ' ' && data[0] == ' ' {
1349		return 4
1350	}
1351	return 0
1352}
1353
1354func (p *Parser) code(data []byte) int {
1355	var work bytes.Buffer
1356
1357	i := 0
1358	for i < len(data) {
1359		beg := i
1360
1361		i = skipUntilChar(data, i, '\n')
1362		i = skipCharN(data, i, '\n', 1)
1363
1364		blankline := p.isEmpty(data[beg:i]) > 0
1365		if pre := p.codePrefix(data[beg:i]); pre > 0 {
1366			beg += pre
1367		} else if !blankline {
1368			// non-empty, non-prefixed line breaks the pre
1369			i = beg
1370			break
1371		}
1372
1373		// verbatim copy to the working buffer
1374		if blankline {
1375			work.WriteByte('\n')
1376		} else {
1377			work.Write(data[beg:i])
1378		}
1379	}
1380
1381	// trim all the \n off the end of work
1382	workbytes := work.Bytes()
1383
1384	eol := backChar(workbytes, len(workbytes), '\n')
1385
1386	if eol != len(workbytes) {
1387		work.Truncate(eol)
1388	}
1389
1390	work.WriteByte('\n')
1391
1392	codeBlock := &ast.CodeBlock{
1393		IsFenced: false,
1394	}
1395	// TODO: get rid of temp buffer
1396	codeBlock.Content = work.Bytes()
1397	p.addBlock(codeBlock)
1398	finalizeCodeBlock(codeBlock)
1399
1400	return i
1401}
1402
1403// returns unordered list item prefix
1404func (p *Parser) uliPrefix(data []byte) int {
1405	// start with up to 3 spaces
1406	i := skipCharN(data, 0, ' ', 3)
1407
1408	if i >= len(data)-1 {
1409		return 0
1410	}
1411	// need one of {'*', '+', '-'} followed by a space or a tab
1412	if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1413		(data[i+1] != ' ' && data[i+1] != '\t') {
1414		return 0
1415	}
1416	return i + 2
1417}
1418
1419// returns ordered list item prefix
1420func (p *Parser) oliPrefix(data []byte) int {
1421	// start with up to 3 spaces
1422	i := skipCharN(data, 0, ' ', 3)
1423
1424	// count the digits
1425	start := i
1426	for i < len(data) && data[i] >= '0' && data[i] <= '9' {
1427		i++
1428	}
1429	if start == i || i >= len(data)-1 {
1430		return 0
1431	}
1432
1433	// we need >= 1 digits followed by a dot and a space or a tab
1434	if data[i] != '.' || !(data[i+1] == ' ' || data[i+1] == '\t') {
1435		return 0
1436	}
1437	return i + 2
1438}
1439
1440// returns definition list item prefix
1441func (p *Parser) dliPrefix(data []byte) int {
1442	if len(data) < 2 {
1443		return 0
1444	}
1445	// need a ':' followed by a space or a tab
1446	if data[0] != ':' || !(data[1] == ' ' || data[1] == '\t') {
1447		return 0
1448	}
1449	i := skipChar(data, 0, ' ')
1450	return i + 2
1451}
1452
1453// parse ordered or unordered list block
1454func (p *Parser) list(data []byte, flags ast.ListType, start int) int {
1455	i := 0
1456	flags |= ast.ListItemBeginningOfList
1457	list := &ast.List{
1458		ListFlags: flags,
1459		Tight:     true,
1460		Start:     start,
1461	}
1462	block := p.addBlock(list)
1463
1464	for i < len(data) {
1465		skip := p.listItem(data[i:], &flags)
1466		if flags&ast.ListItemContainsBlock != 0 {
1467			list.Tight = false
1468		}
1469		i += skip
1470		if skip == 0 || flags&ast.ListItemEndOfList != 0 {
1471			break
1472		}
1473		flags &= ^ast.ListItemBeginningOfList
1474	}
1475
1476	above := block.GetParent()
1477	finalizeList(list)
1478	p.tip = above
1479	return i
1480}
1481
1482// Returns true if the list item is not the same type as its parent list
1483func (p *Parser) listTypeChanged(data []byte, flags *ast.ListType) bool {
1484	if p.dliPrefix(data) > 0 && *flags&ast.ListTypeDefinition == 0 {
1485		return true
1486	} else if p.oliPrefix(data) > 0 && *flags&ast.ListTypeOrdered == 0 {
1487		return true
1488	} else if p.uliPrefix(data) > 0 && (*flags&ast.ListTypeOrdered != 0 || *flags&ast.ListTypeDefinition != 0) {
1489		return true
1490	}
1491	return false
1492}
1493
1494// Returns true if block ends with a blank line, descending if needed
1495// into lists and sublists.
1496func endsWithBlankLine(block ast.Node) bool {
1497	// TODO: figure this out. Always false now.
1498	for block != nil {
1499		//if block.lastLineBlank {
1500		//return true
1501		//}
1502		switch block.(type) {
1503		case *ast.List, *ast.ListItem:
1504			block = ast.GetLastChild(block)
1505		default:
1506			return false
1507		}
1508	}
1509	return false
1510}
1511
1512func finalizeList(list *ast.List) {
1513	items := list.Parent.GetChildren()
1514	lastItemIdx := len(items) - 1
1515	for i, item := range items {
1516		isLastItem := i == lastItemIdx
1517		// check for non-final list item ending with blank line:
1518		if !isLastItem && endsWithBlankLine(item) {
1519			list.Tight = false
1520			break
1521		}
1522		// recurse into children of list item, to see if there are spaces
1523		// between any of them:
1524		subItems := item.GetParent().GetChildren()
1525		lastSubItemIdx := len(subItems) - 1
1526		for j, subItem := range subItems {
1527			isLastSubItem := j == lastSubItemIdx
1528			if (!isLastItem || !isLastSubItem) && endsWithBlankLine(subItem) {
1529				list.Tight = false
1530				break
1531			}
1532		}
1533	}
1534}
1535
1536// Parse a single list item.
1537// Assumes initial prefix is already removed if this is a sublist.
1538func (p *Parser) listItem(data []byte, flags *ast.ListType) int {
1539	// keep track of the indentation of the first line
1540	itemIndent := 0
1541	if data[0] == '\t' {
1542		itemIndent += 4
1543	} else {
1544		for itemIndent < 3 && data[itemIndent] == ' ' {
1545			itemIndent++
1546		}
1547	}
1548
1549	var bulletChar byte = '*'
1550	i := p.uliPrefix(data)
1551	if i == 0 {
1552		i = p.oliPrefix(data)
1553	} else {
1554		bulletChar = data[i-2]
1555	}
1556	if i == 0 {
1557		i = p.dliPrefix(data)
1558		// reset definition term flag
1559		if i > 0 {
1560			*flags &= ^ast.ListTypeTerm
1561		}
1562	}
1563	if i == 0 {
1564		// if in definition list, set term flag and continue
1565		if *flags&ast.ListTypeDefinition != 0 {
1566			*flags |= ast.ListTypeTerm
1567		} else {
1568			return 0
1569		}
1570	}
1571
1572	// skip leading whitespace on first line
1573	i = skipChar(data, i, ' ')
1574
1575	// find the end of the line
1576	line := i
1577	for i > 0 && i < len(data) && data[i-1] != '\n' {
1578		i++
1579	}
1580
1581	// get working buffer
1582	var raw bytes.Buffer
1583
1584	// put the first line into the working buffer
1585	raw.Write(data[line:i])
1586	line = i
1587
1588	// process the following lines
1589	containsBlankLine := false
1590	sublist := 0
1591
1592gatherlines:
1593	for line < len(data) {
1594		i++
1595
1596		// find the end of this line
1597		for i < len(data) && data[i-1] != '\n' {
1598			i++
1599		}
1600
1601		// if it is an empty line, guess that it is part of this item
1602		// and move on to the next line
1603		if p.isEmpty(data[line:i]) > 0 {
1604			containsBlankLine = true
1605			line = i
1606			continue
1607		}
1608
1609		// calculate the indentation
1610		indent := 0
1611		indentIndex := 0
1612		if data[line] == '\t' {
1613			indentIndex++
1614			indent += 4
1615		} else {
1616			for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1617				indent++
1618				indentIndex++
1619			}
1620		}
1621
1622		chunk := data[line+indentIndex : i]
1623
1624		// evaluate how this line fits in
1625		switch {
1626		// is this a nested list item?
1627		case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) || p.oliPrefix(chunk) > 0 || p.dliPrefix(chunk) > 0:
1628
1629			// to be a nested list, it must be indented more
1630			// if not, it is either a different kind of list
1631			// or the next item in the same list
1632			if indent <= itemIndent {
1633				if p.listTypeChanged(chunk, flags) {
1634					*flags |= ast.ListItemEndOfList
1635				} else if containsBlankLine {
1636					*flags |= ast.ListItemContainsBlock
1637				}
1638
1639				break gatherlines
1640			}
1641
1642			if containsBlankLine {
1643				*flags |= ast.ListItemContainsBlock
1644			}
1645
1646			// is this the first item in the nested list?
1647			if sublist == 0 {
1648				sublist = raw.Len()
1649				// in the case of dliPrefix we are too late and need to search back for the definition item, which
1650				// should be on the previous line, we then adjust sublist to start there.
1651				if p.dliPrefix(chunk) > 0 {
1652					sublist = backUntilChar(raw.Bytes(), raw.Len()-1, '\n')
1653				}
1654			}
1655
1656			// is this a nested prefix heading?
1657		case p.isPrefixHeading(chunk), p.isPrefixSpecialHeading(chunk):
1658			// if the heading is not indented, it is not nested in the list
1659			// and thus ends the list
1660			if containsBlankLine && indent < 4 {
1661				*flags |= ast.ListItemEndOfList
1662				break gatherlines
1663			}
1664			*flags |= ast.ListItemContainsBlock
1665
1666		// anything following an empty line is only part
1667		// of this item if it is indented 4 spaces
1668		// (regardless of the indentation of the beginning of the item)
1669		case containsBlankLine && indent < 4:
1670			if *flags&ast.ListTypeDefinition != 0 && i < len(data)-1 {
1671				// is the next item still a part of this list?
1672				next := i
1673				for next < len(data) && data[next] != '\n' {
1674					next++
1675				}
1676				for next < len(data)-1 && data[next] == '\n' {
1677					next++
1678				}
1679				if i < len(data)-1 && data[i] != ':' && next < len(data)-1 && data[next] != ':' {
1680					*flags |= ast.ListItemEndOfList
1681				}
1682			} else {
1683				*flags |= ast.ListItemEndOfList
1684			}
1685			break gatherlines
1686
1687		// a blank line means this should be parsed as a block
1688		case containsBlankLine:
1689			raw.WriteByte('\n')
1690			*flags |= ast.ListItemContainsBlock
1691		}
1692
1693		// if this line was preceded by one or more blanks,
1694		// re-introduce the blank into the buffer
1695		if containsBlankLine {
1696			containsBlankLine = false
1697			raw.WriteByte('\n')
1698		}
1699
1700		// add the line into the working buffer without prefix
1701		raw.Write(data[line+indentIndex : i])
1702
1703		line = i
1704	}
1705
1706	rawBytes := raw.Bytes()
1707
1708	listItem := &ast.ListItem{
1709		ListFlags:  *flags,
1710		Tight:      false,
1711		BulletChar: bulletChar,
1712		Delimiter:  '.', // Only '.' is possible in Markdown, but ')' will also be possible in CommonMark
1713	}
1714	p.addBlock(listItem)
1715
1716	// render the contents of the list item
1717	if *flags&ast.ListItemContainsBlock != 0 && *flags&ast.ListTypeTerm == 0 {
1718		// intermediate render of block item, except for definition term
1719		if sublist > 0 {
1720			p.block(rawBytes[:sublist])
1721			p.block(rawBytes[sublist:])
1722		} else {
1723			p.block(rawBytes)
1724		}
1725	} else {
1726		// intermediate render of inline item
1727		para := &ast.Paragraph{}
1728		if sublist > 0 {
1729			para.Content = rawBytes[:sublist]
1730		} else {
1731			para.Content = rawBytes
1732		}
1733		p.addChild(para)
1734		if sublist > 0 {
1735			p.block(rawBytes[sublist:])
1736		}
1737	}
1738	return line
1739}
1740
1741// render a single paragraph that has already been parsed out
1742func (p *Parser) renderParagraph(data []byte) {
1743	if len(data) == 0 {
1744		return
1745	}
1746
1747	// trim leading spaces
1748	beg := skipChar(data, 0, ' ')
1749
1750	end := len(data)
1751	// trim trailing newline
1752	if data[len(data)-1] == '\n' {
1753		end--
1754	}
1755
1756	// trim trailing spaces
1757	for end > beg && data[end-1] == ' ' {
1758		end--
1759	}
1760	para := &ast.Paragraph{}
1761	para.Content = data[beg:end]
1762	p.addBlock(para)
1763}
1764
1765// blockMath handle block surround with $$
1766func (p *Parser) blockMath(data []byte) int {
1767	if len(data) <= 4 || data[0] != '$' || data[1] != '$' || data[2] == '$' {
1768		return 0
1769	}
1770
1771	// find next $$
1772	var end int
1773	for end = 2; end+1 < len(data) && (data[end] != '$' || data[end+1] != '$'); end++ {
1774	}
1775
1776	// $$ not match
1777	if end+1 == len(data) {
1778		return 0
1779	}
1780
1781	// render the display math
1782	mathBlock := &ast.MathBlock{}
1783	mathBlock.Literal = data[2:end]
1784	p.addBlock(mathBlock)
1785
1786	return end + 2
1787}
1788
1789func (p *Parser) paragraph(data []byte) int {
1790	// prev: index of 1st char of previous line
1791	// line: index of 1st char of current line
1792	// i: index of cursor/end of current line
1793	var prev, line, i int
1794	tabSize := tabSizeDefault
1795	if p.extensions&TabSizeEight != 0 {
1796		tabSize = tabSizeDouble
1797	}
1798	// keep going until we find something to mark the end of the paragraph
1799	for i < len(data) {
1800		// mark the beginning of the current line
1801		prev = line
1802		current := data[i:]
1803		line = i
1804
1805		// did we find a reference or a footnote? If so, end a paragraph
1806		// preceding it and report that we have consumed up to the end of that
1807		// reference:
1808		if refEnd := isReference(p, current, tabSize); refEnd > 0 {
1809			p.renderParagraph(data[:i])
1810			return i + refEnd
1811		}
1812
1813		// did we find a blank line marking the end of the paragraph?
1814		if n := p.isEmpty(current); n > 0 {
1815			// did this blank line followed by a definition list item?
1816			if p.extensions&DefinitionLists != 0 {
1817				if i < len(data)-1 && data[i+1] == ':' {
1818					listLen := p.list(data[prev:], ast.ListTypeDefinition, 0)
1819					return prev + listLen
1820				}
1821			}
1822
1823			p.renderParagraph(data[:i])
1824			return i + n
1825		}
1826
1827		// an underline under some text marks a heading, so our paragraph ended on prev line
1828		if i > 0 {
1829			if level := p.isUnderlinedHeading(current); level > 0 {
1830				// render the paragraph
1831				p.renderParagraph(data[:prev])
1832
1833				// ignore leading and trailing whitespace
1834				eol := i - 1
1835				for prev < eol && data[prev] == ' ' {
1836					prev++
1837				}
1838				for eol > prev && data[eol-1] == ' ' {
1839					eol--
1840				}
1841
1842				id := ""
1843				if p.extensions&AutoHeadingIDs != 0 {
1844					id = sanitizeAnchorName(string(data[prev:eol]))
1845				}
1846
1847				block := &ast.Heading{
1848					Level:     level,
1849					HeadingID: id,
1850				}
1851				block.Content = data[prev:eol]
1852				p.addBlock(block)
1853
1854				// find the end of the underline
1855				return skipUntilChar(data, i, '\n')
1856			}
1857		}
1858
1859		// if the next line starts a block of HTML, then the paragraph ends here
1860		if p.extensions&LaxHTMLBlocks != 0 {
1861			if data[i] == '<' && p.html(current, false) > 0 {
1862				// rewind to before the HTML block
1863				p.renderParagraph(data[:i])
1864				return i
1865			}
1866		}
1867
1868		// if there's a prefixed heading or a horizontal rule after this, paragraph is over
1869		if p.isPrefixHeading(current) || p.isPrefixSpecialHeading(current) || p.isHRule(current) {
1870			p.renderParagraph(data[:i])
1871			return i
1872		}
1873
1874		// if there's a fenced code block, paragraph is over
1875		if p.extensions&FencedCode != 0 {
1876			if p.fencedCodeBlock(current, false) > 0 {
1877				p.renderParagraph(data[:i])
1878				return i
1879			}
1880		}
1881
1882		// if there's a figure block, paragraph is over
1883		if p.extensions&Mmark != 0 {
1884			if p.figureBlock(current, false) > 0 {
1885				p.renderParagraph(data[:i])
1886				return i
1887			}
1888		}
1889
1890		// if there's a definition list item, prev line is a definition term
1891		if p.extensions&DefinitionLists != 0 {
1892			if p.dliPrefix(current) != 0 {
1893				ret := p.list(data[prev:], ast.ListTypeDefinition, 0)
1894				return ret + prev
1895			}
1896		}
1897
1898		// if there's a list after this, paragraph is over
1899		if p.extensions&NoEmptyLineBeforeBlock != 0 {
1900			if p.uliPrefix(current) != 0 ||
1901				p.oliPrefix(current) != 0 ||
1902				p.quotePrefix(current) != 0 ||
1903				p.codePrefix(current) != 0 {
1904				p.renderParagraph(data[:i])
1905				return i
1906			}
1907		}
1908
1909		// otherwise, scan to the beginning of the next line
1910		nl := bytes.IndexByte(data[i:], '\n')
1911		if nl >= 0 {
1912			i += nl + 1
1913		} else {
1914			i += len(data[i:])
1915		}
1916	}
1917
1918	p.renderParagraph(data[:i])
1919	return i
1920}
1921
1922// skipChar advances i as long as data[i] == c
1923func skipChar(data []byte, i int, c byte) int {
1924	n := len(data)
1925	for i < n && data[i] == c {
1926		i++
1927	}
1928	return i
1929}
1930
1931// like skipChar but only skips up to max characters
1932func skipCharN(data []byte, i int, c byte, max int) int {
1933	n := len(data)
1934	for i < n && max > 0 && data[i] == c {
1935		i++
1936		max--
1937	}
1938	return i
1939}
1940
1941// skipUntilChar advances i as long as data[i] != c
1942func skipUntilChar(data []byte, i int, c byte) int {
1943	n := len(data)
1944	for i < n && data[i] != c {
1945		i++
1946	}
1947	return i
1948}
1949
1950func skipAlnum(data []byte, i int) int {
1951	n := len(data)
1952	for i < n && isAlnum(data[i]) {
1953		i++
1954	}
1955	return i
1956}
1957
1958func skipSpace(data []byte, i int) int {
1959	n := len(data)
1960	for i < n && isSpace(data[i]) {
1961		i++
1962	}
1963	return i
1964}
1965
1966func backChar(data []byte, i int, c byte) int {
1967	for i > 0 && data[i-1] == c {
1968		i--
1969	}
1970	return i
1971}
1972
1973func backUntilChar(data []byte, i int, c byte) int {
1974	for i > 0 && data[i-1] != c {
1975		i--
1976	}
1977	return i
1978}