block.go

   1//
   2// Blackfriday Markdown Processor
   3// Available at http://github.com/russross/blackfriday
   4//
   5// Copyright © 2011 Russ Ross <russ@russross.com>.
   6// Distributed under the Simplified BSD License.
   7// See README.md for details.
   8//
   9
  10//
  11// Functions to parse block-level elements.
  12//
  13
  14package blackfriday
  15
  16import (
  17	"bytes"
  18	"unicode"
  19)
  20
  21// Parse block-level data.
  22// Note: this function and many that it calls assume that
  23// the input buffer ends with a newline.
  24func (p *parser) block(out *bytes.Buffer, data []byte) {
  25	if len(data) == 0 || data[len(data)-1] != '\n' {
  26		panic("block input is missing terminating newline")
  27	}
  28
  29	// this is called recursively: enforce a maximum depth
  30	if p.nesting >= p.maxNesting {
  31		return
  32	}
  33	p.nesting++
  34
  35	// parse out one block-level construct at a time
  36	for len(data) > 0 {
  37		// prefixed header:
  38		//
  39		// # Header 1
  40		// ## Header 2
  41		// ...
  42		// ###### Header 6
  43		if p.isPrefixHeader(data) {
  44			data = data[p.prefixHeader(out, data):]
  45			continue
  46		}
  47
  48		// block of preformatted HTML:
  49		//
  50		// <div>
  51		//     ...
  52		// </div>
  53		if data[0] == '<' {
  54			if i := p.html(out, data, true); i > 0 {
  55				data = data[i:]
  56				continue
  57			}
  58		}
  59
  60		// title block
  61		//
  62		// % stuff
  63		// % more stuff
  64		// % even more stuff
  65		if p.flags&EXTENSION_TITLEBLOCK != 0 {
  66			if data[0] == '%' {
  67				if i := p.titleBlock(out, data, true); i > 0 {
  68					data = data[i:]
  69					continue
  70				}
  71			}
  72		}
  73
  74		// blank lines.  note: returns the # of bytes to skip
  75		if i := p.isEmpty(data); i > 0 {
  76			data = data[i:]
  77			continue
  78		}
  79
  80		// indented code block:
  81		//
  82		//     func max(a, b int) int {
  83		//         if a > b {
  84		//             return a
  85		//         }
  86		//         return b
  87		//      }
  88		if p.codePrefix(data) > 0 {
  89			data = data[p.code(out, data):]
  90			continue
  91		}
  92
  93		// fenced code block:
  94		//
  95		// ``` go
  96		// func fact(n int) int {
  97		//     if n <= 1 {
  98		//         return n
  99		//     }
 100		//     return n * fact(n-1)
 101		// }
 102		// ```
 103		if p.flags&EXTENSION_FENCED_CODE != 0 {
 104			if i := p.fencedCodeBlock(out, data, true); i > 0 {
 105				data = data[i:]
 106				continue
 107			}
 108		}
 109
 110		// horizontal rule:
 111		//
 112		// ------
 113		// or
 114		// ******
 115		// or
 116		// ______
 117		if p.isHRule(data) {
 118			p.r.HRule(out)
 119			var i int
 120			for i = 0; data[i] != '\n'; i++ {
 121			}
 122			data = data[i:]
 123			continue
 124		}
 125
 126		// block quote:
 127		//
 128		// > A big quote I found somewhere
 129		// > on the web
 130		if p.quotePrefix(data) > 0 {
 131			data = data[p.quote(out, data):]
 132			continue
 133		}
 134
 135		// table:
 136		//
 137		// Name  | Age | Phone
 138		// ------|-----|---------
 139		// Bob   | 31  | 555-1234
 140		// Alice | 27  | 555-4321
 141		if p.flags&EXTENSION_TABLES != 0 {
 142			if i := p.table(out, data); i > 0 {
 143				data = data[i:]
 144				continue
 145			}
 146		}
 147
 148		// an itemized/unordered list:
 149		//
 150		// * Item 1
 151		// * Item 2
 152		//
 153		// also works with + or -
 154		if p.uliPrefix(data) > 0 {
 155			data = data[p.list(out, data, 0):]
 156			continue
 157		}
 158
 159		// a numbered/ordered list:
 160		//
 161		// 1. Item 1
 162		// 2. Item 2
 163		if p.oliPrefix(data) > 0 {
 164			data = data[p.list(out, data, LIST_TYPE_ORDERED):]
 165			continue
 166		}
 167
 168		// definition lists:
 169		//
 170		// Term 1
 171		// :   Definition a
 172		// :   Definition b
 173		//
 174		// Term 2
 175		// :   Definition c
 176		if p.flags&EXTENSION_DEFINITION_LISTS != 0 {
 177			if p.dliPrefix(data) > 0 {
 178				data = data[p.list(out, data, LIST_TYPE_DEFINITION):]
 179				continue
 180			}
 181		}
 182
 183		// anything else must look like a normal paragraph
 184		// note: this finds underlined headers, too
 185		data = data[p.paragraph(out, data):]
 186	}
 187
 188	p.nesting--
 189}
 190
 191func (p *parser) isPrefixHeader(data []byte) bool {
 192	if data[0] != '#' {
 193		return false
 194	}
 195
 196	if p.flags&EXTENSION_SPACE_HEADERS != 0 {
 197		level := 0
 198		for level < 6 && data[level] == '#' {
 199			level++
 200		}
 201		if data[level] != ' ' {
 202			return false
 203		}
 204	}
 205	return true
 206}
 207
 208func (p *parser) prefixHeader(out *bytes.Buffer, data []byte) int {
 209	level := 0
 210	for level < 6 && data[level] == '#' {
 211		level++
 212	}
 213	i := skipChar(data, level, ' ')
 214	end := skipUntilChar(data, i, '\n')
 215	skip := end
 216	id := ""
 217	if p.flags&EXTENSION_HEADER_IDS != 0 {
 218		j, k := 0, 0
 219		// find start/end of header id
 220		for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ {
 221		}
 222		for k = j + 1; k < end && data[k] != '}'; k++ {
 223		}
 224		// extract header id iff found
 225		if j < end && k < end {
 226			id = string(data[j+2 : k])
 227			end = j
 228			skip = k + 1
 229			for end > 0 && data[end-1] == ' ' {
 230				end--
 231			}
 232		}
 233	}
 234	for end > 0 && data[end-1] == '#' {
 235		if isBackslashEscaped(data, end-1) {
 236			break
 237		}
 238		end--
 239	}
 240	for end > 0 && data[end-1] == ' ' {
 241		end--
 242	}
 243	if end > i {
 244		if id == "" && p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
 245			id = SanitizedAnchorName(string(data[i:end]))
 246		}
 247		work := func() bool {
 248			p.inline(out, data[i:end])
 249			return true
 250		}
 251		p.r.Header(out, work, level, id)
 252	}
 253	return skip
 254}
 255
 256func (p *parser) isUnderlinedHeader(data []byte) int {
 257	// test of level 1 header
 258	if data[0] == '=' {
 259		i := skipChar(data, 1, '=')
 260		i = skipChar(data, i, ' ')
 261		if data[i] == '\n' {
 262			return 1
 263		} else {
 264			return 0
 265		}
 266	}
 267
 268	// test of level 2 header
 269	if data[0] == '-' {
 270		i := skipChar(data, 1, '-')
 271		i = skipChar(data, i, ' ')
 272		if data[i] == '\n' {
 273			return 2
 274		} else {
 275			return 0
 276		}
 277	}
 278
 279	return 0
 280}
 281
 282func (p *parser) titleBlock(out *bytes.Buffer, data []byte, doRender bool) int {
 283	if data[0] != '%' {
 284		return 0
 285	}
 286	splitData := bytes.Split(data, []byte("\n"))
 287	var i int
 288	for idx, b := range splitData {
 289		if !bytes.HasPrefix(b, []byte("%")) {
 290			i = idx // - 1
 291			break
 292		}
 293	}
 294
 295	data = bytes.Join(splitData[0:i], []byte("\n"))
 296	p.r.TitleBlock(out, data)
 297
 298	return len(data)
 299}
 300
 301func (p *parser) html(out *bytes.Buffer, data []byte, doRender bool) int {
 302	var i, j int
 303
 304	// identify the opening tag
 305	if data[0] != '<' {
 306		return 0
 307	}
 308	curtag, tagfound := p.htmlFindTag(data[1:])
 309
 310	// handle special cases
 311	if !tagfound {
 312		// check for an HTML comment
 313		if size := p.htmlComment(out, data, doRender); size > 0 {
 314			return size
 315		}
 316
 317		// check for an <hr> tag
 318		if size := p.htmlHr(out, data, doRender); size > 0 {
 319			return size
 320		}
 321
 322		// check for HTML CDATA
 323		if size := p.htmlCDATA(out, data, doRender); size > 0 {
 324			return size
 325		}
 326
 327		// no special case recognized
 328		return 0
 329	}
 330
 331	// look for an unindented matching closing tag
 332	// followed by a blank line
 333	found := false
 334	/*
 335		closetag := []byte("\n</" + curtag + ">")
 336		j = len(curtag) + 1
 337		for !found {
 338			// scan for a closing tag at the beginning of a line
 339			if skip := bytes.Index(data[j:], closetag); skip >= 0 {
 340				j += skip + len(closetag)
 341			} else {
 342				break
 343			}
 344
 345			// see if it is the only thing on the line
 346			if skip := p.isEmpty(data[j:]); skip > 0 {
 347				// see if it is followed by a blank line/eof
 348				j += skip
 349				if j >= len(data) {
 350					found = true
 351					i = j
 352				} else {
 353					if skip := p.isEmpty(data[j:]); skip > 0 {
 354						j += skip
 355						found = true
 356						i = j
 357					}
 358				}
 359			}
 360		}
 361	*/
 362
 363	// if not found, try a second pass looking for indented match
 364	// but not if tag is "ins" or "del" (following original Markdown.pl)
 365	if !found && curtag != "ins" && curtag != "del" {
 366		i = 1
 367		for i < len(data) {
 368			i++
 369			for i < len(data) && !(data[i-1] == '<' && data[i] == '/') {
 370				i++
 371			}
 372
 373			if i+2+len(curtag) >= len(data) {
 374				break
 375			}
 376
 377			j = p.htmlFindEnd(curtag, data[i-1:])
 378
 379			if j > 0 {
 380				i += j - 1
 381				found = true
 382				break
 383			}
 384		}
 385	}
 386
 387	if !found {
 388		return 0
 389	}
 390
 391	// the end of the block has been found
 392	if doRender {
 393		// trim newlines
 394		end := i
 395		for end > 0 && data[end-1] == '\n' {
 396			end--
 397		}
 398		p.r.BlockHtml(out, data[:end])
 399	}
 400
 401	return i
 402}
 403
 404func (p *parser) renderHTMLBlock(out *bytes.Buffer, data []byte, start int, doRender bool) int {
 405	// html block needs to end with a blank line
 406	if i := p.isEmpty(data[start:]); i > 0 {
 407		size := start + i
 408		if doRender {
 409			// trim trailing newlines
 410			end := size
 411			for end > 0 && data[end-1] == '\n' {
 412				end--
 413			}
 414			p.r.BlockHtml(out, data[:end])
 415		}
 416		return size
 417	}
 418	return 0
 419}
 420
 421// HTML comment, lax form
 422func (p *parser) htmlComment(out *bytes.Buffer, data []byte, doRender bool) int {
 423	i := p.inlineHTMLComment(out, data)
 424	return p.renderHTMLBlock(out, data, i, doRender)
 425}
 426
 427// HTML CDATA section
 428func (p *parser) htmlCDATA(out *bytes.Buffer, data []byte, doRender bool) int {
 429	const cdataTag = "<![cdata["
 430	const cdataTagLen = len(cdataTag)
 431	if len(data) < cdataTagLen+1 {
 432		return 0
 433	}
 434	if !bytes.Equal(bytes.ToLower(data[:cdataTagLen]), []byte(cdataTag)) {
 435		return 0
 436	}
 437	i := cdataTagLen
 438	// scan for an end-of-comment marker, across lines if necessary
 439	for i < len(data) && !(data[i-2] == ']' && data[i-1] == ']' && data[i] == '>') {
 440		i++
 441	}
 442	i++
 443	// no end-of-comment marker
 444	if i >= len(data) {
 445		return 0
 446	}
 447	return p.renderHTMLBlock(out, data, i, doRender)
 448}
 449
 450// HR, which is the only self-closing block tag considered
 451func (p *parser) htmlHr(out *bytes.Buffer, data []byte, doRender bool) int {
 452	if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') {
 453		return 0
 454	}
 455	if data[3] != ' ' && data[3] != '/' && data[3] != '>' {
 456		// not an <hr> tag after all; at least not a valid one
 457		return 0
 458	}
 459
 460	i := 3
 461	for data[i] != '>' && data[i] != '\n' {
 462		i++
 463	}
 464
 465	if data[i] == '>' {
 466		return p.renderHTMLBlock(out, data, i+1, doRender)
 467	}
 468
 469	return 0
 470}
 471
 472func (p *parser) htmlFindTag(data []byte) (string, bool) {
 473	i := 0
 474	for isalnum(data[i]) {
 475		i++
 476	}
 477	key := string(data[:i])
 478	if _, ok := blockTags[key]; ok {
 479		return key, true
 480	}
 481	return "", false
 482}
 483
 484func (p *parser) htmlFindEnd(tag string, data []byte) int {
 485	// assume data[0] == '<' && data[1] == '/' already tested
 486
 487	// check if tag is a match
 488	closetag := []byte("</" + tag + ">")
 489	if !bytes.HasPrefix(data, closetag) {
 490		return 0
 491	}
 492	i := len(closetag)
 493
 494	// check that the rest of the line is blank
 495	skip := 0
 496	if skip = p.isEmpty(data[i:]); skip == 0 {
 497		return 0
 498	}
 499	i += skip
 500	skip = 0
 501
 502	if i >= len(data) {
 503		return i
 504	}
 505
 506	if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
 507		return i
 508	}
 509	if skip = p.isEmpty(data[i:]); skip == 0 {
 510		// following line must be blank
 511		return 0
 512	}
 513
 514	return i + skip
 515}
 516
 517func (*parser) isEmpty(data []byte) int {
 518	// it is okay to call isEmpty on an empty buffer
 519	if len(data) == 0 {
 520		return 0
 521	}
 522
 523	var i int
 524	for i = 0; i < len(data) && data[i] != '\n'; i++ {
 525		if data[i] != ' ' && data[i] != '\t' {
 526			return 0
 527		}
 528	}
 529	return i + 1
 530}
 531
 532func (*parser) isHRule(data []byte) bool {
 533	i := 0
 534
 535	// skip up to three spaces
 536	for i < 3 && data[i] == ' ' {
 537		i++
 538	}
 539
 540	// look at the hrule char
 541	if data[i] != '*' && data[i] != '-' && data[i] != '_' {
 542		return false
 543	}
 544	c := data[i]
 545
 546	// the whole line must be the char or whitespace
 547	n := 0
 548	for data[i] != '\n' {
 549		switch {
 550		case data[i] == c:
 551			n++
 552		case data[i] != ' ':
 553			return false
 554		}
 555		i++
 556	}
 557
 558	return n >= 3
 559}
 560
 561// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data,
 562// and returns the end index if so, or 0 otherwise. It also returns the marker found.
 563// If syntax is not nil, it gets set to the syntax specified in the fence line.
 564// A final newline is mandatory to recognize the fence line, unless newlineOptional is true.
 565func isFenceLine(data []byte, syntax *string, oldmarker string, newlineOptional bool) (end int, marker string) {
 566	i, size := 0, 0
 567
 568	// skip up to three spaces
 569	for i < len(data) && i < 3 && data[i] == ' ' {
 570		i++
 571	}
 572
 573	// check for the marker characters: ~ or `
 574	if i >= len(data) {
 575		return 0, ""
 576	}
 577	if data[i] != '~' && data[i] != '`' {
 578		return 0, ""
 579	}
 580
 581	c := data[i]
 582
 583	// the whole line must be the same char or whitespace
 584	for i < len(data) && data[i] == c {
 585		size++
 586		i++
 587	}
 588
 589	// the marker char must occur at least 3 times
 590	if size < 3 {
 591		return 0, ""
 592	}
 593	marker = string(data[i-size : i])
 594
 595	// if this is the end marker, it must match the beginning marker
 596	if oldmarker != "" && marker != oldmarker {
 597		return 0, ""
 598	}
 599
 600	// TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here
 601	// into one, always get the syntax, and discard it if the caller doesn't care.
 602	if syntax != nil {
 603		syn := 0
 604		i = skipChar(data, i, ' ')
 605
 606		if i >= len(data) {
 607			if newlineOptional && i == len(data) {
 608				return i, marker
 609			}
 610			return 0, ""
 611		}
 612
 613		syntaxStart := i
 614
 615		if data[i] == '{' {
 616			i++
 617			syntaxStart++
 618
 619			for i < len(data) && data[i] != '}' && data[i] != '\n' {
 620				syn++
 621				i++
 622			}
 623
 624			if i >= len(data) || data[i] != '}' {
 625				return 0, ""
 626			}
 627
 628			// strip all whitespace at the beginning and the end
 629			// of the {} block
 630			for syn > 0 && isspace(data[syntaxStart]) {
 631				syntaxStart++
 632				syn--
 633			}
 634
 635			for syn > 0 && isspace(data[syntaxStart+syn-1]) {
 636				syn--
 637			}
 638
 639			i++
 640		} else {
 641			for i < len(data) && !isspace(data[i]) {
 642				syn++
 643				i++
 644			}
 645		}
 646
 647		*syntax = string(data[syntaxStart : syntaxStart+syn])
 648	}
 649
 650	i = skipChar(data, i, ' ')
 651	if i >= len(data) || data[i] != '\n' {
 652		if newlineOptional && i == len(data) {
 653			return i, marker
 654		}
 655		return 0, ""
 656	}
 657
 658	return i + 1, marker // Take newline into account.
 659}
 660
 661// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning,
 662// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects.
 663// If doRender is true, a final newline is mandatory to recognize the fenced code block.
 664func (p *parser) fencedCodeBlock(out *bytes.Buffer, data []byte, doRender bool) int {
 665	var syntax string
 666	beg, marker := isFenceLine(data, &syntax, "", false)
 667	if beg == 0 || beg >= len(data) {
 668		return 0
 669	}
 670
 671	var work bytes.Buffer
 672
 673	for {
 674		// safe to assume beg < len(data)
 675
 676		// check for the end of the code block
 677		newlineOptional := !doRender
 678		fenceEnd, _ := isFenceLine(data[beg:], nil, marker, newlineOptional)
 679		if fenceEnd != 0 {
 680			beg += fenceEnd
 681			break
 682		}
 683
 684		// copy the current line
 685		end := skipUntilChar(data, beg, '\n') + 1
 686
 687		// did we reach the end of the buffer without a closing marker?
 688		if end >= len(data) {
 689			return 0
 690		}
 691
 692		// verbatim copy to the working buffer
 693		if doRender {
 694			work.Write(data[beg:end])
 695		}
 696		beg = end
 697	}
 698
 699	if doRender {
 700		p.r.BlockCode(out, work.Bytes(), syntax)
 701	}
 702
 703	return beg
 704}
 705
 706func (p *parser) table(out *bytes.Buffer, data []byte) int {
 707	var header bytes.Buffer
 708	i, columns := p.tableHeader(&header, data)
 709	if i == 0 {
 710		return 0
 711	}
 712
 713	var body bytes.Buffer
 714
 715	for i < len(data) {
 716		pipes, rowStart := 0, i
 717		for ; data[i] != '\n'; i++ {
 718			if data[i] == '|' {
 719				pipes++
 720			}
 721		}
 722
 723		if pipes == 0 {
 724			i = rowStart
 725			break
 726		}
 727
 728		// include the newline in data sent to tableRow
 729		i++
 730		p.tableRow(&body, data[rowStart:i], columns, false)
 731	}
 732
 733	p.r.Table(out, header.Bytes(), body.Bytes(), columns)
 734
 735	return i
 736}
 737
 738// check if the specified position is preceded by an odd number of backslashes
 739func isBackslashEscaped(data []byte, i int) bool {
 740	backslashes := 0
 741	for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' {
 742		backslashes++
 743	}
 744	return backslashes&1 == 1
 745}
 746
 747func (p *parser) tableHeader(out *bytes.Buffer, data []byte) (size int, columns []int) {
 748	i := 0
 749	colCount := 1
 750	for i = 0; data[i] != '\n'; i++ {
 751		if data[i] == '|' && !isBackslashEscaped(data, i) {
 752			colCount++
 753		}
 754	}
 755
 756	// doesn't look like a table header
 757	if colCount == 1 {
 758		return
 759	}
 760
 761	// include the newline in the data sent to tableRow
 762	header := data[:i+1]
 763
 764	// column count ignores pipes at beginning or end of line
 765	if data[0] == '|' {
 766		colCount--
 767	}
 768	if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) {
 769		colCount--
 770	}
 771
 772	columns = make([]int, colCount)
 773
 774	// move on to the header underline
 775	i++
 776	if i >= len(data) {
 777		return
 778	}
 779
 780	if data[i] == '|' && !isBackslashEscaped(data, i) {
 781		i++
 782	}
 783	i = skipChar(data, i, ' ')
 784
 785	// each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3
 786	// and trailing | optional on last column
 787	col := 0
 788	for data[i] != '\n' {
 789		dashes := 0
 790
 791		if data[i] == ':' {
 792			i++
 793			columns[col] |= TABLE_ALIGNMENT_LEFT
 794			dashes++
 795		}
 796		for data[i] == '-' {
 797			i++
 798			dashes++
 799		}
 800		if data[i] == ':' {
 801			i++
 802			columns[col] |= TABLE_ALIGNMENT_RIGHT
 803			dashes++
 804		}
 805		for data[i] == ' ' {
 806			i++
 807		}
 808
 809		// end of column test is messy
 810		switch {
 811		case dashes < 3:
 812			// not a valid column
 813			return
 814
 815		case data[i] == '|' && !isBackslashEscaped(data, i):
 816			// marker found, now skip past trailing whitespace
 817			col++
 818			i++
 819			for data[i] == ' ' {
 820				i++
 821			}
 822
 823			// trailing junk found after last column
 824			if col >= colCount && data[i] != '\n' {
 825				return
 826			}
 827
 828		case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount:
 829			// something else found where marker was required
 830			return
 831
 832		case data[i] == '\n':
 833			// marker is optional for the last column
 834			col++
 835
 836		default:
 837			// trailing junk found after last column
 838			return
 839		}
 840	}
 841	if col != colCount {
 842		return
 843	}
 844
 845	p.tableRow(out, header, columns, true)
 846	size = i + 1
 847	return
 848}
 849
 850func (p *parser) tableRow(out *bytes.Buffer, data []byte, columns []int, header bool) {
 851	i, col := 0, 0
 852	var rowWork bytes.Buffer
 853
 854	if data[i] == '|' && !isBackslashEscaped(data, i) {
 855		i++
 856	}
 857
 858	for col = 0; col < len(columns) && i < len(data); col++ {
 859		for data[i] == ' ' {
 860			i++
 861		}
 862
 863		cellStart := i
 864
 865		for (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' {
 866			i++
 867		}
 868
 869		cellEnd := i
 870
 871		// skip the end-of-cell marker, possibly taking us past end of buffer
 872		i++
 873
 874		for cellEnd > cellStart && data[cellEnd-1] == ' ' {
 875			cellEnd--
 876		}
 877
 878		var cellWork bytes.Buffer
 879		p.inline(&cellWork, data[cellStart:cellEnd])
 880
 881		if header {
 882			p.r.TableHeaderCell(&rowWork, cellWork.Bytes(), columns[col])
 883		} else {
 884			p.r.TableCell(&rowWork, cellWork.Bytes(), columns[col])
 885		}
 886	}
 887
 888	// pad it out with empty columns to get the right number
 889	for ; col < len(columns); col++ {
 890		if header {
 891			p.r.TableHeaderCell(&rowWork, nil, columns[col])
 892		} else {
 893			p.r.TableCell(&rowWork, nil, columns[col])
 894		}
 895	}
 896
 897	// silently ignore rows with too many cells
 898
 899	p.r.TableRow(out, rowWork.Bytes())
 900}
 901
 902// returns blockquote prefix length
 903func (p *parser) quotePrefix(data []byte) int {
 904	i := 0
 905	for i < 3 && data[i] == ' ' {
 906		i++
 907	}
 908	if data[i] == '>' {
 909		if data[i+1] == ' ' {
 910			return i + 2
 911		}
 912		return i + 1
 913	}
 914	return 0
 915}
 916
 917// blockquote ends with at least one blank line
 918// followed by something without a blockquote prefix
 919func (p *parser) terminateBlockquote(data []byte, beg, end int) bool {
 920	if p.isEmpty(data[beg:]) <= 0 {
 921		return false
 922	}
 923	if end >= len(data) {
 924		return true
 925	}
 926	return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0
 927}
 928
 929// parse a blockquote fragment
 930func (p *parser) quote(out *bytes.Buffer, data []byte) int {
 931	var raw bytes.Buffer
 932	beg, end := 0, 0
 933	for beg < len(data) {
 934		end = beg
 935		// Step over whole lines, collecting them. While doing that, check for
 936		// fenced code and if one's found, incorporate it altogether,
 937		// irregardless of any contents inside it
 938		for data[end] != '\n' {
 939			if p.flags&EXTENSION_FENCED_CODE != 0 {
 940				if i := p.fencedCodeBlock(out, data[end:], false); i > 0 {
 941					// -1 to compensate for the extra end++ after the loop:
 942					end += i - 1
 943					break
 944				}
 945			}
 946			end++
 947		}
 948		end++
 949
 950		if pre := p.quotePrefix(data[beg:]); pre > 0 {
 951			// skip the prefix
 952			beg += pre
 953		} else if p.terminateBlockquote(data, beg, end) {
 954			break
 955		}
 956
 957		// this line is part of the blockquote
 958		raw.Write(data[beg:end])
 959		beg = end
 960	}
 961
 962	var cooked bytes.Buffer
 963	p.block(&cooked, raw.Bytes())
 964	p.r.BlockQuote(out, cooked.Bytes())
 965	return end
 966}
 967
 968// returns prefix length for block code
 969func (p *parser) codePrefix(data []byte) int {
 970	if data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' {
 971		return 4
 972	}
 973	return 0
 974}
 975
 976func (p *parser) code(out *bytes.Buffer, data []byte) int {
 977	var work bytes.Buffer
 978
 979	i := 0
 980	for i < len(data) {
 981		beg := i
 982		for data[i] != '\n' {
 983			i++
 984		}
 985		i++
 986
 987		blankline := p.isEmpty(data[beg:i]) > 0
 988		if pre := p.codePrefix(data[beg:i]); pre > 0 {
 989			beg += pre
 990		} else if !blankline {
 991			// non-empty, non-prefixed line breaks the pre
 992			i = beg
 993			break
 994		}
 995
 996		// verbatim copy to the working buffeu
 997		if blankline {
 998			work.WriteByte('\n')
 999		} else {
1000			work.Write(data[beg:i])
1001		}
1002	}
1003
1004	// trim all the \n off the end of work
1005	workbytes := work.Bytes()
1006	eol := len(workbytes)
1007	for eol > 0 && workbytes[eol-1] == '\n' {
1008		eol--
1009	}
1010	if eol != len(workbytes) {
1011		work.Truncate(eol)
1012	}
1013
1014	work.WriteByte('\n')
1015
1016	p.r.BlockCode(out, work.Bytes(), "")
1017
1018	return i
1019}
1020
1021// returns unordered list item prefix
1022func (p *parser) uliPrefix(data []byte) int {
1023	i := 0
1024
1025	// start with up to 3 spaces
1026	for i < 3 && data[i] == ' ' {
1027		i++
1028	}
1029
1030	// need a *, +, or - followed by a space
1031	if (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
1032		data[i+1] != ' ' {
1033		return 0
1034	}
1035	return i + 2
1036}
1037
1038// returns ordered list item prefix
1039func (p *parser) oliPrefix(data []byte) int {
1040	i := 0
1041
1042	// start with up to 3 spaces
1043	for i < 3 && data[i] == ' ' {
1044		i++
1045	}
1046
1047	// count the digits
1048	start := i
1049	for data[i] >= '0' && data[i] <= '9' {
1050		i++
1051	}
1052
1053	// we need >= 1 digits followed by a dot and a space
1054	if start == i || data[i] != '.' || data[i+1] != ' ' {
1055		return 0
1056	}
1057	return i + 2
1058}
1059
1060// returns definition list item prefix
1061func (p *parser) dliPrefix(data []byte) int {
1062	i := 0
1063
1064	// need a : followed by a spaces
1065	if data[i] != ':' || data[i+1] != ' ' {
1066		return 0
1067	}
1068	for data[i] == ' ' {
1069		i++
1070	}
1071	return i + 2
1072}
1073
1074// parse ordered or unordered list block
1075func (p *parser) list(out *bytes.Buffer, data []byte, flags int) int {
1076	i := 0
1077	flags |= LIST_ITEM_BEGINNING_OF_LIST
1078	work := func() bool {
1079		for i < len(data) {
1080			skip := p.listItem(out, data[i:], &flags)
1081			i += skip
1082
1083			if skip == 0 || flags&LIST_ITEM_END_OF_LIST != 0 {
1084				break
1085			}
1086			flags &= ^LIST_ITEM_BEGINNING_OF_LIST
1087		}
1088		return true
1089	}
1090
1091	p.r.List(out, work, flags)
1092	return i
1093}
1094
1095// Parse a single list item.
1096// Assumes initial prefix is already removed if this is a sublist.
1097func (p *parser) listItem(out *bytes.Buffer, data []byte, flags *int) int {
1098	// keep track of the indentation of the first line
1099	itemIndent := 0
1100	for itemIndent < 3 && data[itemIndent] == ' ' {
1101		itemIndent++
1102	}
1103
1104	i := p.uliPrefix(data)
1105	if i == 0 {
1106		i = p.oliPrefix(data)
1107	}
1108	if i == 0 {
1109		i = p.dliPrefix(data)
1110		// reset definition term flag
1111		if i > 0 {
1112			*flags &= ^LIST_TYPE_TERM
1113		}
1114	}
1115	if i == 0 {
1116		// if in defnition list, set term flag and continue
1117		if *flags&LIST_TYPE_DEFINITION != 0 {
1118			*flags |= LIST_TYPE_TERM
1119		} else {
1120			return 0
1121		}
1122	}
1123
1124	// skip leading whitespace on first line
1125	for data[i] == ' ' {
1126		i++
1127	}
1128
1129	// find the end of the line
1130	line := i
1131	for i > 0 && data[i-1] != '\n' {
1132		i++
1133	}
1134
1135	// get working buffer
1136	var raw bytes.Buffer
1137
1138	// put the first line into the working buffer
1139	raw.Write(data[line:i])
1140	line = i
1141
1142	// process the following lines
1143	containsBlankLine := false
1144	sublist := 0
1145
1146gatherlines:
1147	for line < len(data) {
1148		i++
1149
1150		// find the end of this line
1151		for data[i-1] != '\n' {
1152			i++
1153		}
1154
1155		// if it is an empty line, guess that it is part of this item
1156		// and move on to the next line
1157		if p.isEmpty(data[line:i]) > 0 {
1158			containsBlankLine = true
1159			raw.Write(data[line:i])
1160			line = i
1161			continue
1162		}
1163
1164		// calculate the indentation
1165		indent := 0
1166		for indent < 4 && line+indent < i && data[line+indent] == ' ' {
1167			indent++
1168		}
1169
1170		chunk := data[line+indent : i]
1171
1172		// evaluate how this line fits in
1173		switch {
1174		// is this a nested list item?
1175		case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) ||
1176			p.oliPrefix(chunk) > 0 ||
1177			p.dliPrefix(chunk) > 0:
1178
1179			if containsBlankLine {
1180				// end the list if the type changed after a blank line
1181				if indent <= itemIndent &&
1182					((*flags&LIST_TYPE_ORDERED != 0 && p.uliPrefix(chunk) > 0) ||
1183						(*flags&LIST_TYPE_ORDERED == 0 && p.oliPrefix(chunk) > 0)) {
1184
1185					*flags |= LIST_ITEM_END_OF_LIST
1186					break gatherlines
1187				}
1188				*flags |= LIST_ITEM_CONTAINS_BLOCK
1189			}
1190
1191			// to be a nested list, it must be indented more
1192			// if not, it is the next item in the same list
1193			if indent <= itemIndent {
1194				break gatherlines
1195			}
1196
1197			// is this the first item in the nested list?
1198			if sublist == 0 {
1199				sublist = raw.Len()
1200			}
1201
1202		// is this a nested prefix header?
1203		case p.isPrefixHeader(chunk):
1204			// if the header is not indented, it is not nested in the list
1205			// and thus ends the list
1206			if containsBlankLine && indent < 4 {
1207				*flags |= LIST_ITEM_END_OF_LIST
1208				break gatherlines
1209			}
1210			*flags |= LIST_ITEM_CONTAINS_BLOCK
1211
1212		// anything following an empty line is only part
1213		// of this item if it is indented 4 spaces
1214		// (regardless of the indentation of the beginning of the item)
1215		case containsBlankLine && indent < 4:
1216			if *flags&LIST_TYPE_DEFINITION != 0 && i < len(data)-1 {
1217				// is the next item still a part of this list?
1218				next := i
1219				for data[next] != '\n' {
1220					next++
1221				}
1222				for next < len(data)-1 && data[next] == '\n' {
1223					next++
1224				}
1225				if i < len(data)-1 && data[i] != ':' && data[next] != ':' {
1226					*flags |= LIST_ITEM_END_OF_LIST
1227				}
1228			} else {
1229				*flags |= LIST_ITEM_END_OF_LIST
1230			}
1231			break gatherlines
1232
1233		// a blank line means this should be parsed as a block
1234		case containsBlankLine:
1235			*flags |= LIST_ITEM_CONTAINS_BLOCK
1236		}
1237
1238		containsBlankLine = false
1239
1240		// add the line into the working buffer without prefix
1241		raw.Write(data[line+indent : i])
1242
1243		line = i
1244	}
1245
1246	// If reached end of data, the Renderer.ListItem call we're going to make below
1247	// is definitely the last in the list.
1248	if line >= len(data) {
1249		*flags |= LIST_ITEM_END_OF_LIST
1250	}
1251
1252	rawBytes := raw.Bytes()
1253
1254	// render the contents of the list item
1255	var cooked bytes.Buffer
1256	if *flags&LIST_ITEM_CONTAINS_BLOCK != 0 && *flags&LIST_TYPE_TERM == 0 {
1257		// intermediate render of block item, except for definition term
1258		if sublist > 0 {
1259			p.block(&cooked, rawBytes[:sublist])
1260			p.block(&cooked, rawBytes[sublist:])
1261		} else {
1262			p.block(&cooked, rawBytes)
1263		}
1264	} else {
1265		// intermediate render of inline item
1266		if sublist > 0 {
1267			p.inline(&cooked, rawBytes[:sublist])
1268			p.block(&cooked, rawBytes[sublist:])
1269		} else {
1270			p.inline(&cooked, rawBytes)
1271		}
1272	}
1273
1274	// render the actual list item
1275	cookedBytes := cooked.Bytes()
1276	parsedEnd := len(cookedBytes)
1277
1278	// strip trailing newlines
1279	for parsedEnd > 0 && cookedBytes[parsedEnd-1] == '\n' {
1280		parsedEnd--
1281	}
1282	p.r.ListItem(out, cookedBytes[:parsedEnd], *flags)
1283
1284	return line
1285}
1286
1287// render a single paragraph that has already been parsed out
1288func (p *parser) renderParagraph(out *bytes.Buffer, data []byte) {
1289	if len(data) == 0 {
1290		return
1291	}
1292
1293	// trim leading spaces
1294	beg := 0
1295	for data[beg] == ' ' {
1296		beg++
1297	}
1298
1299	// trim trailing newline
1300	end := len(data) - 1
1301
1302	// trim trailing spaces
1303	for end > beg && data[end-1] == ' ' {
1304		end--
1305	}
1306
1307	work := func() bool {
1308		p.inline(out, data[beg:end])
1309		return true
1310	}
1311	p.r.Paragraph(out, work)
1312}
1313
1314func (p *parser) paragraph(out *bytes.Buffer, data []byte) int {
1315	// prev: index of 1st char of previous line
1316	// line: index of 1st char of current line
1317	// i: index of cursor/end of current line
1318	var prev, line, i int
1319
1320	// keep going until we find something to mark the end of the paragraph
1321	for i < len(data) {
1322		// mark the beginning of the current line
1323		prev = line
1324		current := data[i:]
1325		line = i
1326
1327		// did we find a blank line marking the end of the paragraph?
1328		if n := p.isEmpty(current); n > 0 {
1329			// did this blank line followed by a definition list item?
1330			if p.flags&EXTENSION_DEFINITION_LISTS != 0 {
1331				if i < len(data)-1 && data[i+1] == ':' {
1332					return p.list(out, data[prev:], LIST_TYPE_DEFINITION)
1333				}
1334			}
1335
1336			p.renderParagraph(out, data[:i])
1337			return i + n
1338		}
1339
1340		// an underline under some text marks a header, so our paragraph ended on prev line
1341		if i > 0 {
1342			if level := p.isUnderlinedHeader(current); level > 0 {
1343				// render the paragraph
1344				p.renderParagraph(out, data[:prev])
1345
1346				// ignore leading and trailing whitespace
1347				eol := i - 1
1348				for prev < eol && data[prev] == ' ' {
1349					prev++
1350				}
1351				for eol > prev && data[eol-1] == ' ' {
1352					eol--
1353				}
1354
1355				// render the header
1356				// this ugly double closure avoids forcing variables onto the heap
1357				work := func(o *bytes.Buffer, pp *parser, d []byte) func() bool {
1358					return func() bool {
1359						pp.inline(o, d)
1360						return true
1361					}
1362				}(out, p, data[prev:eol])
1363
1364				id := ""
1365				if p.flags&EXTENSION_AUTO_HEADER_IDS != 0 {
1366					id = SanitizedAnchorName(string(data[prev:eol]))
1367				}
1368
1369				p.r.Header(out, work, level, id)
1370
1371				// find the end of the underline
1372				for data[i] != '\n' {
1373					i++
1374				}
1375				return i
1376			}
1377		}
1378
1379		// if the next line starts a block of HTML, then the paragraph ends here
1380		if p.flags&EXTENSION_LAX_HTML_BLOCKS != 0 {
1381			if data[i] == '<' && p.html(out, current, false) > 0 {
1382				// rewind to before the HTML block
1383				p.renderParagraph(out, data[:i])
1384				return i
1385			}
1386		}
1387
1388		// if there's a prefixed header or a horizontal rule after this, paragraph is over
1389		if p.isPrefixHeader(current) || p.isHRule(current) {
1390			p.renderParagraph(out, data[:i])
1391			return i
1392		}
1393
1394		// if there's a fenced code block, paragraph is over
1395		if p.flags&EXTENSION_FENCED_CODE != 0 {
1396			if p.fencedCodeBlock(out, current, false) > 0 {
1397				p.renderParagraph(out, data[:i])
1398				return i
1399			}
1400		}
1401
1402		// if there's a definition list item, prev line is a definition term
1403		if p.flags&EXTENSION_DEFINITION_LISTS != 0 {
1404			if p.dliPrefix(current) != 0 {
1405				return p.list(out, data[prev:], LIST_TYPE_DEFINITION)
1406			}
1407		}
1408
1409		// if there's a list after this, paragraph is over
1410		if p.flags&EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK != 0 {
1411			if p.uliPrefix(current) != 0 ||
1412				p.oliPrefix(current) != 0 ||
1413				p.quotePrefix(current) != 0 ||
1414				p.codePrefix(current) != 0 {
1415				p.renderParagraph(out, data[:i])
1416				return i
1417			}
1418		}
1419
1420		// otherwise, scan to the beginning of the next line
1421		for data[i] != '\n' {
1422			i++
1423		}
1424		i++
1425	}
1426
1427	p.renderParagraph(out, data[:i])
1428	return i
1429}
1430
1431// SanitizedAnchorName returns a sanitized anchor name for the given text.
1432//
1433// It implements the algorithm specified in the package comment.
1434func SanitizedAnchorName(text string) string {
1435	var anchorName []rune
1436	futureDash := false
1437	for _, r := range text {
1438		switch {
1439		case unicode.IsLetter(r) || unicode.IsNumber(r):
1440			if futureDash && len(anchorName) > 0 {
1441				anchorName = append(anchorName, '-')
1442			}
1443			futureDash = false
1444			anchorName = append(anchorName, unicode.ToLower(r))
1445		default:
1446			futureDash = true
1447		}
1448	}
1449	return string(anchorName)
1450}