inline.go

   1package parser
   2
   3import (
   4	"bytes"
   5	"regexp"
   6	"strconv"
   7
   8	"github.com/gomarkdown/markdown/ast"
   9)
  10
  11// Parsing of inline elements
  12
  13var (
  14	urlRe    = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
  15	anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
  16
  17	// TODO: improve this regexp to catch all possible entities:
  18	htmlEntityRe = regexp.MustCompile(`&[a-z]{2,5};`)
  19)
  20
  21// Inline parses text within a block.
  22// Each function returns the number of consumed chars.
  23func (p *Parser) Inline(currBlock ast.Node, data []byte) {
  24	// handlers might call us recursively: enforce a maximum depth
  25	if p.nesting >= p.maxNesting || len(data) == 0 {
  26		return
  27	}
  28	p.nesting++
  29	beg, end := 0, 0
  30
  31	n := len(data)
  32	for end < n {
  33		handler := p.inlineCallback[data[end]]
  34		if handler == nil {
  35			end++
  36			continue
  37		}
  38		consumed, node := handler(p, data, end)
  39		if consumed == 0 {
  40			// no action from the callback
  41			end++
  42			continue
  43		}
  44		// copy inactive chars into the output
  45		ast.AppendChild(currBlock, newTextNode(data[beg:end]))
  46		if node != nil {
  47			ast.AppendChild(currBlock, node)
  48		}
  49		beg = end + consumed
  50		end = beg
  51	}
  52
  53	if beg < n {
  54		if data[end-1] == '\n' {
  55			end--
  56		}
  57		ast.AppendChild(currBlock, newTextNode(data[beg:end]))
  58	}
  59	p.nesting--
  60}
  61
  62// single and double emphasis parsing
  63func emphasis(p *Parser, data []byte, offset int) (int, ast.Node) {
  64	data = data[offset:]
  65	c := data[0]
  66
  67	n := len(data)
  68	if n > 2 && data[1] != c {
  69		// whitespace cannot follow an opening emphasis;
  70		// strikethrough only takes two characters '~~'
  71		if isSpace(data[1]) {
  72			return 0, nil
  73		}
  74		if p.extensions&SuperSubscript != 0 && c == '~' {
  75			// potential subscript, no spaces, except when escaped, helperEmphasis does
  76			// not check that for us, so walk the bytes and check.
  77			ret := skipUntilChar(data[1:], 0, c)
  78			if ret == 0 {
  79				return 0, nil
  80			}
  81			ret++ // we started with data[1:] above.
  82			for i := 1; i < ret; i++ {
  83				if isSpace(data[i]) && !isEscape(data, i) {
  84					return 0, nil
  85				}
  86			}
  87			sub := &ast.Subscript{}
  88			sub.Literal = data[1:ret]
  89			return ret + 1, sub
  90		}
  91		ret, node := helperEmphasis(p, data[1:], c)
  92		if ret == 0 {
  93			return 0, nil
  94		}
  95
  96		return ret + 1, node
  97	}
  98
  99	if n > 3 && data[1] == c && data[2] != c {
 100		if isSpace(data[2]) {
 101			return 0, nil
 102		}
 103		ret, node := helperDoubleEmphasis(p, data[2:], c)
 104		if ret == 0 {
 105			return 0, nil
 106		}
 107
 108		return ret + 2, node
 109	}
 110
 111	if n > 4 && data[1] == c && data[2] == c && data[3] != c {
 112		if c == '~' || isSpace(data[3]) {
 113			return 0, nil
 114		}
 115		ret, node := helperTripleEmphasis(p, data, 3, c)
 116		if ret == 0 {
 117			return 0, nil
 118		}
 119
 120		return ret + 3, node
 121	}
 122
 123	return 0, nil
 124}
 125
 126func codeSpan(p *Parser, data []byte, offset int) (int, ast.Node) {
 127	data = data[offset:]
 128
 129	// count the number of backticks in the delimiter
 130	nb := skipChar(data, 0, '`')
 131
 132	// find the next delimiter
 133	i, end := 0, 0
 134	for end = nb; end < len(data) && i < nb; end++ {
 135		if data[end] == '`' {
 136			i++
 137		} else {
 138			i = 0
 139		}
 140	}
 141
 142	// no matching delimiter?
 143	if i < nb && end >= len(data) {
 144		return 0, nil
 145	}
 146
 147	// trim outside whitespace
 148	fBegin := nb
 149	for fBegin < end && data[fBegin] == ' ' {
 150		fBegin++
 151	}
 152
 153	fEnd := end - nb
 154	for fEnd > fBegin && data[fEnd-1] == ' ' {
 155		fEnd--
 156	}
 157
 158	// render the code span
 159	if fBegin != fEnd {
 160		code := &ast.Code{}
 161		code.Literal = data[fBegin:fEnd]
 162		return end, code
 163	}
 164
 165	return end, nil
 166}
 167
 168// newline preceded by two spaces becomes <br>
 169func maybeLineBreak(p *Parser, data []byte, offset int) (int, ast.Node) {
 170	origOffset := offset
 171	offset = skipChar(data, offset, ' ')
 172
 173	if offset < len(data) && data[offset] == '\n' {
 174		if offset-origOffset >= 2 {
 175			return offset - origOffset + 1, &ast.Hardbreak{}
 176		}
 177		return offset - origOffset, nil
 178	}
 179	return 0, nil
 180}
 181
 182// newline without two spaces works when HardLineBreak is enabled
 183func lineBreak(p *Parser, data []byte, offset int) (int, ast.Node) {
 184	if p.extensions&HardLineBreak != 0 {
 185		return 1, &ast.Hardbreak{}
 186	}
 187	return 0, nil
 188}
 189
 190type linkType int
 191
 192const (
 193	linkNormal linkType = iota
 194	linkImg
 195	linkDeferredFootnote
 196	linkInlineFootnote
 197	linkCitation
 198)
 199
 200func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
 201	if t == linkDeferredFootnote {
 202		return false
 203	}
 204	return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
 205}
 206
 207func maybeImage(p *Parser, data []byte, offset int) (int, ast.Node) {
 208	if offset < len(data)-1 && data[offset+1] == '[' {
 209		return link(p, data, offset)
 210	}
 211	return 0, nil
 212}
 213
 214func maybeInlineFootnoteOrSuper(p *Parser, data []byte, offset int) (int, ast.Node) {
 215	if offset < len(data)-1 && data[offset+1] == '[' {
 216		return link(p, data, offset)
 217	}
 218
 219	if p.extensions&SuperSubscript != 0 {
 220		ret := skipUntilChar(data[offset:], 1, '^')
 221		if ret == 0 {
 222			return 0, nil
 223		}
 224		for i := offset; i < offset+ret; i++ {
 225			if isSpace(data[i]) && !isEscape(data, i) {
 226				return 0, nil
 227			}
 228		}
 229		sup := &ast.Superscript{}
 230		sup.Literal = data[offset+1 : offset+ret]
 231		return offset + ret, sup
 232	}
 233
 234	return 0, nil
 235}
 236
 237// '[': parse a link or an image or a footnote or a citation
 238func link(p *Parser, data []byte, offset int) (int, ast.Node) {
 239	// no links allowed inside regular links, footnote, and deferred footnotes
 240	if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
 241		return 0, nil
 242	}
 243
 244	var t linkType
 245	switch {
 246	// special case: ![^text] == deferred footnote (that follows something with
 247	// an exclamation point)
 248	case p.extensions&Footnotes != 0 && len(data)-1 > offset && data[offset+1] == '^':
 249		t = linkDeferredFootnote
 250	// ![alt] == image
 251	case offset >= 0 && data[offset] == '!':
 252		t = linkImg
 253		offset++
 254	// [@citation], [@-citation], [@?citation], [@!citation]
 255	case p.extensions&Mmark != 0 && len(data)-1 > offset && data[offset+1] == '@':
 256		t = linkCitation
 257	// [text] == regular link
 258	// ^[text] == inline footnote
 259	// [^refId] == deferred footnote
 260	case p.extensions&Footnotes != 0:
 261		if offset >= 0 && data[offset] == '^' {
 262			t = linkInlineFootnote
 263			offset++
 264		} else if len(data)-1 > offset && data[offset+1] == '^' {
 265			t = linkDeferredFootnote
 266		}
 267	default:
 268		t = linkNormal
 269	}
 270
 271	data = data[offset:]
 272
 273	if t == linkCitation {
 274		return citation(p, data, 0)
 275	}
 276
 277	var (
 278		i                               = 1
 279		noteID                          int
 280		title, link, linkID, altContent []byte
 281		textHasNl                       = false
 282	)
 283
 284	if t == linkDeferredFootnote {
 285		i++
 286	}
 287
 288	// look for the matching closing bracket
 289	for level := 1; level > 0 && i < len(data); i++ {
 290		switch {
 291		case data[i] == '\n':
 292			textHasNl = true
 293
 294		case data[i-1] == '\\':
 295			continue
 296
 297		case data[i] == '[':
 298			level++
 299
 300		case data[i] == ']':
 301			level--
 302			if level <= 0 {
 303				i-- // compensate for extra i++ in for loop
 304			}
 305		}
 306	}
 307
 308	if i >= len(data) {
 309		return 0, nil
 310	}
 311
 312	txtE := i
 313	i++
 314	var footnoteNode ast.Node
 315
 316	// skip any amount of whitespace or newline
 317	// (this is much more lax than original markdown syntax)
 318	i = skipSpace(data, i)
 319
 320	// inline style link
 321	switch {
 322	case i < len(data) && data[i] == '(':
 323		// skip initial whitespace
 324		i++
 325
 326		i = skipSpace(data, i)
 327
 328		linkB := i
 329
 330		// look for link end: ' " )
 331	findlinkend:
 332		for i < len(data) {
 333			switch {
 334			case data[i] == '\\':
 335				i += 2
 336
 337			case data[i] == ')' || data[i] == '\'' || data[i] == '"':
 338				break findlinkend
 339
 340			default:
 341				i++
 342			}
 343		}
 344
 345		if i >= len(data) {
 346			return 0, nil
 347		}
 348		linkE := i
 349
 350		// look for title end if present
 351		titleB, titleE := 0, 0
 352		if data[i] == '\'' || data[i] == '"' {
 353			i++
 354			titleB = i
 355
 356		findtitleend:
 357			for i < len(data) {
 358				switch {
 359				case data[i] == '\\':
 360					i += 2
 361
 362				case data[i] == ')':
 363					break findtitleend
 364
 365				default:
 366					i++
 367				}
 368			}
 369
 370			if i >= len(data) {
 371				return 0, nil
 372			}
 373
 374			// skip whitespace after title
 375			titleE = i - 1
 376			for titleE > titleB && isSpace(data[titleE]) {
 377				titleE--
 378			}
 379
 380			// check for closing quote presence
 381			if data[titleE] != '\'' && data[titleE] != '"' {
 382				titleB, titleE = 0, 0
 383				linkE = i
 384			}
 385		}
 386
 387		// remove whitespace at the end of the link
 388		for linkE > linkB && isSpace(data[linkE-1]) {
 389			linkE--
 390		}
 391
 392		// remove optional angle brackets around the link
 393		if data[linkB] == '<' {
 394			linkB++
 395		}
 396		if data[linkE-1] == '>' {
 397			linkE--
 398		}
 399
 400		// build escaped link and title
 401		if linkE > linkB {
 402			link = data[linkB:linkE]
 403		}
 404
 405		if titleE > titleB {
 406			title = data[titleB:titleE]
 407		}
 408
 409		i++
 410
 411	// reference style link
 412	case isReferenceStyleLink(data, i, t):
 413		var id []byte
 414		altContentConsidered := false
 415
 416		// look for the id
 417		i++
 418		linkB := i
 419		i = skipUntilChar(data, i, ']')
 420
 421		if i >= len(data) {
 422			return 0, nil
 423		}
 424		linkE := i
 425
 426		// find the reference
 427		if linkB == linkE {
 428			if textHasNl {
 429				var b bytes.Buffer
 430
 431				for j := 1; j < txtE; j++ {
 432					switch {
 433					case data[j] != '\n':
 434						b.WriteByte(data[j])
 435					case data[j-1] != ' ':
 436						b.WriteByte(' ')
 437					}
 438				}
 439
 440				id = b.Bytes()
 441			} else {
 442				id = data[1:txtE]
 443				altContentConsidered = true
 444			}
 445		} else {
 446			id = data[linkB:linkE]
 447		}
 448
 449		// find the reference with matching id
 450		lr, ok := p.getRef(string(id))
 451		if !ok {
 452			return 0, nil
 453		}
 454
 455		// keep link and title from reference
 456		linkID = id
 457		link = lr.link
 458		title = lr.title
 459		if altContentConsidered {
 460			altContent = lr.text
 461		}
 462		i++
 463
 464	// shortcut reference style link or reference or inline footnote
 465	default:
 466		var id []byte
 467
 468		// craft the id
 469		if textHasNl {
 470			var b bytes.Buffer
 471
 472			for j := 1; j < txtE; j++ {
 473				switch {
 474				case data[j] != '\n':
 475					b.WriteByte(data[j])
 476				case data[j-1] != ' ':
 477					b.WriteByte(' ')
 478				}
 479			}
 480
 481			id = b.Bytes()
 482		} else {
 483			if t == linkDeferredFootnote {
 484				id = data[2:txtE] // get rid of the ^
 485			} else {
 486				id = data[1:txtE]
 487			}
 488		}
 489
 490		footnoteNode = &ast.ListItem{}
 491		if t == linkInlineFootnote {
 492			// create a new reference
 493			noteID = len(p.notes) + 1
 494
 495			var fragment []byte
 496			if len(id) > 0 {
 497				if len(id) < 16 {
 498					fragment = make([]byte, len(id))
 499				} else {
 500					fragment = make([]byte, 16)
 501				}
 502				copy(fragment, slugify(id))
 503			} else {
 504				fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteID))...)
 505			}
 506
 507			ref := &reference{
 508				noteID:   noteID,
 509				hasBlock: false,
 510				link:     fragment,
 511				title:    id,
 512				footnote: footnoteNode,
 513			}
 514
 515			p.notes = append(p.notes, ref)
 516			p.refsRecord[string(ref.link)] = struct{}{}
 517
 518			link = ref.link
 519			title = ref.title
 520		} else {
 521			// find the reference with matching id
 522			lr, ok := p.getRef(string(id))
 523			if !ok {
 524				return 0, nil
 525			}
 526
 527			if t == linkDeferredFootnote && !p.isFootnote(lr) {
 528				lr.noteID = len(p.notes) + 1
 529				lr.footnote = footnoteNode
 530				p.notes = append(p.notes, lr)
 531				p.refsRecord[string(lr.link)] = struct{}{}
 532			}
 533
 534			// keep link and title from reference
 535			link = lr.link
 536			// if inline footnote, title == footnote contents
 537			title = lr.title
 538			noteID = lr.noteID
 539		}
 540
 541		// rewind the whitespace
 542		i = txtE + 1
 543	}
 544
 545	var uLink []byte
 546	if t == linkNormal || t == linkImg {
 547		if len(link) > 0 {
 548			var uLinkBuf bytes.Buffer
 549			unescapeText(&uLinkBuf, link)
 550			uLink = uLinkBuf.Bytes()
 551		}
 552
 553		// links need something to click on and somewhere to go
 554		if len(uLink) == 0 || (t == linkNormal && txtE <= 1) {
 555			return 0, nil
 556		}
 557	}
 558
 559	// call the relevant rendering function
 560	switch t {
 561	case linkNormal:
 562		link := &ast.Link{
 563			Destination: normalizeURI(uLink),
 564			Title:       title,
 565			DeferredID:  linkID,
 566		}
 567		if len(altContent) > 0 {
 568			ast.AppendChild(link, newTextNode(altContent))
 569		} else {
 570			// links cannot contain other links, so turn off link parsing
 571			// temporarily and recurse
 572			insideLink := p.insideLink
 573			p.insideLink = true
 574			p.Inline(link, data[1:txtE])
 575			p.insideLink = insideLink
 576		}
 577		return i, link
 578
 579	case linkImg:
 580		image := &ast.Image{
 581			Destination: uLink,
 582			Title:       title,
 583		}
 584		ast.AppendChild(image, newTextNode(data[1:txtE]))
 585		return i + 1, image
 586
 587	case linkInlineFootnote, linkDeferredFootnote:
 588		link := &ast.Link{
 589			Destination: link,
 590			Title:       title,
 591			NoteID:      noteID,
 592			Footnote:    footnoteNode,
 593		}
 594		if t == linkDeferredFootnote {
 595			link.DeferredID = data[2:txtE]
 596		}
 597		if t == linkInlineFootnote {
 598			i++
 599		}
 600		return i, link
 601
 602	default:
 603		return 0, nil
 604	}
 605}
 606
 607func (p *Parser) inlineHTMLComment(data []byte) int {
 608	if len(data) < 5 {
 609		return 0
 610	}
 611	if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
 612		return 0
 613	}
 614	i := 5
 615	// scan for an end-of-comment marker, across lines if necessary
 616	for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
 617		i++
 618	}
 619	// no end-of-comment marker
 620	if i >= len(data) {
 621		return 0
 622	}
 623	return i + 1
 624}
 625
 626func stripMailto(link []byte) []byte {
 627	if bytes.HasPrefix(link, []byte("mailto://")) {
 628		return link[9:]
 629	} else if bytes.HasPrefix(link, []byte("mailto:")) {
 630		return link[7:]
 631	} else {
 632		return link
 633	}
 634}
 635
 636// autolinkType specifies a kind of autolink that gets detected.
 637type autolinkType int
 638
 639// These are the possible flag values for the autolink renderer.
 640const (
 641	notAutolink autolinkType = iota
 642	normalAutolink
 643	emailAutolink
 644)
 645
 646// '<' when tags or autolinks are allowed
 647func leftAngle(p *Parser, data []byte, offset int) (int, ast.Node) {
 648	data = data[offset:]
 649
 650	if p.extensions&Mmark != 0 {
 651		id, consumed := IsCallout(data)
 652		if consumed > 0 {
 653			node := &ast.Callout{}
 654			node.ID = id
 655			return consumed, node
 656		}
 657	}
 658
 659	altype, end := tagLength(data)
 660	if size := p.inlineHTMLComment(data); size > 0 {
 661		end = size
 662	}
 663	if end <= 2 {
 664		return end, nil
 665	}
 666	if altype == notAutolink {
 667		htmlTag := &ast.HTMLSpan{}
 668		htmlTag.Literal = data[:end]
 669		return end, htmlTag
 670	}
 671
 672	var uLink bytes.Buffer
 673	unescapeText(&uLink, data[1:end+1-2])
 674	if uLink.Len() <= 0 {
 675		return end, nil
 676	}
 677	link := uLink.Bytes()
 678	node := &ast.Link{
 679		Destination: link,
 680	}
 681	if altype == emailAutolink {
 682		node.Destination = append([]byte("mailto:"), link...)
 683	}
 684	ast.AppendChild(node, newTextNode(stripMailto(link)))
 685	return end, node
 686}
 687
 688// '\\' backslash escape
 689var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
 690
 691func escape(p *Parser, data []byte, offset int) (int, ast.Node) {
 692	data = data[offset:]
 693
 694	if len(data) <= 1 {
 695		return 2, nil
 696	}
 697
 698	if p.extensions&NonBlockingSpace != 0 && data[1] == ' ' {
 699		return 2, &ast.NonBlockingSpace{}
 700	}
 701
 702	if p.extensions&BackslashLineBreak != 0 && data[1] == '\n' {
 703		return 2, &ast.Hardbreak{}
 704	}
 705
 706	if bytes.IndexByte(escapeChars, data[1]) < 0 {
 707		return 0, nil
 708	}
 709
 710	return 2, newTextNode(data[1:2])
 711}
 712
 713func unescapeText(ob *bytes.Buffer, src []byte) {
 714	i := 0
 715	for i < len(src) {
 716		org := i
 717		for i < len(src) && src[i] != '\\' {
 718			i++
 719		}
 720
 721		if i > org {
 722			ob.Write(src[org:i])
 723		}
 724
 725		if i+1 >= len(src) {
 726			break
 727		}
 728
 729		ob.WriteByte(src[i+1])
 730		i += 2
 731	}
 732}
 733
 734// '&' escaped when it doesn't belong to an entity
 735// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
 736func entity(p *Parser, data []byte, offset int) (int, ast.Node) {
 737	data = data[offset:]
 738
 739	end := skipCharN(data, 1, '#', 1)
 740	end = skipAlnum(data, end)
 741
 742	if end < len(data) && data[end] == ';' {
 743		end++ // real entity
 744	} else {
 745		return 0, nil // lone '&'
 746	}
 747
 748	ent := data[:end]
 749	// undo &amp; escaping or it will be converted to &amp;amp; by another
 750	// escaper in the renderer
 751	if bytes.Equal(ent, []byte("&amp;")) {
 752		ent = []byte{'&'}
 753	}
 754
 755	return end, newTextNode(ent)
 756}
 757
 758func linkEndsWithEntity(data []byte, linkEnd int) bool {
 759	entityRanges := htmlEntityRe.FindAllIndex(data[:linkEnd], -1)
 760	return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
 761}
 762
 763// hasPrefixCaseInsensitive is a custom implementation of
 764//     strings.HasPrefix(strings.ToLower(s), prefix)
 765// we rolled our own because ToLower pulls in a huge machinery of lowercasing
 766// anything from Unicode and that's very slow. Since this func will only be
 767// used on ASCII protocol prefixes, we can take shortcuts.
 768func hasPrefixCaseInsensitive(s, prefix []byte) bool {
 769	if len(s) < len(prefix) {
 770		return false
 771	}
 772	delta := byte('a' - 'A')
 773	for i, b := range prefix {
 774		if b != s[i] && b != s[i]+delta {
 775			return false
 776		}
 777	}
 778	return true
 779}
 780
 781var protocolPrefixes = [][]byte{
 782	[]byte("http://"),
 783	[]byte("https://"),
 784	[]byte("ftp://"),
 785	[]byte("file://"),
 786	[]byte("mailto:"),
 787}
 788
 789const shortestPrefix = 6 // len("ftp://"), the shortest of the above
 790
 791func maybeAutoLink(p *Parser, data []byte, offset int) (int, ast.Node) {
 792	// quick check to rule out most false hits
 793	if p.insideLink || len(data) < offset+shortestPrefix {
 794		return 0, nil
 795	}
 796	for _, prefix := range protocolPrefixes {
 797		endOfHead := offset + 8 // 8 is the len() of the longest prefix
 798		if endOfHead > len(data) {
 799			endOfHead = len(data)
 800		}
 801		if hasPrefixCaseInsensitive(data[offset:endOfHead], prefix) {
 802			return autoLink(p, data, offset)
 803		}
 804	}
 805	return 0, nil
 806}
 807
 808func autoLink(p *Parser, data []byte, offset int) (int, ast.Node) {
 809	// Now a more expensive check to see if we're not inside an anchor element
 810	anchorStart := offset
 811	offsetFromAnchor := 0
 812	for anchorStart > 0 && data[anchorStart] != '<' {
 813		anchorStart--
 814		offsetFromAnchor++
 815	}
 816
 817	anchorStr := anchorRe.Find(data[anchorStart:])
 818	if anchorStr != nil {
 819		anchorClose := &ast.HTMLSpan{}
 820		anchorClose.Literal = anchorStr[offsetFromAnchor:]
 821		return len(anchorStr) - offsetFromAnchor, anchorClose
 822	}
 823
 824	// scan backward for a word boundary
 825	rewind := 0
 826	for offset-rewind > 0 && rewind <= 7 && isLetter(data[offset-rewind-1]) {
 827		rewind++
 828	}
 829	if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
 830		return 0, nil
 831	}
 832
 833	origData := data
 834	data = data[offset-rewind:]
 835
 836	if !isSafeLink(data) {
 837		return 0, nil
 838	}
 839
 840	linkEnd := 0
 841	for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
 842		linkEnd++
 843	}
 844
 845	// Skip punctuation at the end of the link
 846	if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
 847		linkEnd--
 848	}
 849
 850	// But don't skip semicolon if it's a part of escaped entity:
 851	if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
 852		linkEnd--
 853	}
 854
 855	// See if the link finishes with a punctuation sign that can be closed.
 856	var copen byte
 857	switch data[linkEnd-1] {
 858	case '"':
 859		copen = '"'
 860	case '\'':
 861		copen = '\''
 862	case ')':
 863		copen = '('
 864	case ']':
 865		copen = '['
 866	case '}':
 867		copen = '{'
 868	default:
 869		copen = 0
 870	}
 871
 872	if copen != 0 {
 873		bufEnd := offset - rewind + linkEnd - 2
 874
 875		openDelim := 1
 876
 877		/* Try to close the final punctuation sign in this same line;
 878		 * if we managed to close it outside of the URL, that means that it's
 879		 * not part of the URL. If it closes inside the URL, that means it
 880		 * is part of the URL.
 881		 *
 882		 * Examples:
 883		 *
 884		 *      foo http://www.pokemon.com/Pikachu_(Electric) bar
 885		 *              => http://www.pokemon.com/Pikachu_(Electric)
 886		 *
 887		 *      foo (http://www.pokemon.com/Pikachu_(Electric)) bar
 888		 *              => http://www.pokemon.com/Pikachu_(Electric)
 889		 *
 890		 *      foo http://www.pokemon.com/Pikachu_(Electric)) bar
 891		 *              => http://www.pokemon.com/Pikachu_(Electric))
 892		 *
 893		 *      (foo http://www.pokemon.com/Pikachu_(Electric)) bar
 894		 *              => foo http://www.pokemon.com/Pikachu_(Electric)
 895		 */
 896
 897		for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
 898			if origData[bufEnd] == data[linkEnd-1] {
 899				openDelim++
 900			}
 901
 902			if origData[bufEnd] == copen {
 903				openDelim--
 904			}
 905
 906			bufEnd--
 907		}
 908
 909		if openDelim == 0 {
 910			linkEnd--
 911		}
 912	}
 913
 914	var uLink bytes.Buffer
 915	unescapeText(&uLink, data[:linkEnd])
 916
 917	if uLink.Len() > 0 {
 918		node := &ast.Link{
 919			Destination: uLink.Bytes(),
 920		}
 921		ast.AppendChild(node, newTextNode(uLink.Bytes()))
 922		return linkEnd, node
 923	}
 924
 925	return linkEnd, nil
 926}
 927
 928func isEndOfLink(char byte) bool {
 929	return isSpace(char) || char == '<'
 930}
 931
 932var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
 933var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
 934
 935func isSafeLink(link []byte) bool {
 936	nLink := len(link)
 937	for _, path := range validPaths {
 938		nPath := len(path)
 939		linkPrefix := link[:nPath]
 940		if nLink >= nPath && bytes.Equal(linkPrefix, path) {
 941			if nLink == nPath {
 942				return true
 943			} else if isAlnum(link[nPath]) {
 944				return true
 945			}
 946		}
 947	}
 948
 949	for _, prefix := range validUris {
 950		// TODO: handle unicode here
 951		// case-insensitive prefix test
 952		nPrefix := len(prefix)
 953		if nLink > nPrefix {
 954			linkPrefix := bytes.ToLower(link[:nPrefix])
 955			if bytes.Equal(linkPrefix, prefix) && isAlnum(link[nPrefix]) {
 956				return true
 957			}
 958		}
 959	}
 960
 961	return false
 962}
 963
 964// return the length of the given tag, or 0 is it's not valid
 965func tagLength(data []byte) (autolink autolinkType, end int) {
 966	var i, j int
 967
 968	// a valid tag can't be shorter than 3 chars
 969	if len(data) < 3 {
 970		return notAutolink, 0
 971	}
 972
 973	// begins with a '<' optionally followed by '/', followed by letter or number
 974	if data[0] != '<' {
 975		return notAutolink, 0
 976	}
 977	if data[1] == '/' {
 978		i = 2
 979	} else {
 980		i = 1
 981	}
 982
 983	if !isAlnum(data[i]) {
 984		return notAutolink, 0
 985	}
 986
 987	// scheme test
 988	autolink = notAutolink
 989
 990	// try to find the beginning of an URI
 991	for i < len(data) && (isAlnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
 992		i++
 993	}
 994
 995	if i > 1 && i < len(data) && data[i] == '@' {
 996		if j = isMailtoAutoLink(data[i:]); j != 0 {
 997			return emailAutolink, i + j
 998		}
 999	}
1000
1001	if i > 2 && i < len(data) && data[i] == ':' {
1002		autolink = normalAutolink
1003		i++
1004	}
1005
1006	// complete autolink test: no whitespace or ' or "
1007	switch {
1008	case i >= len(data):
1009		autolink = notAutolink
1010	case autolink != notAutolink:
1011		j = i
1012
1013		for i < len(data) {
1014			if data[i] == '\\' {
1015				i += 2
1016			} else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isSpace(data[i]) {
1017				break
1018			} else {
1019				i++
1020			}
1021
1022		}
1023
1024		if i >= len(data) {
1025			return autolink, 0
1026		}
1027		if i > j && data[i] == '>' {
1028			return autolink, i + 1
1029		}
1030
1031		// one of the forbidden chars has been found
1032		autolink = notAutolink
1033	}
1034	i += bytes.IndexByte(data[i:], '>')
1035	if i < 0 {
1036		return autolink, 0
1037	}
1038	return autolink, i + 1
1039}
1040
1041// look for the address part of a mail autolink and '>'
1042// this is less strict than the original markdown e-mail address matching
1043func isMailtoAutoLink(data []byte) int {
1044	nb := 0
1045
1046	// address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
1047	for i, c := range data {
1048		if isAlnum(c) {
1049			continue
1050		}
1051
1052		switch c {
1053		case '@':
1054			nb++
1055
1056		case '-', '.', '_':
1057			break
1058
1059		case '>':
1060			if nb == 1 {
1061				return i + 1
1062			}
1063			return 0
1064		default:
1065			return 0
1066		}
1067	}
1068
1069	return 0
1070}
1071
1072// look for the next emph char, skipping other constructs
1073func helperFindEmphChar(data []byte, c byte) int {
1074	i := 0
1075
1076	for i < len(data) {
1077		for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
1078			i++
1079		}
1080		if i >= len(data) {
1081			return 0
1082		}
1083		// do not count escaped chars
1084		if i != 0 && data[i-1] == '\\' {
1085			i++
1086			continue
1087		}
1088		if data[i] == c {
1089			return i
1090		}
1091
1092		if data[i] == '`' {
1093			// skip a code span
1094			tmpI := 0
1095			i++
1096			for i < len(data) && data[i] != '`' {
1097				if tmpI == 0 && data[i] == c {
1098					tmpI = i
1099				}
1100				i++
1101			}
1102			if i >= len(data) {
1103				return tmpI
1104			}
1105			i++
1106		} else if data[i] == '[' {
1107			// skip a link
1108			tmpI := 0
1109			i++
1110			for i < len(data) && data[i] != ']' {
1111				if tmpI == 0 && data[i] == c {
1112					tmpI = i
1113				}
1114				i++
1115			}
1116			i++
1117			for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
1118				i++
1119			}
1120			if i >= len(data) {
1121				return tmpI
1122			}
1123			if data[i] != '[' && data[i] != '(' { // not a link
1124				if tmpI > 0 {
1125					return tmpI
1126				}
1127				continue
1128			}
1129			cc := data[i]
1130			i++
1131			for i < len(data) && data[i] != cc {
1132				if tmpI == 0 && data[i] == c {
1133					return i
1134				}
1135				i++
1136			}
1137			if i >= len(data) {
1138				return tmpI
1139			}
1140			i++
1141		}
1142	}
1143	return 0
1144}
1145
1146func helperEmphasis(p *Parser, data []byte, c byte) (int, ast.Node) {
1147	i := 0
1148
1149	// skip one symbol if coming from emph3
1150	if len(data) > 1 && data[0] == c && data[1] == c {
1151		i = 1
1152	}
1153
1154	for i < len(data) {
1155		length := helperFindEmphChar(data[i:], c)
1156		if length == 0 {
1157			return 0, nil
1158		}
1159		i += length
1160		if i >= len(data) {
1161			return 0, nil
1162		}
1163
1164		if i+1 < len(data) && data[i+1] == c {
1165			i++
1166			continue
1167		}
1168
1169		if data[i] == c && !isSpace(data[i-1]) {
1170
1171			if p.extensions&NoIntraEmphasis != 0 {
1172				if !(i+1 == len(data) || isSpace(data[i+1]) || isPunctuation(data[i+1])) {
1173					continue
1174				}
1175			}
1176
1177			emph := &ast.Emph{}
1178			p.Inline(emph, data[:i])
1179			return i + 1, emph
1180		}
1181	}
1182
1183	return 0, nil
1184}
1185
1186func helperDoubleEmphasis(p *Parser, data []byte, c byte) (int, ast.Node) {
1187	i := 0
1188
1189	for i < len(data) {
1190		length := helperFindEmphChar(data[i:], c)
1191		if length == 0 {
1192			return 0, nil
1193		}
1194		i += length
1195
1196		if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isSpace(data[i-1]) {
1197			var node ast.Node = &ast.Strong{}
1198			if c == '~' {
1199				node = &ast.Del{}
1200			}
1201			p.Inline(node, data[:i])
1202			return i + 2, node
1203		}
1204		i++
1205	}
1206	return 0, nil
1207}
1208
1209func helperTripleEmphasis(p *Parser, data []byte, offset int, c byte) (int, ast.Node) {
1210	i := 0
1211	origData := data
1212	data = data[offset:]
1213
1214	for i < len(data) {
1215		length := helperFindEmphChar(data[i:], c)
1216		if length == 0 {
1217			return 0, nil
1218		}
1219		i += length
1220
1221		// skip whitespace preceded symbols
1222		if data[i] != c || isSpace(data[i-1]) {
1223			continue
1224		}
1225
1226		switch {
1227		case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
1228			// triple symbol found
1229			strong := &ast.Strong{}
1230			em := &ast.Emph{}
1231			ast.AppendChild(strong, em)
1232			p.Inline(em, data[:i])
1233			return i + 3, strong
1234		case i+1 < len(data) && data[i+1] == c:
1235			// double symbol found, hand over to emph1
1236			length, node := helperEmphasis(p, origData[offset-2:], c)
1237			if length == 0 {
1238				return 0, nil
1239			}
1240			return length - 2, node
1241		default:
1242			// single symbol found, hand over to emph2
1243			length, node := helperDoubleEmphasis(p, origData[offset-1:], c)
1244			if length == 0 {
1245				return 0, nil
1246			}
1247			return length - 1, node
1248		}
1249	}
1250	return 0, nil
1251}
1252
1253// math handle inline math wrapped with '$'
1254func math(p *Parser, data []byte, offset int) (int, ast.Node) {
1255	data = data[offset:]
1256
1257	// too short, or block math
1258	if len(data) <= 2 || data[1] == '$' {
1259		return 0, nil
1260	}
1261
1262	// find next '$'
1263	var end int
1264	for end = 1; end < len(data) && data[end] != '$'; end++ {
1265	}
1266
1267	// $ not match
1268	if end == len(data) {
1269		return 0, nil
1270	}
1271
1272	// create inline math node
1273	math := &ast.Math{}
1274	math.Literal = data[1:end]
1275	return end + 1, math
1276}
1277
1278func newTextNode(d []byte) *ast.Text {
1279	return &ast.Text{ast.Leaf{Literal: d}}
1280}
1281
1282func normalizeURI(s []byte) []byte {
1283	return s // TODO: implement
1284}