parse.go

   1// Copyright 2010 The Go Authors. All rights reserved.
   2// Use of this source code is governed by a BSD-style
   3// license that can be found in the LICENSE file.
   4
   5package html
   6
   7import (
   8	"errors"
   9	"fmt"
  10	"io"
  11	"strings"
  12
  13	a "golang.org/x/net/html/atom"
  14)
  15
  16// A parser implements the HTML5 parsing algorithm:
  17// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
  18type parser struct {
  19	// tokenizer provides the tokens for the parser.
  20	tokenizer *Tokenizer
  21	// tok is the most recently read token.
  22	tok Token
  23	// Self-closing tags like <hr/> are treated as start tags, except that
  24	// hasSelfClosingToken is set while they are being processed.
  25	hasSelfClosingToken bool
  26	// doc is the document root element.
  27	doc *Node
  28	// The stack of open elements (section 12.2.4.2) and active formatting
  29	// elements (section 12.2.4.3).
  30	oe, afe nodeStack
  31	// Element pointers (section 12.2.4.4).
  32	head, form *Node
  33	// Other parsing state flags (section 12.2.4.5).
  34	scripting, framesetOK bool
  35	// The stack of template insertion modes
  36	templateStack insertionModeStack
  37	// im is the current insertion mode.
  38	im insertionMode
  39	// originalIM is the insertion mode to go back to after completing a text
  40	// or inTableText insertion mode.
  41	originalIM insertionMode
  42	// fosterParenting is whether new elements should be inserted according to
  43	// the foster parenting rules (section 12.2.6.1).
  44	fosterParenting bool
  45	// quirks is whether the parser is operating in "quirks mode."
  46	quirks bool
  47	// fragment is whether the parser is parsing an HTML fragment.
  48	fragment bool
  49	// context is the context element when parsing an HTML fragment
  50	// (section 12.4).
  51	context *Node
  52}
  53
  54func (p *parser) top() *Node {
  55	if n := p.oe.top(); n != nil {
  56		return n
  57	}
  58	return p.doc
  59}
  60
  61// Stop tags for use in popUntil. These come from section 12.2.4.2.
  62var (
  63	defaultScopeStopTags = map[string][]a.Atom{
  64		"":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
  65		"math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
  66		"svg":  {a.Desc, a.ForeignObject, a.Title},
  67	}
  68)
  69
  70type scope int
  71
  72const (
  73	defaultScope scope = iota
  74	listItemScope
  75	buttonScope
  76	tableScope
  77	tableRowScope
  78	tableBodyScope
  79	selectScope
  80)
  81
  82// popUntil pops the stack of open elements at the highest element whose tag
  83// is in matchTags, provided there is no higher element in the scope's stop
  84// tags (as defined in section 12.2.4.2). It returns whether or not there was
  85// such an element. If there was not, popUntil leaves the stack unchanged.
  86//
  87// For example, the set of stop tags for table scope is: "html", "table". If
  88// the stack was:
  89// ["html", "body", "font", "table", "b", "i", "u"]
  90// then popUntil(tableScope, "font") would return false, but
  91// popUntil(tableScope, "i") would return true and the stack would become:
  92// ["html", "body", "font", "table", "b"]
  93//
  94// If an element's tag is in both the stop tags and matchTags, then the stack
  95// will be popped and the function returns true (provided, of course, there was
  96// no higher element in the stack that was also in the stop tags). For example,
  97// popUntil(tableScope, "table") returns true and leaves:
  98// ["html", "body", "font"]
  99func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
 100	if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
 101		p.oe = p.oe[:i]
 102		return true
 103	}
 104	return false
 105}
 106
 107// indexOfElementInScope returns the index in p.oe of the highest element whose
 108// tag is in matchTags that is in scope. If no matching element is in scope, it
 109// returns -1.
 110func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
 111	for i := len(p.oe) - 1; i >= 0; i-- {
 112		tagAtom := p.oe[i].DataAtom
 113		if p.oe[i].Namespace == "" {
 114			for _, t := range matchTags {
 115				if t == tagAtom {
 116					return i
 117				}
 118			}
 119			switch s {
 120			case defaultScope:
 121				// No-op.
 122			case listItemScope:
 123				if tagAtom == a.Ol || tagAtom == a.Ul {
 124					return -1
 125				}
 126			case buttonScope:
 127				if tagAtom == a.Button {
 128					return -1
 129				}
 130			case tableScope:
 131				if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
 132					return -1
 133				}
 134			case selectScope:
 135				if tagAtom != a.Optgroup && tagAtom != a.Option {
 136					return -1
 137				}
 138			default:
 139				panic("unreachable")
 140			}
 141		}
 142		switch s {
 143		case defaultScope, listItemScope, buttonScope:
 144			for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
 145				if t == tagAtom {
 146					return -1
 147				}
 148			}
 149		}
 150	}
 151	return -1
 152}
 153
 154// elementInScope is like popUntil, except that it doesn't modify the stack of
 155// open elements.
 156func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
 157	return p.indexOfElementInScope(s, matchTags...) != -1
 158}
 159
 160// clearStackToContext pops elements off the stack of open elements until a
 161// scope-defined element is found.
 162func (p *parser) clearStackToContext(s scope) {
 163	for i := len(p.oe) - 1; i >= 0; i-- {
 164		tagAtom := p.oe[i].DataAtom
 165		switch s {
 166		case tableScope:
 167			if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
 168				p.oe = p.oe[:i+1]
 169				return
 170			}
 171		case tableRowScope:
 172			if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
 173				p.oe = p.oe[:i+1]
 174				return
 175			}
 176		case tableBodyScope:
 177			if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
 178				p.oe = p.oe[:i+1]
 179				return
 180			}
 181		default:
 182			panic("unreachable")
 183		}
 184	}
 185}
 186
 187// generateImpliedEndTags pops nodes off the stack of open elements as long as
 188// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
 189// If exceptions are specified, nodes with that name will not be popped off.
 190func (p *parser) generateImpliedEndTags(exceptions ...string) {
 191	var i int
 192loop:
 193	for i = len(p.oe) - 1; i >= 0; i-- {
 194		n := p.oe[i]
 195		if n.Type == ElementNode {
 196			switch n.DataAtom {
 197			case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
 198				for _, except := range exceptions {
 199					if n.Data == except {
 200						break loop
 201					}
 202				}
 203				continue
 204			}
 205		}
 206		break
 207	}
 208
 209	p.oe = p.oe[:i+1]
 210}
 211
 212// addChild adds a child node n to the top element, and pushes n onto the stack
 213// of open elements if it is an element node.
 214func (p *parser) addChild(n *Node) {
 215	if p.shouldFosterParent() {
 216		p.fosterParent(n)
 217	} else {
 218		p.top().AppendChild(n)
 219	}
 220
 221	if n.Type == ElementNode {
 222		p.oe = append(p.oe, n)
 223	}
 224}
 225
 226// shouldFosterParent returns whether the next node to be added should be
 227// foster parented.
 228func (p *parser) shouldFosterParent() bool {
 229	if p.fosterParenting {
 230		switch p.top().DataAtom {
 231		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
 232			return true
 233		}
 234	}
 235	return false
 236}
 237
 238// fosterParent adds a child node according to the foster parenting rules.
 239// Section 12.2.6.1, "foster parenting".
 240func (p *parser) fosterParent(n *Node) {
 241	var table, parent, prev, template *Node
 242	var i int
 243	for i = len(p.oe) - 1; i >= 0; i-- {
 244		if p.oe[i].DataAtom == a.Table {
 245			table = p.oe[i]
 246			break
 247		}
 248	}
 249
 250	var j int
 251	for j = len(p.oe) - 1; j >= 0; j-- {
 252		if p.oe[j].DataAtom == a.Template {
 253			template = p.oe[j]
 254			break
 255		}
 256	}
 257
 258	if template != nil && (table == nil || j > i) {
 259		template.AppendChild(n)
 260		return
 261	}
 262
 263	if table == nil {
 264		// The foster parent is the html element.
 265		parent = p.oe[0]
 266	} else {
 267		parent = table.Parent
 268	}
 269	if parent == nil {
 270		parent = p.oe[i-1]
 271	}
 272
 273	if table != nil {
 274		prev = table.PrevSibling
 275	} else {
 276		prev = parent.LastChild
 277	}
 278	if prev != nil && prev.Type == TextNode && n.Type == TextNode {
 279		prev.Data += n.Data
 280		return
 281	}
 282
 283	parent.InsertBefore(n, table)
 284}
 285
 286// addText adds text to the preceding node if it is a text node, or else it
 287// calls addChild with a new text node.
 288func (p *parser) addText(text string) {
 289	if text == "" {
 290		return
 291	}
 292
 293	if p.shouldFosterParent() {
 294		p.fosterParent(&Node{
 295			Type: TextNode,
 296			Data: text,
 297		})
 298		return
 299	}
 300
 301	t := p.top()
 302	if n := t.LastChild; n != nil && n.Type == TextNode {
 303		n.Data += text
 304		return
 305	}
 306	p.addChild(&Node{
 307		Type: TextNode,
 308		Data: text,
 309	})
 310}
 311
 312// addElement adds a child element based on the current token.
 313func (p *parser) addElement() {
 314	p.addChild(&Node{
 315		Type:     ElementNode,
 316		DataAtom: p.tok.DataAtom,
 317		Data:     p.tok.Data,
 318		Attr:     p.tok.Attr,
 319	})
 320}
 321
 322// Section 12.2.4.3.
 323func (p *parser) addFormattingElement() {
 324	tagAtom, attr := p.tok.DataAtom, p.tok.Attr
 325	p.addElement()
 326
 327	// Implement the Noah's Ark clause, but with three per family instead of two.
 328	identicalElements := 0
 329findIdenticalElements:
 330	for i := len(p.afe) - 1; i >= 0; i-- {
 331		n := p.afe[i]
 332		if n.Type == scopeMarkerNode {
 333			break
 334		}
 335		if n.Type != ElementNode {
 336			continue
 337		}
 338		if n.Namespace != "" {
 339			continue
 340		}
 341		if n.DataAtom != tagAtom {
 342			continue
 343		}
 344		if len(n.Attr) != len(attr) {
 345			continue
 346		}
 347	compareAttributes:
 348		for _, t0 := range n.Attr {
 349			for _, t1 := range attr {
 350				if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
 351					// Found a match for this attribute, continue with the next attribute.
 352					continue compareAttributes
 353				}
 354			}
 355			// If we get here, there is no attribute that matches a.
 356			// Therefore the element is not identical to the new one.
 357			continue findIdenticalElements
 358		}
 359
 360		identicalElements++
 361		if identicalElements >= 3 {
 362			p.afe.remove(n)
 363		}
 364	}
 365
 366	p.afe = append(p.afe, p.top())
 367}
 368
 369// Section 12.2.4.3.
 370func (p *parser) clearActiveFormattingElements() {
 371	for {
 372		n := p.afe.pop()
 373		if len(p.afe) == 0 || n.Type == scopeMarkerNode {
 374			return
 375		}
 376	}
 377}
 378
 379// Section 12.2.4.3.
 380func (p *parser) reconstructActiveFormattingElements() {
 381	n := p.afe.top()
 382	if n == nil {
 383		return
 384	}
 385	if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
 386		return
 387	}
 388	i := len(p.afe) - 1
 389	for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
 390		if i == 0 {
 391			i = -1
 392			break
 393		}
 394		i--
 395		n = p.afe[i]
 396	}
 397	for {
 398		i++
 399		clone := p.afe[i].clone()
 400		p.addChild(clone)
 401		p.afe[i] = clone
 402		if i == len(p.afe)-1 {
 403			break
 404		}
 405	}
 406}
 407
 408// Section 12.2.5.
 409func (p *parser) acknowledgeSelfClosingTag() {
 410	p.hasSelfClosingToken = false
 411}
 412
 413// An insertion mode (section 12.2.4.1) is the state transition function from
 414// a particular state in the HTML5 parser's state machine. It updates the
 415// parser's fields depending on parser.tok (where ErrorToken means EOF).
 416// It returns whether the token was consumed.
 417type insertionMode func(*parser) bool
 418
 419// setOriginalIM sets the insertion mode to return to after completing a text or
 420// inTableText insertion mode.
 421// Section 12.2.4.1, "using the rules for".
 422func (p *parser) setOriginalIM() {
 423	if p.originalIM != nil {
 424		panic("html: bad parser state: originalIM was set twice")
 425	}
 426	p.originalIM = p.im
 427}
 428
 429// Section 12.2.4.1, "reset the insertion mode".
 430func (p *parser) resetInsertionMode() {
 431	for i := len(p.oe) - 1; i >= 0; i-- {
 432		n := p.oe[i]
 433		last := i == 0
 434		if last && p.context != nil {
 435			n = p.context
 436		}
 437
 438		switch n.DataAtom {
 439		case a.Select:
 440			if !last {
 441				for ancestor, first := n, p.oe[0]; ancestor != first; {
 442					if ancestor == first {
 443						break
 444					}
 445					ancestor = p.oe[p.oe.index(ancestor)-1]
 446					switch ancestor.DataAtom {
 447					case a.Template:
 448						p.im = inSelectIM
 449						return
 450					case a.Table:
 451						p.im = inSelectInTableIM
 452						return
 453					}
 454				}
 455			}
 456			p.im = inSelectIM
 457		case a.Td, a.Th:
 458			// TODO: remove this divergence from the HTML5 spec.
 459			//
 460			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
 461			p.im = inCellIM
 462		case a.Tr:
 463			p.im = inRowIM
 464		case a.Tbody, a.Thead, a.Tfoot:
 465			p.im = inTableBodyIM
 466		case a.Caption:
 467			p.im = inCaptionIM
 468		case a.Colgroup:
 469			p.im = inColumnGroupIM
 470		case a.Table:
 471			p.im = inTableIM
 472		case a.Template:
 473			p.im = p.templateStack.top()
 474		case a.Head:
 475			// TODO: remove this divergence from the HTML5 spec.
 476			//
 477			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
 478			p.im = inHeadIM
 479		case a.Body:
 480			p.im = inBodyIM
 481		case a.Frameset:
 482			p.im = inFramesetIM
 483		case a.Html:
 484			if p.head == nil {
 485				p.im = beforeHeadIM
 486			} else {
 487				p.im = afterHeadIM
 488			}
 489		default:
 490			if last {
 491				p.im = inBodyIM
 492				return
 493			}
 494			continue
 495		}
 496		return
 497	}
 498}
 499
 500const whitespace = " \t\r\n\f"
 501
 502// Section 12.2.6.4.1.
 503func initialIM(p *parser) bool {
 504	switch p.tok.Type {
 505	case TextToken:
 506		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
 507		if len(p.tok.Data) == 0 {
 508			// It was all whitespace, so ignore it.
 509			return true
 510		}
 511	case CommentToken:
 512		p.doc.AppendChild(&Node{
 513			Type: CommentNode,
 514			Data: p.tok.Data,
 515		})
 516		return true
 517	case DoctypeToken:
 518		n, quirks := parseDoctype(p.tok.Data)
 519		p.doc.AppendChild(n)
 520		p.quirks = quirks
 521		p.im = beforeHTMLIM
 522		return true
 523	}
 524	p.quirks = true
 525	p.im = beforeHTMLIM
 526	return false
 527}
 528
 529// Section 12.2.6.4.2.
 530func beforeHTMLIM(p *parser) bool {
 531	switch p.tok.Type {
 532	case DoctypeToken:
 533		// Ignore the token.
 534		return true
 535	case TextToken:
 536		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
 537		if len(p.tok.Data) == 0 {
 538			// It was all whitespace, so ignore it.
 539			return true
 540		}
 541	case StartTagToken:
 542		if p.tok.DataAtom == a.Html {
 543			p.addElement()
 544			p.im = beforeHeadIM
 545			return true
 546		}
 547	case EndTagToken:
 548		switch p.tok.DataAtom {
 549		case a.Head, a.Body, a.Html, a.Br:
 550			p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
 551			return false
 552		default:
 553			// Ignore the token.
 554			return true
 555		}
 556	case CommentToken:
 557		p.doc.AppendChild(&Node{
 558			Type: CommentNode,
 559			Data: p.tok.Data,
 560		})
 561		return true
 562	}
 563	p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
 564	return false
 565}
 566
 567// Section 12.2.6.4.3.
 568func beforeHeadIM(p *parser) bool {
 569	switch p.tok.Type {
 570	case TextToken:
 571		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
 572		if len(p.tok.Data) == 0 {
 573			// It was all whitespace, so ignore it.
 574			return true
 575		}
 576	case StartTagToken:
 577		switch p.tok.DataAtom {
 578		case a.Head:
 579			p.addElement()
 580			p.head = p.top()
 581			p.im = inHeadIM
 582			return true
 583		case a.Html:
 584			return inBodyIM(p)
 585		}
 586	case EndTagToken:
 587		switch p.tok.DataAtom {
 588		case a.Head, a.Body, a.Html, a.Br:
 589			p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
 590			return false
 591		default:
 592			// Ignore the token.
 593			return true
 594		}
 595	case CommentToken:
 596		p.addChild(&Node{
 597			Type: CommentNode,
 598			Data: p.tok.Data,
 599		})
 600		return true
 601	case DoctypeToken:
 602		// Ignore the token.
 603		return true
 604	}
 605
 606	p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
 607	return false
 608}
 609
 610// Section 12.2.6.4.4.
 611func inHeadIM(p *parser) bool {
 612	switch p.tok.Type {
 613	case TextToken:
 614		s := strings.TrimLeft(p.tok.Data, whitespace)
 615		if len(s) < len(p.tok.Data) {
 616			// Add the initial whitespace to the current node.
 617			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
 618			if s == "" {
 619				return true
 620			}
 621			p.tok.Data = s
 622		}
 623	case StartTagToken:
 624		switch p.tok.DataAtom {
 625		case a.Html:
 626			return inBodyIM(p)
 627		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta:
 628			p.addElement()
 629			p.oe.pop()
 630			p.acknowledgeSelfClosingTag()
 631			return true
 632		case a.Script, a.Title, a.Noscript, a.Noframes, a.Style:
 633			p.addElement()
 634			p.setOriginalIM()
 635			p.im = textIM
 636			return true
 637		case a.Head:
 638			// Ignore the token.
 639			return true
 640		case a.Template:
 641			p.addElement()
 642			p.afe = append(p.afe, &scopeMarker)
 643			p.framesetOK = false
 644			p.im = inTemplateIM
 645			p.templateStack = append(p.templateStack, inTemplateIM)
 646			return true
 647		}
 648	case EndTagToken:
 649		switch p.tok.DataAtom {
 650		case a.Head:
 651			p.oe.pop()
 652			p.im = afterHeadIM
 653			return true
 654		case a.Body, a.Html, a.Br:
 655			p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
 656			return false
 657		case a.Template:
 658			if !p.oe.contains(a.Template) {
 659				return true
 660			}
 661			// TODO: remove this divergence from the HTML5 spec.
 662			//
 663			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
 664			p.generateImpliedEndTags()
 665			for i := len(p.oe) - 1; i >= 0; i-- {
 666				if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
 667					p.oe = p.oe[:i]
 668					break
 669				}
 670			}
 671			p.clearActiveFormattingElements()
 672			p.templateStack.pop()
 673			p.resetInsertionMode()
 674			return true
 675		default:
 676			// Ignore the token.
 677			return true
 678		}
 679	case CommentToken:
 680		p.addChild(&Node{
 681			Type: CommentNode,
 682			Data: p.tok.Data,
 683		})
 684		return true
 685	case DoctypeToken:
 686		// Ignore the token.
 687		return true
 688	}
 689
 690	p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
 691	return false
 692}
 693
 694// Section 12.2.6.4.6.
 695func afterHeadIM(p *parser) bool {
 696	switch p.tok.Type {
 697	case TextToken:
 698		s := strings.TrimLeft(p.tok.Data, whitespace)
 699		if len(s) < len(p.tok.Data) {
 700			// Add the initial whitespace to the current node.
 701			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
 702			if s == "" {
 703				return true
 704			}
 705			p.tok.Data = s
 706		}
 707	case StartTagToken:
 708		switch p.tok.DataAtom {
 709		case a.Html:
 710			return inBodyIM(p)
 711		case a.Body:
 712			p.addElement()
 713			p.framesetOK = false
 714			p.im = inBodyIM
 715			return true
 716		case a.Frameset:
 717			p.addElement()
 718			p.im = inFramesetIM
 719			return true
 720		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
 721			p.oe = append(p.oe, p.head)
 722			defer p.oe.remove(p.head)
 723			return inHeadIM(p)
 724		case a.Head:
 725			// Ignore the token.
 726			return true
 727		}
 728	case EndTagToken:
 729		switch p.tok.DataAtom {
 730		case a.Body, a.Html, a.Br:
 731			// Drop down to creating an implied <body> tag.
 732		case a.Template:
 733			return inHeadIM(p)
 734		default:
 735			// Ignore the token.
 736			return true
 737		}
 738	case CommentToken:
 739		p.addChild(&Node{
 740			Type: CommentNode,
 741			Data: p.tok.Data,
 742		})
 743		return true
 744	case DoctypeToken:
 745		// Ignore the token.
 746		return true
 747	}
 748
 749	p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
 750	p.framesetOK = true
 751	return false
 752}
 753
 754// copyAttributes copies attributes of src not found on dst to dst.
 755func copyAttributes(dst *Node, src Token) {
 756	if len(src.Attr) == 0 {
 757		return
 758	}
 759	attr := map[string]string{}
 760	for _, t := range dst.Attr {
 761		attr[t.Key] = t.Val
 762	}
 763	for _, t := range src.Attr {
 764		if _, ok := attr[t.Key]; !ok {
 765			dst.Attr = append(dst.Attr, t)
 766			attr[t.Key] = t.Val
 767		}
 768	}
 769}
 770
 771// Section 12.2.6.4.7.
 772func inBodyIM(p *parser) bool {
 773	switch p.tok.Type {
 774	case TextToken:
 775		d := p.tok.Data
 776		switch n := p.oe.top(); n.DataAtom {
 777		case a.Pre, a.Listing:
 778			if n.FirstChild == nil {
 779				// Ignore a newline at the start of a <pre> block.
 780				if d != "" && d[0] == '\r' {
 781					d = d[1:]
 782				}
 783				if d != "" && d[0] == '\n' {
 784					d = d[1:]
 785				}
 786			}
 787		}
 788		d = strings.Replace(d, "\x00", "", -1)
 789		if d == "" {
 790			return true
 791		}
 792		p.reconstructActiveFormattingElements()
 793		p.addText(d)
 794		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
 795			// There were non-whitespace characters inserted.
 796			p.framesetOK = false
 797		}
 798	case StartTagToken:
 799		switch p.tok.DataAtom {
 800		case a.Html:
 801			if p.oe.contains(a.Template) {
 802				return true
 803			}
 804			copyAttributes(p.oe[0], p.tok)
 805		case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
 806			return inHeadIM(p)
 807		case a.Body:
 808			if p.oe.contains(a.Template) {
 809				return true
 810			}
 811			if len(p.oe) >= 2 {
 812				body := p.oe[1]
 813				if body.Type == ElementNode && body.DataAtom == a.Body {
 814					p.framesetOK = false
 815					copyAttributes(body, p.tok)
 816				}
 817			}
 818		case a.Frameset:
 819			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
 820				// Ignore the token.
 821				return true
 822			}
 823			body := p.oe[1]
 824			if body.Parent != nil {
 825				body.Parent.RemoveChild(body)
 826			}
 827			p.oe = p.oe[:1]
 828			p.addElement()
 829			p.im = inFramesetIM
 830			return true
 831		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
 832			p.popUntil(buttonScope, a.P)
 833			p.addElement()
 834		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
 835			p.popUntil(buttonScope, a.P)
 836			switch n := p.top(); n.DataAtom {
 837			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
 838				p.oe.pop()
 839			}
 840			p.addElement()
 841		case a.Pre, a.Listing:
 842			p.popUntil(buttonScope, a.P)
 843			p.addElement()
 844			// The newline, if any, will be dealt with by the TextToken case.
 845			p.framesetOK = false
 846		case a.Form:
 847			if p.form != nil && !p.oe.contains(a.Template) {
 848				// Ignore the token
 849				return true
 850			}
 851			p.popUntil(buttonScope, a.P)
 852			p.addElement()
 853			if !p.oe.contains(a.Template) {
 854				p.form = p.top()
 855			}
 856		case a.Li:
 857			p.framesetOK = false
 858			for i := len(p.oe) - 1; i >= 0; i-- {
 859				node := p.oe[i]
 860				switch node.DataAtom {
 861				case a.Li:
 862					p.oe = p.oe[:i]
 863				case a.Address, a.Div, a.P:
 864					continue
 865				default:
 866					if !isSpecialElement(node) {
 867						continue
 868					}
 869				}
 870				break
 871			}
 872			p.popUntil(buttonScope, a.P)
 873			p.addElement()
 874		case a.Dd, a.Dt:
 875			p.framesetOK = false
 876			for i := len(p.oe) - 1; i >= 0; i-- {
 877				node := p.oe[i]
 878				switch node.DataAtom {
 879				case a.Dd, a.Dt:
 880					p.oe = p.oe[:i]
 881				case a.Address, a.Div, a.P:
 882					continue
 883				default:
 884					if !isSpecialElement(node) {
 885						continue
 886					}
 887				}
 888				break
 889			}
 890			p.popUntil(buttonScope, a.P)
 891			p.addElement()
 892		case a.Plaintext:
 893			p.popUntil(buttonScope, a.P)
 894			p.addElement()
 895		case a.Button:
 896			p.popUntil(defaultScope, a.Button)
 897			p.reconstructActiveFormattingElements()
 898			p.addElement()
 899			p.framesetOK = false
 900		case a.A:
 901			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
 902				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
 903					p.inBodyEndTagFormatting(a.A)
 904					p.oe.remove(n)
 905					p.afe.remove(n)
 906					break
 907				}
 908			}
 909			p.reconstructActiveFormattingElements()
 910			p.addFormattingElement()
 911		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
 912			p.reconstructActiveFormattingElements()
 913			p.addFormattingElement()
 914		case a.Nobr:
 915			p.reconstructActiveFormattingElements()
 916			if p.elementInScope(defaultScope, a.Nobr) {
 917				p.inBodyEndTagFormatting(a.Nobr)
 918				p.reconstructActiveFormattingElements()
 919			}
 920			p.addFormattingElement()
 921		case a.Applet, a.Marquee, a.Object:
 922			p.reconstructActiveFormattingElements()
 923			p.addElement()
 924			p.afe = append(p.afe, &scopeMarker)
 925			p.framesetOK = false
 926		case a.Table:
 927			if !p.quirks {
 928				p.popUntil(buttonScope, a.P)
 929			}
 930			p.addElement()
 931			p.framesetOK = false
 932			p.im = inTableIM
 933			return true
 934		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
 935			p.reconstructActiveFormattingElements()
 936			p.addElement()
 937			p.oe.pop()
 938			p.acknowledgeSelfClosingTag()
 939			if p.tok.DataAtom == a.Input {
 940				for _, t := range p.tok.Attr {
 941					if t.Key == "type" {
 942						if strings.ToLower(t.Val) == "hidden" {
 943							// Skip setting framesetOK = false
 944							return true
 945						}
 946					}
 947				}
 948			}
 949			p.framesetOK = false
 950		case a.Param, a.Source, a.Track:
 951			p.addElement()
 952			p.oe.pop()
 953			p.acknowledgeSelfClosingTag()
 954		case a.Hr:
 955			p.popUntil(buttonScope, a.P)
 956			p.addElement()
 957			p.oe.pop()
 958			p.acknowledgeSelfClosingTag()
 959			p.framesetOK = false
 960		case a.Image:
 961			p.tok.DataAtom = a.Img
 962			p.tok.Data = a.Img.String()
 963			return false
 964		case a.Isindex:
 965			if p.form != nil {
 966				// Ignore the token.
 967				return true
 968			}
 969			action := ""
 970			prompt := "This is a searchable index. Enter search keywords: "
 971			attr := []Attribute{{Key: "name", Val: "isindex"}}
 972			for _, t := range p.tok.Attr {
 973				switch t.Key {
 974				case "action":
 975					action = t.Val
 976				case "name":
 977					// Ignore the attribute.
 978				case "prompt":
 979					prompt = t.Val
 980				default:
 981					attr = append(attr, t)
 982				}
 983			}
 984			p.acknowledgeSelfClosingTag()
 985			p.popUntil(buttonScope, a.P)
 986			p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
 987			if p.form == nil {
 988				// NOTE: The 'isindex' element has been removed,
 989				// and the 'template' element has not been designed to be
 990				// collaborative with the index element.
 991				//
 992				// Ignore the token.
 993				return true
 994			}
 995			if action != "" {
 996				p.form.Attr = []Attribute{{Key: "action", Val: action}}
 997			}
 998			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
 999			p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
1000			p.addText(prompt)
1001			p.addChild(&Node{
1002				Type:     ElementNode,
1003				DataAtom: a.Input,
1004				Data:     a.Input.String(),
1005				Attr:     attr,
1006			})
1007			p.oe.pop()
1008			p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
1009			p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
1010			p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
1011		case a.Textarea:
1012			p.addElement()
1013			p.setOriginalIM()
1014			p.framesetOK = false
1015			p.im = textIM
1016		case a.Xmp:
1017			p.popUntil(buttonScope, a.P)
1018			p.reconstructActiveFormattingElements()
1019			p.framesetOK = false
1020			p.addElement()
1021			p.setOriginalIM()
1022			p.im = textIM
1023		case a.Iframe:
1024			p.framesetOK = false
1025			p.addElement()
1026			p.setOriginalIM()
1027			p.im = textIM
1028		case a.Noembed, a.Noscript:
1029			p.addElement()
1030			p.setOriginalIM()
1031			p.im = textIM
1032		case a.Select:
1033			p.reconstructActiveFormattingElements()
1034			p.addElement()
1035			p.framesetOK = false
1036			p.im = inSelectIM
1037			return true
1038		case a.Optgroup, a.Option:
1039			if p.top().DataAtom == a.Option {
1040				p.oe.pop()
1041			}
1042			p.reconstructActiveFormattingElements()
1043			p.addElement()
1044		case a.Rb, a.Rtc:
1045			if p.elementInScope(defaultScope, a.Ruby) {
1046				p.generateImpliedEndTags()
1047			}
1048			p.addElement()
1049		case a.Rp, a.Rt:
1050			if p.elementInScope(defaultScope, a.Ruby) {
1051				p.generateImpliedEndTags("rtc")
1052			}
1053			p.addElement()
1054		case a.Math, a.Svg:
1055			p.reconstructActiveFormattingElements()
1056			if p.tok.DataAtom == a.Math {
1057				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
1058			} else {
1059				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
1060			}
1061			adjustForeignAttributes(p.tok.Attr)
1062			p.addElement()
1063			p.top().Namespace = p.tok.Data
1064			if p.hasSelfClosingToken {
1065				p.oe.pop()
1066				p.acknowledgeSelfClosingTag()
1067			}
1068			return true
1069		case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1070			// Ignore the token.
1071		default:
1072			p.reconstructActiveFormattingElements()
1073			p.addElement()
1074		}
1075	case EndTagToken:
1076		switch p.tok.DataAtom {
1077		case a.Body:
1078			if p.elementInScope(defaultScope, a.Body) {
1079				p.im = afterBodyIM
1080			}
1081		case a.Html:
1082			if p.elementInScope(defaultScope, a.Body) {
1083				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
1084				return false
1085			}
1086			return true
1087		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
1088			p.popUntil(defaultScope, p.tok.DataAtom)
1089		case a.Form:
1090			if p.oe.contains(a.Template) {
1091				i := p.indexOfElementInScope(defaultScope, a.Form)
1092				if i == -1 {
1093					// Ignore the token.
1094					return true
1095				}
1096				p.generateImpliedEndTags()
1097				if p.oe[i].DataAtom != a.Form {
1098					// Ignore the token.
1099					return true
1100				}
1101				p.popUntil(defaultScope, a.Form)
1102			} else {
1103				node := p.form
1104				p.form = nil
1105				i := p.indexOfElementInScope(defaultScope, a.Form)
1106				if node == nil || i == -1 || p.oe[i] != node {
1107					// Ignore the token.
1108					return true
1109				}
1110				p.generateImpliedEndTags()
1111				p.oe.remove(node)
1112			}
1113		case a.P:
1114			if !p.elementInScope(buttonScope, a.P) {
1115				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
1116			}
1117			p.popUntil(buttonScope, a.P)
1118		case a.Li:
1119			p.popUntil(listItemScope, a.Li)
1120		case a.Dd, a.Dt:
1121			p.popUntil(defaultScope, p.tok.DataAtom)
1122		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
1123			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
1124		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1125			p.inBodyEndTagFormatting(p.tok.DataAtom)
1126		case a.Applet, a.Marquee, a.Object:
1127			if p.popUntil(defaultScope, p.tok.DataAtom) {
1128				p.clearActiveFormattingElements()
1129			}
1130		case a.Br:
1131			p.tok.Type = StartTagToken
1132			return false
1133		case a.Template:
1134			return inHeadIM(p)
1135		default:
1136			p.inBodyEndTagOther(p.tok.DataAtom)
1137		}
1138	case CommentToken:
1139		p.addChild(&Node{
1140			Type: CommentNode,
1141			Data: p.tok.Data,
1142		})
1143	case ErrorToken:
1144		// TODO: remove this divergence from the HTML5 spec.
1145		if len(p.templateStack) > 0 {
1146			p.im = inTemplateIM
1147			return false
1148		} else {
1149			for _, e := range p.oe {
1150				switch e.DataAtom {
1151				case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
1152					a.Thead, a.Tr, a.Body, a.Html:
1153				default:
1154					return true
1155				}
1156			}
1157		}
1158	}
1159
1160	return true
1161}
1162
1163func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
1164	// This is the "adoption agency" algorithm, described at
1165	// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
1166
1167	// TODO: this is a fairly literal line-by-line translation of that algorithm.
1168	// Once the code successfully parses the comprehensive test suite, we should
1169	// refactor this code to be more idiomatic.
1170
1171	// Steps 1-4. The outer loop.
1172	for i := 0; i < 8; i++ {
1173		// Step 5. Find the formatting element.
1174		var formattingElement *Node
1175		for j := len(p.afe) - 1; j >= 0; j-- {
1176			if p.afe[j].Type == scopeMarkerNode {
1177				break
1178			}
1179			if p.afe[j].DataAtom == tagAtom {
1180				formattingElement = p.afe[j]
1181				break
1182			}
1183		}
1184		if formattingElement == nil {
1185			p.inBodyEndTagOther(tagAtom)
1186			return
1187		}
1188		feIndex := p.oe.index(formattingElement)
1189		if feIndex == -1 {
1190			p.afe.remove(formattingElement)
1191			return
1192		}
1193		if !p.elementInScope(defaultScope, tagAtom) {
1194			// Ignore the tag.
1195			return
1196		}
1197
1198		// Steps 9-10. Find the furthest block.
1199		var furthestBlock *Node
1200		for _, e := range p.oe[feIndex:] {
1201			if isSpecialElement(e) {
1202				furthestBlock = e
1203				break
1204			}
1205		}
1206		if furthestBlock == nil {
1207			e := p.oe.pop()
1208			for e != formattingElement {
1209				e = p.oe.pop()
1210			}
1211			p.afe.remove(e)
1212			return
1213		}
1214
1215		// Steps 11-12. Find the common ancestor and bookmark node.
1216		commonAncestor := p.oe[feIndex-1]
1217		bookmark := p.afe.index(formattingElement)
1218
1219		// Step 13. The inner loop. Find the lastNode to reparent.
1220		lastNode := furthestBlock
1221		node := furthestBlock
1222		x := p.oe.index(node)
1223		// Steps 13.1-13.2
1224		for j := 0; j < 3; j++ {
1225			// Step 13.3.
1226			x--
1227			node = p.oe[x]
1228			// Step 13.4 - 13.5.
1229			if p.afe.index(node) == -1 {
1230				p.oe.remove(node)
1231				continue
1232			}
1233			// Step 13.6.
1234			if node == formattingElement {
1235				break
1236			}
1237			// Step 13.7.
1238			clone := node.clone()
1239			p.afe[p.afe.index(node)] = clone
1240			p.oe[p.oe.index(node)] = clone
1241			node = clone
1242			// Step 13.8.
1243			if lastNode == furthestBlock {
1244				bookmark = p.afe.index(node) + 1
1245			}
1246			// Step 13.9.
1247			if lastNode.Parent != nil {
1248				lastNode.Parent.RemoveChild(lastNode)
1249			}
1250			node.AppendChild(lastNode)
1251			// Step 13.10.
1252			lastNode = node
1253		}
1254
1255		// Step 14. Reparent lastNode to the common ancestor,
1256		// or for misnested table nodes, to the foster parent.
1257		if lastNode.Parent != nil {
1258			lastNode.Parent.RemoveChild(lastNode)
1259		}
1260		switch commonAncestor.DataAtom {
1261		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1262			p.fosterParent(lastNode)
1263		case a.Template:
1264			// TODO: remove namespace checking
1265			if commonAncestor.Namespace == "html" {
1266				commonAncestor = commonAncestor.LastChild
1267			}
1268			fallthrough
1269		default:
1270			commonAncestor.AppendChild(lastNode)
1271		}
1272
1273		// Steps 15-17. Reparent nodes from the furthest block's children
1274		// to a clone of the formatting element.
1275		clone := formattingElement.clone()
1276		reparentChildren(clone, furthestBlock)
1277		furthestBlock.AppendChild(clone)
1278
1279		// Step 18. Fix up the list of active formatting elements.
1280		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
1281			// Move the bookmark with the rest of the list.
1282			bookmark--
1283		}
1284		p.afe.remove(formattingElement)
1285		p.afe.insert(bookmark, clone)
1286
1287		// Step 19. Fix up the stack of open elements.
1288		p.oe.remove(formattingElement)
1289		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
1290	}
1291}
1292
1293// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
1294// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
1295// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
1296func (p *parser) inBodyEndTagOther(tagAtom a.Atom) {
1297	for i := len(p.oe) - 1; i >= 0; i-- {
1298		if p.oe[i].DataAtom == tagAtom {
1299			p.oe = p.oe[:i]
1300			break
1301		}
1302		if isSpecialElement(p.oe[i]) {
1303			break
1304		}
1305	}
1306}
1307
1308// Section 12.2.6.4.8.
1309func textIM(p *parser) bool {
1310	switch p.tok.Type {
1311	case ErrorToken:
1312		p.oe.pop()
1313	case TextToken:
1314		d := p.tok.Data
1315		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
1316			// Ignore a newline at the start of a <textarea> block.
1317			if d != "" && d[0] == '\r' {
1318				d = d[1:]
1319			}
1320			if d != "" && d[0] == '\n' {
1321				d = d[1:]
1322			}
1323		}
1324		if d == "" {
1325			return true
1326		}
1327		p.addText(d)
1328		return true
1329	case EndTagToken:
1330		p.oe.pop()
1331	}
1332	p.im = p.originalIM
1333	p.originalIM = nil
1334	return p.tok.Type == EndTagToken
1335}
1336
1337// Section 12.2.6.4.9.
1338func inTableIM(p *parser) bool {
1339	switch p.tok.Type {
1340	case TextToken:
1341		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
1342		switch p.oe.top().DataAtom {
1343		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1344			if strings.Trim(p.tok.Data, whitespace) == "" {
1345				p.addText(p.tok.Data)
1346				return true
1347			}
1348		}
1349	case StartTagToken:
1350		switch p.tok.DataAtom {
1351		case a.Caption:
1352			p.clearStackToContext(tableScope)
1353			p.afe = append(p.afe, &scopeMarker)
1354			p.addElement()
1355			p.im = inCaptionIM
1356			return true
1357		case a.Colgroup:
1358			p.clearStackToContext(tableScope)
1359			p.addElement()
1360			p.im = inColumnGroupIM
1361			return true
1362		case a.Col:
1363			p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
1364			return false
1365		case a.Tbody, a.Tfoot, a.Thead:
1366			p.clearStackToContext(tableScope)
1367			p.addElement()
1368			p.im = inTableBodyIM
1369			return true
1370		case a.Td, a.Th, a.Tr:
1371			p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
1372			return false
1373		case a.Table:
1374			if p.popUntil(tableScope, a.Table) {
1375				p.resetInsertionMode()
1376				return false
1377			}
1378			// Ignore the token.
1379			return true
1380		case a.Style, a.Script, a.Template:
1381			return inHeadIM(p)
1382		case a.Input:
1383			for _, t := range p.tok.Attr {
1384				if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
1385					p.addElement()
1386					p.oe.pop()
1387					return true
1388				}
1389			}
1390			// Otherwise drop down to the default action.
1391		case a.Form:
1392			if p.oe.contains(a.Template) || p.form != nil {
1393				// Ignore the token.
1394				return true
1395			}
1396			p.addElement()
1397			p.form = p.oe.pop()
1398		case a.Select:
1399			p.reconstructActiveFormattingElements()
1400			switch p.top().DataAtom {
1401			case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1402				p.fosterParenting = true
1403			}
1404			p.addElement()
1405			p.fosterParenting = false
1406			p.framesetOK = false
1407			p.im = inSelectInTableIM
1408			return true
1409		}
1410	case EndTagToken:
1411		switch p.tok.DataAtom {
1412		case a.Table:
1413			if p.popUntil(tableScope, a.Table) {
1414				p.resetInsertionMode()
1415				return true
1416			}
1417			// Ignore the token.
1418			return true
1419		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1420			// Ignore the token.
1421			return true
1422		case a.Template:
1423			return inHeadIM(p)
1424		}
1425	case CommentToken:
1426		p.addChild(&Node{
1427			Type: CommentNode,
1428			Data: p.tok.Data,
1429		})
1430		return true
1431	case DoctypeToken:
1432		// Ignore the token.
1433		return true
1434	case ErrorToken:
1435		return inBodyIM(p)
1436	}
1437
1438	p.fosterParenting = true
1439	defer func() { p.fosterParenting = false }()
1440
1441	return inBodyIM(p)
1442}
1443
1444// Section 12.2.6.4.11.
1445func inCaptionIM(p *parser) bool {
1446	switch p.tok.Type {
1447	case StartTagToken:
1448		switch p.tok.DataAtom {
1449		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
1450			if p.popUntil(tableScope, a.Caption) {
1451				p.clearActiveFormattingElements()
1452				p.im = inTableIM
1453				return false
1454			} else {
1455				// Ignore the token.
1456				return true
1457			}
1458		case a.Select:
1459			p.reconstructActiveFormattingElements()
1460			p.addElement()
1461			p.framesetOK = false
1462			p.im = inSelectInTableIM
1463			return true
1464		}
1465	case EndTagToken:
1466		switch p.tok.DataAtom {
1467		case a.Caption:
1468			if p.popUntil(tableScope, a.Caption) {
1469				p.clearActiveFormattingElements()
1470				p.im = inTableIM
1471			}
1472			return true
1473		case a.Table:
1474			if p.popUntil(tableScope, a.Caption) {
1475				p.clearActiveFormattingElements()
1476				p.im = inTableIM
1477				return false
1478			} else {
1479				// Ignore the token.
1480				return true
1481			}
1482		case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1483			// Ignore the token.
1484			return true
1485		}
1486	}
1487	return inBodyIM(p)
1488}
1489
1490// Section 12.2.6.4.12.
1491func inColumnGroupIM(p *parser) bool {
1492	switch p.tok.Type {
1493	case TextToken:
1494		s := strings.TrimLeft(p.tok.Data, whitespace)
1495		if len(s) < len(p.tok.Data) {
1496			// Add the initial whitespace to the current node.
1497			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
1498			if s == "" {
1499				return true
1500			}
1501			p.tok.Data = s
1502		}
1503	case CommentToken:
1504		p.addChild(&Node{
1505			Type: CommentNode,
1506			Data: p.tok.Data,
1507		})
1508		return true
1509	case DoctypeToken:
1510		// Ignore the token.
1511		return true
1512	case StartTagToken:
1513		switch p.tok.DataAtom {
1514		case a.Html:
1515			return inBodyIM(p)
1516		case a.Col:
1517			p.addElement()
1518			p.oe.pop()
1519			p.acknowledgeSelfClosingTag()
1520			return true
1521		case a.Template:
1522			return inHeadIM(p)
1523		}
1524	case EndTagToken:
1525		switch p.tok.DataAtom {
1526		case a.Colgroup:
1527			if p.oe.top().DataAtom == a.Colgroup {
1528				p.oe.pop()
1529				p.im = inTableIM
1530			}
1531			return true
1532		case a.Col:
1533			// Ignore the token.
1534			return true
1535		case a.Template:
1536			return inHeadIM(p)
1537		}
1538	case ErrorToken:
1539		return inBodyIM(p)
1540	}
1541	if p.oe.top().DataAtom != a.Colgroup {
1542		return true
1543	}
1544	p.oe.pop()
1545	p.im = inTableIM
1546	return false
1547}
1548
1549// Section 12.2.6.4.13.
1550func inTableBodyIM(p *parser) bool {
1551	switch p.tok.Type {
1552	case StartTagToken:
1553		switch p.tok.DataAtom {
1554		case a.Tr:
1555			p.clearStackToContext(tableBodyScope)
1556			p.addElement()
1557			p.im = inRowIM
1558			return true
1559		case a.Td, a.Th:
1560			p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
1561			return false
1562		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1563			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1564				p.im = inTableIM
1565				return false
1566			}
1567			// Ignore the token.
1568			return true
1569		}
1570	case EndTagToken:
1571		switch p.tok.DataAtom {
1572		case a.Tbody, a.Tfoot, a.Thead:
1573			if p.elementInScope(tableScope, p.tok.DataAtom) {
1574				p.clearStackToContext(tableBodyScope)
1575				p.oe.pop()
1576				p.im = inTableIM
1577			}
1578			return true
1579		case a.Table:
1580			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1581				p.im = inTableIM
1582				return false
1583			}
1584			// Ignore the token.
1585			return true
1586		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
1587			// Ignore the token.
1588			return true
1589		}
1590	case CommentToken:
1591		p.addChild(&Node{
1592			Type: CommentNode,
1593			Data: p.tok.Data,
1594		})
1595		return true
1596	}
1597
1598	return inTableIM(p)
1599}
1600
1601// Section 12.2.6.4.14.
1602func inRowIM(p *parser) bool {
1603	switch p.tok.Type {
1604	case StartTagToken:
1605		switch p.tok.DataAtom {
1606		case a.Td, a.Th:
1607			p.clearStackToContext(tableRowScope)
1608			p.addElement()
1609			p.afe = append(p.afe, &scopeMarker)
1610			p.im = inCellIM
1611			return true
1612		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1613			if p.popUntil(tableScope, a.Tr) {
1614				p.im = inTableBodyIM
1615				return false
1616			}
1617			// Ignore the token.
1618			return true
1619		}
1620	case EndTagToken:
1621		switch p.tok.DataAtom {
1622		case a.Tr:
1623			if p.popUntil(tableScope, a.Tr) {
1624				p.im = inTableBodyIM
1625				return true
1626			}
1627			// Ignore the token.
1628			return true
1629		case a.Table:
1630			if p.popUntil(tableScope, a.Tr) {
1631				p.im = inTableBodyIM
1632				return false
1633			}
1634			// Ignore the token.
1635			return true
1636		case a.Tbody, a.Tfoot, a.Thead:
1637			if p.elementInScope(tableScope, p.tok.DataAtom) {
1638				p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
1639				return false
1640			}
1641			// Ignore the token.
1642			return true
1643		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
1644			// Ignore the token.
1645			return true
1646		}
1647	}
1648
1649	return inTableIM(p)
1650}
1651
1652// Section 12.2.6.4.15.
1653func inCellIM(p *parser) bool {
1654	switch p.tok.Type {
1655	case StartTagToken:
1656		switch p.tok.DataAtom {
1657		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1658			if p.popUntil(tableScope, a.Td, a.Th) {
1659				// Close the cell and reprocess.
1660				p.clearActiveFormattingElements()
1661				p.im = inRowIM
1662				return false
1663			}
1664			// Ignore the token.
1665			return true
1666		case a.Select:
1667			p.reconstructActiveFormattingElements()
1668			p.addElement()
1669			p.framesetOK = false
1670			p.im = inSelectInTableIM
1671			return true
1672		}
1673	case EndTagToken:
1674		switch p.tok.DataAtom {
1675		case a.Td, a.Th:
1676			if !p.popUntil(tableScope, p.tok.DataAtom) {
1677				// Ignore the token.
1678				return true
1679			}
1680			p.clearActiveFormattingElements()
1681			p.im = inRowIM
1682			return true
1683		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
1684			// Ignore the token.
1685			return true
1686		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1687			if !p.elementInScope(tableScope, p.tok.DataAtom) {
1688				// Ignore the token.
1689				return true
1690			}
1691			// Close the cell and reprocess.
1692			p.popUntil(tableScope, a.Td, a.Th)
1693			p.clearActiveFormattingElements()
1694			p.im = inRowIM
1695			return false
1696		}
1697	}
1698	return inBodyIM(p)
1699}
1700
1701// Section 12.2.6.4.16.
1702func inSelectIM(p *parser) bool {
1703	switch p.tok.Type {
1704	case TextToken:
1705		p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
1706	case StartTagToken:
1707		switch p.tok.DataAtom {
1708		case a.Html:
1709			return inBodyIM(p)
1710		case a.Option:
1711			if p.top().DataAtom == a.Option {
1712				p.oe.pop()
1713			}
1714			p.addElement()
1715		case a.Optgroup:
1716			if p.top().DataAtom == a.Option {
1717				p.oe.pop()
1718			}
1719			if p.top().DataAtom == a.Optgroup {
1720				p.oe.pop()
1721			}
1722			p.addElement()
1723		case a.Select:
1724			p.tok.Type = EndTagToken
1725			return false
1726		case a.Input, a.Keygen, a.Textarea:
1727			if p.elementInScope(selectScope, a.Select) {
1728				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1729				return false
1730			}
1731			// In order to properly ignore <textarea>, we need to change the tokenizer mode.
1732			p.tokenizer.NextIsNotRawText()
1733			// Ignore the token.
1734			return true
1735		case a.Script, a.Template:
1736			return inHeadIM(p)
1737		}
1738	case EndTagToken:
1739		switch p.tok.DataAtom {
1740		case a.Option:
1741			if p.top().DataAtom == a.Option {
1742				p.oe.pop()
1743			}
1744		case a.Optgroup:
1745			i := len(p.oe) - 1
1746			if p.oe[i].DataAtom == a.Option {
1747				i--
1748			}
1749			if p.oe[i].DataAtom == a.Optgroup {
1750				p.oe = p.oe[:i]
1751			}
1752		case a.Select:
1753			if p.popUntil(selectScope, a.Select) {
1754				p.resetInsertionMode()
1755			}
1756		case a.Template:
1757			return inHeadIM(p)
1758		}
1759	case CommentToken:
1760		p.addChild(&Node{
1761			Type: CommentNode,
1762			Data: p.tok.Data,
1763		})
1764	case DoctypeToken:
1765		// Ignore the token.
1766		return true
1767	case ErrorToken:
1768		return inBodyIM(p)
1769	}
1770
1771	return true
1772}
1773
1774// Section 12.2.6.4.17.
1775func inSelectInTableIM(p *parser) bool {
1776	switch p.tok.Type {
1777	case StartTagToken, EndTagToken:
1778		switch p.tok.DataAtom {
1779		case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
1780			if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) {
1781				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1782				return false
1783			} else {
1784				// Ignore the token.
1785				return true
1786			}
1787		}
1788	}
1789	return inSelectIM(p)
1790}
1791
1792// Section 12.2.6.4.18.
1793func inTemplateIM(p *parser) bool {
1794	switch p.tok.Type {
1795	case TextToken, CommentToken, DoctypeToken:
1796		return inBodyIM(p)
1797	case StartTagToken:
1798		switch p.tok.DataAtom {
1799		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
1800			return inHeadIM(p)
1801		case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1802			p.templateStack.pop()
1803			p.templateStack = append(p.templateStack, inTableIM)
1804			p.im = inTableIM
1805			return false
1806		case a.Col:
1807			p.templateStack.pop()
1808			p.templateStack = append(p.templateStack, inColumnGroupIM)
1809			p.im = inColumnGroupIM
1810			return false
1811		case a.Tr:
1812			p.templateStack.pop()
1813			p.templateStack = append(p.templateStack, inTableBodyIM)
1814			p.im = inTableBodyIM
1815			return false
1816		case a.Td, a.Th:
1817			p.templateStack.pop()
1818			p.templateStack = append(p.templateStack, inRowIM)
1819			p.im = inRowIM
1820			return false
1821		default:
1822			p.templateStack.pop()
1823			p.templateStack = append(p.templateStack, inBodyIM)
1824			p.im = inBodyIM
1825			return false
1826		}
1827	case EndTagToken:
1828		switch p.tok.DataAtom {
1829		case a.Template:
1830			return inHeadIM(p)
1831		default:
1832			// Ignore the token.
1833			return true
1834		}
1835	case ErrorToken:
1836		if !p.oe.contains(a.Template) {
1837			// Ignore the token.
1838			return true
1839		}
1840		// TODO: remove this divergence from the HTML5 spec.
1841		//
1842		// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
1843		p.generateImpliedEndTags()
1844		for i := len(p.oe) - 1; i >= 0; i-- {
1845			if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
1846				p.oe = p.oe[:i]
1847				break
1848			}
1849		}
1850		p.clearActiveFormattingElements()
1851		p.templateStack.pop()
1852		p.resetInsertionMode()
1853		return false
1854	}
1855	return false
1856}
1857
1858// Section 12.2.6.4.19.
1859func afterBodyIM(p *parser) bool {
1860	switch p.tok.Type {
1861	case ErrorToken:
1862		// Stop parsing.
1863		return true
1864	case TextToken:
1865		s := strings.TrimLeft(p.tok.Data, whitespace)
1866		if len(s) == 0 {
1867			// It was all whitespace.
1868			return inBodyIM(p)
1869		}
1870	case StartTagToken:
1871		if p.tok.DataAtom == a.Html {
1872			return inBodyIM(p)
1873		}
1874	case EndTagToken:
1875		if p.tok.DataAtom == a.Html {
1876			if !p.fragment {
1877				p.im = afterAfterBodyIM
1878			}
1879			return true
1880		}
1881	case CommentToken:
1882		// The comment is attached to the <html> element.
1883		if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
1884			panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
1885		}
1886		p.oe[0].AppendChild(&Node{
1887			Type: CommentNode,
1888			Data: p.tok.Data,
1889		})
1890		return true
1891	}
1892	p.im = inBodyIM
1893	return false
1894}
1895
1896// Section 12.2.6.4.20.
1897func inFramesetIM(p *parser) bool {
1898	switch p.tok.Type {
1899	case CommentToken:
1900		p.addChild(&Node{
1901			Type: CommentNode,
1902			Data: p.tok.Data,
1903		})
1904	case TextToken:
1905		// Ignore all text but whitespace.
1906		s := strings.Map(func(c rune) rune {
1907			switch c {
1908			case ' ', '\t', '\n', '\f', '\r':
1909				return c
1910			}
1911			return -1
1912		}, p.tok.Data)
1913		if s != "" {
1914			p.addText(s)
1915		}
1916	case StartTagToken:
1917		switch p.tok.DataAtom {
1918		case a.Html:
1919			return inBodyIM(p)
1920		case a.Frameset:
1921			p.addElement()
1922		case a.Frame:
1923			p.addElement()
1924			p.oe.pop()
1925			p.acknowledgeSelfClosingTag()
1926		case a.Noframes:
1927			return inHeadIM(p)
1928		}
1929	case EndTagToken:
1930		switch p.tok.DataAtom {
1931		case a.Frameset:
1932			if p.oe.top().DataAtom != a.Html {
1933				p.oe.pop()
1934				if p.oe.top().DataAtom != a.Frameset {
1935					p.im = afterFramesetIM
1936					return true
1937				}
1938			}
1939		}
1940	default:
1941		// Ignore the token.
1942	}
1943	return true
1944}
1945
1946// Section 12.2.6.4.21.
1947func afterFramesetIM(p *parser) bool {
1948	switch p.tok.Type {
1949	case CommentToken:
1950		p.addChild(&Node{
1951			Type: CommentNode,
1952			Data: p.tok.Data,
1953		})
1954	case TextToken:
1955		// Ignore all text but whitespace.
1956		s := strings.Map(func(c rune) rune {
1957			switch c {
1958			case ' ', '\t', '\n', '\f', '\r':
1959				return c
1960			}
1961			return -1
1962		}, p.tok.Data)
1963		if s != "" {
1964			p.addText(s)
1965		}
1966	case StartTagToken:
1967		switch p.tok.DataAtom {
1968		case a.Html:
1969			return inBodyIM(p)
1970		case a.Noframes:
1971			return inHeadIM(p)
1972		}
1973	case EndTagToken:
1974		switch p.tok.DataAtom {
1975		case a.Html:
1976			p.im = afterAfterFramesetIM
1977			return true
1978		}
1979	default:
1980		// Ignore the token.
1981	}
1982	return true
1983}
1984
1985// Section 12.2.6.4.22.
1986func afterAfterBodyIM(p *parser) bool {
1987	switch p.tok.Type {
1988	case ErrorToken:
1989		// Stop parsing.
1990		return true
1991	case TextToken:
1992		s := strings.TrimLeft(p.tok.Data, whitespace)
1993		if len(s) == 0 {
1994			// It was all whitespace.
1995			return inBodyIM(p)
1996		}
1997	case StartTagToken:
1998		if p.tok.DataAtom == a.Html {
1999			return inBodyIM(p)
2000		}
2001	case CommentToken:
2002		p.doc.AppendChild(&Node{
2003			Type: CommentNode,
2004			Data: p.tok.Data,
2005		})
2006		return true
2007	case DoctypeToken:
2008		return inBodyIM(p)
2009	}
2010	p.im = inBodyIM
2011	return false
2012}
2013
2014// Section 12.2.6.4.23.
2015func afterAfterFramesetIM(p *parser) bool {
2016	switch p.tok.Type {
2017	case CommentToken:
2018		p.doc.AppendChild(&Node{
2019			Type: CommentNode,
2020			Data: p.tok.Data,
2021		})
2022	case TextToken:
2023		// Ignore all text but whitespace.
2024		s := strings.Map(func(c rune) rune {
2025			switch c {
2026			case ' ', '\t', '\n', '\f', '\r':
2027				return c
2028			}
2029			return -1
2030		}, p.tok.Data)
2031		if s != "" {
2032			p.tok.Data = s
2033			return inBodyIM(p)
2034		}
2035	case StartTagToken:
2036		switch p.tok.DataAtom {
2037		case a.Html:
2038			return inBodyIM(p)
2039		case a.Noframes:
2040			return inHeadIM(p)
2041		}
2042	case DoctypeToken:
2043		return inBodyIM(p)
2044	default:
2045		// Ignore the token.
2046	}
2047	return true
2048}
2049
2050const whitespaceOrNUL = whitespace + "\x00"
2051
2052// Section 12.2.6.5
2053func parseForeignContent(p *parser) bool {
2054	switch p.tok.Type {
2055	case TextToken:
2056		if p.framesetOK {
2057			p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
2058		}
2059		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
2060		p.addText(p.tok.Data)
2061	case CommentToken:
2062		p.addChild(&Node{
2063			Type: CommentNode,
2064			Data: p.tok.Data,
2065		})
2066	case StartTagToken:
2067		b := breakout[p.tok.Data]
2068		if p.tok.DataAtom == a.Font {
2069		loop:
2070			for _, attr := range p.tok.Attr {
2071				switch attr.Key {
2072				case "color", "face", "size":
2073					b = true
2074					break loop
2075				}
2076			}
2077		}
2078		if b {
2079			for i := len(p.oe) - 1; i >= 0; i-- {
2080				n := p.oe[i]
2081				if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
2082					p.oe = p.oe[:i+1]
2083					break
2084				}
2085			}
2086			return false
2087		}
2088		switch p.top().Namespace {
2089		case "math":
2090			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
2091		case "svg":
2092			// Adjust SVG tag names. The tokenizer lower-cases tag names, but
2093			// SVG wants e.g. "foreignObject" with a capital second "O".
2094			if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
2095				p.tok.DataAtom = a.Lookup([]byte(x))
2096				p.tok.Data = x
2097			}
2098			adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
2099		default:
2100			panic("html: bad parser state: unexpected namespace")
2101		}
2102		adjustForeignAttributes(p.tok.Attr)
2103		namespace := p.top().Namespace
2104		p.addElement()
2105		p.top().Namespace = namespace
2106		if namespace != "" {
2107			// Don't let the tokenizer go into raw text mode in foreign content
2108			// (e.g. in an SVG <title> tag).
2109			p.tokenizer.NextIsNotRawText()
2110		}
2111		if p.hasSelfClosingToken {
2112			p.oe.pop()
2113			p.acknowledgeSelfClosingTag()
2114		}
2115	case EndTagToken:
2116		for i := len(p.oe) - 1; i >= 0; i-- {
2117			if p.oe[i].Namespace == "" {
2118				return p.im(p)
2119			}
2120			if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
2121				p.oe = p.oe[:i]
2122				break
2123			}
2124		}
2125		return true
2126	default:
2127		// Ignore the token.
2128	}
2129	return true
2130}
2131
2132// Section 12.2.6.
2133func (p *parser) inForeignContent() bool {
2134	if len(p.oe) == 0 {
2135		return false
2136	}
2137	n := p.oe[len(p.oe)-1]
2138	if n.Namespace == "" {
2139		return false
2140	}
2141	if mathMLTextIntegrationPoint(n) {
2142		if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
2143			return false
2144		}
2145		if p.tok.Type == TextToken {
2146			return false
2147		}
2148	}
2149	if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
2150		return false
2151	}
2152	if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
2153		return false
2154	}
2155	if p.tok.Type == ErrorToken {
2156		return false
2157	}
2158	return true
2159}
2160
2161// parseImpliedToken parses a token as though it had appeared in the parser's
2162// input.
2163func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
2164	realToken, selfClosing := p.tok, p.hasSelfClosingToken
2165	p.tok = Token{
2166		Type:     t,
2167		DataAtom: dataAtom,
2168		Data:     data,
2169	}
2170	p.hasSelfClosingToken = false
2171	p.parseCurrentToken()
2172	p.tok, p.hasSelfClosingToken = realToken, selfClosing
2173}
2174
2175// parseCurrentToken runs the current token through the parsing routines
2176// until it is consumed.
2177func (p *parser) parseCurrentToken() {
2178	if p.tok.Type == SelfClosingTagToken {
2179		p.hasSelfClosingToken = true
2180		p.tok.Type = StartTagToken
2181	}
2182
2183	consumed := false
2184	for !consumed {
2185		if p.inForeignContent() {
2186			consumed = parseForeignContent(p)
2187		} else {
2188			consumed = p.im(p)
2189		}
2190	}
2191
2192	if p.hasSelfClosingToken {
2193		// This is a parse error, but ignore it.
2194		p.hasSelfClosingToken = false
2195	}
2196}
2197
2198func (p *parser) parse() error {
2199	// Iterate until EOF. Any other error will cause an early return.
2200	var err error
2201	for err != io.EOF {
2202		// CDATA sections are allowed only in foreign content.
2203		n := p.oe.top()
2204		p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
2205		// Read and parse the next token.
2206		p.tokenizer.Next()
2207		p.tok = p.tokenizer.Token()
2208		if p.tok.Type == ErrorToken {
2209			err = p.tokenizer.Err()
2210			if err != nil && err != io.EOF {
2211				return err
2212			}
2213		}
2214		p.parseCurrentToken()
2215	}
2216	return nil
2217}
2218
2219// Parse returns the parse tree for the HTML from the given Reader.
2220//
2221// It implements the HTML5 parsing algorithm
2222// (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
2223// which is very complicated. The resultant tree can contain implicitly created
2224// nodes that have no explicit <tag> listed in r's data, and nodes' parents can
2225// differ from the nesting implied by a naive processing of start and end
2226// <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
2227// with no corresponding node in the resulting tree.
2228//
2229// The input is assumed to be UTF-8 encoded.
2230func Parse(r io.Reader) (*Node, error) {
2231	p := &parser{
2232		tokenizer: NewTokenizer(r),
2233		doc: &Node{
2234			Type: DocumentNode,
2235		},
2236		scripting:  true,
2237		framesetOK: true,
2238		im:         initialIM,
2239	}
2240	err := p.parse()
2241	if err != nil {
2242		return nil, err
2243	}
2244	return p.doc, nil
2245}
2246
2247// ParseFragment parses a fragment of HTML and returns the nodes that were
2248// found. If the fragment is the InnerHTML for an existing element, pass that
2249// element in context.
2250//
2251// It has the same intricacies as Parse.
2252func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
2253	contextTag := ""
2254	if context != nil {
2255		if context.Type != ElementNode {
2256			return nil, errors.New("html: ParseFragment of non-element Node")
2257		}
2258		// The next check isn't just context.DataAtom.String() == context.Data because
2259		// it is valid to pass an element whose tag isn't a known atom. For example,
2260		// DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
2261		if context.DataAtom != a.Lookup([]byte(context.Data)) {
2262			return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
2263		}
2264		contextTag = context.DataAtom.String()
2265	}
2266	p := &parser{
2267		tokenizer: NewTokenizerFragment(r, contextTag),
2268		doc: &Node{
2269			Type: DocumentNode,
2270		},
2271		scripting: true,
2272		fragment:  true,
2273		context:   context,
2274	}
2275
2276	root := &Node{
2277		Type:     ElementNode,
2278		DataAtom: a.Html,
2279		Data:     a.Html.String(),
2280	}
2281	p.doc.AppendChild(root)
2282	p.oe = nodeStack{root}
2283	if context != nil && context.DataAtom == a.Template {
2284		p.templateStack = append(p.templateStack, inTemplateIM)
2285	}
2286	p.resetInsertionMode()
2287
2288	for n := context; n != nil; n = n.Parent {
2289		if n.Type == ElementNode && n.DataAtom == a.Form {
2290			p.form = n
2291			break
2292		}
2293	}
2294
2295	err := p.parse()
2296	if err != nil {
2297		return nil, err
2298	}
2299
2300	parent := p.doc
2301	if context != nil {
2302		parent = root
2303	}
2304
2305	var result []*Node
2306	for c := parent.FirstChild; c != nil; {
2307		next := c.NextSibling
2308		parent.RemoveChild(c)
2309		result = append(result, c)
2310		c = next
2311	}
2312	return result, nil
2313}