parse.go

   1// Copyright 2010 The Go Authors. All rights reserved.
   2// Use of this source code is governed by a BSD-style
   3// license that can be found in the LICENSE file.
   4
   5package html
   6
   7import (
   8	"errors"
   9	"fmt"
  10	"io"
  11	"strings"
  12
  13	a "golang.org/x/net/html/atom"
  14)
  15
  16// A parser implements the HTML5 parsing algorithm:
  17// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
  18type parser struct {
  19	// tokenizer provides the tokens for the parser.
  20	tokenizer *Tokenizer
  21	// tok is the most recently read token.
  22	tok Token
  23	// Self-closing tags like <hr/> are treated as start tags, except that
  24	// hasSelfClosingToken is set while they are being processed.
  25	hasSelfClosingToken bool
  26	// doc is the document root element.
  27	doc *Node
  28	// The stack of open elements (section 12.2.4.2) and active formatting
  29	// elements (section 12.2.4.3).
  30	oe, afe nodeStack
  31	// Element pointers (section 12.2.4.4).
  32	head, form *Node
  33	// Other parsing state flags (section 12.2.4.5).
  34	scripting, framesetOK bool
  35	// The stack of template insertion modes
  36	templateStack insertionModeStack
  37	// im is the current insertion mode.
  38	im insertionMode
  39	// originalIM is the insertion mode to go back to after completing a text
  40	// or inTableText insertion mode.
  41	originalIM insertionMode
  42	// fosterParenting is whether new elements should be inserted according to
  43	// the foster parenting rules (section 12.2.6.1).
  44	fosterParenting bool
  45	// quirks is whether the parser is operating in "quirks mode."
  46	quirks bool
  47	// fragment is whether the parser is parsing an HTML fragment.
  48	fragment bool
  49	// context is the context element when parsing an HTML fragment
  50	// (section 12.4).
  51	context *Node
  52}
  53
  54func (p *parser) top() *Node {
  55	if n := p.oe.top(); n != nil {
  56		return n
  57	}
  58	return p.doc
  59}
  60
  61// Stop tags for use in popUntil. These come from section 12.2.4.2.
  62var (
  63	defaultScopeStopTags = map[string][]a.Atom{
  64		"":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
  65		"math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
  66		"svg":  {a.Desc, a.ForeignObject, a.Title},
  67	}
  68)
  69
  70type scope int
  71
  72const (
  73	defaultScope scope = iota
  74	listItemScope
  75	buttonScope
  76	tableScope
  77	tableRowScope
  78	tableBodyScope
  79	selectScope
  80)
  81
  82// popUntil pops the stack of open elements at the highest element whose tag
  83// is in matchTags, provided there is no higher element in the scope's stop
  84// tags (as defined in section 12.2.4.2). It returns whether or not there was
  85// such an element. If there was not, popUntil leaves the stack unchanged.
  86//
  87// For example, the set of stop tags for table scope is: "html", "table". If
  88// the stack was:
  89// ["html", "body", "font", "table", "b", "i", "u"]
  90// then popUntil(tableScope, "font") would return false, but
  91// popUntil(tableScope, "i") would return true and the stack would become:
  92// ["html", "body", "font", "table", "b"]
  93//
  94// If an element's tag is in both the stop tags and matchTags, then the stack
  95// will be popped and the function returns true (provided, of course, there was
  96// no higher element in the stack that was also in the stop tags). For example,
  97// popUntil(tableScope, "table") returns true and leaves:
  98// ["html", "body", "font"]
  99func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
 100	if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
 101		p.oe = p.oe[:i]
 102		return true
 103	}
 104	return false
 105}
 106
 107// indexOfElementInScope returns the index in p.oe of the highest element whose
 108// tag is in matchTags that is in scope. If no matching element is in scope, it
 109// returns -1.
 110func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
 111	for i := len(p.oe) - 1; i >= 0; i-- {
 112		tagAtom := p.oe[i].DataAtom
 113		if p.oe[i].Namespace == "" {
 114			for _, t := range matchTags {
 115				if t == tagAtom {
 116					return i
 117				}
 118			}
 119			switch s {
 120			case defaultScope:
 121				// No-op.
 122			case listItemScope:
 123				if tagAtom == a.Ol || tagAtom == a.Ul {
 124					return -1
 125				}
 126			case buttonScope:
 127				if tagAtom == a.Button {
 128					return -1
 129				}
 130			case tableScope:
 131				if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
 132					return -1
 133				}
 134			case selectScope:
 135				if tagAtom != a.Optgroup && tagAtom != a.Option {
 136					return -1
 137				}
 138			default:
 139				panic("unreachable")
 140			}
 141		}
 142		switch s {
 143		case defaultScope, listItemScope, buttonScope:
 144			for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
 145				if t == tagAtom {
 146					return -1
 147				}
 148			}
 149		}
 150	}
 151	return -1
 152}
 153
 154// elementInScope is like popUntil, except that it doesn't modify the stack of
 155// open elements.
 156func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
 157	return p.indexOfElementInScope(s, matchTags...) != -1
 158}
 159
 160// clearStackToContext pops elements off the stack of open elements until a
 161// scope-defined element is found.
 162func (p *parser) clearStackToContext(s scope) {
 163	for i := len(p.oe) - 1; i >= 0; i-- {
 164		tagAtom := p.oe[i].DataAtom
 165		switch s {
 166		case tableScope:
 167			if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
 168				p.oe = p.oe[:i+1]
 169				return
 170			}
 171		case tableRowScope:
 172			if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
 173				p.oe = p.oe[:i+1]
 174				return
 175			}
 176		case tableBodyScope:
 177			if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
 178				p.oe = p.oe[:i+1]
 179				return
 180			}
 181		default:
 182			panic("unreachable")
 183		}
 184	}
 185}
 186
 187// parseGenericRawTextElement implements the generic raw text element parsing
 188// algorithm defined in 12.2.6.2.
 189// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
 190// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
 191// officially, need to make tokenizer consider both states.
 192func (p *parser) parseGenericRawTextElement() {
 193	p.addElement()
 194	p.originalIM = p.im
 195	p.im = textIM
 196}
 197
 198// generateImpliedEndTags pops nodes off the stack of open elements as long as
 199// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
 200// If exceptions are specified, nodes with that name will not be popped off.
 201func (p *parser) generateImpliedEndTags(exceptions ...string) {
 202	var i int
 203loop:
 204	for i = len(p.oe) - 1; i >= 0; i-- {
 205		n := p.oe[i]
 206		if n.Type != ElementNode {
 207			break
 208		}
 209		switch n.DataAtom {
 210		case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
 211			for _, except := range exceptions {
 212				if n.Data == except {
 213					break loop
 214				}
 215			}
 216			continue
 217		}
 218		break
 219	}
 220
 221	p.oe = p.oe[:i+1]
 222}
 223
 224// addChild adds a child node n to the top element, and pushes n onto the stack
 225// of open elements if it is an element node.
 226func (p *parser) addChild(n *Node) {
 227	if p.shouldFosterParent() {
 228		p.fosterParent(n)
 229	} else {
 230		p.top().AppendChild(n)
 231	}
 232
 233	if n.Type == ElementNode {
 234		p.oe = append(p.oe, n)
 235	}
 236}
 237
 238// shouldFosterParent returns whether the next node to be added should be
 239// foster parented.
 240func (p *parser) shouldFosterParent() bool {
 241	if p.fosterParenting {
 242		switch p.top().DataAtom {
 243		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
 244			return true
 245		}
 246	}
 247	return false
 248}
 249
 250// fosterParent adds a child node according to the foster parenting rules.
 251// Section 12.2.6.1, "foster parenting".
 252func (p *parser) fosterParent(n *Node) {
 253	var table, parent, prev, template *Node
 254	var i int
 255	for i = len(p.oe) - 1; i >= 0; i-- {
 256		if p.oe[i].DataAtom == a.Table {
 257			table = p.oe[i]
 258			break
 259		}
 260	}
 261
 262	var j int
 263	for j = len(p.oe) - 1; j >= 0; j-- {
 264		if p.oe[j].DataAtom == a.Template {
 265			template = p.oe[j]
 266			break
 267		}
 268	}
 269
 270	if template != nil && (table == nil || j > i) {
 271		template.AppendChild(n)
 272		return
 273	}
 274
 275	if table == nil {
 276		// The foster parent is the html element.
 277		parent = p.oe[0]
 278	} else {
 279		parent = table.Parent
 280	}
 281	if parent == nil {
 282		parent = p.oe[i-1]
 283	}
 284
 285	if table != nil {
 286		prev = table.PrevSibling
 287	} else {
 288		prev = parent.LastChild
 289	}
 290	if prev != nil && prev.Type == TextNode && n.Type == TextNode {
 291		prev.Data += n.Data
 292		return
 293	}
 294
 295	parent.InsertBefore(n, table)
 296}
 297
 298// addText adds text to the preceding node if it is a text node, or else it
 299// calls addChild with a new text node.
 300func (p *parser) addText(text string) {
 301	if text == "" {
 302		return
 303	}
 304
 305	if p.shouldFosterParent() {
 306		p.fosterParent(&Node{
 307			Type: TextNode,
 308			Data: text,
 309		})
 310		return
 311	}
 312
 313	t := p.top()
 314	if n := t.LastChild; n != nil && n.Type == TextNode {
 315		n.Data += text
 316		return
 317	}
 318	p.addChild(&Node{
 319		Type: TextNode,
 320		Data: text,
 321	})
 322}
 323
 324// addElement adds a child element based on the current token.
 325func (p *parser) addElement() {
 326	p.addChild(&Node{
 327		Type:     ElementNode,
 328		DataAtom: p.tok.DataAtom,
 329		Data:     p.tok.Data,
 330		Attr:     p.tok.Attr,
 331	})
 332}
 333
 334// Section 12.2.4.3.
 335func (p *parser) addFormattingElement() {
 336	tagAtom, attr := p.tok.DataAtom, p.tok.Attr
 337	p.addElement()
 338
 339	// Implement the Noah's Ark clause, but with three per family instead of two.
 340	identicalElements := 0
 341findIdenticalElements:
 342	for i := len(p.afe) - 1; i >= 0; i-- {
 343		n := p.afe[i]
 344		if n.Type == scopeMarkerNode {
 345			break
 346		}
 347		if n.Type != ElementNode {
 348			continue
 349		}
 350		if n.Namespace != "" {
 351			continue
 352		}
 353		if n.DataAtom != tagAtom {
 354			continue
 355		}
 356		if len(n.Attr) != len(attr) {
 357			continue
 358		}
 359	compareAttributes:
 360		for _, t0 := range n.Attr {
 361			for _, t1 := range attr {
 362				if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
 363					// Found a match for this attribute, continue with the next attribute.
 364					continue compareAttributes
 365				}
 366			}
 367			// If we get here, there is no attribute that matches a.
 368			// Therefore the element is not identical to the new one.
 369			continue findIdenticalElements
 370		}
 371
 372		identicalElements++
 373		if identicalElements >= 3 {
 374			p.afe.remove(n)
 375		}
 376	}
 377
 378	p.afe = append(p.afe, p.top())
 379}
 380
 381// Section 12.2.4.3.
 382func (p *parser) clearActiveFormattingElements() {
 383	for {
 384		if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
 385			return
 386		}
 387	}
 388}
 389
 390// Section 12.2.4.3.
 391func (p *parser) reconstructActiveFormattingElements() {
 392	n := p.afe.top()
 393	if n == nil {
 394		return
 395	}
 396	if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
 397		return
 398	}
 399	i := len(p.afe) - 1
 400	for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
 401		if i == 0 {
 402			i = -1
 403			break
 404		}
 405		i--
 406		n = p.afe[i]
 407	}
 408	for {
 409		i++
 410		clone := p.afe[i].clone()
 411		p.addChild(clone)
 412		p.afe[i] = clone
 413		if i == len(p.afe)-1 {
 414			break
 415		}
 416	}
 417}
 418
 419// Section 12.2.5.
 420func (p *parser) acknowledgeSelfClosingTag() {
 421	p.hasSelfClosingToken = false
 422}
 423
 424// An insertion mode (section 12.2.4.1) is the state transition function from
 425// a particular state in the HTML5 parser's state machine. It updates the
 426// parser's fields depending on parser.tok (where ErrorToken means EOF).
 427// It returns whether the token was consumed.
 428type insertionMode func(*parser) bool
 429
 430// setOriginalIM sets the insertion mode to return to after completing a text or
 431// inTableText insertion mode.
 432// Section 12.2.4.1, "using the rules for".
 433func (p *parser) setOriginalIM() {
 434	if p.originalIM != nil {
 435		panic("html: bad parser state: originalIM was set twice")
 436	}
 437	p.originalIM = p.im
 438}
 439
 440// Section 12.2.4.1, "reset the insertion mode".
 441func (p *parser) resetInsertionMode() {
 442	for i := len(p.oe) - 1; i >= 0; i-- {
 443		n := p.oe[i]
 444		last := i == 0
 445		if last && p.context != nil {
 446			n = p.context
 447		}
 448
 449		switch n.DataAtom {
 450		case a.Select:
 451			if !last {
 452				for ancestor, first := n, p.oe[0]; ancestor != first; {
 453					ancestor = p.oe[p.oe.index(ancestor)-1]
 454					switch ancestor.DataAtom {
 455					case a.Template:
 456						p.im = inSelectIM
 457						return
 458					case a.Table:
 459						p.im = inSelectInTableIM
 460						return
 461					}
 462				}
 463			}
 464			p.im = inSelectIM
 465		case a.Td, a.Th:
 466			// TODO: remove this divergence from the HTML5 spec.
 467			//
 468			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
 469			p.im = inCellIM
 470		case a.Tr:
 471			p.im = inRowIM
 472		case a.Tbody, a.Thead, a.Tfoot:
 473			p.im = inTableBodyIM
 474		case a.Caption:
 475			p.im = inCaptionIM
 476		case a.Colgroup:
 477			p.im = inColumnGroupIM
 478		case a.Table:
 479			p.im = inTableIM
 480		case a.Template:
 481			// TODO: remove this divergence from the HTML5 spec.
 482			if n.Namespace != "" {
 483				continue
 484			}
 485			p.im = p.templateStack.top()
 486		case a.Head:
 487			// TODO: remove this divergence from the HTML5 spec.
 488			//
 489			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
 490			p.im = inHeadIM
 491		case a.Body:
 492			p.im = inBodyIM
 493		case a.Frameset:
 494			p.im = inFramesetIM
 495		case a.Html:
 496			if p.head == nil {
 497				p.im = beforeHeadIM
 498			} else {
 499				p.im = afterHeadIM
 500			}
 501		default:
 502			if last {
 503				p.im = inBodyIM
 504				return
 505			}
 506			continue
 507		}
 508		return
 509	}
 510}
 511
 512const whitespace = " \t\r\n\f"
 513
 514// Section 12.2.6.4.1.
 515func initialIM(p *parser) bool {
 516	switch p.tok.Type {
 517	case TextToken:
 518		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
 519		if len(p.tok.Data) == 0 {
 520			// It was all whitespace, so ignore it.
 521			return true
 522		}
 523	case CommentToken:
 524		p.doc.AppendChild(&Node{
 525			Type: CommentNode,
 526			Data: p.tok.Data,
 527		})
 528		return true
 529	case DoctypeToken:
 530		n, quirks := parseDoctype(p.tok.Data)
 531		p.doc.AppendChild(n)
 532		p.quirks = quirks
 533		p.im = beforeHTMLIM
 534		return true
 535	}
 536	p.quirks = true
 537	p.im = beforeHTMLIM
 538	return false
 539}
 540
 541// Section 12.2.6.4.2.
 542func beforeHTMLIM(p *parser) bool {
 543	switch p.tok.Type {
 544	case DoctypeToken:
 545		// Ignore the token.
 546		return true
 547	case TextToken:
 548		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
 549		if len(p.tok.Data) == 0 {
 550			// It was all whitespace, so ignore it.
 551			return true
 552		}
 553	case StartTagToken:
 554		if p.tok.DataAtom == a.Html {
 555			p.addElement()
 556			p.im = beforeHeadIM
 557			return true
 558		}
 559	case EndTagToken:
 560		switch p.tok.DataAtom {
 561		case a.Head, a.Body, a.Html, a.Br:
 562			p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
 563			return false
 564		default:
 565			// Ignore the token.
 566			return true
 567		}
 568	case CommentToken:
 569		p.doc.AppendChild(&Node{
 570			Type: CommentNode,
 571			Data: p.tok.Data,
 572		})
 573		return true
 574	}
 575	p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
 576	return false
 577}
 578
 579// Section 12.2.6.4.3.
 580func beforeHeadIM(p *parser) bool {
 581	switch p.tok.Type {
 582	case TextToken:
 583		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
 584		if len(p.tok.Data) == 0 {
 585			// It was all whitespace, so ignore it.
 586			return true
 587		}
 588	case StartTagToken:
 589		switch p.tok.DataAtom {
 590		case a.Head:
 591			p.addElement()
 592			p.head = p.top()
 593			p.im = inHeadIM
 594			return true
 595		case a.Html:
 596			return inBodyIM(p)
 597		}
 598	case EndTagToken:
 599		switch p.tok.DataAtom {
 600		case a.Head, a.Body, a.Html, a.Br:
 601			p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
 602			return false
 603		default:
 604			// Ignore the token.
 605			return true
 606		}
 607	case CommentToken:
 608		p.addChild(&Node{
 609			Type: CommentNode,
 610			Data: p.tok.Data,
 611		})
 612		return true
 613	case DoctypeToken:
 614		// Ignore the token.
 615		return true
 616	}
 617
 618	p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
 619	return false
 620}
 621
 622// Section 12.2.6.4.4.
 623func inHeadIM(p *parser) bool {
 624	switch p.tok.Type {
 625	case TextToken:
 626		s := strings.TrimLeft(p.tok.Data, whitespace)
 627		if len(s) < len(p.tok.Data) {
 628			// Add the initial whitespace to the current node.
 629			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
 630			if s == "" {
 631				return true
 632			}
 633			p.tok.Data = s
 634		}
 635	case StartTagToken:
 636		switch p.tok.DataAtom {
 637		case a.Html:
 638			return inBodyIM(p)
 639		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
 640			p.addElement()
 641			p.oe.pop()
 642			p.acknowledgeSelfClosingTag()
 643			return true
 644		case a.Noscript:
 645			if p.scripting {
 646				p.parseGenericRawTextElement()
 647				return true
 648			}
 649			p.addElement()
 650			p.im = inHeadNoscriptIM
 651			// Don't let the tokenizer go into raw text mode when scripting is disabled.
 652			p.tokenizer.NextIsNotRawText()
 653			return true
 654		case a.Script, a.Title:
 655			p.addElement()
 656			p.setOriginalIM()
 657			p.im = textIM
 658			return true
 659		case a.Noframes, a.Style:
 660			p.parseGenericRawTextElement()
 661			return true
 662		case a.Head:
 663			// Ignore the token.
 664			return true
 665		case a.Template:
 666			// TODO: remove this divergence from the HTML5 spec.
 667			//
 668			// We don't handle all of the corner cases when mixing foreign
 669			// content (i.e. <math> or <svg>) with <template>. Without this
 670			// early return, we can get into an infinite loop, possibly because
 671			// of the "TODO... further divergence" a little below.
 672			//
 673			// As a workaround, if we are mixing foreign content and templates,
 674			// just ignore the rest of the HTML. Foreign content is rare and a
 675			// relatively old HTML feature. Templates are also rare and a
 676			// relatively new HTML feature. Their combination is very rare.
 677			for _, e := range p.oe {
 678				if e.Namespace != "" {
 679					p.im = ignoreTheRemainingTokens
 680					return true
 681				}
 682			}
 683
 684			p.addElement()
 685			p.afe = append(p.afe, &scopeMarker)
 686			p.framesetOK = false
 687			p.im = inTemplateIM
 688			p.templateStack = append(p.templateStack, inTemplateIM)
 689			return true
 690		}
 691	case EndTagToken:
 692		switch p.tok.DataAtom {
 693		case a.Head:
 694			p.oe.pop()
 695			p.im = afterHeadIM
 696			return true
 697		case a.Body, a.Html, a.Br:
 698			p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
 699			return false
 700		case a.Template:
 701			if !p.oe.contains(a.Template) {
 702				return true
 703			}
 704			// TODO: remove this further divergence from the HTML5 spec.
 705			//
 706			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
 707			p.generateImpliedEndTags()
 708			for i := len(p.oe) - 1; i >= 0; i-- {
 709				if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
 710					p.oe = p.oe[:i]
 711					break
 712				}
 713			}
 714			p.clearActiveFormattingElements()
 715			p.templateStack.pop()
 716			p.resetInsertionMode()
 717			return true
 718		default:
 719			// Ignore the token.
 720			return true
 721		}
 722	case CommentToken:
 723		p.addChild(&Node{
 724			Type: CommentNode,
 725			Data: p.tok.Data,
 726		})
 727		return true
 728	case DoctypeToken:
 729		// Ignore the token.
 730		return true
 731	}
 732
 733	p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
 734	return false
 735}
 736
 737// Section 12.2.6.4.5.
 738func inHeadNoscriptIM(p *parser) bool {
 739	switch p.tok.Type {
 740	case DoctypeToken:
 741		// Ignore the token.
 742		return true
 743	case StartTagToken:
 744		switch p.tok.DataAtom {
 745		case a.Html:
 746			return inBodyIM(p)
 747		case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
 748			return inHeadIM(p)
 749		case a.Head:
 750			// Ignore the token.
 751			return true
 752		case a.Noscript:
 753			// Don't let the tokenizer go into raw text mode even when a <noscript>
 754			// tag is in "in head noscript" insertion mode.
 755			p.tokenizer.NextIsNotRawText()
 756			// Ignore the token.
 757			return true
 758		}
 759	case EndTagToken:
 760		switch p.tok.DataAtom {
 761		case a.Noscript, a.Br:
 762		default:
 763			// Ignore the token.
 764			return true
 765		}
 766	case TextToken:
 767		s := strings.TrimLeft(p.tok.Data, whitespace)
 768		if len(s) == 0 {
 769			// It was all whitespace.
 770			return inHeadIM(p)
 771		}
 772	case CommentToken:
 773		return inHeadIM(p)
 774	}
 775	p.oe.pop()
 776	if p.top().DataAtom != a.Head {
 777		panic("html: the new current node will be a head element.")
 778	}
 779	p.im = inHeadIM
 780	if p.tok.DataAtom == a.Noscript {
 781		return true
 782	}
 783	return false
 784}
 785
 786// Section 12.2.6.4.6.
 787func afterHeadIM(p *parser) bool {
 788	switch p.tok.Type {
 789	case TextToken:
 790		s := strings.TrimLeft(p.tok.Data, whitespace)
 791		if len(s) < len(p.tok.Data) {
 792			// Add the initial whitespace to the current node.
 793			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
 794			if s == "" {
 795				return true
 796			}
 797			p.tok.Data = s
 798		}
 799	case StartTagToken:
 800		switch p.tok.DataAtom {
 801		case a.Html:
 802			return inBodyIM(p)
 803		case a.Body:
 804			p.addElement()
 805			p.framesetOK = false
 806			p.im = inBodyIM
 807			return true
 808		case a.Frameset:
 809			p.addElement()
 810			p.im = inFramesetIM
 811			return true
 812		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
 813			p.oe = append(p.oe, p.head)
 814			defer p.oe.remove(p.head)
 815			return inHeadIM(p)
 816		case a.Head:
 817			// Ignore the token.
 818			return true
 819		}
 820	case EndTagToken:
 821		switch p.tok.DataAtom {
 822		case a.Body, a.Html, a.Br:
 823			// Drop down to creating an implied <body> tag.
 824		case a.Template:
 825			return inHeadIM(p)
 826		default:
 827			// Ignore the token.
 828			return true
 829		}
 830	case CommentToken:
 831		p.addChild(&Node{
 832			Type: CommentNode,
 833			Data: p.tok.Data,
 834		})
 835		return true
 836	case DoctypeToken:
 837		// Ignore the token.
 838		return true
 839	}
 840
 841	p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
 842	p.framesetOK = true
 843	if p.tok.Type == ErrorToken {
 844		// Stop parsing.
 845		return true
 846	}
 847	return false
 848}
 849
 850// copyAttributes copies attributes of src not found on dst to dst.
 851func copyAttributes(dst *Node, src Token) {
 852	if len(src.Attr) == 0 {
 853		return
 854	}
 855	attr := map[string]string{}
 856	for _, t := range dst.Attr {
 857		attr[t.Key] = t.Val
 858	}
 859	for _, t := range src.Attr {
 860		if _, ok := attr[t.Key]; !ok {
 861			dst.Attr = append(dst.Attr, t)
 862			attr[t.Key] = t.Val
 863		}
 864	}
 865}
 866
 867// Section 12.2.6.4.7.
 868func inBodyIM(p *parser) bool {
 869	switch p.tok.Type {
 870	case TextToken:
 871		d := p.tok.Data
 872		switch n := p.oe.top(); n.DataAtom {
 873		case a.Pre, a.Listing:
 874			if n.FirstChild == nil {
 875				// Ignore a newline at the start of a <pre> block.
 876				if d != "" && d[0] == '\r' {
 877					d = d[1:]
 878				}
 879				if d != "" && d[0] == '\n' {
 880					d = d[1:]
 881				}
 882			}
 883		}
 884		d = strings.Replace(d, "\x00", "", -1)
 885		if d == "" {
 886			return true
 887		}
 888		p.reconstructActiveFormattingElements()
 889		p.addText(d)
 890		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
 891			// There were non-whitespace characters inserted.
 892			p.framesetOK = false
 893		}
 894	case StartTagToken:
 895		switch p.tok.DataAtom {
 896		case a.Html:
 897			if p.oe.contains(a.Template) {
 898				return true
 899			}
 900			copyAttributes(p.oe[0], p.tok)
 901		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
 902			return inHeadIM(p)
 903		case a.Body:
 904			if p.oe.contains(a.Template) {
 905				return true
 906			}
 907			if len(p.oe) >= 2 {
 908				body := p.oe[1]
 909				if body.Type == ElementNode && body.DataAtom == a.Body {
 910					p.framesetOK = false
 911					copyAttributes(body, p.tok)
 912				}
 913			}
 914		case a.Frameset:
 915			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
 916				// Ignore the token.
 917				return true
 918			}
 919			body := p.oe[1]
 920			if body.Parent != nil {
 921				body.Parent.RemoveChild(body)
 922			}
 923			p.oe = p.oe[:1]
 924			p.addElement()
 925			p.im = inFramesetIM
 926			return true
 927		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Search, a.Section, a.Summary, a.Ul:
 928			p.popUntil(buttonScope, a.P)
 929			p.addElement()
 930		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
 931			p.popUntil(buttonScope, a.P)
 932			switch n := p.top(); n.DataAtom {
 933			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
 934				p.oe.pop()
 935			}
 936			p.addElement()
 937		case a.Pre, a.Listing:
 938			p.popUntil(buttonScope, a.P)
 939			p.addElement()
 940			// The newline, if any, will be dealt with by the TextToken case.
 941			p.framesetOK = false
 942		case a.Form:
 943			if p.form != nil && !p.oe.contains(a.Template) {
 944				// Ignore the token
 945				return true
 946			}
 947			p.popUntil(buttonScope, a.P)
 948			p.addElement()
 949			if !p.oe.contains(a.Template) {
 950				p.form = p.top()
 951			}
 952		case a.Li:
 953			p.framesetOK = false
 954			for i := len(p.oe) - 1; i >= 0; i-- {
 955				node := p.oe[i]
 956				switch node.DataAtom {
 957				case a.Li:
 958					p.oe = p.oe[:i]
 959				case a.Address, a.Div, a.P:
 960					continue
 961				default:
 962					if !isSpecialElement(node) {
 963						continue
 964					}
 965				}
 966				break
 967			}
 968			p.popUntil(buttonScope, a.P)
 969			p.addElement()
 970		case a.Dd, a.Dt:
 971			p.framesetOK = false
 972			for i := len(p.oe) - 1; i >= 0; i-- {
 973				node := p.oe[i]
 974				switch node.DataAtom {
 975				case a.Dd, a.Dt:
 976					p.oe = p.oe[:i]
 977				case a.Address, a.Div, a.P:
 978					continue
 979				default:
 980					if !isSpecialElement(node) {
 981						continue
 982					}
 983				}
 984				break
 985			}
 986			p.popUntil(buttonScope, a.P)
 987			p.addElement()
 988		case a.Plaintext:
 989			p.popUntil(buttonScope, a.P)
 990			p.addElement()
 991		case a.Button:
 992			p.popUntil(defaultScope, a.Button)
 993			p.reconstructActiveFormattingElements()
 994			p.addElement()
 995			p.framesetOK = false
 996		case a.A:
 997			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
 998				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
 999					p.inBodyEndTagFormatting(a.A, "a")
1000					p.oe.remove(n)
1001					p.afe.remove(n)
1002					break
1003				}
1004			}
1005			p.reconstructActiveFormattingElements()
1006			p.addFormattingElement()
1007		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1008			p.reconstructActiveFormattingElements()
1009			p.addFormattingElement()
1010		case a.Nobr:
1011			p.reconstructActiveFormattingElements()
1012			if p.elementInScope(defaultScope, a.Nobr) {
1013				p.inBodyEndTagFormatting(a.Nobr, "nobr")
1014				p.reconstructActiveFormattingElements()
1015			}
1016			p.addFormattingElement()
1017		case a.Applet, a.Marquee, a.Object:
1018			p.reconstructActiveFormattingElements()
1019			p.addElement()
1020			p.afe = append(p.afe, &scopeMarker)
1021			p.framesetOK = false
1022		case a.Table:
1023			if !p.quirks {
1024				p.popUntil(buttonScope, a.P)
1025			}
1026			p.addElement()
1027			p.framesetOK = false
1028			p.im = inTableIM
1029			return true
1030		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
1031			p.reconstructActiveFormattingElements()
1032			p.addElement()
1033			p.oe.pop()
1034			p.acknowledgeSelfClosingTag()
1035			if p.tok.DataAtom == a.Input {
1036				for _, t := range p.tok.Attr {
1037					if t.Key == "type" {
1038						if strings.EqualFold(t.Val, "hidden") {
1039							// Skip setting framesetOK = false
1040							return true
1041						}
1042					}
1043				}
1044			}
1045			p.framesetOK = false
1046		case a.Param, a.Source, a.Track:
1047			p.addElement()
1048			p.oe.pop()
1049			p.acknowledgeSelfClosingTag()
1050		case a.Hr:
1051			p.popUntil(buttonScope, a.P)
1052			p.addElement()
1053			p.oe.pop()
1054			p.acknowledgeSelfClosingTag()
1055			p.framesetOK = false
1056		case a.Image:
1057			p.tok.DataAtom = a.Img
1058			p.tok.Data = a.Img.String()
1059			return false
1060		case a.Textarea:
1061			p.addElement()
1062			p.setOriginalIM()
1063			p.framesetOK = false
1064			p.im = textIM
1065		case a.Xmp:
1066			p.popUntil(buttonScope, a.P)
1067			p.reconstructActiveFormattingElements()
1068			p.framesetOK = false
1069			p.parseGenericRawTextElement()
1070		case a.Iframe:
1071			p.framesetOK = false
1072			p.parseGenericRawTextElement()
1073		case a.Noembed:
1074			p.parseGenericRawTextElement()
1075		case a.Noscript:
1076			if p.scripting {
1077				p.parseGenericRawTextElement()
1078				return true
1079			}
1080			p.reconstructActiveFormattingElements()
1081			p.addElement()
1082			// Don't let the tokenizer go into raw text mode when scripting is disabled.
1083			p.tokenizer.NextIsNotRawText()
1084		case a.Select:
1085			p.reconstructActiveFormattingElements()
1086			p.addElement()
1087			p.framesetOK = false
1088			p.im = inSelectIM
1089			return true
1090		case a.Optgroup, a.Option:
1091			if p.top().DataAtom == a.Option {
1092				p.oe.pop()
1093			}
1094			p.reconstructActiveFormattingElements()
1095			p.addElement()
1096		case a.Rb, a.Rtc:
1097			if p.elementInScope(defaultScope, a.Ruby) {
1098				p.generateImpliedEndTags()
1099			}
1100			p.addElement()
1101		case a.Rp, a.Rt:
1102			if p.elementInScope(defaultScope, a.Ruby) {
1103				p.generateImpliedEndTags("rtc")
1104			}
1105			p.addElement()
1106		case a.Math, a.Svg:
1107			p.reconstructActiveFormattingElements()
1108			if p.tok.DataAtom == a.Math {
1109				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
1110			} else {
1111				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
1112			}
1113			adjustForeignAttributes(p.tok.Attr)
1114			p.addElement()
1115			p.top().Namespace = p.tok.Data
1116			if p.hasSelfClosingToken {
1117				p.oe.pop()
1118				p.acknowledgeSelfClosingTag()
1119			}
1120			return true
1121		case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1122			// Ignore the token.
1123		default:
1124			p.reconstructActiveFormattingElements()
1125			p.addElement()
1126		}
1127	case EndTagToken:
1128		switch p.tok.DataAtom {
1129		case a.Body:
1130			if p.elementInScope(defaultScope, a.Body) {
1131				p.im = afterBodyIM
1132			}
1133		case a.Html:
1134			if p.elementInScope(defaultScope, a.Body) {
1135				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
1136				return false
1137			}
1138			return true
1139		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Search, a.Section, a.Summary, a.Ul:
1140			p.popUntil(defaultScope, p.tok.DataAtom)
1141		case a.Form:
1142			if p.oe.contains(a.Template) {
1143				i := p.indexOfElementInScope(defaultScope, a.Form)
1144				if i == -1 {
1145					// Ignore the token.
1146					return true
1147				}
1148				p.generateImpliedEndTags()
1149				if p.oe[i].DataAtom != a.Form {
1150					// Ignore the token.
1151					return true
1152				}
1153				p.popUntil(defaultScope, a.Form)
1154			} else {
1155				node := p.form
1156				p.form = nil
1157				i := p.indexOfElementInScope(defaultScope, a.Form)
1158				if node == nil || i == -1 || p.oe[i] != node {
1159					// Ignore the token.
1160					return true
1161				}
1162				p.generateImpliedEndTags()
1163				p.oe.remove(node)
1164			}
1165		case a.P:
1166			if !p.elementInScope(buttonScope, a.P) {
1167				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
1168			}
1169			p.popUntil(buttonScope, a.P)
1170		case a.Li:
1171			p.popUntil(listItemScope, a.Li)
1172		case a.Dd, a.Dt:
1173			p.popUntil(defaultScope, p.tok.DataAtom)
1174		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
1175			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
1176		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1177			p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
1178		case a.Applet, a.Marquee, a.Object:
1179			if p.popUntil(defaultScope, p.tok.DataAtom) {
1180				p.clearActiveFormattingElements()
1181			}
1182		case a.Br:
1183			p.tok.Type = StartTagToken
1184			return false
1185		case a.Template:
1186			return inHeadIM(p)
1187		default:
1188			p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
1189		}
1190	case CommentToken:
1191		p.addChild(&Node{
1192			Type: CommentNode,
1193			Data: p.tok.Data,
1194		})
1195	case ErrorToken:
1196		// TODO: remove this divergence from the HTML5 spec.
1197		if len(p.templateStack) > 0 {
1198			p.im = inTemplateIM
1199			return false
1200		}
1201		for _, e := range p.oe {
1202			switch e.DataAtom {
1203			case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
1204				a.Thead, a.Tr, a.Body, a.Html:
1205			default:
1206				return true
1207			}
1208		}
1209	}
1210
1211	return true
1212}
1213
1214func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
1215	// This is the "adoption agency" algorithm, described at
1216	// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
1217
1218	// TODO: this is a fairly literal line-by-line translation of that algorithm.
1219	// Once the code successfully parses the comprehensive test suite, we should
1220	// refactor this code to be more idiomatic.
1221
1222	// Steps 1-2
1223	if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
1224		p.oe.pop()
1225		return
1226	}
1227
1228	// Steps 3-5. The outer loop.
1229	for i := 0; i < 8; i++ {
1230		// Step 6. Find the formatting element.
1231		var formattingElement *Node
1232		for j := len(p.afe) - 1; j >= 0; j-- {
1233			if p.afe[j].Type == scopeMarkerNode {
1234				break
1235			}
1236			if p.afe[j].DataAtom == tagAtom {
1237				formattingElement = p.afe[j]
1238				break
1239			}
1240		}
1241		if formattingElement == nil {
1242			p.inBodyEndTagOther(tagAtom, tagName)
1243			return
1244		}
1245
1246		// Step 7. Ignore the tag if formatting element is not in the stack of open elements.
1247		feIndex := p.oe.index(formattingElement)
1248		if feIndex == -1 {
1249			p.afe.remove(formattingElement)
1250			return
1251		}
1252		// Step 8. Ignore the tag if formatting element is not in the scope.
1253		if !p.elementInScope(defaultScope, tagAtom) {
1254			// Ignore the tag.
1255			return
1256		}
1257
1258		// Step 9. This step is omitted because it's just a parse error but no need to return.
1259
1260		// Steps 10-11. Find the furthest block.
1261		var furthestBlock *Node
1262		for _, e := range p.oe[feIndex:] {
1263			if isSpecialElement(e) {
1264				furthestBlock = e
1265				break
1266			}
1267		}
1268		if furthestBlock == nil {
1269			e := p.oe.pop()
1270			for e != formattingElement {
1271				e = p.oe.pop()
1272			}
1273			p.afe.remove(e)
1274			return
1275		}
1276
1277		// Steps 12-13. Find the common ancestor and bookmark node.
1278		commonAncestor := p.oe[feIndex-1]
1279		bookmark := p.afe.index(formattingElement)
1280
1281		// Step 14. The inner loop. Find the lastNode to reparent.
1282		lastNode := furthestBlock
1283		node := furthestBlock
1284		x := p.oe.index(node)
1285		// Step 14.1.
1286		j := 0
1287		for {
1288			// Step 14.2.
1289			j++
1290			// Step. 14.3.
1291			x--
1292			node = p.oe[x]
1293			// Step 14.4. Go to the next step if node is formatting element.
1294			if node == formattingElement {
1295				break
1296			}
1297			// Step 14.5. Remove node from the list of active formatting elements if
1298			// inner loop counter is greater than three and node is in the list of
1299			// active formatting elements.
1300			if ni := p.afe.index(node); j > 3 && ni > -1 {
1301				p.afe.remove(node)
1302				// If any element of the list of active formatting elements is removed,
1303				// we need to take care whether bookmark should be decremented or not.
1304				// This is because the value of bookmark may exceed the size of the
1305				// list by removing elements from the list.
1306				if ni <= bookmark {
1307					bookmark--
1308				}
1309				continue
1310			}
1311			// Step 14.6. Continue the next inner loop if node is not in the list of
1312			// active formatting elements.
1313			if p.afe.index(node) == -1 {
1314				p.oe.remove(node)
1315				continue
1316			}
1317			// Step 14.7.
1318			clone := node.clone()
1319			p.afe[p.afe.index(node)] = clone
1320			p.oe[p.oe.index(node)] = clone
1321			node = clone
1322			// Step 14.8.
1323			if lastNode == furthestBlock {
1324				bookmark = p.afe.index(node) + 1
1325			}
1326			// Step 14.9.
1327			if lastNode.Parent != nil {
1328				lastNode.Parent.RemoveChild(lastNode)
1329			}
1330			node.AppendChild(lastNode)
1331			// Step 14.10.
1332			lastNode = node
1333		}
1334
1335		// Step 15. Reparent lastNode to the common ancestor,
1336		// or for misnested table nodes, to the foster parent.
1337		if lastNode.Parent != nil {
1338			lastNode.Parent.RemoveChild(lastNode)
1339		}
1340		switch commonAncestor.DataAtom {
1341		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1342			p.fosterParent(lastNode)
1343		default:
1344			commonAncestor.AppendChild(lastNode)
1345		}
1346
1347		// Steps 16-18. Reparent nodes from the furthest block's children
1348		// to a clone of the formatting element.
1349		clone := formattingElement.clone()
1350		reparentChildren(clone, furthestBlock)
1351		furthestBlock.AppendChild(clone)
1352
1353		// Step 19. Fix up the list of active formatting elements.
1354		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
1355			// Move the bookmark with the rest of the list.
1356			bookmark--
1357		}
1358		p.afe.remove(formattingElement)
1359		p.afe.insert(bookmark, clone)
1360
1361		// Step 20. Fix up the stack of open elements.
1362		p.oe.remove(formattingElement)
1363		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
1364	}
1365}
1366
1367// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
1368// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
1369// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
1370func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
1371	for i := len(p.oe) - 1; i >= 0; i-- {
1372		// Two element nodes have the same tag if they have the same Data (a
1373		// string-typed field). As an optimization, for common HTML tags, each
1374		// Data string is assigned a unique, non-zero DataAtom (a uint32-typed
1375		// field), since integer comparison is faster than string comparison.
1376		// Uncommon (custom) tags get a zero DataAtom.
1377		//
1378		// The if condition here is equivalent to (p.oe[i].Data == tagName).
1379		if (p.oe[i].DataAtom == tagAtom) &&
1380			((tagAtom != 0) || (p.oe[i].Data == tagName)) {
1381			p.oe = p.oe[:i]
1382			break
1383		}
1384		if isSpecialElement(p.oe[i]) {
1385			break
1386		}
1387	}
1388}
1389
1390// Section 12.2.6.4.8.
1391func textIM(p *parser) bool {
1392	switch p.tok.Type {
1393	case ErrorToken:
1394		p.oe.pop()
1395	case TextToken:
1396		d := p.tok.Data
1397		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
1398			// Ignore a newline at the start of a <textarea> block.
1399			if d != "" && d[0] == '\r' {
1400				d = d[1:]
1401			}
1402			if d != "" && d[0] == '\n' {
1403				d = d[1:]
1404			}
1405		}
1406		if d == "" {
1407			return true
1408		}
1409		p.addText(d)
1410		return true
1411	case EndTagToken:
1412		p.oe.pop()
1413	}
1414	p.im = p.originalIM
1415	p.originalIM = nil
1416	return p.tok.Type == EndTagToken
1417}
1418
1419// Section 12.2.6.4.9.
1420func inTableIM(p *parser) bool {
1421	switch p.tok.Type {
1422	case TextToken:
1423		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
1424		switch p.oe.top().DataAtom {
1425		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1426			if strings.Trim(p.tok.Data, whitespace) == "" {
1427				p.addText(p.tok.Data)
1428				return true
1429			}
1430		}
1431	case StartTagToken:
1432		switch p.tok.DataAtom {
1433		case a.Caption:
1434			p.clearStackToContext(tableScope)
1435			p.afe = append(p.afe, &scopeMarker)
1436			p.addElement()
1437			p.im = inCaptionIM
1438			return true
1439		case a.Colgroup:
1440			p.clearStackToContext(tableScope)
1441			p.addElement()
1442			p.im = inColumnGroupIM
1443			return true
1444		case a.Col:
1445			p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
1446			return false
1447		case a.Tbody, a.Tfoot, a.Thead:
1448			p.clearStackToContext(tableScope)
1449			p.addElement()
1450			p.im = inTableBodyIM
1451			return true
1452		case a.Td, a.Th, a.Tr:
1453			p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
1454			return false
1455		case a.Table:
1456			if p.popUntil(tableScope, a.Table) {
1457				p.resetInsertionMode()
1458				return false
1459			}
1460			// Ignore the token.
1461			return true
1462		case a.Style, a.Script, a.Template:
1463			return inHeadIM(p)
1464		case a.Input:
1465			for _, t := range p.tok.Attr {
1466				if t.Key == "type" && strings.EqualFold(t.Val, "hidden") {
1467					p.addElement()
1468					p.oe.pop()
1469					return true
1470				}
1471			}
1472			// Otherwise drop down to the default action.
1473		case a.Form:
1474			if p.oe.contains(a.Template) || p.form != nil {
1475				// Ignore the token.
1476				return true
1477			}
1478			p.addElement()
1479			p.form = p.oe.pop()
1480		case a.Select:
1481			p.reconstructActiveFormattingElements()
1482			switch p.top().DataAtom {
1483			case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1484				p.fosterParenting = true
1485			}
1486			p.addElement()
1487			p.fosterParenting = false
1488			p.framesetOK = false
1489			p.im = inSelectInTableIM
1490			return true
1491		}
1492	case EndTagToken:
1493		switch p.tok.DataAtom {
1494		case a.Table:
1495			if p.popUntil(tableScope, a.Table) {
1496				p.resetInsertionMode()
1497				return true
1498			}
1499			// Ignore the token.
1500			return true
1501		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1502			// Ignore the token.
1503			return true
1504		case a.Template:
1505			return inHeadIM(p)
1506		}
1507	case CommentToken:
1508		p.addChild(&Node{
1509			Type: CommentNode,
1510			Data: p.tok.Data,
1511		})
1512		return true
1513	case DoctypeToken:
1514		// Ignore the token.
1515		return true
1516	case ErrorToken:
1517		return inBodyIM(p)
1518	}
1519
1520	p.fosterParenting = true
1521	defer func() { p.fosterParenting = false }()
1522
1523	return inBodyIM(p)
1524}
1525
1526// Section 12.2.6.4.11.
1527func inCaptionIM(p *parser) bool {
1528	switch p.tok.Type {
1529	case StartTagToken:
1530		switch p.tok.DataAtom {
1531		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
1532			if !p.popUntil(tableScope, a.Caption) {
1533				// Ignore the token.
1534				return true
1535			}
1536			p.clearActiveFormattingElements()
1537			p.im = inTableIM
1538			return false
1539		case a.Select:
1540			p.reconstructActiveFormattingElements()
1541			p.addElement()
1542			p.framesetOK = false
1543			p.im = inSelectInTableIM
1544			return true
1545		}
1546	case EndTagToken:
1547		switch p.tok.DataAtom {
1548		case a.Caption:
1549			if p.popUntil(tableScope, a.Caption) {
1550				p.clearActiveFormattingElements()
1551				p.im = inTableIM
1552			}
1553			return true
1554		case a.Table:
1555			if !p.popUntil(tableScope, a.Caption) {
1556				// Ignore the token.
1557				return true
1558			}
1559			p.clearActiveFormattingElements()
1560			p.im = inTableIM
1561			return false
1562		case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1563			// Ignore the token.
1564			return true
1565		}
1566	}
1567	return inBodyIM(p)
1568}
1569
1570// Section 12.2.6.4.12.
1571func inColumnGroupIM(p *parser) bool {
1572	switch p.tok.Type {
1573	case TextToken:
1574		s := strings.TrimLeft(p.tok.Data, whitespace)
1575		if len(s) < len(p.tok.Data) {
1576			// Add the initial whitespace to the current node.
1577			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
1578			if s == "" {
1579				return true
1580			}
1581			p.tok.Data = s
1582		}
1583	case CommentToken:
1584		p.addChild(&Node{
1585			Type: CommentNode,
1586			Data: p.tok.Data,
1587		})
1588		return true
1589	case DoctypeToken:
1590		// Ignore the token.
1591		return true
1592	case StartTagToken:
1593		switch p.tok.DataAtom {
1594		case a.Html:
1595			return inBodyIM(p)
1596		case a.Col:
1597			p.addElement()
1598			p.oe.pop()
1599			p.acknowledgeSelfClosingTag()
1600			return true
1601		case a.Template:
1602			return inHeadIM(p)
1603		}
1604	case EndTagToken:
1605		switch p.tok.DataAtom {
1606		case a.Colgroup:
1607			if p.oe.top().DataAtom == a.Colgroup {
1608				p.oe.pop()
1609				p.im = inTableIM
1610			}
1611			return true
1612		case a.Col:
1613			// Ignore the token.
1614			return true
1615		case a.Template:
1616			return inHeadIM(p)
1617		}
1618	case ErrorToken:
1619		return inBodyIM(p)
1620	}
1621	if p.oe.top().DataAtom != a.Colgroup {
1622		return true
1623	}
1624	p.oe.pop()
1625	p.im = inTableIM
1626	return false
1627}
1628
1629// Section 12.2.6.4.13.
1630func inTableBodyIM(p *parser) bool {
1631	switch p.tok.Type {
1632	case StartTagToken:
1633		switch p.tok.DataAtom {
1634		case a.Tr:
1635			p.clearStackToContext(tableBodyScope)
1636			p.addElement()
1637			p.im = inRowIM
1638			return true
1639		case a.Td, a.Th:
1640			p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
1641			return false
1642		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1643			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1644				p.im = inTableIM
1645				return false
1646			}
1647			// Ignore the token.
1648			return true
1649		}
1650	case EndTagToken:
1651		switch p.tok.DataAtom {
1652		case a.Tbody, a.Tfoot, a.Thead:
1653			if p.elementInScope(tableScope, p.tok.DataAtom) {
1654				p.clearStackToContext(tableBodyScope)
1655				p.oe.pop()
1656				p.im = inTableIM
1657			}
1658			return true
1659		case a.Table:
1660			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1661				p.im = inTableIM
1662				return false
1663			}
1664			// Ignore the token.
1665			return true
1666		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
1667			// Ignore the token.
1668			return true
1669		}
1670	case CommentToken:
1671		p.addChild(&Node{
1672			Type: CommentNode,
1673			Data: p.tok.Data,
1674		})
1675		return true
1676	}
1677
1678	return inTableIM(p)
1679}
1680
1681// Section 12.2.6.4.14.
1682func inRowIM(p *parser) bool {
1683	switch p.tok.Type {
1684	case StartTagToken:
1685		switch p.tok.DataAtom {
1686		case a.Td, a.Th:
1687			p.clearStackToContext(tableRowScope)
1688			p.addElement()
1689			p.afe = append(p.afe, &scopeMarker)
1690			p.im = inCellIM
1691			return true
1692		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1693			if p.popUntil(tableScope, a.Tr) {
1694				p.im = inTableBodyIM
1695				return false
1696			}
1697			// Ignore the token.
1698			return true
1699		}
1700	case EndTagToken:
1701		switch p.tok.DataAtom {
1702		case a.Tr:
1703			if p.popUntil(tableScope, a.Tr) {
1704				p.im = inTableBodyIM
1705				return true
1706			}
1707			// Ignore the token.
1708			return true
1709		case a.Table:
1710			if p.popUntil(tableScope, a.Tr) {
1711				p.im = inTableBodyIM
1712				return false
1713			}
1714			// Ignore the token.
1715			return true
1716		case a.Tbody, a.Tfoot, a.Thead:
1717			if p.elementInScope(tableScope, p.tok.DataAtom) {
1718				p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
1719				return false
1720			}
1721			// Ignore the token.
1722			return true
1723		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
1724			// Ignore the token.
1725			return true
1726		}
1727	}
1728
1729	return inTableIM(p)
1730}
1731
1732// Section 12.2.6.4.15.
1733func inCellIM(p *parser) bool {
1734	switch p.tok.Type {
1735	case StartTagToken:
1736		switch p.tok.DataAtom {
1737		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1738			if p.popUntil(tableScope, a.Td, a.Th) {
1739				// Close the cell and reprocess.
1740				p.clearActiveFormattingElements()
1741				p.im = inRowIM
1742				return false
1743			}
1744			// Ignore the token.
1745			return true
1746		case a.Select:
1747			p.reconstructActiveFormattingElements()
1748			p.addElement()
1749			p.framesetOK = false
1750			p.im = inSelectInTableIM
1751			return true
1752		}
1753	case EndTagToken:
1754		switch p.tok.DataAtom {
1755		case a.Td, a.Th:
1756			if !p.popUntil(tableScope, p.tok.DataAtom) {
1757				// Ignore the token.
1758				return true
1759			}
1760			p.clearActiveFormattingElements()
1761			p.im = inRowIM
1762			return true
1763		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
1764			// Ignore the token.
1765			return true
1766		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1767			if !p.elementInScope(tableScope, p.tok.DataAtom) {
1768				// Ignore the token.
1769				return true
1770			}
1771			// Close the cell and reprocess.
1772			if p.popUntil(tableScope, a.Td, a.Th) {
1773				p.clearActiveFormattingElements()
1774			}
1775			p.im = inRowIM
1776			return false
1777		}
1778	}
1779	return inBodyIM(p)
1780}
1781
1782// Section 12.2.6.4.16.
1783func inSelectIM(p *parser) bool {
1784	switch p.tok.Type {
1785	case TextToken:
1786		p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
1787	case StartTagToken:
1788		switch p.tok.DataAtom {
1789		case a.Html:
1790			return inBodyIM(p)
1791		case a.Option:
1792			if p.top().DataAtom == a.Option {
1793				p.oe.pop()
1794			}
1795			p.addElement()
1796		case a.Optgroup:
1797			if p.top().DataAtom == a.Option {
1798				p.oe.pop()
1799			}
1800			if p.top().DataAtom == a.Optgroup {
1801				p.oe.pop()
1802			}
1803			p.addElement()
1804		case a.Select:
1805			if !p.popUntil(selectScope, a.Select) {
1806				// Ignore the token.
1807				return true
1808			}
1809			p.resetInsertionMode()
1810		case a.Input, a.Keygen, a.Textarea:
1811			if p.elementInScope(selectScope, a.Select) {
1812				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1813				return false
1814			}
1815			// In order to properly ignore <textarea>, we need to change the tokenizer mode.
1816			p.tokenizer.NextIsNotRawText()
1817			// Ignore the token.
1818			return true
1819		case a.Script, a.Template:
1820			return inHeadIM(p)
1821		case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp:
1822			// Don't let the tokenizer go into raw text mode when there are raw tags
1823			// to be ignored. These tags should be ignored from the tokenizer
1824			// properly.
1825			p.tokenizer.NextIsNotRawText()
1826			// Ignore the token.
1827			return true
1828		}
1829	case EndTagToken:
1830		switch p.tok.DataAtom {
1831		case a.Option:
1832			if p.top().DataAtom == a.Option {
1833				p.oe.pop()
1834			}
1835		case a.Optgroup:
1836			i := len(p.oe) - 1
1837			if p.oe[i].DataAtom == a.Option {
1838				i--
1839			}
1840			if p.oe[i].DataAtom == a.Optgroup {
1841				p.oe = p.oe[:i]
1842			}
1843		case a.Select:
1844			if !p.popUntil(selectScope, a.Select) {
1845				// Ignore the token.
1846				return true
1847			}
1848			p.resetInsertionMode()
1849		case a.Template:
1850			return inHeadIM(p)
1851		}
1852	case CommentToken:
1853		p.addChild(&Node{
1854			Type: CommentNode,
1855			Data: p.tok.Data,
1856		})
1857	case DoctypeToken:
1858		// Ignore the token.
1859		return true
1860	case ErrorToken:
1861		return inBodyIM(p)
1862	}
1863
1864	return true
1865}
1866
1867// Section 12.2.6.4.17.
1868func inSelectInTableIM(p *parser) bool {
1869	switch p.tok.Type {
1870	case StartTagToken, EndTagToken:
1871		switch p.tok.DataAtom {
1872		case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
1873			if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
1874				// Ignore the token.
1875				return true
1876			}
1877			// This is like p.popUntil(selectScope, a.Select), but it also
1878			// matches <math select>, not just <select>. Matching the MathML
1879			// tag is arguably incorrect (conceptually), but it mimics what
1880			// Chromium does.
1881			for i := len(p.oe) - 1; i >= 0; i-- {
1882				if n := p.oe[i]; n.DataAtom == a.Select {
1883					p.oe = p.oe[:i]
1884					break
1885				}
1886			}
1887			p.resetInsertionMode()
1888			return false
1889		}
1890	}
1891	return inSelectIM(p)
1892}
1893
1894// Section 12.2.6.4.18.
1895func inTemplateIM(p *parser) bool {
1896	switch p.tok.Type {
1897	case TextToken, CommentToken, DoctypeToken:
1898		return inBodyIM(p)
1899	case StartTagToken:
1900		switch p.tok.DataAtom {
1901		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
1902			return inHeadIM(p)
1903		case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1904			p.templateStack.pop()
1905			p.templateStack = append(p.templateStack, inTableIM)
1906			p.im = inTableIM
1907			return false
1908		case a.Col:
1909			p.templateStack.pop()
1910			p.templateStack = append(p.templateStack, inColumnGroupIM)
1911			p.im = inColumnGroupIM
1912			return false
1913		case a.Tr:
1914			p.templateStack.pop()
1915			p.templateStack = append(p.templateStack, inTableBodyIM)
1916			p.im = inTableBodyIM
1917			return false
1918		case a.Td, a.Th:
1919			p.templateStack.pop()
1920			p.templateStack = append(p.templateStack, inRowIM)
1921			p.im = inRowIM
1922			return false
1923		default:
1924			p.templateStack.pop()
1925			p.templateStack = append(p.templateStack, inBodyIM)
1926			p.im = inBodyIM
1927			return false
1928		}
1929	case EndTagToken:
1930		switch p.tok.DataAtom {
1931		case a.Template:
1932			return inHeadIM(p)
1933		default:
1934			// Ignore the token.
1935			return true
1936		}
1937	case ErrorToken:
1938		if !p.oe.contains(a.Template) {
1939			// Ignore the token.
1940			return true
1941		}
1942		// TODO: remove this divergence from the HTML5 spec.
1943		//
1944		// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
1945		p.generateImpliedEndTags()
1946		for i := len(p.oe) - 1; i >= 0; i-- {
1947			if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
1948				p.oe = p.oe[:i]
1949				break
1950			}
1951		}
1952		p.clearActiveFormattingElements()
1953		p.templateStack.pop()
1954		p.resetInsertionMode()
1955		return false
1956	}
1957	return false
1958}
1959
1960// Section 12.2.6.4.19.
1961func afterBodyIM(p *parser) bool {
1962	switch p.tok.Type {
1963	case ErrorToken:
1964		// Stop parsing.
1965		return true
1966	case TextToken:
1967		s := strings.TrimLeft(p.tok.Data, whitespace)
1968		if len(s) == 0 {
1969			// It was all whitespace.
1970			return inBodyIM(p)
1971		}
1972	case StartTagToken:
1973		if p.tok.DataAtom == a.Html {
1974			return inBodyIM(p)
1975		}
1976	case EndTagToken:
1977		if p.tok.DataAtom == a.Html {
1978			if !p.fragment {
1979				p.im = afterAfterBodyIM
1980			}
1981			return true
1982		}
1983	case CommentToken:
1984		// The comment is attached to the <html> element.
1985		if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
1986			panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
1987		}
1988		p.oe[0].AppendChild(&Node{
1989			Type: CommentNode,
1990			Data: p.tok.Data,
1991		})
1992		return true
1993	}
1994	p.im = inBodyIM
1995	return false
1996}
1997
1998// Section 12.2.6.4.20.
1999func inFramesetIM(p *parser) bool {
2000	switch p.tok.Type {
2001	case CommentToken:
2002		p.addChild(&Node{
2003			Type: CommentNode,
2004			Data: p.tok.Data,
2005		})
2006	case TextToken:
2007		// Ignore all text but whitespace.
2008		s := strings.Map(func(c rune) rune {
2009			switch c {
2010			case ' ', '\t', '\n', '\f', '\r':
2011				return c
2012			}
2013			return -1
2014		}, p.tok.Data)
2015		if s != "" {
2016			p.addText(s)
2017		}
2018	case StartTagToken:
2019		switch p.tok.DataAtom {
2020		case a.Html:
2021			return inBodyIM(p)
2022		case a.Frameset:
2023			p.addElement()
2024		case a.Frame:
2025			p.addElement()
2026			p.oe.pop()
2027			p.acknowledgeSelfClosingTag()
2028		case a.Noframes:
2029			return inHeadIM(p)
2030		}
2031	case EndTagToken:
2032		switch p.tok.DataAtom {
2033		case a.Frameset:
2034			if p.oe.top().DataAtom != a.Html {
2035				p.oe.pop()
2036				if p.oe.top().DataAtom != a.Frameset {
2037					p.im = afterFramesetIM
2038					return true
2039				}
2040			}
2041		}
2042	default:
2043		// Ignore the token.
2044	}
2045	return true
2046}
2047
2048// Section 12.2.6.4.21.
2049func afterFramesetIM(p *parser) bool {
2050	switch p.tok.Type {
2051	case CommentToken:
2052		p.addChild(&Node{
2053			Type: CommentNode,
2054			Data: p.tok.Data,
2055		})
2056	case TextToken:
2057		// Ignore all text but whitespace.
2058		s := strings.Map(func(c rune) rune {
2059			switch c {
2060			case ' ', '\t', '\n', '\f', '\r':
2061				return c
2062			}
2063			return -1
2064		}, p.tok.Data)
2065		if s != "" {
2066			p.addText(s)
2067		}
2068	case StartTagToken:
2069		switch p.tok.DataAtom {
2070		case a.Html:
2071			return inBodyIM(p)
2072		case a.Noframes:
2073			return inHeadIM(p)
2074		}
2075	case EndTagToken:
2076		switch p.tok.DataAtom {
2077		case a.Html:
2078			p.im = afterAfterFramesetIM
2079			return true
2080		}
2081	default:
2082		// Ignore the token.
2083	}
2084	return true
2085}
2086
2087// Section 12.2.6.4.22.
2088func afterAfterBodyIM(p *parser) bool {
2089	switch p.tok.Type {
2090	case ErrorToken:
2091		// Stop parsing.
2092		return true
2093	case TextToken:
2094		s := strings.TrimLeft(p.tok.Data, whitespace)
2095		if len(s) == 0 {
2096			// It was all whitespace.
2097			return inBodyIM(p)
2098		}
2099	case StartTagToken:
2100		if p.tok.DataAtom == a.Html {
2101			return inBodyIM(p)
2102		}
2103	case CommentToken:
2104		p.doc.AppendChild(&Node{
2105			Type: CommentNode,
2106			Data: p.tok.Data,
2107		})
2108		return true
2109	case DoctypeToken:
2110		return inBodyIM(p)
2111	}
2112	p.im = inBodyIM
2113	return false
2114}
2115
2116// Section 12.2.6.4.23.
2117func afterAfterFramesetIM(p *parser) bool {
2118	switch p.tok.Type {
2119	case CommentToken:
2120		p.doc.AppendChild(&Node{
2121			Type: CommentNode,
2122			Data: p.tok.Data,
2123		})
2124	case TextToken:
2125		// Ignore all text but whitespace.
2126		s := strings.Map(func(c rune) rune {
2127			switch c {
2128			case ' ', '\t', '\n', '\f', '\r':
2129				return c
2130			}
2131			return -1
2132		}, p.tok.Data)
2133		if s != "" {
2134			p.tok.Data = s
2135			return inBodyIM(p)
2136		}
2137	case StartTagToken:
2138		switch p.tok.DataAtom {
2139		case a.Html:
2140			return inBodyIM(p)
2141		case a.Noframes:
2142			return inHeadIM(p)
2143		}
2144	case DoctypeToken:
2145		return inBodyIM(p)
2146	default:
2147		// Ignore the token.
2148	}
2149	return true
2150}
2151
2152func ignoreTheRemainingTokens(p *parser) bool {
2153	return true
2154}
2155
2156const whitespaceOrNUL = whitespace + "\x00"
2157
2158// Section 12.2.6.5
2159func parseForeignContent(p *parser) bool {
2160	switch p.tok.Type {
2161	case TextToken:
2162		if p.framesetOK {
2163			p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
2164		}
2165		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
2166		p.addText(p.tok.Data)
2167	case CommentToken:
2168		p.addChild(&Node{
2169			Type: CommentNode,
2170			Data: p.tok.Data,
2171		})
2172	case StartTagToken:
2173		if !p.fragment {
2174			b := breakout[p.tok.Data]
2175			if p.tok.DataAtom == a.Font {
2176			loop:
2177				for _, attr := range p.tok.Attr {
2178					switch attr.Key {
2179					case "color", "face", "size":
2180						b = true
2181						break loop
2182					}
2183				}
2184			}
2185			if b {
2186				for i := len(p.oe) - 1; i >= 0; i-- {
2187					n := p.oe[i]
2188					if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
2189						p.oe = p.oe[:i+1]
2190						break
2191					}
2192				}
2193				return false
2194			}
2195		}
2196		current := p.adjustedCurrentNode()
2197		switch current.Namespace {
2198		case "math":
2199			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
2200		case "svg":
2201			// Adjust SVG tag names. The tokenizer lower-cases tag names, but
2202			// SVG wants e.g. "foreignObject" with a capital second "O".
2203			if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
2204				p.tok.DataAtom = a.Lookup([]byte(x))
2205				p.tok.Data = x
2206			}
2207			adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
2208		default:
2209			panic("html: bad parser state: unexpected namespace")
2210		}
2211		adjustForeignAttributes(p.tok.Attr)
2212		namespace := current.Namespace
2213		p.addElement()
2214		p.top().Namespace = namespace
2215		if namespace != "" {
2216			// Don't let the tokenizer go into raw text mode in foreign content
2217			// (e.g. in an SVG <title> tag).
2218			p.tokenizer.NextIsNotRawText()
2219		}
2220		if p.hasSelfClosingToken {
2221			p.oe.pop()
2222			p.acknowledgeSelfClosingTag()
2223		}
2224	case EndTagToken:
2225		for i := len(p.oe) - 1; i >= 0; i-- {
2226			if p.oe[i].Namespace == "" {
2227				return p.im(p)
2228			}
2229			if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
2230				p.oe = p.oe[:i]
2231				break
2232			}
2233		}
2234		return true
2235	default:
2236		// Ignore the token.
2237	}
2238	return true
2239}
2240
2241// Section 12.2.4.2.
2242func (p *parser) adjustedCurrentNode() *Node {
2243	if len(p.oe) == 1 && p.fragment && p.context != nil {
2244		return p.context
2245	}
2246	return p.oe.top()
2247}
2248
2249// Section 12.2.6.
2250func (p *parser) inForeignContent() bool {
2251	if len(p.oe) == 0 {
2252		return false
2253	}
2254	n := p.adjustedCurrentNode()
2255	if n.Namespace == "" {
2256		return false
2257	}
2258	if mathMLTextIntegrationPoint(n) {
2259		if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
2260			return false
2261		}
2262		if p.tok.Type == TextToken {
2263			return false
2264		}
2265	}
2266	if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
2267		return false
2268	}
2269	if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
2270		return false
2271	}
2272	if p.tok.Type == ErrorToken {
2273		return false
2274	}
2275	return true
2276}
2277
2278// parseImpliedToken parses a token as though it had appeared in the parser's
2279// input.
2280func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
2281	realToken, selfClosing := p.tok, p.hasSelfClosingToken
2282	p.tok = Token{
2283		Type:     t,
2284		DataAtom: dataAtom,
2285		Data:     data,
2286	}
2287	p.hasSelfClosingToken = false
2288	p.parseCurrentToken()
2289	p.tok, p.hasSelfClosingToken = realToken, selfClosing
2290}
2291
2292// parseCurrentToken runs the current token through the parsing routines
2293// until it is consumed.
2294func (p *parser) parseCurrentToken() {
2295	if p.tok.Type == SelfClosingTagToken {
2296		p.hasSelfClosingToken = true
2297		p.tok.Type = StartTagToken
2298	}
2299
2300	consumed := false
2301	for !consumed {
2302		if p.inForeignContent() {
2303			consumed = parseForeignContent(p)
2304		} else {
2305			consumed = p.im(p)
2306		}
2307	}
2308
2309	if p.hasSelfClosingToken {
2310		// This is a parse error, but ignore it.
2311		p.hasSelfClosingToken = false
2312	}
2313}
2314
2315func (p *parser) parse() error {
2316	// Iterate until EOF. Any other error will cause an early return.
2317	var err error
2318	for err != io.EOF {
2319		// CDATA sections are allowed only in foreign content.
2320		n := p.oe.top()
2321		p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
2322		// Read and parse the next token.
2323		p.tokenizer.Next()
2324		p.tok = p.tokenizer.Token()
2325		if p.tok.Type == ErrorToken {
2326			err = p.tokenizer.Err()
2327			if err != nil && err != io.EOF {
2328				return err
2329			}
2330		}
2331		p.parseCurrentToken()
2332	}
2333	return nil
2334}
2335
2336// Parse returns the parse tree for the HTML from the given Reader.
2337//
2338// It implements the HTML5 parsing algorithm
2339// (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
2340// which is very complicated. The resultant tree can contain implicitly created
2341// nodes that have no explicit <tag> listed in r's data, and nodes' parents can
2342// differ from the nesting implied by a naive processing of start and end
2343// <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
2344// with no corresponding node in the resulting tree.
2345//
2346// The input is assumed to be UTF-8 encoded.
2347func Parse(r io.Reader) (*Node, error) {
2348	return ParseWithOptions(r)
2349}
2350
2351// ParseFragment parses a fragment of HTML and returns the nodes that were
2352// found. If the fragment is the InnerHTML for an existing element, pass that
2353// element in context.
2354//
2355// It has the same intricacies as Parse.
2356func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
2357	return ParseFragmentWithOptions(r, context)
2358}
2359
2360// ParseOption configures a parser.
2361type ParseOption func(p *parser)
2362
2363// ParseOptionEnableScripting configures the scripting flag.
2364// https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting
2365//
2366// By default, scripting is enabled.
2367func ParseOptionEnableScripting(enable bool) ParseOption {
2368	return func(p *parser) {
2369		p.scripting = enable
2370	}
2371}
2372
2373// ParseWithOptions is like Parse, with options.
2374func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
2375	p := &parser{
2376		tokenizer: NewTokenizer(r),
2377		doc: &Node{
2378			Type: DocumentNode,
2379		},
2380		scripting:  true,
2381		framesetOK: true,
2382		im:         initialIM,
2383	}
2384
2385	for _, f := range opts {
2386		f(p)
2387	}
2388
2389	if err := p.parse(); err != nil {
2390		return nil, err
2391	}
2392	return p.doc, nil
2393}
2394
2395// ParseFragmentWithOptions is like ParseFragment, with options.
2396func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) {
2397	contextTag := ""
2398	if context != nil {
2399		if context.Type != ElementNode {
2400			return nil, errors.New("html: ParseFragment of non-element Node")
2401		}
2402		// The next check isn't just context.DataAtom.String() == context.Data because
2403		// it is valid to pass an element whose tag isn't a known atom. For example,
2404		// DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
2405		if context.DataAtom != a.Lookup([]byte(context.Data)) {
2406			return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
2407		}
2408		contextTag = context.DataAtom.String()
2409	}
2410	p := &parser{
2411		doc: &Node{
2412			Type: DocumentNode,
2413		},
2414		scripting: true,
2415		fragment:  true,
2416		context:   context,
2417	}
2418	if context != nil && context.Namespace != "" {
2419		p.tokenizer = NewTokenizer(r)
2420	} else {
2421		p.tokenizer = NewTokenizerFragment(r, contextTag)
2422	}
2423
2424	for _, f := range opts {
2425		f(p)
2426	}
2427
2428	root := &Node{
2429		Type:     ElementNode,
2430		DataAtom: a.Html,
2431		Data:     a.Html.String(),
2432	}
2433	p.doc.AppendChild(root)
2434	p.oe = nodeStack{root}
2435	if context != nil && context.DataAtom == a.Template {
2436		p.templateStack = append(p.templateStack, inTemplateIM)
2437	}
2438	p.resetInsertionMode()
2439
2440	for n := context; n != nil; n = n.Parent {
2441		if n.Type == ElementNode && n.DataAtom == a.Form {
2442			p.form = n
2443			break
2444		}
2445	}
2446
2447	if err := p.parse(); err != nil {
2448		return nil, err
2449	}
2450
2451	parent := p.doc
2452	if context != nil {
2453		parent = root
2454	}
2455
2456	var result []*Node
2457	for c := parent.FirstChild; c != nil; {
2458		next := c.NextSibling
2459		parent.RemoveChild(c)
2460		result = append(result, c)
2461		c = next
2462	}
2463	return result, nil
2464}