1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "errors"
9 "fmt"
10 "io"
11 "strings"
12
13 a "golang.org/x/net/html/atom"
14)
15
16// A parser implements the HTML5 parsing algorithm:
17// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
18type parser struct {
19 // tokenizer provides the tokens for the parser.
20 tokenizer *Tokenizer
21 // tok is the most recently read token.
22 tok Token
23 // Self-closing tags like <hr/> are treated as start tags, except that
24 // hasSelfClosingToken is set while they are being processed.
25 hasSelfClosingToken bool
26 // doc is the document root element.
27 doc *Node
28 // The stack of open elements (section 12.2.4.2) and active formatting
29 // elements (section 12.2.4.3).
30 oe, afe nodeStack
31 // Element pointers (section 12.2.4.4).
32 head, form *Node
33 // Other parsing state flags (section 12.2.4.5).
34 scripting, framesetOK bool
35 // The stack of template insertion modes
36 templateStack insertionModeStack
37 // im is the current insertion mode.
38 im insertionMode
39 // originalIM is the insertion mode to go back to after completing a text
40 // or inTableText insertion mode.
41 originalIM insertionMode
42 // fosterParenting is whether new elements should be inserted according to
43 // the foster parenting rules (section 12.2.6.1).
44 fosterParenting bool
45 // quirks is whether the parser is operating in "quirks mode."
46 quirks bool
47 // fragment is whether the parser is parsing an HTML fragment.
48 fragment bool
49 // context is the context element when parsing an HTML fragment
50 // (section 12.4).
51 context *Node
52}
53
54func (p *parser) top() *Node {
55 if n := p.oe.top(); n != nil {
56 return n
57 }
58 return p.doc
59}
60
61// Stop tags for use in popUntil. These come from section 12.2.4.2.
62var (
63 defaultScopeStopTags = map[string][]a.Atom{
64 "": {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
65 "math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
66 "svg": {a.Desc, a.ForeignObject, a.Title},
67 }
68)
69
70type scope int
71
72const (
73 defaultScope scope = iota
74 listItemScope
75 buttonScope
76 tableScope
77 tableRowScope
78 tableBodyScope
79 selectScope
80)
81
82// popUntil pops the stack of open elements at the highest element whose tag
83// is in matchTags, provided there is no higher element in the scope's stop
84// tags (as defined in section 12.2.4.2). It returns whether or not there was
85// such an element. If there was not, popUntil leaves the stack unchanged.
86//
87// For example, the set of stop tags for table scope is: "html", "table". If
88// the stack was:
89// ["html", "body", "font", "table", "b", "i", "u"]
90// then popUntil(tableScope, "font") would return false, but
91// popUntil(tableScope, "i") would return true and the stack would become:
92// ["html", "body", "font", "table", "b"]
93//
94// If an element's tag is in both the stop tags and matchTags, then the stack
95// will be popped and the function returns true (provided, of course, there was
96// no higher element in the stack that was also in the stop tags). For example,
97// popUntil(tableScope, "table") returns true and leaves:
98// ["html", "body", "font"]
99func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
100 if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
101 p.oe = p.oe[:i]
102 return true
103 }
104 return false
105}
106
107// indexOfElementInScope returns the index in p.oe of the highest element whose
108// tag is in matchTags that is in scope. If no matching element is in scope, it
109// returns -1.
110func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
111 for i := len(p.oe) - 1; i >= 0; i-- {
112 tagAtom := p.oe[i].DataAtom
113 if p.oe[i].Namespace == "" {
114 for _, t := range matchTags {
115 if t == tagAtom {
116 return i
117 }
118 }
119 switch s {
120 case defaultScope:
121 // No-op.
122 case listItemScope:
123 if tagAtom == a.Ol || tagAtom == a.Ul {
124 return -1
125 }
126 case buttonScope:
127 if tagAtom == a.Button {
128 return -1
129 }
130 case tableScope:
131 if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
132 return -1
133 }
134 case selectScope:
135 if tagAtom != a.Optgroup && tagAtom != a.Option {
136 return -1
137 }
138 default:
139 panic("unreachable")
140 }
141 }
142 switch s {
143 case defaultScope, listItemScope, buttonScope:
144 for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
145 if t == tagAtom {
146 return -1
147 }
148 }
149 }
150 }
151 return -1
152}
153
154// elementInScope is like popUntil, except that it doesn't modify the stack of
155// open elements.
156func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
157 return p.indexOfElementInScope(s, matchTags...) != -1
158}
159
160// clearStackToContext pops elements off the stack of open elements until a
161// scope-defined element is found.
162func (p *parser) clearStackToContext(s scope) {
163 for i := len(p.oe) - 1; i >= 0; i-- {
164 tagAtom := p.oe[i].DataAtom
165 switch s {
166 case tableScope:
167 if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
168 p.oe = p.oe[:i+1]
169 return
170 }
171 case tableRowScope:
172 if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
173 p.oe = p.oe[:i+1]
174 return
175 }
176 case tableBodyScope:
177 if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
178 p.oe = p.oe[:i+1]
179 return
180 }
181 default:
182 panic("unreachable")
183 }
184 }
185}
186
187// generateImpliedEndTags pops nodes off the stack of open elements as long as
188// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
189// If exceptions are specified, nodes with that name will not be popped off.
190func (p *parser) generateImpliedEndTags(exceptions ...string) {
191 var i int
192loop:
193 for i = len(p.oe) - 1; i >= 0; i-- {
194 n := p.oe[i]
195 if n.Type == ElementNode {
196 switch n.DataAtom {
197 case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
198 for _, except := range exceptions {
199 if n.Data == except {
200 break loop
201 }
202 }
203 continue
204 }
205 }
206 break
207 }
208
209 p.oe = p.oe[:i+1]
210}
211
212// addChild adds a child node n to the top element, and pushes n onto the stack
213// of open elements if it is an element node.
214func (p *parser) addChild(n *Node) {
215 if p.shouldFosterParent() {
216 p.fosterParent(n)
217 } else {
218 p.top().AppendChild(n)
219 }
220
221 if n.Type == ElementNode {
222 p.oe = append(p.oe, n)
223 }
224}
225
226// shouldFosterParent returns whether the next node to be added should be
227// foster parented.
228func (p *parser) shouldFosterParent() bool {
229 if p.fosterParenting {
230 switch p.top().DataAtom {
231 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
232 return true
233 }
234 }
235 return false
236}
237
238// fosterParent adds a child node according to the foster parenting rules.
239// Section 12.2.6.1, "foster parenting".
240func (p *parser) fosterParent(n *Node) {
241 var table, parent, prev, template *Node
242 var i int
243 for i = len(p.oe) - 1; i >= 0; i-- {
244 if p.oe[i].DataAtom == a.Table {
245 table = p.oe[i]
246 break
247 }
248 }
249
250 var j int
251 for j = len(p.oe) - 1; j >= 0; j-- {
252 if p.oe[j].DataAtom == a.Template {
253 template = p.oe[j]
254 break
255 }
256 }
257
258 if template != nil && (table == nil || j > i) {
259 template.AppendChild(n)
260 return
261 }
262
263 if table == nil {
264 // The foster parent is the html element.
265 parent = p.oe[0]
266 } else {
267 parent = table.Parent
268 }
269 if parent == nil {
270 parent = p.oe[i-1]
271 }
272
273 if table != nil {
274 prev = table.PrevSibling
275 } else {
276 prev = parent.LastChild
277 }
278 if prev != nil && prev.Type == TextNode && n.Type == TextNode {
279 prev.Data += n.Data
280 return
281 }
282
283 parent.InsertBefore(n, table)
284}
285
286// addText adds text to the preceding node if it is a text node, or else it
287// calls addChild with a new text node.
288func (p *parser) addText(text string) {
289 if text == "" {
290 return
291 }
292
293 if p.shouldFosterParent() {
294 p.fosterParent(&Node{
295 Type: TextNode,
296 Data: text,
297 })
298 return
299 }
300
301 t := p.top()
302 if n := t.LastChild; n != nil && n.Type == TextNode {
303 n.Data += text
304 return
305 }
306 p.addChild(&Node{
307 Type: TextNode,
308 Data: text,
309 })
310}
311
312// addElement adds a child element based on the current token.
313func (p *parser) addElement() {
314 p.addChild(&Node{
315 Type: ElementNode,
316 DataAtom: p.tok.DataAtom,
317 Data: p.tok.Data,
318 Attr: p.tok.Attr,
319 })
320}
321
322// Section 12.2.4.3.
323func (p *parser) addFormattingElement() {
324 tagAtom, attr := p.tok.DataAtom, p.tok.Attr
325 p.addElement()
326
327 // Implement the Noah's Ark clause, but with three per family instead of two.
328 identicalElements := 0
329findIdenticalElements:
330 for i := len(p.afe) - 1; i >= 0; i-- {
331 n := p.afe[i]
332 if n.Type == scopeMarkerNode {
333 break
334 }
335 if n.Type != ElementNode {
336 continue
337 }
338 if n.Namespace != "" {
339 continue
340 }
341 if n.DataAtom != tagAtom {
342 continue
343 }
344 if len(n.Attr) != len(attr) {
345 continue
346 }
347 compareAttributes:
348 for _, t0 := range n.Attr {
349 for _, t1 := range attr {
350 if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
351 // Found a match for this attribute, continue with the next attribute.
352 continue compareAttributes
353 }
354 }
355 // If we get here, there is no attribute that matches a.
356 // Therefore the element is not identical to the new one.
357 continue findIdenticalElements
358 }
359
360 identicalElements++
361 if identicalElements >= 3 {
362 p.afe.remove(n)
363 }
364 }
365
366 p.afe = append(p.afe, p.top())
367}
368
369// Section 12.2.4.3.
370func (p *parser) clearActiveFormattingElements() {
371 for {
372 n := p.afe.pop()
373 if len(p.afe) == 0 || n.Type == scopeMarkerNode {
374 return
375 }
376 }
377}
378
379// Section 12.2.4.3.
380func (p *parser) reconstructActiveFormattingElements() {
381 n := p.afe.top()
382 if n == nil {
383 return
384 }
385 if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
386 return
387 }
388 i := len(p.afe) - 1
389 for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
390 if i == 0 {
391 i = -1
392 break
393 }
394 i--
395 n = p.afe[i]
396 }
397 for {
398 i++
399 clone := p.afe[i].clone()
400 p.addChild(clone)
401 p.afe[i] = clone
402 if i == len(p.afe)-1 {
403 break
404 }
405 }
406}
407
408// Section 12.2.5.
409func (p *parser) acknowledgeSelfClosingTag() {
410 p.hasSelfClosingToken = false
411}
412
413// An insertion mode (section 12.2.4.1) is the state transition function from
414// a particular state in the HTML5 parser's state machine. It updates the
415// parser's fields depending on parser.tok (where ErrorToken means EOF).
416// It returns whether the token was consumed.
417type insertionMode func(*parser) bool
418
419// setOriginalIM sets the insertion mode to return to after completing a text or
420// inTableText insertion mode.
421// Section 12.2.4.1, "using the rules for".
422func (p *parser) setOriginalIM() {
423 if p.originalIM != nil {
424 panic("html: bad parser state: originalIM was set twice")
425 }
426 p.originalIM = p.im
427}
428
429// Section 12.2.4.1, "reset the insertion mode".
430func (p *parser) resetInsertionMode() {
431 for i := len(p.oe) - 1; i >= 0; i-- {
432 n := p.oe[i]
433 last := i == 0
434 if last && p.context != nil {
435 n = p.context
436 }
437
438 switch n.DataAtom {
439 case a.Select:
440 if !last {
441 for ancestor, first := n, p.oe[0]; ancestor != first; {
442 if ancestor == first {
443 break
444 }
445 ancestor = p.oe[p.oe.index(ancestor)-1]
446 switch ancestor.DataAtom {
447 case a.Template:
448 p.im = inSelectIM
449 return
450 case a.Table:
451 p.im = inSelectInTableIM
452 return
453 }
454 }
455 }
456 p.im = inSelectIM
457 case a.Td, a.Th:
458 // TODO: remove this divergence from the HTML5 spec.
459 //
460 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
461 p.im = inCellIM
462 case a.Tr:
463 p.im = inRowIM
464 case a.Tbody, a.Thead, a.Tfoot:
465 p.im = inTableBodyIM
466 case a.Caption:
467 p.im = inCaptionIM
468 case a.Colgroup:
469 p.im = inColumnGroupIM
470 case a.Table:
471 p.im = inTableIM
472 case a.Template:
473 p.im = p.templateStack.top()
474 case a.Head:
475 // TODO: remove this divergence from the HTML5 spec.
476 //
477 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
478 p.im = inHeadIM
479 case a.Body:
480 p.im = inBodyIM
481 case a.Frameset:
482 p.im = inFramesetIM
483 case a.Html:
484 if p.head == nil {
485 p.im = beforeHeadIM
486 } else {
487 p.im = afterHeadIM
488 }
489 default:
490 if last {
491 p.im = inBodyIM
492 return
493 }
494 continue
495 }
496 return
497 }
498}
499
500const whitespace = " \t\r\n\f"
501
502// Section 12.2.6.4.1.
503func initialIM(p *parser) bool {
504 switch p.tok.Type {
505 case TextToken:
506 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
507 if len(p.tok.Data) == 0 {
508 // It was all whitespace, so ignore it.
509 return true
510 }
511 case CommentToken:
512 p.doc.AppendChild(&Node{
513 Type: CommentNode,
514 Data: p.tok.Data,
515 })
516 return true
517 case DoctypeToken:
518 n, quirks := parseDoctype(p.tok.Data)
519 p.doc.AppendChild(n)
520 p.quirks = quirks
521 p.im = beforeHTMLIM
522 return true
523 }
524 p.quirks = true
525 p.im = beforeHTMLIM
526 return false
527}
528
529// Section 12.2.6.4.2.
530func beforeHTMLIM(p *parser) bool {
531 switch p.tok.Type {
532 case DoctypeToken:
533 // Ignore the token.
534 return true
535 case TextToken:
536 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
537 if len(p.tok.Data) == 0 {
538 // It was all whitespace, so ignore it.
539 return true
540 }
541 case StartTagToken:
542 if p.tok.DataAtom == a.Html {
543 p.addElement()
544 p.im = beforeHeadIM
545 return true
546 }
547 case EndTagToken:
548 switch p.tok.DataAtom {
549 case a.Head, a.Body, a.Html, a.Br:
550 p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
551 return false
552 default:
553 // Ignore the token.
554 return true
555 }
556 case CommentToken:
557 p.doc.AppendChild(&Node{
558 Type: CommentNode,
559 Data: p.tok.Data,
560 })
561 return true
562 }
563 p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
564 return false
565}
566
567// Section 12.2.6.4.3.
568func beforeHeadIM(p *parser) bool {
569 switch p.tok.Type {
570 case TextToken:
571 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
572 if len(p.tok.Data) == 0 {
573 // It was all whitespace, so ignore it.
574 return true
575 }
576 case StartTagToken:
577 switch p.tok.DataAtom {
578 case a.Head:
579 p.addElement()
580 p.head = p.top()
581 p.im = inHeadIM
582 return true
583 case a.Html:
584 return inBodyIM(p)
585 }
586 case EndTagToken:
587 switch p.tok.DataAtom {
588 case a.Head, a.Body, a.Html, a.Br:
589 p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
590 return false
591 default:
592 // Ignore the token.
593 return true
594 }
595 case CommentToken:
596 p.addChild(&Node{
597 Type: CommentNode,
598 Data: p.tok.Data,
599 })
600 return true
601 case DoctypeToken:
602 // Ignore the token.
603 return true
604 }
605
606 p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
607 return false
608}
609
610// Section 12.2.6.4.4.
611func inHeadIM(p *parser) bool {
612 switch p.tok.Type {
613 case TextToken:
614 s := strings.TrimLeft(p.tok.Data, whitespace)
615 if len(s) < len(p.tok.Data) {
616 // Add the initial whitespace to the current node.
617 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
618 if s == "" {
619 return true
620 }
621 p.tok.Data = s
622 }
623 case StartTagToken:
624 switch p.tok.DataAtom {
625 case a.Html:
626 return inBodyIM(p)
627 case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta:
628 p.addElement()
629 p.oe.pop()
630 p.acknowledgeSelfClosingTag()
631 return true
632 case a.Script, a.Title, a.Noscript, a.Noframes, a.Style:
633 p.addElement()
634 p.setOriginalIM()
635 p.im = textIM
636 return true
637 case a.Head:
638 // Ignore the token.
639 return true
640 case a.Template:
641 p.addElement()
642 p.afe = append(p.afe, &scopeMarker)
643 p.framesetOK = false
644 p.im = inTemplateIM
645 p.templateStack = append(p.templateStack, inTemplateIM)
646 return true
647 }
648 case EndTagToken:
649 switch p.tok.DataAtom {
650 case a.Head:
651 p.oe.pop()
652 p.im = afterHeadIM
653 return true
654 case a.Body, a.Html, a.Br:
655 p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
656 return false
657 case a.Template:
658 if !p.oe.contains(a.Template) {
659 return true
660 }
661 // TODO: remove this divergence from the HTML5 spec.
662 //
663 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
664 p.generateImpliedEndTags()
665 for i := len(p.oe) - 1; i >= 0; i-- {
666 if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
667 p.oe = p.oe[:i]
668 break
669 }
670 }
671 p.clearActiveFormattingElements()
672 p.templateStack.pop()
673 p.resetInsertionMode()
674 return true
675 default:
676 // Ignore the token.
677 return true
678 }
679 case CommentToken:
680 p.addChild(&Node{
681 Type: CommentNode,
682 Data: p.tok.Data,
683 })
684 return true
685 case DoctypeToken:
686 // Ignore the token.
687 return true
688 }
689
690 p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
691 return false
692}
693
694// Section 12.2.6.4.6.
695func afterHeadIM(p *parser) bool {
696 switch p.tok.Type {
697 case TextToken:
698 s := strings.TrimLeft(p.tok.Data, whitespace)
699 if len(s) < len(p.tok.Data) {
700 // Add the initial whitespace to the current node.
701 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
702 if s == "" {
703 return true
704 }
705 p.tok.Data = s
706 }
707 case StartTagToken:
708 switch p.tok.DataAtom {
709 case a.Html:
710 return inBodyIM(p)
711 case a.Body:
712 p.addElement()
713 p.framesetOK = false
714 p.im = inBodyIM
715 return true
716 case a.Frameset:
717 p.addElement()
718 p.im = inFramesetIM
719 return true
720 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
721 p.oe = append(p.oe, p.head)
722 defer p.oe.remove(p.head)
723 return inHeadIM(p)
724 case a.Head:
725 // Ignore the token.
726 return true
727 }
728 case EndTagToken:
729 switch p.tok.DataAtom {
730 case a.Body, a.Html, a.Br:
731 // Drop down to creating an implied <body> tag.
732 case a.Template:
733 return inHeadIM(p)
734 default:
735 // Ignore the token.
736 return true
737 }
738 case CommentToken:
739 p.addChild(&Node{
740 Type: CommentNode,
741 Data: p.tok.Data,
742 })
743 return true
744 case DoctypeToken:
745 // Ignore the token.
746 return true
747 }
748
749 p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
750 p.framesetOK = true
751 return false
752}
753
754// copyAttributes copies attributes of src not found on dst to dst.
755func copyAttributes(dst *Node, src Token) {
756 if len(src.Attr) == 0 {
757 return
758 }
759 attr := map[string]string{}
760 for _, t := range dst.Attr {
761 attr[t.Key] = t.Val
762 }
763 for _, t := range src.Attr {
764 if _, ok := attr[t.Key]; !ok {
765 dst.Attr = append(dst.Attr, t)
766 attr[t.Key] = t.Val
767 }
768 }
769}
770
771// Section 12.2.6.4.7.
772func inBodyIM(p *parser) bool {
773 switch p.tok.Type {
774 case TextToken:
775 d := p.tok.Data
776 switch n := p.oe.top(); n.DataAtom {
777 case a.Pre, a.Listing:
778 if n.FirstChild == nil {
779 // Ignore a newline at the start of a <pre> block.
780 if d != "" && d[0] == '\r' {
781 d = d[1:]
782 }
783 if d != "" && d[0] == '\n' {
784 d = d[1:]
785 }
786 }
787 }
788 d = strings.Replace(d, "\x00", "", -1)
789 if d == "" {
790 return true
791 }
792 p.reconstructActiveFormattingElements()
793 p.addText(d)
794 if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
795 // There were non-whitespace characters inserted.
796 p.framesetOK = false
797 }
798 case StartTagToken:
799 switch p.tok.DataAtom {
800 case a.Html:
801 if p.oe.contains(a.Template) {
802 return true
803 }
804 copyAttributes(p.oe[0], p.tok)
805 case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
806 return inHeadIM(p)
807 case a.Body:
808 if p.oe.contains(a.Template) {
809 return true
810 }
811 if len(p.oe) >= 2 {
812 body := p.oe[1]
813 if body.Type == ElementNode && body.DataAtom == a.Body {
814 p.framesetOK = false
815 copyAttributes(body, p.tok)
816 }
817 }
818 case a.Frameset:
819 if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
820 // Ignore the token.
821 return true
822 }
823 body := p.oe[1]
824 if body.Parent != nil {
825 body.Parent.RemoveChild(body)
826 }
827 p.oe = p.oe[:1]
828 p.addElement()
829 p.im = inFramesetIM
830 return true
831 case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
832 p.popUntil(buttonScope, a.P)
833 p.addElement()
834 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
835 p.popUntil(buttonScope, a.P)
836 switch n := p.top(); n.DataAtom {
837 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
838 p.oe.pop()
839 }
840 p.addElement()
841 case a.Pre, a.Listing:
842 p.popUntil(buttonScope, a.P)
843 p.addElement()
844 // The newline, if any, will be dealt with by the TextToken case.
845 p.framesetOK = false
846 case a.Form:
847 if p.form != nil && !p.oe.contains(a.Template) {
848 // Ignore the token
849 return true
850 }
851 p.popUntil(buttonScope, a.P)
852 p.addElement()
853 if !p.oe.contains(a.Template) {
854 p.form = p.top()
855 }
856 case a.Li:
857 p.framesetOK = false
858 for i := len(p.oe) - 1; i >= 0; i-- {
859 node := p.oe[i]
860 switch node.DataAtom {
861 case a.Li:
862 p.oe = p.oe[:i]
863 case a.Address, a.Div, a.P:
864 continue
865 default:
866 if !isSpecialElement(node) {
867 continue
868 }
869 }
870 break
871 }
872 p.popUntil(buttonScope, a.P)
873 p.addElement()
874 case a.Dd, a.Dt:
875 p.framesetOK = false
876 for i := len(p.oe) - 1; i >= 0; i-- {
877 node := p.oe[i]
878 switch node.DataAtom {
879 case a.Dd, a.Dt:
880 p.oe = p.oe[:i]
881 case a.Address, a.Div, a.P:
882 continue
883 default:
884 if !isSpecialElement(node) {
885 continue
886 }
887 }
888 break
889 }
890 p.popUntil(buttonScope, a.P)
891 p.addElement()
892 case a.Plaintext:
893 p.popUntil(buttonScope, a.P)
894 p.addElement()
895 case a.Button:
896 p.popUntil(defaultScope, a.Button)
897 p.reconstructActiveFormattingElements()
898 p.addElement()
899 p.framesetOK = false
900 case a.A:
901 for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
902 if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
903 p.inBodyEndTagFormatting(a.A)
904 p.oe.remove(n)
905 p.afe.remove(n)
906 break
907 }
908 }
909 p.reconstructActiveFormattingElements()
910 p.addFormattingElement()
911 case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
912 p.reconstructActiveFormattingElements()
913 p.addFormattingElement()
914 case a.Nobr:
915 p.reconstructActiveFormattingElements()
916 if p.elementInScope(defaultScope, a.Nobr) {
917 p.inBodyEndTagFormatting(a.Nobr)
918 p.reconstructActiveFormattingElements()
919 }
920 p.addFormattingElement()
921 case a.Applet, a.Marquee, a.Object:
922 p.reconstructActiveFormattingElements()
923 p.addElement()
924 p.afe = append(p.afe, &scopeMarker)
925 p.framesetOK = false
926 case a.Table:
927 if !p.quirks {
928 p.popUntil(buttonScope, a.P)
929 }
930 p.addElement()
931 p.framesetOK = false
932 p.im = inTableIM
933 return true
934 case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
935 p.reconstructActiveFormattingElements()
936 p.addElement()
937 p.oe.pop()
938 p.acknowledgeSelfClosingTag()
939 if p.tok.DataAtom == a.Input {
940 for _, t := range p.tok.Attr {
941 if t.Key == "type" {
942 if strings.ToLower(t.Val) == "hidden" {
943 // Skip setting framesetOK = false
944 return true
945 }
946 }
947 }
948 }
949 p.framesetOK = false
950 case a.Param, a.Source, a.Track:
951 p.addElement()
952 p.oe.pop()
953 p.acknowledgeSelfClosingTag()
954 case a.Hr:
955 p.popUntil(buttonScope, a.P)
956 p.addElement()
957 p.oe.pop()
958 p.acknowledgeSelfClosingTag()
959 p.framesetOK = false
960 case a.Image:
961 p.tok.DataAtom = a.Img
962 p.tok.Data = a.Img.String()
963 return false
964 case a.Isindex:
965 if p.form != nil {
966 // Ignore the token.
967 return true
968 }
969 action := ""
970 prompt := "This is a searchable index. Enter search keywords: "
971 attr := []Attribute{{Key: "name", Val: "isindex"}}
972 for _, t := range p.tok.Attr {
973 switch t.Key {
974 case "action":
975 action = t.Val
976 case "name":
977 // Ignore the attribute.
978 case "prompt":
979 prompt = t.Val
980 default:
981 attr = append(attr, t)
982 }
983 }
984 p.acknowledgeSelfClosingTag()
985 p.popUntil(buttonScope, a.P)
986 p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
987 if p.form == nil {
988 // NOTE: The 'isindex' element has been removed,
989 // and the 'template' element has not been designed to be
990 // collaborative with the index element.
991 //
992 // Ignore the token.
993 return true
994 }
995 if action != "" {
996 p.form.Attr = []Attribute{{Key: "action", Val: action}}
997 }
998 p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
999 p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
1000 p.addText(prompt)
1001 p.addChild(&Node{
1002 Type: ElementNode,
1003 DataAtom: a.Input,
1004 Data: a.Input.String(),
1005 Attr: attr,
1006 })
1007 p.oe.pop()
1008 p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
1009 p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
1010 p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
1011 case a.Textarea:
1012 p.addElement()
1013 p.setOriginalIM()
1014 p.framesetOK = false
1015 p.im = textIM
1016 case a.Xmp:
1017 p.popUntil(buttonScope, a.P)
1018 p.reconstructActiveFormattingElements()
1019 p.framesetOK = false
1020 p.addElement()
1021 p.setOriginalIM()
1022 p.im = textIM
1023 case a.Iframe:
1024 p.framesetOK = false
1025 p.addElement()
1026 p.setOriginalIM()
1027 p.im = textIM
1028 case a.Noembed, a.Noscript:
1029 p.addElement()
1030 p.setOriginalIM()
1031 p.im = textIM
1032 case a.Select:
1033 p.reconstructActiveFormattingElements()
1034 p.addElement()
1035 p.framesetOK = false
1036 p.im = inSelectIM
1037 return true
1038 case a.Optgroup, a.Option:
1039 if p.top().DataAtom == a.Option {
1040 p.oe.pop()
1041 }
1042 p.reconstructActiveFormattingElements()
1043 p.addElement()
1044 case a.Rb, a.Rtc:
1045 if p.elementInScope(defaultScope, a.Ruby) {
1046 p.generateImpliedEndTags()
1047 }
1048 p.addElement()
1049 case a.Rp, a.Rt:
1050 if p.elementInScope(defaultScope, a.Ruby) {
1051 p.generateImpliedEndTags("rtc")
1052 }
1053 p.addElement()
1054 case a.Math, a.Svg:
1055 p.reconstructActiveFormattingElements()
1056 if p.tok.DataAtom == a.Math {
1057 adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
1058 } else {
1059 adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
1060 }
1061 adjustForeignAttributes(p.tok.Attr)
1062 p.addElement()
1063 p.top().Namespace = p.tok.Data
1064 if p.hasSelfClosingToken {
1065 p.oe.pop()
1066 p.acknowledgeSelfClosingTag()
1067 }
1068 return true
1069 case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1070 // Ignore the token.
1071 default:
1072 p.reconstructActiveFormattingElements()
1073 p.addElement()
1074 }
1075 case EndTagToken:
1076 switch p.tok.DataAtom {
1077 case a.Body:
1078 if p.elementInScope(defaultScope, a.Body) {
1079 p.im = afterBodyIM
1080 }
1081 case a.Html:
1082 if p.elementInScope(defaultScope, a.Body) {
1083 p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
1084 return false
1085 }
1086 return true
1087 case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
1088 p.popUntil(defaultScope, p.tok.DataAtom)
1089 case a.Form:
1090 if p.oe.contains(a.Template) {
1091 i := p.indexOfElementInScope(defaultScope, a.Form)
1092 if i == -1 {
1093 // Ignore the token.
1094 return true
1095 }
1096 p.generateImpliedEndTags()
1097 if p.oe[i].DataAtom != a.Form {
1098 // Ignore the token.
1099 return true
1100 }
1101 p.popUntil(defaultScope, a.Form)
1102 } else {
1103 node := p.form
1104 p.form = nil
1105 i := p.indexOfElementInScope(defaultScope, a.Form)
1106 if node == nil || i == -1 || p.oe[i] != node {
1107 // Ignore the token.
1108 return true
1109 }
1110 p.generateImpliedEndTags()
1111 p.oe.remove(node)
1112 }
1113 case a.P:
1114 if !p.elementInScope(buttonScope, a.P) {
1115 p.parseImpliedToken(StartTagToken, a.P, a.P.String())
1116 }
1117 p.popUntil(buttonScope, a.P)
1118 case a.Li:
1119 p.popUntil(listItemScope, a.Li)
1120 case a.Dd, a.Dt:
1121 p.popUntil(defaultScope, p.tok.DataAtom)
1122 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
1123 p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
1124 case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1125 p.inBodyEndTagFormatting(p.tok.DataAtom)
1126 case a.Applet, a.Marquee, a.Object:
1127 if p.popUntil(defaultScope, p.tok.DataAtom) {
1128 p.clearActiveFormattingElements()
1129 }
1130 case a.Br:
1131 p.tok.Type = StartTagToken
1132 return false
1133 case a.Template:
1134 return inHeadIM(p)
1135 default:
1136 p.inBodyEndTagOther(p.tok.DataAtom)
1137 }
1138 case CommentToken:
1139 p.addChild(&Node{
1140 Type: CommentNode,
1141 Data: p.tok.Data,
1142 })
1143 case ErrorToken:
1144 // TODO: remove this divergence from the HTML5 spec.
1145 if len(p.templateStack) > 0 {
1146 p.im = inTemplateIM
1147 return false
1148 } else {
1149 for _, e := range p.oe {
1150 switch e.DataAtom {
1151 case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
1152 a.Thead, a.Tr, a.Body, a.Html:
1153 default:
1154 return true
1155 }
1156 }
1157 }
1158 }
1159
1160 return true
1161}
1162
1163func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
1164 // This is the "adoption agency" algorithm, described at
1165 // https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
1166
1167 // TODO: this is a fairly literal line-by-line translation of that algorithm.
1168 // Once the code successfully parses the comprehensive test suite, we should
1169 // refactor this code to be more idiomatic.
1170
1171 // Steps 1-4. The outer loop.
1172 for i := 0; i < 8; i++ {
1173 // Step 5. Find the formatting element.
1174 var formattingElement *Node
1175 for j := len(p.afe) - 1; j >= 0; j-- {
1176 if p.afe[j].Type == scopeMarkerNode {
1177 break
1178 }
1179 if p.afe[j].DataAtom == tagAtom {
1180 formattingElement = p.afe[j]
1181 break
1182 }
1183 }
1184 if formattingElement == nil {
1185 p.inBodyEndTagOther(tagAtom)
1186 return
1187 }
1188 feIndex := p.oe.index(formattingElement)
1189 if feIndex == -1 {
1190 p.afe.remove(formattingElement)
1191 return
1192 }
1193 if !p.elementInScope(defaultScope, tagAtom) {
1194 // Ignore the tag.
1195 return
1196 }
1197
1198 // Steps 9-10. Find the furthest block.
1199 var furthestBlock *Node
1200 for _, e := range p.oe[feIndex:] {
1201 if isSpecialElement(e) {
1202 furthestBlock = e
1203 break
1204 }
1205 }
1206 if furthestBlock == nil {
1207 e := p.oe.pop()
1208 for e != formattingElement {
1209 e = p.oe.pop()
1210 }
1211 p.afe.remove(e)
1212 return
1213 }
1214
1215 // Steps 11-12. Find the common ancestor and bookmark node.
1216 commonAncestor := p.oe[feIndex-1]
1217 bookmark := p.afe.index(formattingElement)
1218
1219 // Step 13. The inner loop. Find the lastNode to reparent.
1220 lastNode := furthestBlock
1221 node := furthestBlock
1222 x := p.oe.index(node)
1223 // Steps 13.1-13.2
1224 for j := 0; j < 3; j++ {
1225 // Step 13.3.
1226 x--
1227 node = p.oe[x]
1228 // Step 13.4 - 13.5.
1229 if p.afe.index(node) == -1 {
1230 p.oe.remove(node)
1231 continue
1232 }
1233 // Step 13.6.
1234 if node == formattingElement {
1235 break
1236 }
1237 // Step 13.7.
1238 clone := node.clone()
1239 p.afe[p.afe.index(node)] = clone
1240 p.oe[p.oe.index(node)] = clone
1241 node = clone
1242 // Step 13.8.
1243 if lastNode == furthestBlock {
1244 bookmark = p.afe.index(node) + 1
1245 }
1246 // Step 13.9.
1247 if lastNode.Parent != nil {
1248 lastNode.Parent.RemoveChild(lastNode)
1249 }
1250 node.AppendChild(lastNode)
1251 // Step 13.10.
1252 lastNode = node
1253 }
1254
1255 // Step 14. Reparent lastNode to the common ancestor,
1256 // or for misnested table nodes, to the foster parent.
1257 if lastNode.Parent != nil {
1258 lastNode.Parent.RemoveChild(lastNode)
1259 }
1260 switch commonAncestor.DataAtom {
1261 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1262 p.fosterParent(lastNode)
1263 case a.Template:
1264 // TODO: remove namespace checking
1265 if commonAncestor.Namespace == "html" {
1266 commonAncestor = commonAncestor.LastChild
1267 }
1268 fallthrough
1269 default:
1270 commonAncestor.AppendChild(lastNode)
1271 }
1272
1273 // Steps 15-17. Reparent nodes from the furthest block's children
1274 // to a clone of the formatting element.
1275 clone := formattingElement.clone()
1276 reparentChildren(clone, furthestBlock)
1277 furthestBlock.AppendChild(clone)
1278
1279 // Step 18. Fix up the list of active formatting elements.
1280 if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
1281 // Move the bookmark with the rest of the list.
1282 bookmark--
1283 }
1284 p.afe.remove(formattingElement)
1285 p.afe.insert(bookmark, clone)
1286
1287 // Step 19. Fix up the stack of open elements.
1288 p.oe.remove(formattingElement)
1289 p.oe.insert(p.oe.index(furthestBlock)+1, clone)
1290 }
1291}
1292
1293// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
1294// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
1295// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
1296func (p *parser) inBodyEndTagOther(tagAtom a.Atom) {
1297 for i := len(p.oe) - 1; i >= 0; i-- {
1298 if p.oe[i].DataAtom == tagAtom {
1299 p.oe = p.oe[:i]
1300 break
1301 }
1302 if isSpecialElement(p.oe[i]) {
1303 break
1304 }
1305 }
1306}
1307
1308// Section 12.2.6.4.8.
1309func textIM(p *parser) bool {
1310 switch p.tok.Type {
1311 case ErrorToken:
1312 p.oe.pop()
1313 case TextToken:
1314 d := p.tok.Data
1315 if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
1316 // Ignore a newline at the start of a <textarea> block.
1317 if d != "" && d[0] == '\r' {
1318 d = d[1:]
1319 }
1320 if d != "" && d[0] == '\n' {
1321 d = d[1:]
1322 }
1323 }
1324 if d == "" {
1325 return true
1326 }
1327 p.addText(d)
1328 return true
1329 case EndTagToken:
1330 p.oe.pop()
1331 }
1332 p.im = p.originalIM
1333 p.originalIM = nil
1334 return p.tok.Type == EndTagToken
1335}
1336
1337// Section 12.2.6.4.9.
1338func inTableIM(p *parser) bool {
1339 switch p.tok.Type {
1340 case TextToken:
1341 p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
1342 switch p.oe.top().DataAtom {
1343 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1344 if strings.Trim(p.tok.Data, whitespace) == "" {
1345 p.addText(p.tok.Data)
1346 return true
1347 }
1348 }
1349 case StartTagToken:
1350 switch p.tok.DataAtom {
1351 case a.Caption:
1352 p.clearStackToContext(tableScope)
1353 p.afe = append(p.afe, &scopeMarker)
1354 p.addElement()
1355 p.im = inCaptionIM
1356 return true
1357 case a.Colgroup:
1358 p.clearStackToContext(tableScope)
1359 p.addElement()
1360 p.im = inColumnGroupIM
1361 return true
1362 case a.Col:
1363 p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
1364 return false
1365 case a.Tbody, a.Tfoot, a.Thead:
1366 p.clearStackToContext(tableScope)
1367 p.addElement()
1368 p.im = inTableBodyIM
1369 return true
1370 case a.Td, a.Th, a.Tr:
1371 p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
1372 return false
1373 case a.Table:
1374 if p.popUntil(tableScope, a.Table) {
1375 p.resetInsertionMode()
1376 return false
1377 }
1378 // Ignore the token.
1379 return true
1380 case a.Style, a.Script, a.Template:
1381 return inHeadIM(p)
1382 case a.Input:
1383 for _, t := range p.tok.Attr {
1384 if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
1385 p.addElement()
1386 p.oe.pop()
1387 return true
1388 }
1389 }
1390 // Otherwise drop down to the default action.
1391 case a.Form:
1392 if p.oe.contains(a.Template) || p.form != nil {
1393 // Ignore the token.
1394 return true
1395 }
1396 p.addElement()
1397 p.form = p.oe.pop()
1398 case a.Select:
1399 p.reconstructActiveFormattingElements()
1400 switch p.top().DataAtom {
1401 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1402 p.fosterParenting = true
1403 }
1404 p.addElement()
1405 p.fosterParenting = false
1406 p.framesetOK = false
1407 p.im = inSelectInTableIM
1408 return true
1409 }
1410 case EndTagToken:
1411 switch p.tok.DataAtom {
1412 case a.Table:
1413 if p.popUntil(tableScope, a.Table) {
1414 p.resetInsertionMode()
1415 return true
1416 }
1417 // Ignore the token.
1418 return true
1419 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1420 // Ignore the token.
1421 return true
1422 case a.Template:
1423 return inHeadIM(p)
1424 }
1425 case CommentToken:
1426 p.addChild(&Node{
1427 Type: CommentNode,
1428 Data: p.tok.Data,
1429 })
1430 return true
1431 case DoctypeToken:
1432 // Ignore the token.
1433 return true
1434 case ErrorToken:
1435 return inBodyIM(p)
1436 }
1437
1438 p.fosterParenting = true
1439 defer func() { p.fosterParenting = false }()
1440
1441 return inBodyIM(p)
1442}
1443
1444// Section 12.2.6.4.11.
1445func inCaptionIM(p *parser) bool {
1446 switch p.tok.Type {
1447 case StartTagToken:
1448 switch p.tok.DataAtom {
1449 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
1450 if p.popUntil(tableScope, a.Caption) {
1451 p.clearActiveFormattingElements()
1452 p.im = inTableIM
1453 return false
1454 } else {
1455 // Ignore the token.
1456 return true
1457 }
1458 case a.Select:
1459 p.reconstructActiveFormattingElements()
1460 p.addElement()
1461 p.framesetOK = false
1462 p.im = inSelectInTableIM
1463 return true
1464 }
1465 case EndTagToken:
1466 switch p.tok.DataAtom {
1467 case a.Caption:
1468 if p.popUntil(tableScope, a.Caption) {
1469 p.clearActiveFormattingElements()
1470 p.im = inTableIM
1471 }
1472 return true
1473 case a.Table:
1474 if p.popUntil(tableScope, a.Caption) {
1475 p.clearActiveFormattingElements()
1476 p.im = inTableIM
1477 return false
1478 } else {
1479 // Ignore the token.
1480 return true
1481 }
1482 case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1483 // Ignore the token.
1484 return true
1485 }
1486 }
1487 return inBodyIM(p)
1488}
1489
1490// Section 12.2.6.4.12.
1491func inColumnGroupIM(p *parser) bool {
1492 switch p.tok.Type {
1493 case TextToken:
1494 s := strings.TrimLeft(p.tok.Data, whitespace)
1495 if len(s) < len(p.tok.Data) {
1496 // Add the initial whitespace to the current node.
1497 p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
1498 if s == "" {
1499 return true
1500 }
1501 p.tok.Data = s
1502 }
1503 case CommentToken:
1504 p.addChild(&Node{
1505 Type: CommentNode,
1506 Data: p.tok.Data,
1507 })
1508 return true
1509 case DoctypeToken:
1510 // Ignore the token.
1511 return true
1512 case StartTagToken:
1513 switch p.tok.DataAtom {
1514 case a.Html:
1515 return inBodyIM(p)
1516 case a.Col:
1517 p.addElement()
1518 p.oe.pop()
1519 p.acknowledgeSelfClosingTag()
1520 return true
1521 case a.Template:
1522 return inHeadIM(p)
1523 }
1524 case EndTagToken:
1525 switch p.tok.DataAtom {
1526 case a.Colgroup:
1527 if p.oe.top().DataAtom == a.Colgroup {
1528 p.oe.pop()
1529 p.im = inTableIM
1530 }
1531 return true
1532 case a.Col:
1533 // Ignore the token.
1534 return true
1535 case a.Template:
1536 return inHeadIM(p)
1537 }
1538 case ErrorToken:
1539 return inBodyIM(p)
1540 }
1541 if p.oe.top().DataAtom != a.Colgroup {
1542 return true
1543 }
1544 p.oe.pop()
1545 p.im = inTableIM
1546 return false
1547}
1548
1549// Section 12.2.6.4.13.
1550func inTableBodyIM(p *parser) bool {
1551 switch p.tok.Type {
1552 case StartTagToken:
1553 switch p.tok.DataAtom {
1554 case a.Tr:
1555 p.clearStackToContext(tableBodyScope)
1556 p.addElement()
1557 p.im = inRowIM
1558 return true
1559 case a.Td, a.Th:
1560 p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
1561 return false
1562 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1563 if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1564 p.im = inTableIM
1565 return false
1566 }
1567 // Ignore the token.
1568 return true
1569 }
1570 case EndTagToken:
1571 switch p.tok.DataAtom {
1572 case a.Tbody, a.Tfoot, a.Thead:
1573 if p.elementInScope(tableScope, p.tok.DataAtom) {
1574 p.clearStackToContext(tableBodyScope)
1575 p.oe.pop()
1576 p.im = inTableIM
1577 }
1578 return true
1579 case a.Table:
1580 if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1581 p.im = inTableIM
1582 return false
1583 }
1584 // Ignore the token.
1585 return true
1586 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
1587 // Ignore the token.
1588 return true
1589 }
1590 case CommentToken:
1591 p.addChild(&Node{
1592 Type: CommentNode,
1593 Data: p.tok.Data,
1594 })
1595 return true
1596 }
1597
1598 return inTableIM(p)
1599}
1600
1601// Section 12.2.6.4.14.
1602func inRowIM(p *parser) bool {
1603 switch p.tok.Type {
1604 case StartTagToken:
1605 switch p.tok.DataAtom {
1606 case a.Td, a.Th:
1607 p.clearStackToContext(tableRowScope)
1608 p.addElement()
1609 p.afe = append(p.afe, &scopeMarker)
1610 p.im = inCellIM
1611 return true
1612 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1613 if p.popUntil(tableScope, a.Tr) {
1614 p.im = inTableBodyIM
1615 return false
1616 }
1617 // Ignore the token.
1618 return true
1619 }
1620 case EndTagToken:
1621 switch p.tok.DataAtom {
1622 case a.Tr:
1623 if p.popUntil(tableScope, a.Tr) {
1624 p.im = inTableBodyIM
1625 return true
1626 }
1627 // Ignore the token.
1628 return true
1629 case a.Table:
1630 if p.popUntil(tableScope, a.Tr) {
1631 p.im = inTableBodyIM
1632 return false
1633 }
1634 // Ignore the token.
1635 return true
1636 case a.Tbody, a.Tfoot, a.Thead:
1637 if p.elementInScope(tableScope, p.tok.DataAtom) {
1638 p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
1639 return false
1640 }
1641 // Ignore the token.
1642 return true
1643 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
1644 // Ignore the token.
1645 return true
1646 }
1647 }
1648
1649 return inTableIM(p)
1650}
1651
1652// Section 12.2.6.4.15.
1653func inCellIM(p *parser) bool {
1654 switch p.tok.Type {
1655 case StartTagToken:
1656 switch p.tok.DataAtom {
1657 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1658 if p.popUntil(tableScope, a.Td, a.Th) {
1659 // Close the cell and reprocess.
1660 p.clearActiveFormattingElements()
1661 p.im = inRowIM
1662 return false
1663 }
1664 // Ignore the token.
1665 return true
1666 case a.Select:
1667 p.reconstructActiveFormattingElements()
1668 p.addElement()
1669 p.framesetOK = false
1670 p.im = inSelectInTableIM
1671 return true
1672 }
1673 case EndTagToken:
1674 switch p.tok.DataAtom {
1675 case a.Td, a.Th:
1676 if !p.popUntil(tableScope, p.tok.DataAtom) {
1677 // Ignore the token.
1678 return true
1679 }
1680 p.clearActiveFormattingElements()
1681 p.im = inRowIM
1682 return true
1683 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
1684 // Ignore the token.
1685 return true
1686 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1687 if !p.elementInScope(tableScope, p.tok.DataAtom) {
1688 // Ignore the token.
1689 return true
1690 }
1691 // Close the cell and reprocess.
1692 p.popUntil(tableScope, a.Td, a.Th)
1693 p.clearActiveFormattingElements()
1694 p.im = inRowIM
1695 return false
1696 }
1697 }
1698 return inBodyIM(p)
1699}
1700
1701// Section 12.2.6.4.16.
1702func inSelectIM(p *parser) bool {
1703 switch p.tok.Type {
1704 case TextToken:
1705 p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
1706 case StartTagToken:
1707 switch p.tok.DataAtom {
1708 case a.Html:
1709 return inBodyIM(p)
1710 case a.Option:
1711 if p.top().DataAtom == a.Option {
1712 p.oe.pop()
1713 }
1714 p.addElement()
1715 case a.Optgroup:
1716 if p.top().DataAtom == a.Option {
1717 p.oe.pop()
1718 }
1719 if p.top().DataAtom == a.Optgroup {
1720 p.oe.pop()
1721 }
1722 p.addElement()
1723 case a.Select:
1724 p.tok.Type = EndTagToken
1725 return false
1726 case a.Input, a.Keygen, a.Textarea:
1727 if p.elementInScope(selectScope, a.Select) {
1728 p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1729 return false
1730 }
1731 // In order to properly ignore <textarea>, we need to change the tokenizer mode.
1732 p.tokenizer.NextIsNotRawText()
1733 // Ignore the token.
1734 return true
1735 case a.Script, a.Template:
1736 return inHeadIM(p)
1737 }
1738 case EndTagToken:
1739 switch p.tok.DataAtom {
1740 case a.Option:
1741 if p.top().DataAtom == a.Option {
1742 p.oe.pop()
1743 }
1744 case a.Optgroup:
1745 i := len(p.oe) - 1
1746 if p.oe[i].DataAtom == a.Option {
1747 i--
1748 }
1749 if p.oe[i].DataAtom == a.Optgroup {
1750 p.oe = p.oe[:i]
1751 }
1752 case a.Select:
1753 if p.popUntil(selectScope, a.Select) {
1754 p.resetInsertionMode()
1755 }
1756 case a.Template:
1757 return inHeadIM(p)
1758 }
1759 case CommentToken:
1760 p.addChild(&Node{
1761 Type: CommentNode,
1762 Data: p.tok.Data,
1763 })
1764 case DoctypeToken:
1765 // Ignore the token.
1766 return true
1767 case ErrorToken:
1768 return inBodyIM(p)
1769 }
1770
1771 return true
1772}
1773
1774// Section 12.2.6.4.17.
1775func inSelectInTableIM(p *parser) bool {
1776 switch p.tok.Type {
1777 case StartTagToken, EndTagToken:
1778 switch p.tok.DataAtom {
1779 case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
1780 if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) {
1781 p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1782 return false
1783 } else {
1784 // Ignore the token.
1785 return true
1786 }
1787 }
1788 }
1789 return inSelectIM(p)
1790}
1791
1792// Section 12.2.6.4.18.
1793func inTemplateIM(p *parser) bool {
1794 switch p.tok.Type {
1795 case TextToken, CommentToken, DoctypeToken:
1796 return inBodyIM(p)
1797 case StartTagToken:
1798 switch p.tok.DataAtom {
1799 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
1800 return inHeadIM(p)
1801 case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1802 p.templateStack.pop()
1803 p.templateStack = append(p.templateStack, inTableIM)
1804 p.im = inTableIM
1805 return false
1806 case a.Col:
1807 p.templateStack.pop()
1808 p.templateStack = append(p.templateStack, inColumnGroupIM)
1809 p.im = inColumnGroupIM
1810 return false
1811 case a.Tr:
1812 p.templateStack.pop()
1813 p.templateStack = append(p.templateStack, inTableBodyIM)
1814 p.im = inTableBodyIM
1815 return false
1816 case a.Td, a.Th:
1817 p.templateStack.pop()
1818 p.templateStack = append(p.templateStack, inRowIM)
1819 p.im = inRowIM
1820 return false
1821 default:
1822 p.templateStack.pop()
1823 p.templateStack = append(p.templateStack, inBodyIM)
1824 p.im = inBodyIM
1825 return false
1826 }
1827 case EndTagToken:
1828 switch p.tok.DataAtom {
1829 case a.Template:
1830 return inHeadIM(p)
1831 default:
1832 // Ignore the token.
1833 return true
1834 }
1835 case ErrorToken:
1836 if !p.oe.contains(a.Template) {
1837 // Ignore the token.
1838 return true
1839 }
1840 // TODO: remove this divergence from the HTML5 spec.
1841 //
1842 // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
1843 p.generateImpliedEndTags()
1844 for i := len(p.oe) - 1; i >= 0; i-- {
1845 if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
1846 p.oe = p.oe[:i]
1847 break
1848 }
1849 }
1850 p.clearActiveFormattingElements()
1851 p.templateStack.pop()
1852 p.resetInsertionMode()
1853 return false
1854 }
1855 return false
1856}
1857
1858// Section 12.2.6.4.19.
1859func afterBodyIM(p *parser) bool {
1860 switch p.tok.Type {
1861 case ErrorToken:
1862 // Stop parsing.
1863 return true
1864 case TextToken:
1865 s := strings.TrimLeft(p.tok.Data, whitespace)
1866 if len(s) == 0 {
1867 // It was all whitespace.
1868 return inBodyIM(p)
1869 }
1870 case StartTagToken:
1871 if p.tok.DataAtom == a.Html {
1872 return inBodyIM(p)
1873 }
1874 case EndTagToken:
1875 if p.tok.DataAtom == a.Html {
1876 if !p.fragment {
1877 p.im = afterAfterBodyIM
1878 }
1879 return true
1880 }
1881 case CommentToken:
1882 // The comment is attached to the <html> element.
1883 if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
1884 panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
1885 }
1886 p.oe[0].AppendChild(&Node{
1887 Type: CommentNode,
1888 Data: p.tok.Data,
1889 })
1890 return true
1891 }
1892 p.im = inBodyIM
1893 return false
1894}
1895
1896// Section 12.2.6.4.20.
1897func inFramesetIM(p *parser) bool {
1898 switch p.tok.Type {
1899 case CommentToken:
1900 p.addChild(&Node{
1901 Type: CommentNode,
1902 Data: p.tok.Data,
1903 })
1904 case TextToken:
1905 // Ignore all text but whitespace.
1906 s := strings.Map(func(c rune) rune {
1907 switch c {
1908 case ' ', '\t', '\n', '\f', '\r':
1909 return c
1910 }
1911 return -1
1912 }, p.tok.Data)
1913 if s != "" {
1914 p.addText(s)
1915 }
1916 case StartTagToken:
1917 switch p.tok.DataAtom {
1918 case a.Html:
1919 return inBodyIM(p)
1920 case a.Frameset:
1921 p.addElement()
1922 case a.Frame:
1923 p.addElement()
1924 p.oe.pop()
1925 p.acknowledgeSelfClosingTag()
1926 case a.Noframes:
1927 return inHeadIM(p)
1928 }
1929 case EndTagToken:
1930 switch p.tok.DataAtom {
1931 case a.Frameset:
1932 if p.oe.top().DataAtom != a.Html {
1933 p.oe.pop()
1934 if p.oe.top().DataAtom != a.Frameset {
1935 p.im = afterFramesetIM
1936 return true
1937 }
1938 }
1939 }
1940 default:
1941 // Ignore the token.
1942 }
1943 return true
1944}
1945
1946// Section 12.2.6.4.21.
1947func afterFramesetIM(p *parser) bool {
1948 switch p.tok.Type {
1949 case CommentToken:
1950 p.addChild(&Node{
1951 Type: CommentNode,
1952 Data: p.tok.Data,
1953 })
1954 case TextToken:
1955 // Ignore all text but whitespace.
1956 s := strings.Map(func(c rune) rune {
1957 switch c {
1958 case ' ', '\t', '\n', '\f', '\r':
1959 return c
1960 }
1961 return -1
1962 }, p.tok.Data)
1963 if s != "" {
1964 p.addText(s)
1965 }
1966 case StartTagToken:
1967 switch p.tok.DataAtom {
1968 case a.Html:
1969 return inBodyIM(p)
1970 case a.Noframes:
1971 return inHeadIM(p)
1972 }
1973 case EndTagToken:
1974 switch p.tok.DataAtom {
1975 case a.Html:
1976 p.im = afterAfterFramesetIM
1977 return true
1978 }
1979 default:
1980 // Ignore the token.
1981 }
1982 return true
1983}
1984
1985// Section 12.2.6.4.22.
1986func afterAfterBodyIM(p *parser) bool {
1987 switch p.tok.Type {
1988 case ErrorToken:
1989 // Stop parsing.
1990 return true
1991 case TextToken:
1992 s := strings.TrimLeft(p.tok.Data, whitespace)
1993 if len(s) == 0 {
1994 // It was all whitespace.
1995 return inBodyIM(p)
1996 }
1997 case StartTagToken:
1998 if p.tok.DataAtom == a.Html {
1999 return inBodyIM(p)
2000 }
2001 case CommentToken:
2002 p.doc.AppendChild(&Node{
2003 Type: CommentNode,
2004 Data: p.tok.Data,
2005 })
2006 return true
2007 case DoctypeToken:
2008 return inBodyIM(p)
2009 }
2010 p.im = inBodyIM
2011 return false
2012}
2013
2014// Section 12.2.6.4.23.
2015func afterAfterFramesetIM(p *parser) bool {
2016 switch p.tok.Type {
2017 case CommentToken:
2018 p.doc.AppendChild(&Node{
2019 Type: CommentNode,
2020 Data: p.tok.Data,
2021 })
2022 case TextToken:
2023 // Ignore all text but whitespace.
2024 s := strings.Map(func(c rune) rune {
2025 switch c {
2026 case ' ', '\t', '\n', '\f', '\r':
2027 return c
2028 }
2029 return -1
2030 }, p.tok.Data)
2031 if s != "" {
2032 p.tok.Data = s
2033 return inBodyIM(p)
2034 }
2035 case StartTagToken:
2036 switch p.tok.DataAtom {
2037 case a.Html:
2038 return inBodyIM(p)
2039 case a.Noframes:
2040 return inHeadIM(p)
2041 }
2042 case DoctypeToken:
2043 return inBodyIM(p)
2044 default:
2045 // Ignore the token.
2046 }
2047 return true
2048}
2049
2050const whitespaceOrNUL = whitespace + "\x00"
2051
2052// Section 12.2.6.5
2053func parseForeignContent(p *parser) bool {
2054 switch p.tok.Type {
2055 case TextToken:
2056 if p.framesetOK {
2057 p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
2058 }
2059 p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
2060 p.addText(p.tok.Data)
2061 case CommentToken:
2062 p.addChild(&Node{
2063 Type: CommentNode,
2064 Data: p.tok.Data,
2065 })
2066 case StartTagToken:
2067 b := breakout[p.tok.Data]
2068 if p.tok.DataAtom == a.Font {
2069 loop:
2070 for _, attr := range p.tok.Attr {
2071 switch attr.Key {
2072 case "color", "face", "size":
2073 b = true
2074 break loop
2075 }
2076 }
2077 }
2078 if b {
2079 for i := len(p.oe) - 1; i >= 0; i-- {
2080 n := p.oe[i]
2081 if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
2082 p.oe = p.oe[:i+1]
2083 break
2084 }
2085 }
2086 return false
2087 }
2088 switch p.top().Namespace {
2089 case "math":
2090 adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
2091 case "svg":
2092 // Adjust SVG tag names. The tokenizer lower-cases tag names, but
2093 // SVG wants e.g. "foreignObject" with a capital second "O".
2094 if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
2095 p.tok.DataAtom = a.Lookup([]byte(x))
2096 p.tok.Data = x
2097 }
2098 adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
2099 default:
2100 panic("html: bad parser state: unexpected namespace")
2101 }
2102 adjustForeignAttributes(p.tok.Attr)
2103 namespace := p.top().Namespace
2104 p.addElement()
2105 p.top().Namespace = namespace
2106 if namespace != "" {
2107 // Don't let the tokenizer go into raw text mode in foreign content
2108 // (e.g. in an SVG <title> tag).
2109 p.tokenizer.NextIsNotRawText()
2110 }
2111 if p.hasSelfClosingToken {
2112 p.oe.pop()
2113 p.acknowledgeSelfClosingTag()
2114 }
2115 case EndTagToken:
2116 for i := len(p.oe) - 1; i >= 0; i-- {
2117 if p.oe[i].Namespace == "" {
2118 return p.im(p)
2119 }
2120 if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
2121 p.oe = p.oe[:i]
2122 break
2123 }
2124 }
2125 return true
2126 default:
2127 // Ignore the token.
2128 }
2129 return true
2130}
2131
2132// Section 12.2.6.
2133func (p *parser) inForeignContent() bool {
2134 if len(p.oe) == 0 {
2135 return false
2136 }
2137 n := p.oe[len(p.oe)-1]
2138 if n.Namespace == "" {
2139 return false
2140 }
2141 if mathMLTextIntegrationPoint(n) {
2142 if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
2143 return false
2144 }
2145 if p.tok.Type == TextToken {
2146 return false
2147 }
2148 }
2149 if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
2150 return false
2151 }
2152 if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
2153 return false
2154 }
2155 if p.tok.Type == ErrorToken {
2156 return false
2157 }
2158 return true
2159}
2160
2161// parseImpliedToken parses a token as though it had appeared in the parser's
2162// input.
2163func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
2164 realToken, selfClosing := p.tok, p.hasSelfClosingToken
2165 p.tok = Token{
2166 Type: t,
2167 DataAtom: dataAtom,
2168 Data: data,
2169 }
2170 p.hasSelfClosingToken = false
2171 p.parseCurrentToken()
2172 p.tok, p.hasSelfClosingToken = realToken, selfClosing
2173}
2174
2175// parseCurrentToken runs the current token through the parsing routines
2176// until it is consumed.
2177func (p *parser) parseCurrentToken() {
2178 if p.tok.Type == SelfClosingTagToken {
2179 p.hasSelfClosingToken = true
2180 p.tok.Type = StartTagToken
2181 }
2182
2183 consumed := false
2184 for !consumed {
2185 if p.inForeignContent() {
2186 consumed = parseForeignContent(p)
2187 } else {
2188 consumed = p.im(p)
2189 }
2190 }
2191
2192 if p.hasSelfClosingToken {
2193 // This is a parse error, but ignore it.
2194 p.hasSelfClosingToken = false
2195 }
2196}
2197
2198func (p *parser) parse() error {
2199 // Iterate until EOF. Any other error will cause an early return.
2200 var err error
2201 for err != io.EOF {
2202 // CDATA sections are allowed only in foreign content.
2203 n := p.oe.top()
2204 p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
2205 // Read and parse the next token.
2206 p.tokenizer.Next()
2207 p.tok = p.tokenizer.Token()
2208 if p.tok.Type == ErrorToken {
2209 err = p.tokenizer.Err()
2210 if err != nil && err != io.EOF {
2211 return err
2212 }
2213 }
2214 p.parseCurrentToken()
2215 }
2216 return nil
2217}
2218
2219// Parse returns the parse tree for the HTML from the given Reader.
2220//
2221// It implements the HTML5 parsing algorithm
2222// (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
2223// which is very complicated. The resultant tree can contain implicitly created
2224// nodes that have no explicit <tag> listed in r's data, and nodes' parents can
2225// differ from the nesting implied by a naive processing of start and end
2226// <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
2227// with no corresponding node in the resulting tree.
2228//
2229// The input is assumed to be UTF-8 encoded.
2230func Parse(r io.Reader) (*Node, error) {
2231 p := &parser{
2232 tokenizer: NewTokenizer(r),
2233 doc: &Node{
2234 Type: DocumentNode,
2235 },
2236 scripting: true,
2237 framesetOK: true,
2238 im: initialIM,
2239 }
2240 err := p.parse()
2241 if err != nil {
2242 return nil, err
2243 }
2244 return p.doc, nil
2245}
2246
2247// ParseFragment parses a fragment of HTML and returns the nodes that were
2248// found. If the fragment is the InnerHTML for an existing element, pass that
2249// element in context.
2250//
2251// It has the same intricacies as Parse.
2252func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
2253 contextTag := ""
2254 if context != nil {
2255 if context.Type != ElementNode {
2256 return nil, errors.New("html: ParseFragment of non-element Node")
2257 }
2258 // The next check isn't just context.DataAtom.String() == context.Data because
2259 // it is valid to pass an element whose tag isn't a known atom. For example,
2260 // DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
2261 if context.DataAtom != a.Lookup([]byte(context.Data)) {
2262 return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
2263 }
2264 contextTag = context.DataAtom.String()
2265 }
2266 p := &parser{
2267 tokenizer: NewTokenizerFragment(r, contextTag),
2268 doc: &Node{
2269 Type: DocumentNode,
2270 },
2271 scripting: true,
2272 fragment: true,
2273 context: context,
2274 }
2275
2276 root := &Node{
2277 Type: ElementNode,
2278 DataAtom: a.Html,
2279 Data: a.Html.String(),
2280 }
2281 p.doc.AppendChild(root)
2282 p.oe = nodeStack{root}
2283 if context != nil && context.DataAtom == a.Template {
2284 p.templateStack = append(p.templateStack, inTemplateIM)
2285 }
2286 p.resetInsertionMode()
2287
2288 for n := context; n != nil; n = n.Parent {
2289 if n.Type == ElementNode && n.DataAtom == a.Form {
2290 p.form = n
2291 break
2292 }
2293 }
2294
2295 err := p.parse()
2296 if err != nil {
2297 return nil, err
2298 }
2299
2300 parent := p.doc
2301 if context != nil {
2302 parent = root
2303 }
2304
2305 var result []*Node
2306 for c := parent.FirstChild; c != nil; {
2307 next := c.NextSibling
2308 parent.RemoveChild(c)
2309 result = append(result, c)
2310 c = next
2311 }
2312 return result, nil
2313}