render.go

  1// Copyright 2011 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5package html
  6
  7import (
  8	"bufio"
  9	"errors"
 10	"fmt"
 11	"io"
 12	"strings"
 13)
 14
 15type writer interface {
 16	io.Writer
 17	io.ByteWriter
 18	WriteString(string) (int, error)
 19}
 20
 21// Render renders the parse tree n to the given writer.
 22//
 23// Rendering is done on a 'best effort' basis: calling Parse on the output of
 24// Render will always result in something similar to the original tree, but it
 25// is not necessarily an exact clone unless the original tree was 'well-formed'.
 26// 'Well-formed' is not easily specified; the HTML5 specification is
 27// complicated.
 28//
 29// Calling Parse on arbitrary input typically results in a 'well-formed' parse
 30// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
 31// For example, in a 'well-formed' parse tree, no <a> element is a child of
 32// another <a> element: parsing "<a><a>" results in two sibling elements.
 33// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
 34// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
 35// children; the <a> is reparented to the <table>'s parent. However, calling
 36// Parse on "<a><table><a>" does not return an error, but the result has an <a>
 37// element with an <a> child, and is therefore not 'well-formed'.
 38//
 39// Programmatically constructed trees are typically also 'well-formed', but it
 40// is possible to construct a tree that looks innocuous but, when rendered and
 41// re-parsed, results in a different tree. A simple example is that a solitary
 42// text node would become a tree containing <html>, <head> and <body> elements.
 43// Another example is that the programmatic equivalent of "a<head>b</head>c"
 44// becomes "<html><head><head/><body>abc</body></html>".
 45func Render(w io.Writer, n *Node) error {
 46	if x, ok := w.(writer); ok {
 47		return render(x, n)
 48	}
 49	buf := bufio.NewWriter(w)
 50	if err := render(buf, n); err != nil {
 51		return err
 52	}
 53	return buf.Flush()
 54}
 55
 56// plaintextAbort is returned from render1 when a <plaintext> element
 57// has been rendered. No more end tags should be rendered after that.
 58var plaintextAbort = errors.New("html: internal error (plaintext abort)")
 59
 60func render(w writer, n *Node) error {
 61	err := render1(w, n)
 62	if err == plaintextAbort {
 63		err = nil
 64	}
 65	return err
 66}
 67
 68func render1(w writer, n *Node) error {
 69	// Render non-element nodes; these are the easy cases.
 70	switch n.Type {
 71	case ErrorNode:
 72		return errors.New("html: cannot render an ErrorNode node")
 73	case TextNode:
 74		return escape(w, n.Data)
 75	case DocumentNode:
 76		for c := n.FirstChild; c != nil; c = c.NextSibling {
 77			if err := render1(w, c); err != nil {
 78				return err
 79			}
 80		}
 81		return nil
 82	case ElementNode:
 83		// No-op.
 84	case CommentNode:
 85		if _, err := w.WriteString("<!--"); err != nil {
 86			return err
 87		}
 88		if err := escapeComment(w, n.Data); err != nil {
 89			return err
 90		}
 91		if _, err := w.WriteString("-->"); err != nil {
 92			return err
 93		}
 94		return nil
 95	case DoctypeNode:
 96		if _, err := w.WriteString("<!DOCTYPE "); err != nil {
 97			return err
 98		}
 99		if err := escape(w, n.Data); err != nil {
100			return err
101		}
102		if n.Attr != nil {
103			var p, s string
104			for _, a := range n.Attr {
105				switch a.Key {
106				case "public":
107					p = a.Val
108				case "system":
109					s = a.Val
110				}
111			}
112			if p != "" {
113				if _, err := w.WriteString(" PUBLIC "); err != nil {
114					return err
115				}
116				if err := writeQuoted(w, p); err != nil {
117					return err
118				}
119				if s != "" {
120					if err := w.WriteByte(' '); err != nil {
121						return err
122					}
123					if err := writeQuoted(w, s); err != nil {
124						return err
125					}
126				}
127			} else if s != "" {
128				if _, err := w.WriteString(" SYSTEM "); err != nil {
129					return err
130				}
131				if err := writeQuoted(w, s); err != nil {
132					return err
133				}
134			}
135		}
136		return w.WriteByte('>')
137	case RawNode:
138		_, err := w.WriteString(n.Data)
139		return err
140	default:
141		return errors.New("html: unknown node type")
142	}
143
144	// Render the <xxx> opening tag.
145	if err := w.WriteByte('<'); err != nil {
146		return err
147	}
148	if _, err := w.WriteString(n.Data); err != nil {
149		return err
150	}
151	for _, a := range n.Attr {
152		if err := w.WriteByte(' '); err != nil {
153			return err
154		}
155		if a.Namespace != "" {
156			if _, err := w.WriteString(a.Namespace); err != nil {
157				return err
158			}
159			if err := w.WriteByte(':'); err != nil {
160				return err
161			}
162		}
163		if _, err := w.WriteString(a.Key); err != nil {
164			return err
165		}
166		if _, err := w.WriteString(`="`); err != nil {
167			return err
168		}
169		if err := escape(w, a.Val); err != nil {
170			return err
171		}
172		if err := w.WriteByte('"'); err != nil {
173			return err
174		}
175	}
176	if voidElements[n.Data] {
177		if n.FirstChild != nil {
178			return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
179		}
180		_, err := w.WriteString("/>")
181		return err
182	}
183	if err := w.WriteByte('>'); err != nil {
184		return err
185	}
186
187	// Add initial newline where there is danger of a newline beging ignored.
188	if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
189		switch n.Data {
190		case "pre", "listing", "textarea":
191			if err := w.WriteByte('\n'); err != nil {
192				return err
193			}
194		}
195	}
196
197	// Render any child nodes
198	if childTextNodesAreLiteral(n) {
199		for c := n.FirstChild; c != nil; c = c.NextSibling {
200			if c.Type == TextNode {
201				if _, err := w.WriteString(c.Data); err != nil {
202					return err
203				}
204			} else {
205				if err := render1(w, c); err != nil {
206					return err
207				}
208			}
209		}
210		if n.Data == "plaintext" {
211			// Don't render anything else. <plaintext> must be the
212			// last element in the file, with no closing tag.
213			return plaintextAbort
214		}
215	} else {
216		for c := n.FirstChild; c != nil; c = c.NextSibling {
217			if err := render1(w, c); err != nil {
218				return err
219			}
220		}
221	}
222
223	// Render the </xxx> closing tag.
224	if _, err := w.WriteString("</"); err != nil {
225		return err
226	}
227	if _, err := w.WriteString(n.Data); err != nil {
228		return err
229	}
230	return w.WriteByte('>')
231}
232
233func childTextNodesAreLiteral(n *Node) bool {
234	// Per WHATWG HTML 13.3, if the parent of the current node is a style,
235	// script, xmp, iframe, noembed, noframes, or plaintext element, and the
236	// current node is a text node, append the value of the node's data
237	// literally. The specification is not explicit about it, but we only
238	// enforce this if we are in the HTML namespace (i.e. when the namespace is
239	// "").
240	// NOTE: we also always include noscript elements, although the
241	// specification states that they should only be rendered as such if
242	// scripting is enabled for the node (which is not something we track).
243	if n.Namespace != "" {
244		return false
245	}
246	switch n.Data {
247	case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
248		return true
249	default:
250		return false
251	}
252}
253
254// writeQuoted writes s to w surrounded by quotes. Normally it will use double
255// quotes, but if s contains a double quote, it will use single quotes.
256// It is used for writing the identifiers in a doctype declaration.
257// In valid HTML, they can't contain both types of quotes.
258func writeQuoted(w writer, s string) error {
259	var q byte = '"'
260	if strings.Contains(s, `"`) {
261		q = '\''
262	}
263	if err := w.WriteByte(q); err != nil {
264		return err
265	}
266	if _, err := w.WriteString(s); err != nil {
267		return err
268	}
269	if err := w.WriteByte(q); err != nil {
270		return err
271	}
272	return nil
273}
274
275// Section 12.1.2, "Elements", gives this list of void elements. Void elements
276// are those that can't have any contents.
277var voidElements = map[string]bool{
278	"area":   true,
279	"base":   true,
280	"br":     true,
281	"col":    true,
282	"embed":  true,
283	"hr":     true,
284	"img":    true,
285	"input":  true,
286	"keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
287	"link":   true,
288	"meta":   true,
289	"param":  true,
290	"source": true,
291	"track":  true,
292	"wbr":    true,
293}