1// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "bufio"
9 "errors"
10 "fmt"
11 "io"
12 "strings"
13)
14
15type writer interface {
16 io.Writer
17 io.ByteWriter
18 WriteString(string) (int, error)
19}
20
21// Render renders the parse tree n to the given writer.
22//
23// Rendering is done on a 'best effort' basis: calling Parse on the output of
24// Render will always result in something similar to the original tree, but it
25// is not necessarily an exact clone unless the original tree was 'well-formed'.
26// 'Well-formed' is not easily specified; the HTML5 specification is
27// complicated.
28//
29// Calling Parse on arbitrary input typically results in a 'well-formed' parse
30// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
31// For example, in a 'well-formed' parse tree, no <a> element is a child of
32// another <a> element: parsing "<a><a>" results in two sibling elements.
33// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
34// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
35// children; the <a> is reparented to the <table>'s parent. However, calling
36// Parse on "<a><table><a>" does not return an error, but the result has an <a>
37// element with an <a> child, and is therefore not 'well-formed'.
38//
39// Programmatically constructed trees are typically also 'well-formed', but it
40// is possible to construct a tree that looks innocuous but, when rendered and
41// re-parsed, results in a different tree. A simple example is that a solitary
42// text node would become a tree containing <html>, <head> and <body> elements.
43// Another example is that the programmatic equivalent of "a<head>b</head>c"
44// becomes "<html><head><head/><body>abc</body></html>".
45func Render(w io.Writer, n *Node) error {
46 if x, ok := w.(writer); ok {
47 return render(x, n)
48 }
49 buf := bufio.NewWriter(w)
50 if err := render(buf, n); err != nil {
51 return err
52 }
53 return buf.Flush()
54}
55
56// plaintextAbort is returned from render1 when a <plaintext> element
57// has been rendered. No more end tags should be rendered after that.
58var plaintextAbort = errors.New("html: internal error (plaintext abort)")
59
60func render(w writer, n *Node) error {
61 err := render1(w, n)
62 if err == plaintextAbort {
63 err = nil
64 }
65 return err
66}
67
68func render1(w writer, n *Node) error {
69 // Render non-element nodes; these are the easy cases.
70 switch n.Type {
71 case ErrorNode:
72 return errors.New("html: cannot render an ErrorNode node")
73 case TextNode:
74 return escape(w, n.Data)
75 case DocumentNode:
76 for c := n.FirstChild; c != nil; c = c.NextSibling {
77 if err := render1(w, c); err != nil {
78 return err
79 }
80 }
81 return nil
82 case ElementNode:
83 // No-op.
84 case CommentNode:
85 if _, err := w.WriteString("<!--"); err != nil {
86 return err
87 }
88 if err := escapeComment(w, n.Data); err != nil {
89 return err
90 }
91 if _, err := w.WriteString("-->"); err != nil {
92 return err
93 }
94 return nil
95 case DoctypeNode:
96 if _, err := w.WriteString("<!DOCTYPE "); err != nil {
97 return err
98 }
99 if err := escape(w, n.Data); err != nil {
100 return err
101 }
102 if n.Attr != nil {
103 var p, s string
104 for _, a := range n.Attr {
105 switch a.Key {
106 case "public":
107 p = a.Val
108 case "system":
109 s = a.Val
110 }
111 }
112 if p != "" {
113 if _, err := w.WriteString(" PUBLIC "); err != nil {
114 return err
115 }
116 if err := writeQuoted(w, p); err != nil {
117 return err
118 }
119 if s != "" {
120 if err := w.WriteByte(' '); err != nil {
121 return err
122 }
123 if err := writeQuoted(w, s); err != nil {
124 return err
125 }
126 }
127 } else if s != "" {
128 if _, err := w.WriteString(" SYSTEM "); err != nil {
129 return err
130 }
131 if err := writeQuoted(w, s); err != nil {
132 return err
133 }
134 }
135 }
136 return w.WriteByte('>')
137 case RawNode:
138 _, err := w.WriteString(n.Data)
139 return err
140 default:
141 return errors.New("html: unknown node type")
142 }
143
144 // Render the <xxx> opening tag.
145 if err := w.WriteByte('<'); err != nil {
146 return err
147 }
148 if _, err := w.WriteString(n.Data); err != nil {
149 return err
150 }
151 for _, a := range n.Attr {
152 if err := w.WriteByte(' '); err != nil {
153 return err
154 }
155 if a.Namespace != "" {
156 if _, err := w.WriteString(a.Namespace); err != nil {
157 return err
158 }
159 if err := w.WriteByte(':'); err != nil {
160 return err
161 }
162 }
163 if _, err := w.WriteString(a.Key); err != nil {
164 return err
165 }
166 if _, err := w.WriteString(`="`); err != nil {
167 return err
168 }
169 if err := escape(w, a.Val); err != nil {
170 return err
171 }
172 if err := w.WriteByte('"'); err != nil {
173 return err
174 }
175 }
176 if voidElements[n.Data] {
177 if n.FirstChild != nil {
178 return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
179 }
180 _, err := w.WriteString("/>")
181 return err
182 }
183 if err := w.WriteByte('>'); err != nil {
184 return err
185 }
186
187 // Add initial newline where there is danger of a newline beging ignored.
188 if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
189 switch n.Data {
190 case "pre", "listing", "textarea":
191 if err := w.WriteByte('\n'); err != nil {
192 return err
193 }
194 }
195 }
196
197 // Render any child nodes
198 if childTextNodesAreLiteral(n) {
199 for c := n.FirstChild; c != nil; c = c.NextSibling {
200 if c.Type == TextNode {
201 if _, err := w.WriteString(c.Data); err != nil {
202 return err
203 }
204 } else {
205 if err := render1(w, c); err != nil {
206 return err
207 }
208 }
209 }
210 if n.Data == "plaintext" {
211 // Don't render anything else. <plaintext> must be the
212 // last element in the file, with no closing tag.
213 return plaintextAbort
214 }
215 } else {
216 for c := n.FirstChild; c != nil; c = c.NextSibling {
217 if err := render1(w, c); err != nil {
218 return err
219 }
220 }
221 }
222
223 // Render the </xxx> closing tag.
224 if _, err := w.WriteString("</"); err != nil {
225 return err
226 }
227 if _, err := w.WriteString(n.Data); err != nil {
228 return err
229 }
230 return w.WriteByte('>')
231}
232
233func childTextNodesAreLiteral(n *Node) bool {
234 // Per WHATWG HTML 13.3, if the parent of the current node is a style,
235 // script, xmp, iframe, noembed, noframes, or plaintext element, and the
236 // current node is a text node, append the value of the node's data
237 // literally. The specification is not explicit about it, but we only
238 // enforce this if we are in the HTML namespace (i.e. when the namespace is
239 // "").
240 // NOTE: we also always include noscript elements, although the
241 // specification states that they should only be rendered as such if
242 // scripting is enabled for the node (which is not something we track).
243 if n.Namespace != "" {
244 return false
245 }
246 switch n.Data {
247 case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
248 return true
249 default:
250 return false
251 }
252}
253
254// writeQuoted writes s to w surrounded by quotes. Normally it will use double
255// quotes, but if s contains a double quote, it will use single quotes.
256// It is used for writing the identifiers in a doctype declaration.
257// In valid HTML, they can't contain both types of quotes.
258func writeQuoted(w writer, s string) error {
259 var q byte = '"'
260 if strings.Contains(s, `"`) {
261 q = '\''
262 }
263 if err := w.WriteByte(q); err != nil {
264 return err
265 }
266 if _, err := w.WriteString(s); err != nil {
267 return err
268 }
269 if err := w.WriteByte(q); err != nil {
270 return err
271 }
272 return nil
273}
274
275// Section 12.1.2, "Elements", gives this list of void elements. Void elements
276// are those that can't have any contents.
277var voidElements = map[string]bool{
278 "area": true,
279 "base": true,
280 "br": true,
281 "col": true,
282 "embed": true,
283 "hr": true,
284 "img": true,
285 "input": true,
286 "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
287 "link": true,
288 "meta": true,
289 "param": true,
290 "source": true,
291 "track": true,
292 "wbr": true,
293}