1// Copyright (c) 2014, David Kitchen <david@buro9.com>
2//
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice, this
9// list of conditions and the following disclaimer.
10//
11// * Redistributions in binary form must reproduce the above copyright notice,
12// this list of conditions and the following disclaimer in the documentation
13// and/or other materials provided with the distribution.
14//
15// * Neither the name of the organisation (Microcosm) nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30package bluemonday
31
32import (
33 "regexp"
34)
35
36// StrictPolicy returns an empty policy, which will effectively strip all HTML
37// elements and their attributes from a document.
38func StrictPolicy() *Policy {
39 return NewPolicy()
40}
41
42// StripTagsPolicy is DEPRECATED. Use StrictPolicy instead.
43func StripTagsPolicy() *Policy {
44 return StrictPolicy()
45}
46
47// UGCPolicy returns a policy aimed at user generated content that is a result
48// of HTML WYSIWYG tools and Markdown conversions.
49//
50// This is expected to be a fairly rich document where as much markup as
51// possible should be retained. Markdown permits raw HTML so we are basically
52// providing a policy to sanitise HTML5 documents safely but with the
53// least intrusion on the formatting expectations of the user.
54func UGCPolicy() *Policy {
55
56 p := NewPolicy()
57
58 ///////////////////////
59 // Global attributes //
60 ///////////////////////
61
62 // "class" is not permitted as we are not allowing users to style their own
63 // content
64
65 p.AllowStandardAttributes()
66
67 //////////////////////////////
68 // Global URL format policy //
69 //////////////////////////////
70
71 p.AllowStandardURLs()
72
73 ////////////////////////////////
74 // Declarations and structure //
75 ////////////////////////////////
76
77 // "xml" "xslt" "DOCTYPE" "html" "head" are not permitted as we are
78 // expecting user generated content to be a fragment of HTML and not a full
79 // document.
80
81 //////////////////////////
82 // Sectioning root tags //
83 //////////////////////////
84
85 // "article" and "aside" are permitted and takes no attributes
86 p.AllowElements("article", "aside")
87
88 // "body" is not permitted as we are expecting user generated content to be a fragment
89 // of HTML and not a full document.
90
91 // "details" is permitted, including the "open" attribute which can either
92 // be blank or the value "open".
93 p.AllowAttrs(
94 "open",
95 ).Matching(regexp.MustCompile(`(?i)^(|open)$`)).OnElements("details")
96
97 // "fieldset" is not permitted as we are not allowing forms to be created.
98
99 // "figure" is permitted and takes no attributes
100 p.AllowElements("figure")
101
102 // "nav" is not permitted as it is assumed that the site (and not the user)
103 // has defined navigation elements
104
105 // "section" is permitted and takes no attributes
106 p.AllowElements("section")
107
108 // "summary" is permitted and takes no attributes
109 p.AllowElements("summary")
110
111 //////////////////////////
112 // Headings and footers //
113 //////////////////////////
114
115 // "footer" is not permitted as we expect user content to be a fragment and
116 // not structural to this extent
117
118 // "h1" through "h6" are permitted and take no attributes
119 p.AllowElements("h1", "h2", "h3", "h4", "h5", "h6")
120
121 // "header" is not permitted as we expect user content to be a fragment and
122 // not structural to this extent
123
124 // "hgroup" is permitted and takes no attributes
125 p.AllowElements("hgroup")
126
127 /////////////////////////////////////
128 // Content grouping and separating //
129 /////////////////////////////////////
130
131 // "blockquote" is permitted, including the "cite" attribute which must be
132 // a standard URL.
133 p.AllowAttrs("cite").OnElements("blockquote")
134
135 // "br" "div" "hr" "p" "span" "wbr" are permitted and take no attributes
136 p.AllowElements("br", "div", "hr", "p", "span", "wbr")
137
138 ///////////
139 // Links //
140 ///////////
141
142 // "a" is permitted
143 p.AllowAttrs("href").OnElements("a")
144
145 // "area" is permitted along with the attributes that map image maps work
146 p.AllowAttrs("name").Matching(
147 regexp.MustCompile(`^([\p{L}\p{N}_-]+)$`),
148 ).OnElements("map")
149 p.AllowAttrs("alt").Matching(Paragraph).OnElements("area")
150 p.AllowAttrs("coords").Matching(
151 regexp.MustCompile(`^([0-9]+,)+[0-9]+$`),
152 ).OnElements("area")
153 p.AllowAttrs("href").OnElements("area")
154 p.AllowAttrs("rel").Matching(SpaceSeparatedTokens).OnElements("area")
155 p.AllowAttrs("shape").Matching(
156 regexp.MustCompile(`(?i)^(default|circle|rect|poly)$`),
157 ).OnElements("area")
158 p.AllowAttrs("usemap").Matching(
159 regexp.MustCompile(`(?i)^#[\p{L}\p{N}_-]+$`),
160 ).OnElements("img")
161
162 // "link" is not permitted
163
164 /////////////////////
165 // Phrase elements //
166 /////////////////////
167
168 // The following are all inline phrasing elements
169 p.AllowElements("abbr", "acronym", "cite", "code", "dfn", "em",
170 "figcaption", "mark", "s", "samp", "strong", "sub", "sup", "var")
171
172 // "q" is permitted and "cite" is a URL and handled by URL policies
173 p.AllowAttrs("cite").OnElements("q")
174
175 // "time" is permitted
176 p.AllowAttrs("datetime").Matching(ISO8601).OnElements("time")
177
178 ////////////////////
179 // Style elements //
180 ////////////////////
181
182 // block and inline elements that impart no semantic meaning but style the
183 // document
184 p.AllowElements("b", "i", "pre", "small", "strike", "tt", "u")
185
186 // "style" is not permitted as we are not yet sanitising CSS and it is an
187 // XSS attack vector
188
189 //////////////////////
190 // HTML5 Formatting //
191 //////////////////////
192
193 // "bdi" "bdo" are permitted
194 p.AllowAttrs("dir").Matching(Direction).OnElements("bdi", "bdo")
195
196 // "rp" "rt" "ruby" are permitted
197 p.AllowElements("rp", "rt", "ruby")
198
199 ///////////////////////////
200 // HTML5 Change tracking //
201 ///////////////////////////
202
203 // "del" "ins" are permitted
204 p.AllowAttrs("cite").Matching(Paragraph).OnElements("del", "ins")
205 p.AllowAttrs("datetime").Matching(ISO8601).OnElements("del", "ins")
206
207 ///////////
208 // Lists //
209 ///////////
210
211 p.AllowLists()
212
213 ////////////
214 // Tables //
215 ////////////
216
217 p.AllowTables()
218
219 ///////////
220 // Forms //
221 ///////////
222
223 // By and large, forms are not permitted. However there are some form
224 // elements that can be used to present data, and we do permit those
225 //
226 // "button" "fieldset" "input" "keygen" "label" "output" "select" "datalist"
227 // "textarea" "optgroup" "option" are all not permitted
228
229 // "meter" is permitted
230 p.AllowAttrs(
231 "value",
232 "min",
233 "max",
234 "low",
235 "high",
236 "optimum",
237 ).Matching(Number).OnElements("meter")
238
239 // "progress" is permitted
240 p.AllowAttrs("value", "max").Matching(Number).OnElements("progress")
241
242 //////////////////////
243 // Embedded content //
244 //////////////////////
245
246 // Vast majority not permitted
247 // "audio" "canvas" "embed" "iframe" "object" "param" "source" "svg" "track"
248 // "video" are all not permitted
249
250 p.AllowImages()
251
252 return p
253}