helpers.go

  1// Copyright (c) 2014, David Kitchen <david@buro9.com>
  2//
  3// All rights reserved.
  4//
  5// Redistribution and use in source and binary forms, with or without
  6// modification, are permitted provided that the following conditions are met:
  7//
  8// * Redistributions of source code must retain the above copyright notice, this
  9//   list of conditions and the following disclaimer.
 10//
 11// * Redistributions in binary form must reproduce the above copyright notice,
 12//   this list of conditions and the following disclaimer in the documentation
 13//   and/or other materials provided with the distribution.
 14//
 15// * Neither the name of the organisation (Microcosm) nor the names of its
 16//   contributors may be used to endorse or promote products derived from
 17//   this software without specific prior written permission.
 18//
 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29
 30package bluemonday
 31
 32import (
 33	"encoding/base64"
 34	"net/url"
 35	"regexp"
 36)
 37
 38// A selection of regular expressions that can be used as .Matching() rules on
 39// HTML attributes.
 40var (
 41	// CellAlign handles the `align` attribute
 42	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-align
 43	CellAlign = regexp.MustCompile(`(?i)^(center|justify|left|right|char)$`)
 44
 45	// CellVerticalAlign handles the `valign` attribute
 46	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-valign
 47	CellVerticalAlign = regexp.MustCompile(`(?i)^(baseline|bottom|middle|top)$`)
 48
 49	// Direction handles the `dir` attribute
 50	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/bdo#attr-dir
 51	Direction = regexp.MustCompile(`(?i)^(rtl|ltr)$`)
 52
 53	// ImageAlign handles the `align` attribute on the `image` tag
 54	// http://www.w3.org/MarkUp/Test/Img/imgtest.html
 55	ImageAlign = regexp.MustCompile(
 56		`(?i)^(left|right|top|texttop|middle|absmiddle|baseline|bottom|absbottom)$`,
 57	)
 58
 59	// Integer describes whole positive integers (including 0) used in places
 60	// like td.colspan
 61	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-colspan
 62	Integer = regexp.MustCompile(`^[0-9]+$`)
 63
 64	// ISO8601 according to the W3 group is only a subset of the ISO8601
 65	// standard: http://www.w3.org/TR/NOTE-datetime
 66	//
 67	// Used in places like time.datetime
 68	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/time#attr-datetime
 69	//
 70	// Matches patterns:
 71	//  Year:
 72	//     YYYY (eg 1997)
 73	//  Year and month:
 74	//     YYYY-MM (eg 1997-07)
 75	//  Complete date:
 76	//     YYYY-MM-DD (eg 1997-07-16)
 77	//  Complete date plus hours and minutes:
 78	//     YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00)
 79	//  Complete date plus hours, minutes and seconds:
 80	//     YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00)
 81	//  Complete date plus hours, minutes, seconds and a decimal fraction of a
 82	//  second
 83	//      YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00)
 84	ISO8601 = regexp.MustCompile(
 85		`^[0-9]{4}(-[0-9]{2}(-[0-9]{2}([ T][0-9]{2}(:[0-9]{2}){1,2}(.[0-9]{1,6})` +
 86			`?Z?([\+-][0-9]{2}:[0-9]{2})?)?)?)?$`,
 87	)
 88
 89	// ListType encapsulates the common value as well as the latest spec
 90	// values for lists
 91	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/ol#attr-type
 92	ListType = regexp.MustCompile(`(?i)^(circle|disc|square|a|A|i|I|1)$`)
 93
 94	// SpaceSeparatedTokens is used in places like `a.rel` and the common attribute
 95	// `class` which both contain space delimited lists of data tokens
 96	// http://www.w3.org/TR/html-markup/datatypes.html#common.data.tokens-def
 97	// Regexp: \p{L} matches unicode letters, \p{N} matches unicode numbers
 98	SpaceSeparatedTokens = regexp.MustCompile(`^([\s\p{L}\p{N}_-]+)$`)
 99
100	// Number is a double value used on HTML5 meter and progress elements
101	// http://www.whatwg.org/specs/web-apps/current-work/multipage/the-button-element.html#the-meter-element
102	Number = regexp.MustCompile(`^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$`)
103
104	// NumberOrPercent is used predominantly as units of measurement in width
105	// and height attributes
106	// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img#attr-height
107	NumberOrPercent = regexp.MustCompile(`^[0-9]+[%]?$`)
108
109	// Paragraph of text in an attribute such as *.'title', img.alt, etc
110	// https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes#attr-title
111	// Note that we are not allowing chars that could close tags like '>'
112	Paragraph = regexp.MustCompile(`^[\p{L}\p{N}\s\-_',\[\]!\./\\\(\)]*$`)
113
114	// dataURIImagePrefix is used by AllowDataURIImages to define the acceptable
115	// prefix of data URIs that contain common web image formats.
116	//
117	// This is not exported as it's not useful by itself, and only has value
118	// within the AllowDataURIImages func
119	dataURIImagePrefix = regexp.MustCompile(
120		`^image/(gif|jpeg|png|svg\+xml|webp);base64,`,
121	)
122)
123
124// AllowStandardURLs is a convenience function that will enable rel="nofollow"
125// on "a", "area" and "link" (if you have allowed those elements) and will
126// ensure that the URL values are parseable and either relative or belong to the
127// "mailto", "http", or "https" schemes
128func (p *Policy) AllowStandardURLs() {
129	// URLs must be parseable by net/url.Parse()
130	p.RequireParseableURLs(true)
131
132	// !url.IsAbs() is permitted
133	p.AllowRelativeURLs(true)
134
135	// Most common URL schemes only
136	p.AllowURLSchemes("mailto", "http", "https")
137
138	// For linking elements we will add rel="nofollow" if it does not already exist
139	// This applies to "a" "area" "link"
140	p.RequireNoFollowOnLinks(true)
141}
142
143// AllowStandardAttributes will enable "id", "title" and the language specific
144// attributes "dir" and "lang" on all elements that are allowed
145func (p *Policy) AllowStandardAttributes() {
146	// "dir" "lang" are permitted as both language attributes affect charsets
147	// and direction of text.
148	p.AllowAttrs("dir").Matching(Direction).Globally()
149	p.AllowAttrs(
150		"lang",
151	).Matching(regexp.MustCompile(`[a-zA-Z]{2,20}`)).Globally()
152
153	// "id" is permitted. This is pretty much as some HTML elements require this
154	// to work well ("dfn" is an example of a "id" being value)
155	// This does create a risk that JavaScript and CSS within your web page
156	// might identify the wrong elements. Ensure that you select things
157	// accurately
158	p.AllowAttrs("id").Matching(
159		regexp.MustCompile(`[a-zA-Z0-9\:\-_\.]+`),
160	).Globally()
161
162	// "title" is permitted as it improves accessibility.
163	p.AllowAttrs("title").Matching(Paragraph).Globally()
164}
165
166// AllowStyling presently enables the class attribute globally.
167//
168// Note: When bluemonday ships a CSS parser and we can safely sanitise that,
169// this will also allow sanitized styling of elements via the style attribute.
170func (p *Policy) AllowStyling() {
171
172	// "class" is permitted globally
173	p.AllowAttrs("class").Matching(SpaceSeparatedTokens).Globally()
174}
175
176// AllowImages enables the img element and some popular attributes. It will also
177// ensure that URL values are parseable. This helper does not enable data URI
178// images, for that you should also use the AllowDataURIImages() helper.
179func (p *Policy) AllowImages() {
180
181	// "img" is permitted
182	p.AllowAttrs("align").Matching(ImageAlign).OnElements("img")
183	p.AllowAttrs("alt").Matching(Paragraph).OnElements("img")
184	p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("img")
185
186	// Standard URLs enabled
187	p.AllowStandardURLs()
188	p.AllowAttrs("src").OnElements("img")
189}
190
191// AllowDataURIImages permits the use of inline images defined in RFC2397
192// http://tools.ietf.org/html/rfc2397
193// http://en.wikipedia.org/wiki/Data_URI_scheme
194//
195// Images must have a mimetype matching:
196//
197//	image/gif
198//	image/jpeg
199//	image/png
200//	image/webp
201//
202// NOTE: There is a potential security risk to allowing data URIs and you should
203// only permit them on content you already trust.
204// http://palizine.plynt.com/issues/2010Oct/bypass-xss-filters/
205// https://capec.mitre.org/data/definitions/244.html
206func (p *Policy) AllowDataURIImages() {
207
208	// URLs must be parseable by net/url.Parse()
209	p.RequireParseableURLs(true)
210
211	// Supply a function to validate images contained within data URI
212	p.AllowURLSchemeWithCustomPolicy(
213		"data",
214		func(url *url.URL) (allowUrl bool) {
215			if url.RawQuery != "" || url.Fragment != "" {
216				return false
217			}
218
219			matched := dataURIImagePrefix.FindString(url.Opaque)
220			if matched == "" {
221				return false
222			}
223
224			_, err := base64.StdEncoding.DecodeString(url.Opaque[len(matched):])
225			return err == nil
226		},
227	)
228}
229
230// AllowLists will enabled ordered and unordered lists, as well as definition
231// lists
232func (p *Policy) AllowLists() {
233	// "ol" "ul" are permitted
234	p.AllowAttrs("type").Matching(ListType).OnElements("ol", "ul")
235
236	// "li" is permitted
237	p.AllowAttrs("type").Matching(ListType).OnElements("li")
238	p.AllowAttrs("value").Matching(Integer).OnElements("li")
239
240	// "dl" "dt" "dd" are permitted
241	p.AllowElements("dl", "dt", "dd")
242}
243
244// AllowTables will enable a rich set of elements and attributes to describe
245// HTML tables
246func (p *Policy) AllowTables() {
247
248	// "table" is permitted
249	p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("table")
250	p.AllowAttrs("summary").Matching(Paragraph).OnElements("table")
251
252	// "caption" is permitted
253	p.AllowElements("caption")
254
255	// "col" "colgroup" are permitted
256	p.AllowAttrs("align").Matching(CellAlign).OnElements("col", "colgroup")
257	p.AllowAttrs("height", "width").Matching(
258		NumberOrPercent,
259	).OnElements("col", "colgroup")
260	p.AllowAttrs("span").Matching(Integer).OnElements("colgroup", "col")
261	p.AllowAttrs("valign").Matching(
262		CellVerticalAlign,
263	).OnElements("col", "colgroup")
264
265	// "thead" "tr" are permitted
266	p.AllowAttrs("align").Matching(CellAlign).OnElements("thead", "tr")
267	p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("thead", "tr")
268
269	// "td" "th" are permitted
270	p.AllowAttrs("abbr").Matching(Paragraph).OnElements("td", "th")
271	p.AllowAttrs("align").Matching(CellAlign).OnElements("td", "th")
272	p.AllowAttrs("colspan", "rowspan").Matching(Integer).OnElements("td", "th")
273	p.AllowAttrs("headers").Matching(
274		SpaceSeparatedTokens,
275	).OnElements("td", "th")
276	p.AllowAttrs("height", "width").Matching(
277		NumberOrPercent,
278	).OnElements("td", "th")
279	p.AllowAttrs(
280		"scope",
281	).Matching(
282		regexp.MustCompile(`(?i)(?:row|col)(?:group)?`),
283	).OnElements("td", "th")
284	p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("td", "th")
285	p.AllowAttrs("nowrap").Matching(
286		regexp.MustCompile(`(?i)|nowrap`),
287	).OnElements("td", "th")
288
289	// "tbody" "tfoot"
290	p.AllowAttrs("align").Matching(CellAlign).OnElements("tbody", "tfoot")
291	p.AllowAttrs("valign").Matching(
292		CellVerticalAlign,
293	).OnElements("tbody", "tfoot")
294}
295
296func (p *Policy) AllowIFrames(vals ...SandboxValue) {
297	p.AllowAttrs("sandbox").OnElements("iframe")
298
299	p.RequireSandboxOnIFrame(vals...)
300}