policy.go

  1// Copyright (c) 2014, David Kitchen <david@buro9.com>
  2//
  3// All rights reserved.
  4//
  5// Redistribution and use in source and binary forms, with or without
  6// modification, are permitted provided that the following conditions are met:
  7//
  8// * Redistributions of source code must retain the above copyright notice, this
  9//   list of conditions and the following disclaimer.
 10//
 11// * Redistributions in binary form must reproduce the above copyright notice,
 12//   this list of conditions and the following disclaimer in the documentation
 13//   and/or other materials provided with the distribution.
 14//
 15// * Neither the name of the organisation (Microcosm) nor the names of its
 16//   contributors may be used to endorse or promote products derived from
 17//   this software without specific prior written permission.
 18//
 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 27// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29
 30package bluemonday
 31
 32//TODO sgutzwiller create map of styles to default handlers
 33//TODO sgutzwiller create handlers for various attributes
 34import (
 35	"net/url"
 36	"regexp"
 37	"strings"
 38
 39	"github.com/microcosm-cc/bluemonday/css"
 40)
 41
 42// Policy encapsulates the allowlist of HTML elements and attributes that will
 43// be applied to the sanitised HTML.
 44//
 45// You should use bluemonday.NewPolicy() to create a blank policy as the
 46// unexported fields contain maps that need to be initialized.
 47type Policy struct {
 48
 49	// Declares whether the maps have been initialized, used as a cheap check to
 50	// ensure that those using Policy{} directly won't cause nil pointer
 51	// exceptions
 52	initialized bool
 53
 54	// If true then we add spaces when stripping tags, specifically the closing
 55	// tag is replaced by a space character.
 56	addSpaces bool
 57
 58	// When true, add rel="nofollow" to HTML a, area, and link tags
 59	requireNoFollow bool
 60
 61	// When true, add rel="nofollow" to HTML a, area, and link tags
 62	// Will add for href="http://foo"
 63	// Will skip for href="/foo" or href="foo"
 64	requireNoFollowFullyQualifiedLinks bool
 65
 66	// When true, add rel="noreferrer" to HTML a, area, and link tags
 67	requireNoReferrer bool
 68
 69	// When true, add rel="noreferrer" to HTML a, area, and link tags
 70	// Will add for href="http://foo"
 71	// Will skip for href="/foo" or href="foo"
 72	requireNoReferrerFullyQualifiedLinks bool
 73
 74	// When true, add crossorigin="anonymous" to HTML audio, img, link, script, and video tags
 75	requireCrossOriginAnonymous bool
 76
 77	// When true, add and filter sandbox attribute on iframe tags
 78	requireSandboxOnIFrame map[string]bool
 79
 80	// When true add target="_blank" to fully qualified links
 81	// Will add for href="http://foo"
 82	// Will skip for href="/foo" or href="foo"
 83	addTargetBlankToFullyQualifiedLinks bool
 84
 85	// When true, URLs must be parseable by "net/url" url.Parse()
 86	requireParseableURLs bool
 87
 88	// When true, u, _ := url.Parse("url"); !u.IsAbs() is permitted
 89	allowRelativeURLs bool
 90
 91	// When true, allow data attributes.
 92	allowDataAttributes bool
 93
 94	// When true, allow comments.
 95	allowComments bool
 96
 97	// map[htmlElementName]map[htmlAttributeName][]attrPolicy
 98	elsAndAttrs map[string]map[string][]attrPolicy
 99
100	// elsMatchingAndAttrs stores regex based element matches along with attributes
101	elsMatchingAndAttrs map[*regexp.Regexp]map[string][]attrPolicy
102
103	// map[htmlAttributeName][]attrPolicy
104	globalAttrs map[string][]attrPolicy
105
106	// map[htmlElementName]map[cssPropertyName][]stylePolicy
107	elsAndStyles map[string]map[string][]stylePolicy
108
109	// map[regex]map[cssPropertyName][]stylePolicy
110	elsMatchingAndStyles map[*regexp.Regexp]map[string][]stylePolicy
111
112	// map[cssPropertyName][]stylePolicy
113	globalStyles map[string][]stylePolicy
114
115	// If urlPolicy is nil, all URLs with matching schema are allowed.
116	// Otherwise, only the URLs with matching schema and urlPolicy(url)
117	// returning true are allowed.
118	allowURLSchemes map[string][]urlPolicy
119
120	// These regexps are used to match allowed URL schemes, for example
121	// if one would want to allow all URL schemes, they would add `.+`.
122	// However pay attention as this can lead to XSS being rendered thus
123	// defeating the purpose of using a HTML sanitizer.
124	// The regexps are only considered if a schema was not explicitly
125	// handled by `AllowURLSchemes` or `AllowURLSchemeWithCustomPolicy`.
126	allowURLSchemeRegexps []*regexp.Regexp
127
128	// If srcRewriter is not nil, it is used to rewrite the src attribute
129	// of tags that download resources, such as <img> and <script>.
130	// It requires that the URL is parsable by "net/url" url.Parse().
131	srcRewriter urlRewriter
132
133	// If an element has had all attributes removed as a result of a policy
134	// being applied, then the element would be removed from the output.
135	//
136	// However some elements are valid and have strong layout meaning without
137	// any attributes, i.e. <table>. To prevent those being removed we maintain
138	// a list of elements that are allowed to have no attributes and that will
139	// be maintained in the output HTML.
140	setOfElementsAllowedWithoutAttrs map[string]struct{}
141
142	// If an element has had all attributes removed as a result of a policy
143	// being applied, then the element would be removed from the output.
144	//
145	// However some elements are valid and have strong layout meaning without
146	// any attributes, i.e. <table>.
147	//
148	// In this case, any element matching a regular expression will be accepted without
149	// attributes added.
150	setOfElementsMatchingAllowedWithoutAttrs []*regexp.Regexp
151
152	setOfElementsToSkipContent map[string]struct{}
153
154	// Permits fundamentally unsafe elements.
155	//
156	// If false (default) then elements such as `style` and `script` will not be
157	// permitted even if declared in a policy. These elements when combined with
158	// untrusted input cannot be safely handled by bluemonday at this point in
159	// time.
160	//
161	// If true then `style` and `script` would be permitted by bluemonday if a
162	// policy declares them. However this is not recommended under any circumstance
163	// and can lead to XSS being rendered thus defeating the purpose of using a
164	// HTML sanitizer.
165	allowUnsafe bool
166}
167
168type attrPolicy struct {
169
170	// optional pattern to match, when not nil the regexp needs to match
171	// otherwise the attribute is removed
172	regexp *regexp.Regexp
173}
174
175type stylePolicy struct {
176	// handler to validate
177	handler func(string) bool
178
179	// optional pattern to match, when not nil the regexp needs to match
180	// otherwise the property is removed
181	regexp *regexp.Regexp
182
183	// optional list of allowed property values, for properties which
184	// have a defined list of allowed values; property will be removed
185	// if the value is not allowed
186	enum []string
187}
188
189type attrPolicyBuilder struct {
190	p *Policy
191
192	attrNames  []string
193	regexp     *regexp.Regexp
194	allowEmpty bool
195}
196
197type stylePolicyBuilder struct {
198	p *Policy
199
200	propertyNames []string
201	regexp        *regexp.Regexp
202	enum          []string
203	handler       func(string) bool
204}
205
206type urlPolicy func(url *url.URL) (allowUrl bool)
207
208type urlRewriter func(*url.URL)
209
210type SandboxValue int64
211
212const (
213	SandboxAllowDownloads SandboxValue = iota
214	SandboxAllowDownloadsWithoutUserActivation
215	SandboxAllowForms
216	SandboxAllowModals
217	SandboxAllowOrientationLock
218	SandboxAllowPointerLock
219	SandboxAllowPopups
220	SandboxAllowPopupsToEscapeSandbox
221	SandboxAllowPresentation
222	SandboxAllowSameOrigin
223	SandboxAllowScripts
224	SandboxAllowStorageAccessByUserActivation
225	SandboxAllowTopNavigation
226	SandboxAllowTopNavigationByUserActivation
227)
228
229// init initializes the maps if this has not been done already
230func (p *Policy) init() {
231	if !p.initialized {
232		p.elsAndAttrs = make(map[string]map[string][]attrPolicy)
233		p.elsMatchingAndAttrs = make(map[*regexp.Regexp]map[string][]attrPolicy)
234		p.globalAttrs = make(map[string][]attrPolicy)
235		p.elsAndStyles = make(map[string]map[string][]stylePolicy)
236		p.elsMatchingAndStyles = make(map[*regexp.Regexp]map[string][]stylePolicy)
237		p.globalStyles = make(map[string][]stylePolicy)
238		p.allowURLSchemes = make(map[string][]urlPolicy)
239		p.allowURLSchemeRegexps = make([]*regexp.Regexp, 0)
240		p.setOfElementsAllowedWithoutAttrs = make(map[string]struct{})
241		p.setOfElementsToSkipContent = make(map[string]struct{})
242		p.initialized = true
243	}
244}
245
246// NewPolicy returns a blank policy with nothing allowed or permitted. This
247// is the recommended way to start building a policy and you should now use
248// AllowAttrs() and/or AllowElements() to construct the allowlist of HTML
249// elements and attributes.
250func NewPolicy() *Policy {
251
252	p := Policy{}
253
254	p.addDefaultElementsWithoutAttrs()
255	p.addDefaultSkipElementContent()
256
257	return &p
258}
259
260// AllowAttrs takes a range of HTML attribute names and returns an
261// attribute policy builder that allows you to specify the pattern and scope of
262// the allowed attribute.
263//
264// The attribute policy is only added to the core policy when either Globally()
265// or OnElements(...) are called.
266func (p *Policy) AllowAttrs(attrNames ...string) *attrPolicyBuilder {
267
268	p.init()
269
270	abp := attrPolicyBuilder{
271		p:          p,
272		allowEmpty: false,
273	}
274
275	for _, attrName := range attrNames {
276		abp.attrNames = append(abp.attrNames, strings.ToLower(attrName))
277	}
278
279	return &abp
280}
281
282// AllowDataAttributes permits all data attributes. We can't specify the name
283// of each attribute exactly as they are customized.
284//
285// NOTE: These values are not sanitized and applications that evaluate or process
286// them without checking and verification of the input may be at risk if this option
287// is enabled. This is a 'caveat emptor' option and the person enabling this option
288// needs to fully understand the potential impact with regards to whatever application
289// will be consuming the sanitized HTML afterwards, i.e. if you know you put a link in a
290// data attribute and use that to automatically load some new window then you're giving
291// the author of a HTML fragment the means to open a malicious destination automatically.
292// Use with care!
293func (p *Policy) AllowDataAttributes() {
294	p.allowDataAttributes = true
295}
296
297// AllowComments allows comments.
298//
299// Please note that only one type of comment will be allowed by this, this is the
300// the standard HTML comment <!-- --> which includes the use of that to permit
301// conditionals as per https://docs.microsoft.com/en-us/previous-versions/windows/internet-explorer/ie-developer/compatibility/ms537512(v=vs.85)?redirectedfrom=MSDN
302//
303// What is not permitted are CDATA XML comments, as the x/net/html package we depend
304// on does not handle this fully and we are not choosing to take on that work:
305// https://pkg.go.dev/golang.org/x/net/html#Tokenizer.AllowCDATA . If the x/net/html
306// package changes this then these will be considered, otherwise if you AllowComments
307// but provide a CDATA comment, then as per the documentation in x/net/html this will
308// be treated as a plain HTML comment.
309func (p *Policy) AllowComments() {
310	p.allowComments = true
311}
312
313// AllowNoAttrs says that attributes on element are optional.
314//
315// The attribute policy is only added to the core policy when OnElements(...)
316// are called.
317func (p *Policy) AllowNoAttrs() *attrPolicyBuilder {
318
319	p.init()
320
321	abp := attrPolicyBuilder{
322		p:          p,
323		allowEmpty: true,
324	}
325	return &abp
326}
327
328// AllowNoAttrs says that attributes on element are optional.
329//
330// The attribute policy is only added to the core policy when OnElements(...)
331// are called.
332func (abp *attrPolicyBuilder) AllowNoAttrs() *attrPolicyBuilder {
333
334	abp.allowEmpty = true
335
336	return abp
337}
338
339// Matching allows a regular expression to be applied to a nascent attribute
340// policy, and returns the attribute policy.
341func (abp *attrPolicyBuilder) Matching(regex *regexp.Regexp) *attrPolicyBuilder {
342
343	abp.regexp = regex
344
345	return abp
346}
347
348// OnElements will bind an attribute policy to a given range of HTML elements
349// and return the updated policy
350func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy {
351
352	for _, element := range elements {
353		element = strings.ToLower(element)
354
355		for _, attr := range abp.attrNames {
356
357			if _, ok := abp.p.elsAndAttrs[element]; !ok {
358				abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy)
359			}
360
361			ap := attrPolicy{}
362			if abp.regexp != nil {
363				ap.regexp = abp.regexp
364			}
365
366			abp.p.elsAndAttrs[element][attr] = append(abp.p.elsAndAttrs[element][attr], ap)
367		}
368
369		if abp.allowEmpty {
370			abp.p.setOfElementsAllowedWithoutAttrs[element] = struct{}{}
371
372			if _, ok := abp.p.elsAndAttrs[element]; !ok {
373				abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy)
374			}
375		}
376	}
377
378	return abp.p
379}
380
381// OnElementsMatching will bind an attribute policy to all elements matching a given regex
382// and return the updated policy
383func (abp *attrPolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
384	for _, attr := range abp.attrNames {
385		if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
386			abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
387		}
388		ap := attrPolicy{}
389		if abp.regexp != nil {
390			ap.regexp = abp.regexp
391		}
392		abp.p.elsMatchingAndAttrs[regex][attr] = append(abp.p.elsMatchingAndAttrs[regex][attr], ap)
393	}
394
395	if abp.allowEmpty {
396		abp.p.setOfElementsMatchingAllowedWithoutAttrs = append(abp.p.setOfElementsMatchingAllowedWithoutAttrs, regex)
397		if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
398			abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
399		}
400	}
401
402	return abp.p
403}
404
405// Globally will bind an attribute policy to all HTML elements and return the
406// updated policy
407func (abp *attrPolicyBuilder) Globally() *Policy {
408
409	for _, attr := range abp.attrNames {
410		if _, ok := abp.p.globalAttrs[attr]; !ok {
411			abp.p.globalAttrs[attr] = []attrPolicy{}
412		}
413
414		ap := attrPolicy{}
415		if abp.regexp != nil {
416			ap.regexp = abp.regexp
417		}
418
419		abp.p.globalAttrs[attr] = append(abp.p.globalAttrs[attr], ap)
420	}
421
422	return abp.p
423}
424
425// AllowStyles takes a range of CSS property names and returns a
426// style policy builder that allows you to specify the pattern and scope of
427// the allowed property.
428//
429// The style policy is only added to the core policy when either Globally()
430// or OnElements(...) are called.
431func (p *Policy) AllowStyles(propertyNames ...string) *stylePolicyBuilder {
432
433	p.init()
434
435	abp := stylePolicyBuilder{
436		p: p,
437	}
438
439	for _, propertyName := range propertyNames {
440		abp.propertyNames = append(abp.propertyNames, strings.ToLower(propertyName))
441	}
442
443	return &abp
444}
445
446// Matching allows a regular expression to be applied to a nascent style
447// policy, and returns the style policy.
448func (spb *stylePolicyBuilder) Matching(regex *regexp.Regexp) *stylePolicyBuilder {
449
450	spb.regexp = regex
451
452	return spb
453}
454
455// MatchingEnum allows a list of allowed values to be applied to a nascent style
456// policy, and returns the style policy.
457func (spb *stylePolicyBuilder) MatchingEnum(enum ...string) *stylePolicyBuilder {
458
459	spb.enum = enum
460
461	return spb
462}
463
464// MatchingHandler allows a handler to be applied to a nascent style
465// policy, and returns the style policy.
466func (spb *stylePolicyBuilder) MatchingHandler(handler func(string) bool) *stylePolicyBuilder {
467
468	spb.handler = handler
469
470	return spb
471}
472
473// OnElements will bind a style policy to a given range of HTML elements
474// and return the updated policy
475func (spb *stylePolicyBuilder) OnElements(elements ...string) *Policy {
476
477	for _, element := range elements {
478		element = strings.ToLower(element)
479
480		for _, attr := range spb.propertyNames {
481
482			if _, ok := spb.p.elsAndStyles[element]; !ok {
483				spb.p.elsAndStyles[element] = make(map[string][]stylePolicy)
484			}
485
486			sp := stylePolicy{}
487			if spb.handler != nil {
488				sp.handler = spb.handler
489			} else if len(spb.enum) > 0 {
490				sp.enum = spb.enum
491			} else if spb.regexp != nil {
492				sp.regexp = spb.regexp
493			} else {
494				sp.handler = css.GetDefaultHandler(attr)
495			}
496			spb.p.elsAndStyles[element][attr] = append(spb.p.elsAndStyles[element][attr], sp)
497		}
498	}
499
500	return spb.p
501}
502
503// OnElementsMatching will bind a style policy to any HTML elements matching the pattern
504// and return the updated policy
505func (spb *stylePolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
506
507	for _, attr := range spb.propertyNames {
508
509		if _, ok := spb.p.elsMatchingAndStyles[regex]; !ok {
510			spb.p.elsMatchingAndStyles[regex] = make(map[string][]stylePolicy)
511		}
512
513		sp := stylePolicy{}
514		if spb.handler != nil {
515			sp.handler = spb.handler
516		} else if len(spb.enum) > 0 {
517			sp.enum = spb.enum
518		} else if spb.regexp != nil {
519			sp.regexp = spb.regexp
520		} else {
521			sp.handler = css.GetDefaultHandler(attr)
522		}
523		spb.p.elsMatchingAndStyles[regex][attr] = append(spb.p.elsMatchingAndStyles[regex][attr], sp)
524	}
525
526	return spb.p
527}
528
529// Globally will bind a style policy to all HTML elements and return the
530// updated policy
531func (spb *stylePolicyBuilder) Globally() *Policy {
532
533	for _, attr := range spb.propertyNames {
534		if _, ok := spb.p.globalStyles[attr]; !ok {
535			spb.p.globalStyles[attr] = []stylePolicy{}
536		}
537
538		// Use only one strategy for validating styles, fallback to default
539		sp := stylePolicy{}
540		if spb.handler != nil {
541			sp.handler = spb.handler
542		} else if len(spb.enum) > 0 {
543			sp.enum = spb.enum
544		} else if spb.regexp != nil {
545			sp.regexp = spb.regexp
546		} else {
547			sp.handler = css.GetDefaultHandler(attr)
548		}
549		spb.p.globalStyles[attr] = append(spb.p.globalStyles[attr], sp)
550	}
551
552	return spb.p
553}
554
555// AllowElements will append HTML elements to the allowlist without applying an
556// attribute policy to those elements (the elements are permitted
557// sans-attributes)
558func (p *Policy) AllowElements(names ...string) *Policy {
559	p.init()
560
561	for _, element := range names {
562		element = strings.ToLower(element)
563
564		if _, ok := p.elsAndAttrs[element]; !ok {
565			p.elsAndAttrs[element] = make(map[string][]attrPolicy)
566		}
567	}
568
569	return p
570}
571
572// AllowElementsMatching will append HTML elements to the allowlist if they
573// match a regexp.
574func (p *Policy) AllowElementsMatching(regex *regexp.Regexp) *Policy {
575	p.init()
576	if _, ok := p.elsMatchingAndAttrs[regex]; !ok {
577		p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
578	}
579	return p
580}
581
582// AllowURLSchemesMatching will append URL schemes to the allowlist if they
583// match a regexp.
584func (p *Policy) AllowURLSchemesMatching(r *regexp.Regexp) *Policy {
585	p.allowURLSchemeRegexps = append(p.allowURLSchemeRegexps, r)
586	return p
587}
588
589// RewriteSrc will rewrite the src attribute of a resource downloading tag
590// (e.g. <img>, <script>, <iframe>) using the provided function.
591//
592// Typically the use case here is that if the content that we're sanitizing
593// is untrusted then the content that is inlined is also untrusted.
594// To prevent serving this content on the same domain as the content appears
595// on it is good practise to proxy the content through an additional domain
596// name as this will force the web client to consider the inline content as
597// third party to the main content, thus providing browser isolation around
598// the inline content.
599//
600// An example of this is a web mail provider like fastmail.com , when an
601// email (user generated content) is displayed, the email text is shown on
602// fastmail.com but the inline attachments and content are rendered from
603// fastmailusercontent.com . This proxying of the external content on a
604// domain that is different to the content domain forces the browser domain
605// security model to kick in. Note that this only applies to differences
606// below the suffix (as per the publix suffix list).
607//
608// This is a good practise to adopt as it prevents the content from being
609// able to set cookies on the main domain and thus prevents the content on
610// the main domain from being able to read those cookies.
611func (p *Policy) RewriteSrc(fn urlRewriter) *Policy {
612	p.srcRewriter = fn
613	return p
614}
615
616// RequireNoFollowOnLinks will result in all a, area, link tags having a
617// rel="nofollow"added to them if one does not already exist
618//
619// Note: This requires p.RequireParseableURLs(true) and will enable it.
620func (p *Policy) RequireNoFollowOnLinks(require bool) *Policy {
621
622	p.requireNoFollow = require
623	p.requireParseableURLs = true
624
625	return p
626}
627
628// RequireNoFollowOnFullyQualifiedLinks will result in all a, area, and link
629// tags that point to a non-local destination (i.e. starts with a protocol and
630// has a host) having a rel="nofollow" added to them if one does not already
631// exist
632//
633// Note: This requires p.RequireParseableURLs(true) and will enable it.
634func (p *Policy) RequireNoFollowOnFullyQualifiedLinks(require bool) *Policy {
635
636	p.requireNoFollowFullyQualifiedLinks = require
637	p.requireParseableURLs = true
638
639	return p
640}
641
642// RequireNoReferrerOnLinks will result in all a, area, and link tags having a
643// rel="noreferrrer" added to them if one does not already exist
644//
645// Note: This requires p.RequireParseableURLs(true) and will enable it.
646func (p *Policy) RequireNoReferrerOnLinks(require bool) *Policy {
647
648	p.requireNoReferrer = require
649	p.requireParseableURLs = true
650
651	return p
652}
653
654// RequireNoReferrerOnFullyQualifiedLinks will result in all a, area, and link
655// tags that point to a non-local destination (i.e. starts with a protocol and
656// has a host) having a rel="noreferrer" added to them if one does not already
657// exist
658//
659// Note: This requires p.RequireParseableURLs(true) and will enable it.
660func (p *Policy) RequireNoReferrerOnFullyQualifiedLinks(require bool) *Policy {
661
662	p.requireNoReferrerFullyQualifiedLinks = require
663	p.requireParseableURLs = true
664
665	return p
666}
667
668// RequireCrossOriginAnonymous will result in all audio, img, link, script, and
669// video tags having a crossorigin="anonymous" added to them if one does not
670// already exist
671func (p *Policy) RequireCrossOriginAnonymous(require bool) *Policy {
672
673	p.requireCrossOriginAnonymous = require
674
675	return p
676}
677
678// AddTargetBlankToFullyQualifiedLinks will result in all a, area and link tags
679// that point to a non-local destination (i.e. starts with a protocol and has a
680// host) having a target="_blank" added to them if one does not already exist
681//
682// Note: This requires p.RequireParseableURLs(true) and will enable it.
683func (p *Policy) AddTargetBlankToFullyQualifiedLinks(require bool) *Policy {
684
685	p.addTargetBlankToFullyQualifiedLinks = require
686	p.requireParseableURLs = true
687
688	return p
689}
690
691// RequireParseableURLs will result in all URLs requiring that they be parseable
692// by "net/url" url.Parse()
693// This applies to:
694// - a.href
695// - area.href
696// - blockquote.cite
697// - img.src
698// - link.href
699// - script.src
700func (p *Policy) RequireParseableURLs(require bool) *Policy {
701
702	p.requireParseableURLs = require
703
704	return p
705}
706
707// AllowRelativeURLs enables RequireParseableURLs and then permits URLs that
708// are parseable, have no schema information and url.IsAbs() returns false
709// This permits local URLs
710func (p *Policy) AllowRelativeURLs(require bool) *Policy {
711
712	p.RequireParseableURLs(true)
713	p.allowRelativeURLs = require
714
715	return p
716}
717
718// AllowURLSchemes will append URL schemes to the allowlist
719// Example: p.AllowURLSchemes("mailto", "http", "https")
720func (p *Policy) AllowURLSchemes(schemes ...string) *Policy {
721	p.init()
722
723	p.RequireParseableURLs(true)
724
725	for _, scheme := range schemes {
726		scheme = strings.ToLower(scheme)
727
728		// Allow all URLs with matching scheme.
729		p.allowURLSchemes[scheme] = nil
730	}
731
732	return p
733}
734
735// AllowURLSchemeWithCustomPolicy will append URL schemes with
736// a custom URL policy to the allowlist.
737// Only the URLs with matching schema and urlPolicy(url)
738// returning true will be allowed.
739func (p *Policy) AllowURLSchemeWithCustomPolicy(
740	scheme string,
741	urlPolicy func(url *url.URL) (allowUrl bool),
742) *Policy {
743
744	p.init()
745
746	p.RequireParseableURLs(true)
747
748	scheme = strings.ToLower(scheme)
749
750	p.allowURLSchemes[scheme] = append(p.allowURLSchemes[scheme], urlPolicy)
751
752	return p
753}
754
755// RequireSandboxOnIFrame will result in all iframe tags having a sandbox="" tag
756// Any sandbox values not specified here will be filtered from the generated HTML
757func (p *Policy) RequireSandboxOnIFrame(vals ...SandboxValue) {
758	p.requireSandboxOnIFrame = make(map[string]bool)
759
760	for _, val := range vals {
761		switch SandboxValue(val) {
762		case SandboxAllowDownloads:
763			p.requireSandboxOnIFrame["allow-downloads"] = true
764
765		case SandboxAllowDownloadsWithoutUserActivation:
766			p.requireSandboxOnIFrame["allow-downloads-without-user-activation"] = true
767
768		case SandboxAllowForms:
769			p.requireSandboxOnIFrame["allow-forms"] = true
770
771		case SandboxAllowModals:
772			p.requireSandboxOnIFrame["allow-modals"] = true
773
774		case SandboxAllowOrientationLock:
775			p.requireSandboxOnIFrame["allow-orientation-lock"] = true
776
777		case SandboxAllowPointerLock:
778			p.requireSandboxOnIFrame["allow-pointer-lock"] = true
779
780		case SandboxAllowPopups:
781			p.requireSandboxOnIFrame["allow-popups"] = true
782
783		case SandboxAllowPopupsToEscapeSandbox:
784			p.requireSandboxOnIFrame["allow-popups-to-escape-sandbox"] = true
785
786		case SandboxAllowPresentation:
787			p.requireSandboxOnIFrame["allow-presentation"] = true
788
789		case SandboxAllowSameOrigin:
790			p.requireSandboxOnIFrame["allow-same-origin"] = true
791
792		case SandboxAllowScripts:
793			p.requireSandboxOnIFrame["allow-scripts"] = true
794
795		case SandboxAllowStorageAccessByUserActivation:
796			p.requireSandboxOnIFrame["allow-storage-access-by-user-activation"] = true
797
798		case SandboxAllowTopNavigation:
799			p.requireSandboxOnIFrame["allow-top-navigation"] = true
800
801		case SandboxAllowTopNavigationByUserActivation:
802			p.requireSandboxOnIFrame["allow-top-navigation-by-user-activation"] = true
803		}
804	}
805}
806
807// AddSpaceWhenStrippingTag states whether to add a single space " " when
808// removing tags that are not allowed by the policy.
809//
810// This is useful if you expect to strip tags in dense markup and may lose the
811// value of whitespace.
812//
813// For example: "<p>Hello</p><p>World</p>"" would be sanitized to "HelloWorld"
814// with the default value of false, but you may wish to sanitize this to
815// " Hello  World " by setting AddSpaceWhenStrippingTag to true as this would
816// retain the intent of the text.
817func (p *Policy) AddSpaceWhenStrippingTag(allow bool) *Policy {
818
819	p.addSpaces = allow
820
821	return p
822}
823
824// SkipElementsContent adds the HTML elements whose tags is needed to be removed
825// with its content.
826func (p *Policy) SkipElementsContent(names ...string) *Policy {
827
828	p.init()
829
830	for _, element := range names {
831		element = strings.ToLower(element)
832
833		if _, ok := p.setOfElementsToSkipContent[element]; !ok {
834			p.setOfElementsToSkipContent[element] = struct{}{}
835		}
836	}
837
838	return p
839}
840
841// AllowElementsContent marks the HTML elements whose content should be
842// retained after removing the tag.
843func (p *Policy) AllowElementsContent(names ...string) *Policy {
844
845	p.init()
846
847	for _, element := range names {
848		delete(p.setOfElementsToSkipContent, strings.ToLower(element))
849	}
850
851	return p
852}
853
854// AllowUnsafe permits fundamentally unsafe elements.
855//
856// If false (default) then elements such as `style` and `script` will not be
857// permitted even if declared in a policy. These elements when combined with
858// untrusted input cannot be safely handled by bluemonday at this point in
859// time.
860//
861// If true then `style` and `script` would be permitted by bluemonday if a
862// policy declares them. However this is not recommended under any circumstance
863// and can lead to XSS being rendered thus defeating the purpose of using a
864// HTML sanitizer.
865func (p *Policy) AllowUnsafe(allowUnsafe bool) *Policy {
866	p.init()
867	p.allowUnsafe = allowUnsafe
868	return p
869}
870
871// addDefaultElementsWithoutAttrs adds the HTML elements that we know are valid
872// without any attributes to an internal map.
873// i.e. we know that <table> is valid, but <bdo> isn't valid as the "dir" attr
874// is mandatory
875func (p *Policy) addDefaultElementsWithoutAttrs() {
876	p.init()
877
878	p.setOfElementsAllowedWithoutAttrs["abbr"] = struct{}{}
879	p.setOfElementsAllowedWithoutAttrs["acronym"] = struct{}{}
880	p.setOfElementsAllowedWithoutAttrs["address"] = struct{}{}
881	p.setOfElementsAllowedWithoutAttrs["article"] = struct{}{}
882	p.setOfElementsAllowedWithoutAttrs["aside"] = struct{}{}
883	p.setOfElementsAllowedWithoutAttrs["audio"] = struct{}{}
884	p.setOfElementsAllowedWithoutAttrs["b"] = struct{}{}
885	p.setOfElementsAllowedWithoutAttrs["bdi"] = struct{}{}
886	p.setOfElementsAllowedWithoutAttrs["blockquote"] = struct{}{}
887	p.setOfElementsAllowedWithoutAttrs["body"] = struct{}{}
888	p.setOfElementsAllowedWithoutAttrs["br"] = struct{}{}
889	p.setOfElementsAllowedWithoutAttrs["button"] = struct{}{}
890	p.setOfElementsAllowedWithoutAttrs["canvas"] = struct{}{}
891	p.setOfElementsAllowedWithoutAttrs["caption"] = struct{}{}
892	p.setOfElementsAllowedWithoutAttrs["center"] = struct{}{}
893	p.setOfElementsAllowedWithoutAttrs["cite"] = struct{}{}
894	p.setOfElementsAllowedWithoutAttrs["code"] = struct{}{}
895	p.setOfElementsAllowedWithoutAttrs["col"] = struct{}{}
896	p.setOfElementsAllowedWithoutAttrs["colgroup"] = struct{}{}
897	p.setOfElementsAllowedWithoutAttrs["datalist"] = struct{}{}
898	p.setOfElementsAllowedWithoutAttrs["dd"] = struct{}{}
899	p.setOfElementsAllowedWithoutAttrs["del"] = struct{}{}
900	p.setOfElementsAllowedWithoutAttrs["details"] = struct{}{}
901	p.setOfElementsAllowedWithoutAttrs["dfn"] = struct{}{}
902	p.setOfElementsAllowedWithoutAttrs["div"] = struct{}{}
903	p.setOfElementsAllowedWithoutAttrs["dl"] = struct{}{}
904	p.setOfElementsAllowedWithoutAttrs["dt"] = struct{}{}
905	p.setOfElementsAllowedWithoutAttrs["em"] = struct{}{}
906	p.setOfElementsAllowedWithoutAttrs["fieldset"] = struct{}{}
907	p.setOfElementsAllowedWithoutAttrs["figcaption"] = struct{}{}
908	p.setOfElementsAllowedWithoutAttrs["figure"] = struct{}{}
909	p.setOfElementsAllowedWithoutAttrs["footer"] = struct{}{}
910	p.setOfElementsAllowedWithoutAttrs["h1"] = struct{}{}
911	p.setOfElementsAllowedWithoutAttrs["h2"] = struct{}{}
912	p.setOfElementsAllowedWithoutAttrs["h3"] = struct{}{}
913	p.setOfElementsAllowedWithoutAttrs["h4"] = struct{}{}
914	p.setOfElementsAllowedWithoutAttrs["h5"] = struct{}{}
915	p.setOfElementsAllowedWithoutAttrs["h6"] = struct{}{}
916	p.setOfElementsAllowedWithoutAttrs["head"] = struct{}{}
917	p.setOfElementsAllowedWithoutAttrs["header"] = struct{}{}
918	p.setOfElementsAllowedWithoutAttrs["hgroup"] = struct{}{}
919	p.setOfElementsAllowedWithoutAttrs["hr"] = struct{}{}
920	p.setOfElementsAllowedWithoutAttrs["html"] = struct{}{}
921	p.setOfElementsAllowedWithoutAttrs["i"] = struct{}{}
922	p.setOfElementsAllowedWithoutAttrs["ins"] = struct{}{}
923	p.setOfElementsAllowedWithoutAttrs["kbd"] = struct{}{}
924	p.setOfElementsAllowedWithoutAttrs["li"] = struct{}{}
925	p.setOfElementsAllowedWithoutAttrs["mark"] = struct{}{}
926	p.setOfElementsAllowedWithoutAttrs["marquee"] = struct{}{}
927	p.setOfElementsAllowedWithoutAttrs["nav"] = struct{}{}
928	p.setOfElementsAllowedWithoutAttrs["ol"] = struct{}{}
929	p.setOfElementsAllowedWithoutAttrs["optgroup"] = struct{}{}
930	p.setOfElementsAllowedWithoutAttrs["option"] = struct{}{}
931	p.setOfElementsAllowedWithoutAttrs["p"] = struct{}{}
932	p.setOfElementsAllowedWithoutAttrs["picture"] = struct{}{}
933	p.setOfElementsAllowedWithoutAttrs["pre"] = struct{}{}
934	p.setOfElementsAllowedWithoutAttrs["q"] = struct{}{}
935	p.setOfElementsAllowedWithoutAttrs["rp"] = struct{}{}
936	p.setOfElementsAllowedWithoutAttrs["rt"] = struct{}{}
937	p.setOfElementsAllowedWithoutAttrs["ruby"] = struct{}{}
938	p.setOfElementsAllowedWithoutAttrs["s"] = struct{}{}
939	p.setOfElementsAllowedWithoutAttrs["samp"] = struct{}{}
940	p.setOfElementsAllowedWithoutAttrs["script"] = struct{}{}
941	p.setOfElementsAllowedWithoutAttrs["section"] = struct{}{}
942	p.setOfElementsAllowedWithoutAttrs["select"] = struct{}{}
943	p.setOfElementsAllowedWithoutAttrs["small"] = struct{}{}
944	p.setOfElementsAllowedWithoutAttrs["span"] = struct{}{}
945	p.setOfElementsAllowedWithoutAttrs["strike"] = struct{}{}
946	p.setOfElementsAllowedWithoutAttrs["strong"] = struct{}{}
947	p.setOfElementsAllowedWithoutAttrs["style"] = struct{}{}
948	p.setOfElementsAllowedWithoutAttrs["sub"] = struct{}{}
949	p.setOfElementsAllowedWithoutAttrs["summary"] = struct{}{}
950	p.setOfElementsAllowedWithoutAttrs["sup"] = struct{}{}
951	p.setOfElementsAllowedWithoutAttrs["svg"] = struct{}{}
952	p.setOfElementsAllowedWithoutAttrs["table"] = struct{}{}
953	p.setOfElementsAllowedWithoutAttrs["tbody"] = struct{}{}
954	p.setOfElementsAllowedWithoutAttrs["td"] = struct{}{}
955	p.setOfElementsAllowedWithoutAttrs["textarea"] = struct{}{}
956	p.setOfElementsAllowedWithoutAttrs["tfoot"] = struct{}{}
957	p.setOfElementsAllowedWithoutAttrs["th"] = struct{}{}
958	p.setOfElementsAllowedWithoutAttrs["thead"] = struct{}{}
959	p.setOfElementsAllowedWithoutAttrs["title"] = struct{}{}
960	p.setOfElementsAllowedWithoutAttrs["time"] = struct{}{}
961	p.setOfElementsAllowedWithoutAttrs["tr"] = struct{}{}
962	p.setOfElementsAllowedWithoutAttrs["tt"] = struct{}{}
963	p.setOfElementsAllowedWithoutAttrs["u"] = struct{}{}
964	p.setOfElementsAllowedWithoutAttrs["ul"] = struct{}{}
965	p.setOfElementsAllowedWithoutAttrs["var"] = struct{}{}
966	p.setOfElementsAllowedWithoutAttrs["video"] = struct{}{}
967	p.setOfElementsAllowedWithoutAttrs["wbr"] = struct{}{}
968
969}
970
971// addDefaultSkipElementContent adds the HTML elements that we should skip
972// rendering the character content of, if the element itself is not allowed.
973// This is all character data that the end user would not normally see.
974// i.e. if we exclude a <script> tag then we shouldn't render the JavaScript or
975// anything else until we encounter the closing </script> tag.
976func (p *Policy) addDefaultSkipElementContent() {
977	p.init()
978
979	p.setOfElementsToSkipContent["frame"] = struct{}{}
980	p.setOfElementsToSkipContent["frameset"] = struct{}{}
981	p.setOfElementsToSkipContent["iframe"] = struct{}{}
982	p.setOfElementsToSkipContent["noembed"] = struct{}{}
983	p.setOfElementsToSkipContent["noframes"] = struct{}{}
984	p.setOfElementsToSkipContent["noscript"] = struct{}{}
985	p.setOfElementsToSkipContent["nostyle"] = struct{}{}
986	p.setOfElementsToSkipContent["object"] = struct{}{}
987	p.setOfElementsToSkipContent["script"] = struct{}{}
988	p.setOfElementsToSkipContent["style"] = struct{}{}
989	p.setOfElementsToSkipContent["title"] = struct{}{}
990}