1package htmlsanitizer
2
3import (
4 "strings"
5 "testing"
6)
7
8func TestLibSanitizerRemovesUnsafeHTML(t *testing.T) {
9 sanitizer := NewLibSanitizer()
10 input := []byte(`
11 <p onclick="alert(1)">Hello</p>
12 <script>alert(1)</script>
13 <style>body { background-image: url("javascript:alert(1)") }</style>
14 <a href="javascript:alert(1)">bad link</a>
15 <a href="https://example.com">good link</a>
16 <img src="file:///tmp/bad.png" alt="bad image">
17 <img src="cid:test@example.com" alt="cid image">
18 <img src="data:text/html,<script>alert(1)</script>" alt="bad data">
19 <img src="data:image/png;base64,iVBORw0KGgo=" alt="data image">
20 `)
21
22 got := string(sanitizer.SanitizeBytes(input))
23
24 for _, forbidden := range []string{
25 "onclick",
26 "<script",
27 "<style",
28 "javascript:",
29 "file:///tmp/bad.png",
30 "data:text/html",
31 } {
32 if strings.Contains(got, forbidden) {
33 t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
34 }
35 }
36
37 for _, want := range []string{
38 `href="https://example.com"`,
39 `src="cid:test@example.com"`,
40 `src="data:image/png;base64,iVBORw0KGgo="`,
41 } {
42 if !strings.Contains(got, want) {
43 t.Fatalf("sanitized HTML does not contain %q:\n%s", want, got)
44 }
45 }
46}
47
48func TestLibSanitizerDoesNotAllowDataOrCIDLinks(t *testing.T) {
49 sanitizer := NewLibSanitizer()
50 input := []byte(`
51 <a href="data:image/png;base64,iVBORw0KGgo=">data link</a>
52 <a href="cid:test@example.com">cid link</a>
53 <a href="ftp://example.com/file.txt">ftp link</a>
54 <a href="file:///tmp/bad.txt">file link</a>
55 <a href="vbscript:msgbox(1)">vbscript link</a>
56 <a href="//example.com/protocol-relative">protocol relative link</a>
57 <a href="/relative/path">relative link</a>
58 <a href=":not-a-url">broken link</a>
59 `)
60
61 got := string(sanitizer.SanitizeBytes(input))
62
63 for _, forbidden := range []string{
64 "href=\"data:image",
65 "href=\"cid:",
66 "href=\"ftp:",
67 "href=\"file:",
68 "href=\"vbscript:",
69 "href=\"//example.com",
70 "href=\"/relative",
71 "href=\":not-a-url",
72 } {
73 if strings.Contains(got, forbidden) {
74 t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
75 }
76 }
77
78 for _, wantText := range []string{
79 "data link",
80 "cid link",
81 "ftp link",
82 "file link",
83 "vbscript link",
84 "protocol relative link",
85 "relative link",
86 "broken link",
87 } {
88 if !strings.Contains(got, wantText) {
89 t.Fatalf("sanitized HTML should keep link text %q:\n%s", wantText, got)
90 }
91 }
92}
93
94func TestLibSanitizerAllowsSafeLinks(t *testing.T) {
95 sanitizer := NewLibSanitizer()
96 input := []byte(`
97 <a href="http://example.com/path?x=1">http link</a>
98 <a href="https://example.com/path?x=1">https link</a>
99 <a href="HTTPS://example.com/path?x=1">uppercase https link</a>
100 <a href="mailto:security@example.com">mailto link</a>
101 <a href="MAILTO:security@example.com">uppercase mailto link</a>
102 <a href="tel:+15551234567">tel link</a>
103 `)
104
105 got := string(sanitizer.SanitizeBytes(input))
106
107 for _, want := range []string{
108 `href="http://example.com/path?x=1"`,
109 `href="https://example.com/path?x=1"`,
110 `href="https://example.com/path?x=1"`,
111 `href="mailto:security@example.com"`,
112 `href="mailto:security@example.com"`,
113 `href="tel:+15551234567"`,
114 } {
115 if !strings.Contains(got, want) {
116 t.Fatalf("sanitized HTML does not contain %q:\n%s", want, got)
117 }
118 }
119}
120
121func TestLibSanitizerFiltersImageSources(t *testing.T) {
122 sanitizer := NewLibSanitizer()
123 input := []byte(`
124 <img src="http://example.com/image.png" alt="http image">
125 <img src="https://example.com/image.png" alt="https image">
126 <img src="cid:test@example.com" alt="cid image">
127 <img src="data:image/png;base64,iVBORw0KGgo=" alt="data image">
128 <img src="javascript:alert(1)" alt="javascript image">
129 <img src="file:///tmp/bad.png" alt="file image">
130 <img src="data:text/html,<script>alert(1)</script>" alt="html data image">
131 <img src="/relative.png" alt="relative image">
132 `)
133
134 got := string(sanitizer.SanitizeBytes(input))
135
136 for _, want := range []string{
137 `src="http://example.com/image.png"`,
138 `src="https://example.com/image.png"`,
139 `src="cid:test@example.com"`,
140 `src="data:image/png;base64,iVBORw0KGgo="`,
141 } {
142 if !strings.Contains(got, want) {
143 t.Fatalf("sanitized HTML does not contain %q:\n%s", want, got)
144 }
145 }
146
147 for _, forbidden := range []string{
148 "src=\"javascript:",
149 "src=\"file:",
150 "src=\"data:text/html",
151 "src=\"/relative.png",
152 } {
153 if strings.Contains(got, forbidden) {
154 t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
155 }
156 }
157}
158
159func TestLibSanitizerRemovesUnknownElementsButKeepsText(t *testing.T) {
160 sanitizer := NewLibSanitizer()
161 input := []byte(`
162 <form action="https://example.com"><input name="token" value="secret">form text</form>
163 <iframe src="https://example.com">iframe text</iframe>
164 <object data="https://example.com">object text</object>
165 <p>safe text</p>
166 `)
167
168 got := string(sanitizer.SanitizeBytes(input))
169
170 for _, forbidden := range []string{
171 "<form",
172 "<input",
173 "<iframe",
174 "<object",
175 "action=",
176 "value=\"secret\"",
177 "src=\"https://example.com\"",
178 "data=\"https://example.com\"",
179 } {
180 if strings.Contains(got, forbidden) {
181 t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
182 }
183 }
184
185 for _, wantText := range []string{
186 "form text",
187 "safe text",
188 } {
189 if !strings.Contains(got, wantText) {
190 t.Fatalf("sanitized HTML should keep text %q:\n%s", wantText, got)
191 }
192 }
193}
194
195func TestLibSanitizerRemovesUnsafeGlobalAttributes(t *testing.T) {
196 sanitizer := NewLibSanitizer()
197 input := []byte(`
198 <p style="color: red" class="promo" data-secret="token" id="message">styled text</p>
199 <blockquote cite="https://example.com" onclick="alert(1)">quote text</blockquote>
200 `)
201
202 got := string(sanitizer.SanitizeBytes(input))
203
204 for _, forbidden := range []string{
205 "style=",
206 "class=",
207 "data-secret",
208 "id=",
209 "onclick=",
210 } {
211 if strings.Contains(got, forbidden) {
212 t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
213 }
214 }
215
216 for _, want := range []string{
217 "styled text",
218 `cite="https://example.com"`,
219 "quote text",
220 } {
221 if !strings.Contains(got, want) {
222 t.Fatalf("sanitized HTML does not contain %q:\n%s", want, got)
223 }
224 }
225}
226
227func TestLibSanitizerRejectsCIDWithQueryOrFragment(t *testing.T) {
228 sanitizer := NewLibSanitizer()
229 input := []byte(`
230 <img src="cid:test@example.com?x=1" alt="cid query">
231 <img src="cid:test@example.com#frag" alt="cid fragment">
232 <img src="cid:test@example.com" alt="cid ok">
233 `)
234
235 got := string(sanitizer.SanitizeBytes(input))
236
237 for _, forbidden := range []string{
238 `src="cid:test@example.com?x=1"`,
239 `src="cid:test@example.com#frag"`,
240 } {
241 if strings.Contains(got, forbidden) {
242 t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
243 }
244 }
245
246 if !strings.Contains(got, `src="cid:test@example.com"`) {
247 t.Fatalf("sanitized HTML should keep clean cid source:\n%s", got)
248 }
249}
250
251func TestLibSanitizerRejectsInvalidDataImages(t *testing.T) {
252 sanitizer := NewLibSanitizer()
253 input := []byte(`
254 <img src="data:image/png;base64,not base64!" alt="invalid base64">
255 <img src="data:image/svg+xml;base64,PHN2Zy8+" alt="svg data">
256 <img src="data:image/png;base64,iVBORw0KGgo=" alt="png data">
257 <img src="data:image/png;base64,iVBORw0KGgo" alt="raw png data">
258 `)
259
260 got := string(sanitizer.SanitizeBytes(input))
261
262 for _, forbidden := range []string{
263 "not base64",
264 "data:image/svg+xml",
265 } {
266 if strings.Contains(got, forbidden) {
267 t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
268 }
269 }
270
271 if !strings.Contains(got, `src="data:image/png;base64,iVBORw0KGgo="`) {
272 t.Fatalf("sanitized HTML should keep valid png data URI:\n%s", got)
273 }
274 if !strings.Contains(got, `src="data:image/png;base64,iVBORw0KGgo"`) {
275 t.Fatalf("sanitized HTML should keep valid unpadded png data URI:\n%s", got)
276 }
277}