1package web
2
3import (
4 "strings"
5 "testing"
6
7 "github.com/matryer/is"
8)
9
10func TestRenderMarkdownWithURLRewriting(t *testing.T) {
11 is := is.New(t)
12
13 ctx := &ReadmeContext{
14 RepoName: "test-repo",
15 CommitHash: "abc123def456",
16 ReadmePath: "README.md",
17 }
18
19 // Test relative image in root README
20 md := []byte(``)
21 html, err := renderMarkdown(md, ctx)
22 is.NoErr(err)
23 is.True(string(html) != "")
24 is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/image.png?raw=1"))
25
26 // Test relative link to markdown file
27 md = []byte(`[docs](docs/README.md)`)
28 html, err = renderMarkdown(md, ctx)
29 is.NoErr(err)
30 is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/README.md"))
31
32 // Test relative link to non-markdown file
33 md = []byte(`[download](file.tar.gz)`)
34 html, err = renderMarkdown(md, ctx)
35 is.NoErr(err)
36 is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/file.tar.gz?raw=1"))
37
38 // Test relative link to directory
39 md = []byte(`[folder](docs)`)
40 html, err = renderMarkdown(md, ctx)
41 is.NoErr(err)
42 is.True(strings.Contains(string(html), "/test-repo/tree/abc123def456/docs"))
43
44 // Test absolute https URL unchanged
45 md = []byte(``)
46 html, err = renderMarkdown(md, ctx)
47 is.NoErr(err)
48 is.True(strings.Contains(string(html), "https://example.com/image.png"))
49
50 // Test anchor-only link unchanged
51 md = []byte(`[section](#heading)`)
52 html, err = renderMarkdown(md, ctx)
53 is.NoErr(err)
54 is.True(strings.Contains(string(html), "#heading"))
55
56 // Test absolute path unchanged (starts with /)
57 md = []byte(`[root](/other-repo/file)`)
58 html, err = renderMarkdown(md, ctx)
59 is.NoErr(err)
60 is.True(strings.Contains(string(html), "/other-repo/file"))
61}
62
63func TestRenderMarkdownNestedReadme(t *testing.T) {
64 is := is.New(t)
65
66 ctx := &ReadmeContext{
67 RepoName: "test-repo",
68 CommitHash: "abc123def456",
69 ReadmePath: "docs/README.md",
70 }
71
72 // Test relative image in nested README
73 md := []byte(``)
74 html, err := renderMarkdown(md, ctx)
75 is.NoErr(err)
76 is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/image.png?raw=1"))
77
78 // Test going up with ../
79 md = []byte(``)
80 html, err = renderMarkdown(md, ctx)
81 is.NoErr(err)
82 is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/root.png?raw=1"))
83
84 // Test deep nesting
85 md = []byte(`[deep](subdir/file.md)`)
86 html, err = renderMarkdown(md, ctx)
87 is.NoErr(err)
88 is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/subdir/file.md"))
89}
90
91func TestRenderMarkdownTraversalAttempts(t *testing.T) {
92 is := is.New(t)
93
94 ctx := &ReadmeContext{
95 RepoName: "test-repo",
96 CommitHash: "abc123def456",
97 ReadmePath: "README.md",
98 }
99
100 // Test ../ traversal outside repo - should be blocked (empty src/href)
101 md := []byte(``)
102 html, err := renderMarkdown(md, ctx)
103 is.NoErr(err)
104 htmlStr := string(html)
105 // Should not contain the path in src attribute
106 is.True(!elementAttrContains(htmlStr, "img", "src", "etc/passwd"))
107
108 // Test absolute path traversal
109 md = []byte(``)
110 html, err = renderMarkdown(md, ctx)
111 is.NoErr(err)
112 htmlStr = string(html)
113 // Absolute paths starting with / are left unchanged by rewriter
114 is.True(elementAttrContains(htmlStr, "img", "src", "/../../../etc/passwd"))
115
116 // Test that fragments don't leak when path traversal is blocked
117 md = []byte(``)
118 html, err = renderMarkdown(md, ctx)
119 is.NoErr(err)
120 htmlStr = string(html)
121 // Should not contain the path or fragment in src attribute
122 is.True(!elementAttrContains(htmlStr, "img", "src", "secret.png") && !elementAttrContains(htmlStr, "img", "src", "#anchor"))
123}
124
125func TestRenderMarkdownURLDetails(t *testing.T) {
126 is := is.New(t)
127
128 ctx := &ReadmeContext{
129 RepoName: "test-repo",
130 CommitHash: "abc123def456",
131 ReadmePath: "README.md",
132 }
133
134 // Test fragments preserved for links
135 md := []byte(`[section](docs/guide.md#installation)`)
136 html, err := renderMarkdown(md, ctx)
137 is.NoErr(err)
138 is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/guide.md#installation"))
139
140 // Test fragments preserved for images
141 md = []byte(``)
142 html, err = renderMarkdown(md, ctx)
143 is.NoErr(err)
144 is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/arch.png?raw=1#diagram"))
145
146 // Test directories link to /tree without trailing slash
147 md = []byte(`[docs](docs/)`)
148 html, err = renderMarkdown(md, ctx)
149 is.NoErr(err)
150 is.True(strings.Contains(string(html), "/test-repo/tree/abc123def456/docs"))
151 is.True(!strings.Contains(string(html), "docs/\"")) // no trailing slash in href
152
153 // Test non-md files get ?raw=1
154 md = []byte(`[archive](file.zip)`)
155 html, err = renderMarkdown(md, ctx)
156 is.NoErr(err)
157 is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/file.zip?raw=1"))
158
159 md = []byte(`[pdf](docs/manual.pdf)`)
160 html, err = renderMarkdown(md, ctx)
161 is.NoErr(err)
162 is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/manual.pdf?raw=1"))
163
164 // Test uppercase .MD/.MARKDOWN files are treated as non-markdown
165 // (document current behavior - filepath.Ext is case-sensitive)
166 md = []byte(`[upper](README.MD)`)
167 html, err = renderMarkdown(md, ctx)
168 is.NoErr(err)
169 // .MD is not .md, so treated as regular file requiring ?raw=1
170 is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/README.MD?raw=1"))
171
172 md = []byte(`[markdown](README.MARKDOWN)`)
173 html, err = renderMarkdown(md, ctx)
174 is.NoErr(err)
175 // .MARKDOWN is not .markdown, so treated as regular file requiring ?raw=1
176 is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/README.MARKDOWN?raw=1"))
177}
178
179func TestRenderMarkdownSpecialCharacters(t *testing.T) {
180 is := is.New(t)
181
182 ctx := &ReadmeContext{
183 RepoName: "test-repo",
184 CommitHash: "abc123def456",
185 ReadmePath: "README.md",
186 }
187
188 // Test spaces in filename - bluemonday may strip invalid URLs
189 md := []byte(``)
190 html, err := renderMarkdown(md, ctx)
191 is.NoErr(err)
192 // Should contain the repo path
193 is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/"))
194
195 // Test unicode
196 md = []byte(``)
197 html, err = renderMarkdown(md, ctx)
198 is.NoErr(err)
199 is.True(string(html) != "")
200}
201
202func TestRenderMarkdownDangerousSchemes(t *testing.T) {
203 is := is.New(t)
204
205 ctx := &ReadmeContext{
206 RepoName: "test-repo",
207 CommitHash: "abc123def456",
208 ReadmePath: "README.md",
209 }
210
211 // Test javascript: scheme - should be stripped by sanitizer
212 md := []byte(`[xss](javascript:alert('xss'))`)
213 html, err := renderMarkdown(md, ctx)
214 is.NoErr(err)
215 is.True(!strings.Contains(string(html), "javascript:"))
216
217 // Test data: scheme - should be stripped
218 md = []byte(``)
219 html, err = renderMarkdown(md, ctx)
220 is.NoErr(err)
221 is.True(!strings.Contains(string(html), "data:image"))
222
223 // Test http: scheme - should be stripped (only https allowed)
224 md = []byte(`[http](http://example.com)`)
225 html, err = renderMarkdown(md, ctx)
226 is.NoErr(err)
227 // Sanitizer should strip non-https schemes
228 is.True(!strings.Contains(string(html), "http://example.com") || !strings.Contains(string(html), "href="))
229}
230
231func TestRenderMarkdownWithoutContext(t *testing.T) {
232 is := is.New(t)
233
234 // Test that rendering without context works (no rewriting)
235 md := []byte(``)
236 html, err := renderMarkdown(md, nil)
237 is.NoErr(err)
238 is.True(string(html) != "")
239 // Without context, relative URLs stay relative
240 is.True(strings.Contains(string(html), "image.png"))
241}