markdown_rewriter_test.go

  1package web
  2
  3import (
  4	"strings"
  5	"testing"
  6
  7	"github.com/matryer/is"
  8)
  9
 10func TestRenderMarkdownWithURLRewriting(t *testing.T) {
 11	is := is.New(t)
 12
 13	ctx := &ReadmeContext{
 14		RepoName:   "test-repo",
 15		CommitHash: "abc123def456",
 16		ReadmePath: "README.md",
 17	}
 18
 19	// Test relative image in root README
 20	md := []byte(`![image](image.png)`)
 21	html, err := renderMarkdown(md, ctx)
 22	is.NoErr(err)
 23	is.True(string(html) != "")
 24	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/image.png?raw=1"))
 25
 26	// Test relative link to markdown file
 27	md = []byte(`[docs](docs/README.md)`)
 28	html, err = renderMarkdown(md, ctx)
 29	is.NoErr(err)
 30	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/README.md"))
 31
 32	// Test relative link to non-markdown file
 33	md = []byte(`[download](file.tar.gz)`)
 34	html, err = renderMarkdown(md, ctx)
 35	is.NoErr(err)
 36	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/file.tar.gz?raw=1"))
 37
 38	// Test relative link to directory
 39	md = []byte(`[folder](docs)`)
 40	html, err = renderMarkdown(md, ctx)
 41	is.NoErr(err)
 42	is.True(strings.Contains(string(html), "/test-repo/tree/abc123def456/docs"))
 43
 44	// Test absolute https URL unchanged
 45	md = []byte(`![remote](https://example.com/image.png)`)
 46	html, err = renderMarkdown(md, ctx)
 47	is.NoErr(err)
 48	is.True(strings.Contains(string(html), "https://example.com/image.png"))
 49
 50	// Test anchor-only link unchanged
 51	md = []byte(`[section](#heading)`)
 52	html, err = renderMarkdown(md, ctx)
 53	is.NoErr(err)
 54	is.True(strings.Contains(string(html), "#heading"))
 55
 56	// Test absolute path unchanged (starts with /)
 57	md = []byte(`[root](/other-repo/file)`)
 58	html, err = renderMarkdown(md, ctx)
 59	is.NoErr(err)
 60	is.True(strings.Contains(string(html), "/other-repo/file"))
 61}
 62
 63func TestRenderMarkdownNestedReadme(t *testing.T) {
 64	is := is.New(t)
 65
 66	ctx := &ReadmeContext{
 67		RepoName:   "test-repo",
 68		CommitHash: "abc123def456",
 69		ReadmePath: "docs/README.md",
 70	}
 71
 72	// Test relative image in nested README
 73	md := []byte(`![image](image.png)`)
 74	html, err := renderMarkdown(md, ctx)
 75	is.NoErr(err)
 76	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/image.png?raw=1"))
 77
 78	// Test going up with ../
 79	md = []byte(`![up](../root.png)`)
 80	html, err = renderMarkdown(md, ctx)
 81	is.NoErr(err)
 82	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/root.png?raw=1"))
 83
 84	// Test deep nesting
 85	md = []byte(`[deep](subdir/file.md)`)
 86	html, err = renderMarkdown(md, ctx)
 87	is.NoErr(err)
 88	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/subdir/file.md"))
 89}
 90
 91func TestRenderMarkdownTraversalAttempts(t *testing.T) {
 92	is := is.New(t)
 93
 94	ctx := &ReadmeContext{
 95		RepoName:   "test-repo",
 96		CommitHash: "abc123def456",
 97		ReadmePath: "README.md",
 98	}
 99
100	// Test ../ traversal outside repo - should be blocked (empty src/href)
101	md := []byte(`![escape](../../etc/passwd)`)
102	html, err := renderMarkdown(md, ctx)
103	is.NoErr(err)
104	htmlStr := string(html)
105	// Should not contain the path in src attribute
106	is.True(!elementAttrContains(htmlStr, "img", "src", "etc/passwd"))
107
108	// Test absolute path traversal
109	md = []byte(`![abs](/../../../etc/passwd)`)
110	html, err = renderMarkdown(md, ctx)
111	is.NoErr(err)
112	htmlStr = string(html)
113	// Absolute paths starting with / are left unchanged by rewriter
114	is.True(elementAttrContains(htmlStr, "img", "src", "/../../../etc/passwd"))
115
116	// Test that fragments don't leak when path traversal is blocked
117	md = []byte(`![escape](../../secret.png#anchor)`)
118	html, err = renderMarkdown(md, ctx)
119	is.NoErr(err)
120	htmlStr = string(html)
121	// Should not contain the path or fragment in src attribute
122	is.True(!elementAttrContains(htmlStr, "img", "src", "secret.png") && !elementAttrContains(htmlStr, "img", "src", "#anchor"))
123}
124
125func TestRenderMarkdownURLDetails(t *testing.T) {
126	is := is.New(t)
127
128	ctx := &ReadmeContext{
129		RepoName:   "test-repo",
130		CommitHash: "abc123def456",
131		ReadmePath: "README.md",
132	}
133
134	// Test fragments preserved for links
135	md := []byte(`[section](docs/guide.md#installation)`)
136	html, err := renderMarkdown(md, ctx)
137	is.NoErr(err)
138	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/guide.md#installation"))
139
140	// Test fragments preserved for images
141	md = []byte(`![diagram](docs/arch.png#diagram)`)
142	html, err = renderMarkdown(md, ctx)
143	is.NoErr(err)
144	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/arch.png?raw=1#diagram"))
145
146	// Test directories link to /tree without trailing slash
147	md = []byte(`[docs](docs/)`)
148	html, err = renderMarkdown(md, ctx)
149	is.NoErr(err)
150	is.True(strings.Contains(string(html), "/test-repo/tree/abc123def456/docs"))
151	is.True(!strings.Contains(string(html), "docs/\"")) // no trailing slash in href
152
153	// Test non-md files get ?raw=1
154	md = []byte(`[archive](file.zip)`)
155	html, err = renderMarkdown(md, ctx)
156	is.NoErr(err)
157	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/file.zip?raw=1"))
158
159	md = []byte(`[pdf](docs/manual.pdf)`)
160	html, err = renderMarkdown(md, ctx)
161	is.NoErr(err)
162	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/manual.pdf?raw=1"))
163
164	// Test uppercase .MD/.MARKDOWN files are treated as non-markdown
165	// (document current behavior - filepath.Ext is case-sensitive)
166	md = []byte(`[upper](README.MD)`)
167	html, err = renderMarkdown(md, ctx)
168	is.NoErr(err)
169	// .MD is not .md, so treated as regular file requiring ?raw=1
170	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/README.MD?raw=1"))
171
172	md = []byte(`[markdown](README.MARKDOWN)`)
173	html, err = renderMarkdown(md, ctx)
174	is.NoErr(err)
175	// .MARKDOWN is not .markdown, so treated as regular file requiring ?raw=1
176	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/README.MARKDOWN?raw=1"))
177}
178
179func TestRenderMarkdownSpecialCharacters(t *testing.T) {
180	is := is.New(t)
181
182	ctx := &ReadmeContext{
183		RepoName:   "test-repo",
184		CommitHash: "abc123def456",
185		ReadmePath: "README.md",
186	}
187
188	// Test spaces in filename - bluemonday may strip invalid URLs
189	md := []byte(`![spaces](my_image.png)`)
190	html, err := renderMarkdown(md, ctx)
191	is.NoErr(err)
192	// Should contain the repo path
193	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/"))
194
195	// Test unicode
196	md = []byte(`![unicode](ๆ–‡ไปถ.png)`)
197	html, err = renderMarkdown(md, ctx)
198	is.NoErr(err)
199	is.True(string(html) != "")
200}
201
202func TestRenderMarkdownDangerousSchemes(t *testing.T) {
203	is := is.New(t)
204
205	ctx := &ReadmeContext{
206		RepoName:   "test-repo",
207		CommitHash: "abc123def456",
208		ReadmePath: "README.md",
209	}
210
211	// Test javascript: scheme - should be stripped by sanitizer
212	md := []byte(`[xss](javascript:alert('xss'))`)
213	html, err := renderMarkdown(md, ctx)
214	is.NoErr(err)
215	is.True(!strings.Contains(string(html), "javascript:"))
216
217	// Test data: scheme - should be stripped
218	md = []byte(`![data](data:image/png;base64,abc123)`)
219	html, err = renderMarkdown(md, ctx)
220	is.NoErr(err)
221	is.True(!strings.Contains(string(html), "data:image"))
222
223	// Test http: scheme - should be stripped (only https allowed)
224	md = []byte(`[http](http://example.com)`)
225	html, err = renderMarkdown(md, ctx)
226	is.NoErr(err)
227	// Sanitizer should strip non-https schemes
228	is.True(!strings.Contains(string(html), "http://example.com") || !strings.Contains(string(html), "href="))
229}
230
231func TestRenderMarkdownWithoutContext(t *testing.T) {
232	is := is.New(t)
233
234	// Test that rendering without context works (no rewriting)
235	md := []byte(`![image](image.png)`)
236	html, err := renderMarkdown(md, nil)
237	is.NoErr(err)
238	is.True(string(html) != "")
239	// Without context, relative URLs stay relative
240	is.True(strings.Contains(string(html), "image.png"))
241}