diff --git a/pkg/web/markdown_rewriter.go b/pkg/web/markdown_rewriter.go
new file mode 100644
index 0000000000000000000000000000000000000000..2ee5a9bb7353be89163b97b9c71290799c722efd
--- /dev/null
+++ b/pkg/web/markdown_rewriter.go
@@ -0,0 +1,147 @@
+package web
+
+import (
+ "net/url"
+ "path"
+ "strings"
+
+ "github.com/yuin/goldmark/ast"
+ "github.com/yuin/goldmark/parser"
+ "github.com/yuin/goldmark/text"
+)
+
+// ReadmeContext contains information needed to rewrite URLs in README files.
+type ReadmeContext struct {
+ RepoName string // Repository name (e.g., "myrepo")
+ CommitHash string // Full commit hash to pin URLs to
+ ReadmePath string // Path to README file (e.g., "docs/README.md")
+}
+
+// urlRewriter is a Goldmark AST transformer that rewrites relative URLs in markdown.
+type urlRewriter struct {
+ ctx ReadmeContext
+}
+
+// newURLRewriter creates a new URL rewriter transformer.
+func newURLRewriter(ctx ReadmeContext) *urlRewriter {
+ return &urlRewriter{ctx: ctx}
+}
+
+// Transform implements ast.Transformer.
+func (r *urlRewriter) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
+ // Get the directory containing the README
+ readmeDir := path.Dir(r.ctx.ReadmePath)
+ if readmeDir == "." {
+ readmeDir = ""
+ }
+
+ // Walk the AST and rewrite links and images
+ ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
+ if !entering {
+ return ast.WalkContinue, nil
+ }
+
+ switch v := n.(type) {
+ case *ast.Link:
+ v.Destination = r.rewriteURL(v.Destination, readmeDir, false)
+ case *ast.Image:
+ v.Destination = r.rewriteURL(v.Destination, readmeDir, true)
+ }
+
+ return ast.WalkContinue, nil
+ })
+}
+
+// rewriteURL rewrites a single URL destination.
+func (r *urlRewriter) rewriteURL(dest []byte, readmeDir string, isImage bool) []byte {
+ destStr := string(dest)
+
+ // Skip empty URLs
+ if destStr == "" {
+ return dest
+ }
+
+ // Parse the URL
+ u, err := url.Parse(destStr)
+ if err != nil {
+ // Invalid URL, fail closed by returning empty
+ return []byte("")
+ }
+
+ // Skip absolute URLs (http://, https://, mailto:, etc.)
+ if u.Scheme != "" {
+ return dest
+ }
+
+ // Skip protocol-relative URLs (//example.com/path)
+ // These have no scheme but do have a host
+ if u.Host != "" {
+ return []byte("")
+ }
+
+ // Skip anchor-only links (#section)
+ if strings.HasPrefix(destStr, "#") {
+ return dest
+ }
+
+ // Skip absolute paths (starting with /)
+ if strings.HasPrefix(destStr, "/") {
+ return dest
+ }
+
+ // Now we have a relative URL - resolve it against the README directory
+ // Join README dir with the relative path
+ var resolvedPath string
+ if readmeDir != "" {
+ resolvedPath = path.Join(readmeDir, u.Path)
+ } else {
+ resolvedPath = u.Path
+ }
+
+ // Clean the path to resolve .. and .
+ resolvedPath = path.Clean(resolvedPath)
+
+ // Security check: reject any path that escapes the repository root
+ // After path.Clean, a path starting with ../ indicates traversal outside the repo
+ if strings.HasPrefix(resolvedPath, "../") || resolvedPath == ".." {
+ // Path tries to escape repo, return empty/invalid
+ return []byte("")
+ }
+
+ // For images, always rewrite to blob endpoint with ?raw=1
+ // For links, check if it's a file (has extension) or could be a directory/markdown file
+ if isImage {
+ // Build URL: /{repo}/blob/{commit}/{path}?raw=1
+ rewritten := "/" + r.ctx.RepoName + "/blob/" + r.ctx.CommitHash + "/" + resolvedPath + "?raw=1"
+ // Preserve fragment if present
+ if u.Fragment != "" {
+ rewritten += "#" + u.Fragment
+ }
+ return []byte(rewritten)
+ }
+
+ // For links, determine if it's a file or navigation
+ ext := path.Ext(resolvedPath)
+ if ext != "" && ext != ".md" && ext != ".markdown" {
+ // It's a file (not markdown), serve raw
+ rewritten := "/" + r.ctx.RepoName + "/blob/" + r.ctx.CommitHash + "/" + resolvedPath + "?raw=1"
+ if u.Fragment != "" {
+ rewritten += "#" + u.Fragment
+ }
+ return []byte(rewritten)
+ } else if ext == ".md" || ext == ".markdown" {
+ // It's a markdown file, link to blob view (rendered)
+ rewritten := "/" + r.ctx.RepoName + "/blob/" + r.ctx.CommitHash + "/" + resolvedPath
+ if u.Fragment != "" {
+ rewritten += "#" + u.Fragment
+ }
+ return []byte(rewritten)
+ } else {
+ // No extension, could be a directory - link to tree view
+ rewritten := "/" + r.ctx.RepoName + "/tree/" + r.ctx.CommitHash + "/" + resolvedPath
+ if u.Fragment != "" {
+ rewritten += "#" + u.Fragment
+ }
+ return []byte(rewritten)
+ }
+}
diff --git a/pkg/web/markdown_rewriter_test.go b/pkg/web/markdown_rewriter_test.go
new file mode 100644
index 0000000000000000000000000000000000000000..fbf802e7314858fbe0edc50b1c57414b1fd4aa57
--- /dev/null
+++ b/pkg/web/markdown_rewriter_test.go
@@ -0,0 +1,241 @@
+package web
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/matryer/is"
+)
+
+func TestRenderMarkdownWithURLRewriting(t *testing.T) {
+ is := is.New(t)
+
+ ctx := &ReadmeContext{
+ RepoName: "test-repo",
+ CommitHash: "abc123def456",
+ ReadmePath: "README.md",
+ }
+
+ // Test relative image in root README
+ md := []byte(``)
+ html, err := renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(string(html) != "")
+ is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/image.png?raw=1"))
+
+ // Test relative link to markdown file
+ md = []byte(`[docs](docs/README.md)`)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/README.md"))
+
+ // Test relative link to non-markdown file
+ md = []byte(`[download](file.tar.gz)`)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/file.tar.gz?raw=1"))
+
+ // Test relative link to directory
+ md = []byte(`[folder](docs)`)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(strings.Contains(string(html), "/test-repo/tree/abc123def456/docs"))
+
+ // Test absolute https URL unchanged
+ md = []byte(``)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(strings.Contains(string(html), "https://example.com/image.png"))
+
+ // Test anchor-only link unchanged
+ md = []byte(`[section](#heading)`)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(strings.Contains(string(html), "#heading"))
+
+ // Test absolute path unchanged (starts with /)
+ md = []byte(`[root](/other-repo/file)`)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(strings.Contains(string(html), "/other-repo/file"))
+}
+
+func TestRenderMarkdownNestedReadme(t *testing.T) {
+ is := is.New(t)
+
+ ctx := &ReadmeContext{
+ RepoName: "test-repo",
+ CommitHash: "abc123def456",
+ ReadmePath: "docs/README.md",
+ }
+
+ // Test relative image in nested README
+ md := []byte(``)
+ html, err := renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/image.png?raw=1"))
+
+ // Test going up with ../
+ md = []byte(``)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/root.png?raw=1"))
+
+ // Test deep nesting
+ md = []byte(`[deep](subdir/file.md)`)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/subdir/file.md"))
+}
+
+func TestRenderMarkdownTraversalAttempts(t *testing.T) {
+ is := is.New(t)
+
+ ctx := &ReadmeContext{
+ RepoName: "test-repo",
+ CommitHash: "abc123def456",
+ ReadmePath: "README.md",
+ }
+
+ // Test ../ traversal outside repo - should be blocked (empty src/href)
+ md := []byte(``)
+ html, err := renderMarkdown(md, ctx)
+ is.NoErr(err)
+ htmlStr := string(html)
+ // Should not contain the path in src attribute
+ is.True(!elementAttrContains(htmlStr, "img", "src", "etc/passwd"))
+
+ // Test absolute path traversal
+ md = []byte(``)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ htmlStr = string(html)
+ // Absolute paths starting with / are left unchanged by rewriter
+ is.True(elementAttrContains(htmlStr, "img", "src", "/../../../etc/passwd"))
+
+ // Test that fragments don't leak when path traversal is blocked
+ md = []byte(``)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ htmlStr = string(html)
+ // Should not contain the path or fragment in src attribute
+ is.True(!elementAttrContains(htmlStr, "img", "src", "secret.png") && !elementAttrContains(htmlStr, "img", "src", "#anchor"))
+}
+
+func TestRenderMarkdownURLDetails(t *testing.T) {
+ is := is.New(t)
+
+ ctx := &ReadmeContext{
+ RepoName: "test-repo",
+ CommitHash: "abc123def456",
+ ReadmePath: "README.md",
+ }
+
+ // Test fragments preserved for links
+ md := []byte(`[section](docs/guide.md#installation)`)
+ html, err := renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/guide.md#installation"))
+
+ // Test fragments preserved for images
+ md = []byte(``)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/arch.png?raw=1#diagram"))
+
+ // Test directories link to /tree without trailing slash
+ md = []byte(`[docs](docs/)`)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(strings.Contains(string(html), "/test-repo/tree/abc123def456/docs"))
+ is.True(!strings.Contains(string(html), "docs/\"")) // no trailing slash in href
+
+ // Test non-md files get ?raw=1
+ md = []byte(`[archive](file.zip)`)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/file.zip?raw=1"))
+
+ md = []byte(`[pdf](docs/manual.pdf)`)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/manual.pdf?raw=1"))
+
+ // Test uppercase .MD/.MARKDOWN files are treated as non-markdown
+ // (document current behavior - filepath.Ext is case-sensitive)
+ md = []byte(`[upper](README.MD)`)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ // .MD is not .md, so treated as regular file requiring ?raw=1
+ is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/README.MD?raw=1"))
+
+ md = []byte(`[markdown](README.MARKDOWN)`)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ // .MARKDOWN is not .markdown, so treated as regular file requiring ?raw=1
+ is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/README.MARKDOWN?raw=1"))
+}
+
+func TestRenderMarkdownSpecialCharacters(t *testing.T) {
+ is := is.New(t)
+
+ ctx := &ReadmeContext{
+ RepoName: "test-repo",
+ CommitHash: "abc123def456",
+ ReadmePath: "README.md",
+ }
+
+ // Test spaces in filename - bluemonday may strip invalid URLs
+ md := []byte(``)
+ html, err := renderMarkdown(md, ctx)
+ is.NoErr(err)
+ // Should contain the repo path
+ is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/"))
+
+ // Test unicode
+ md = []byte(``)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(string(html) != "")
+}
+
+func TestRenderMarkdownDangerousSchemes(t *testing.T) {
+ is := is.New(t)
+
+ ctx := &ReadmeContext{
+ RepoName: "test-repo",
+ CommitHash: "abc123def456",
+ ReadmePath: "README.md",
+ }
+
+ // Test javascript: scheme - should be stripped by sanitizer
+ md := []byte(`[xss](javascript:alert('xss'))`)
+ html, err := renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(!strings.Contains(string(html), "javascript:"))
+
+ // Test data: scheme - should be stripped
+ md = []byte(``)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(!strings.Contains(string(html), "data:image"))
+
+ // Test http: scheme - should be stripped (only https allowed)
+ md = []byte(`[http](http://example.com)`)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ // Sanitizer should strip non-https schemes
+ is.True(!strings.Contains(string(html), "http://example.com") || !strings.Contains(string(html), "href="))
+}
+
+func TestRenderMarkdownWithoutContext(t *testing.T) {
+ is := is.New(t)
+
+ // Test that rendering without context works (no rewriting)
+ md := []byte(``)
+ html, err := renderMarkdown(md, nil)
+ is.NoErr(err)
+ is.True(string(html) != "")
+ // Without context, relative URLs stay relative
+ is.True(strings.Contains(string(html), "image.png"))
+}
diff --git a/pkg/web/sanitizer_test.go b/pkg/web/sanitizer_test.go
new file mode 100644
index 0000000000000000000000000000000000000000..2228522ec786c43f5bb5b530478982b4ec2d54ca
--- /dev/null
+++ b/pkg/web/sanitizer_test.go
@@ -0,0 +1,481 @@
+package web
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/matryer/is"
+ "golang.org/x/net/html"
+)
+
+func TestSanitizerXSSProtection(t *testing.T) {
+ is := is.New(t)
+
+ ctx := &ReadmeContext{
+ RepoName: "test-repo",
+ CommitHash: "abc123",
+ ReadmePath: "README.md",
+ }
+
+ // Test javascript: URL in link
+ md := []byte(`[click me](javascript:alert('xss'))`)
+ html, err := renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(!strings.Contains(string(html), "javascript:"))
+
+ // Test javascript: URL in image
+ md = []byte(`)`)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(!strings.Contains(string(html), "javascript:"))
+
+ // Test data: URI in image
+ md = []byte(``)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(!strings.Contains(string(html), "data:image"))
+
+ // Test data: URI in link
+ md = []byte(`[link](data:text/html,)`)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(!strings.Contains(string(html), "data:text"))
+
+ // Test onerror handler
+ md = []byte(``)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(!strings.Contains(string(html), "onerror"))
+
+ // Test onclick handler
+ md = []byte(`click`)
+ html, err = renderMarkdown(md, ctx)
+ is.NoErr(err)
+ is.True(!strings.Contains(string(html), "onclick"))
+
+ // Test style attribute
+ md = []byte(`
test
`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(!strings.Contains(string(html), "style=")) + is.True(!strings.Contains(string(html), "javascript")) + + // Test iframe injection + md = []byte(``) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(!strings.Contains(string(html), "