feat(webui): rewrite README links without proxy

Amolith created 2 weeks ago

Implement rendering of images and links in README files without
requiring an image proxy. Relative URLs are rewritten to point to
repository files pinned to specific commits.

Relative images and files are served through the blob endpoint with
?raw=1 query parameter. Links to markdown files render the formatted
view, while other files download raw. Directory links go to tree view.

Assisted-by: Claude Sonnet 4.5 via Crush

Change summary

pkg/web/markdown_rewriter.go      | 147 ++++++++++
pkg/web/markdown_rewriter_test.go | 241 ++++++++++++++++
pkg/web/sanitizer_test.go         | 481 +++++++++++++++++++++++++++++++++
pkg/web/webui.go                  |   7 
pkg/web/webui_blob.go             |  11 
pkg/web/webui_helpers.go          |  72 ++++
pkg/web/webui_overview.go         |  13 
7 files changed, 960 insertions(+), 12 deletions(-)

Detailed changes

pkg/web/markdown_rewriter.go 🔗

@@ -0,0 +1,147 @@
+package web
+
+import (
+	"net/url"
+	"path"
+	"strings"
+
+	"github.com/yuin/goldmark/ast"
+	"github.com/yuin/goldmark/parser"
+	"github.com/yuin/goldmark/text"
+)
+
+// ReadmeContext contains information needed to rewrite URLs in README files.
+type ReadmeContext struct {
+	RepoName   string // Repository name (e.g., "myrepo")
+	CommitHash string // Full commit hash to pin URLs to
+	ReadmePath string // Path to README file (e.g., "docs/README.md")
+}
+
+// urlRewriter is a Goldmark AST transformer that rewrites relative URLs in markdown.
+type urlRewriter struct {
+	ctx ReadmeContext
+}
+
+// newURLRewriter creates a new URL rewriter transformer.
+func newURLRewriter(ctx ReadmeContext) *urlRewriter {
+	return &urlRewriter{ctx: ctx}
+}
+
+// Transform implements ast.Transformer.
+func (r *urlRewriter) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
+	// Get the directory containing the README
+	readmeDir := path.Dir(r.ctx.ReadmePath)
+	if readmeDir == "." {
+		readmeDir = ""
+	}
+
+	// Walk the AST and rewrite links and images
+	ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
+		if !entering {
+			return ast.WalkContinue, nil
+		}
+
+		switch v := n.(type) {
+		case *ast.Link:
+			v.Destination = r.rewriteURL(v.Destination, readmeDir, false)
+		case *ast.Image:
+			v.Destination = r.rewriteURL(v.Destination, readmeDir, true)
+		}
+
+		return ast.WalkContinue, nil
+	})
+}
+
+// rewriteURL rewrites a single URL destination.
+func (r *urlRewriter) rewriteURL(dest []byte, readmeDir string, isImage bool) []byte {
+	destStr := string(dest)
+
+	// Skip empty URLs
+	if destStr == "" {
+		return dest
+	}
+
+	// Parse the URL
+	u, err := url.Parse(destStr)
+	if err != nil {
+		// Invalid URL, fail closed by returning empty
+		return []byte("")
+	}
+
+	// Skip absolute URLs (http://, https://, mailto:, etc.)
+	if u.Scheme != "" {
+		return dest
+	}
+
+	// Skip protocol-relative URLs (//example.com/path)
+	// These have no scheme but do have a host
+	if u.Host != "" {
+		return []byte("")
+	}
+
+	// Skip anchor-only links (#section)
+	if strings.HasPrefix(destStr, "#") {
+		return dest
+	}
+
+	// Skip absolute paths (starting with /)
+	if strings.HasPrefix(destStr, "/") {
+		return dest
+	}
+
+	// Now we have a relative URL - resolve it against the README directory
+	// Join README dir with the relative path
+	var resolvedPath string
+	if readmeDir != "" {
+		resolvedPath = path.Join(readmeDir, u.Path)
+	} else {
+		resolvedPath = u.Path
+	}
+
+	// Clean the path to resolve .. and .
+	resolvedPath = path.Clean(resolvedPath)
+
+	// Security check: reject any path that escapes the repository root
+	// After path.Clean, a path starting with ../ indicates traversal outside the repo
+	if strings.HasPrefix(resolvedPath, "../") || resolvedPath == ".." {
+		// Path tries to escape repo, return empty/invalid
+		return []byte("")
+	}
+
+	// For images, always rewrite to blob endpoint with ?raw=1
+	// For links, check if it's a file (has extension) or could be a directory/markdown file
+	if isImage {
+		// Build URL: /{repo}/blob/{commit}/{path}?raw=1
+		rewritten := "/" + r.ctx.RepoName + "/blob/" + r.ctx.CommitHash + "/" + resolvedPath + "?raw=1"
+		// Preserve fragment if present
+		if u.Fragment != "" {
+			rewritten += "#" + u.Fragment
+		}
+		return []byte(rewritten)
+	}
+
+	// For links, determine if it's a file or navigation
+	ext := path.Ext(resolvedPath)
+	if ext != "" && ext != ".md" && ext != ".markdown" {
+		// It's a file (not markdown), serve raw
+		rewritten := "/" + r.ctx.RepoName + "/blob/" + r.ctx.CommitHash + "/" + resolvedPath + "?raw=1"
+		if u.Fragment != "" {
+			rewritten += "#" + u.Fragment
+		}
+		return []byte(rewritten)
+	} else if ext == ".md" || ext == ".markdown" {
+		// It's a markdown file, link to blob view (rendered)
+		rewritten := "/" + r.ctx.RepoName + "/blob/" + r.ctx.CommitHash + "/" + resolvedPath
+		if u.Fragment != "" {
+			rewritten += "#" + u.Fragment
+		}
+		return []byte(rewritten)
+	} else {
+		// No extension, could be a directory - link to tree view
+		rewritten := "/" + r.ctx.RepoName + "/tree/" + r.ctx.CommitHash + "/" + resolvedPath
+		if u.Fragment != "" {
+			rewritten += "#" + u.Fragment
+		}
+		return []byte(rewritten)
+	}
+}

pkg/web/markdown_rewriter_test.go 🔗

@@ -0,0 +1,241 @@
+package web
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/matryer/is"
+)
+
+func TestRenderMarkdownWithURLRewriting(t *testing.T) {
+	is := is.New(t)
+
+	ctx := &ReadmeContext{
+		RepoName:   "test-repo",
+		CommitHash: "abc123def456",
+		ReadmePath: "README.md",
+	}
+
+	// Test relative image in root README
+	md := []byte(`![image](image.png)`)
+	html, err := renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(string(html) != "")
+	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/image.png?raw=1"))
+
+	// Test relative link to markdown file
+	md = []byte(`[docs](docs/README.md)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/README.md"))
+
+	// Test relative link to non-markdown file
+	md = []byte(`[download](file.tar.gz)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/file.tar.gz?raw=1"))
+
+	// Test relative link to directory
+	md = []byte(`[folder](docs)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "/test-repo/tree/abc123def456/docs"))
+
+	// Test absolute https URL unchanged
+	md = []byte(`![remote](https://example.com/image.png)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "https://example.com/image.png"))
+
+	// Test anchor-only link unchanged
+	md = []byte(`[section](#heading)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "#heading"))
+
+	// Test absolute path unchanged (starts with /)
+	md = []byte(`[root](/other-repo/file)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "/other-repo/file"))
+}
+
+func TestRenderMarkdownNestedReadme(t *testing.T) {
+	is := is.New(t)
+
+	ctx := &ReadmeContext{
+		RepoName:   "test-repo",
+		CommitHash: "abc123def456",
+		ReadmePath: "docs/README.md",
+	}
+
+	// Test relative image in nested README
+	md := []byte(`![image](image.png)`)
+	html, err := renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/image.png?raw=1"))
+
+	// Test going up with ../
+	md = []byte(`![up](../root.png)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/root.png?raw=1"))
+
+	// Test deep nesting
+	md = []byte(`[deep](subdir/file.md)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/subdir/file.md"))
+}
+
+func TestRenderMarkdownTraversalAttempts(t *testing.T) {
+	is := is.New(t)
+
+	ctx := &ReadmeContext{
+		RepoName:   "test-repo",
+		CommitHash: "abc123def456",
+		ReadmePath: "README.md",
+	}
+
+	// Test ../ traversal outside repo - should be blocked (empty src/href)
+	md := []byte(`![escape](../../etc/passwd)`)
+	html, err := renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr := string(html)
+	// Should not contain the path in src attribute
+	is.True(!elementAttrContains(htmlStr, "img", "src", "etc/passwd"))
+
+	// Test absolute path traversal
+	md = []byte(`![abs](/../../../etc/passwd)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr = string(html)
+	// Absolute paths starting with / are left unchanged by rewriter
+	is.True(elementAttrContains(htmlStr, "img", "src", "/../../../etc/passwd"))
+
+	// Test that fragments don't leak when path traversal is blocked
+	md = []byte(`![escape](../../secret.png#anchor)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr = string(html)
+	// Should not contain the path or fragment in src attribute
+	is.True(!elementAttrContains(htmlStr, "img", "src", "secret.png") && !elementAttrContains(htmlStr, "img", "src", "#anchor"))
+}
+
+func TestRenderMarkdownURLDetails(t *testing.T) {
+	is := is.New(t)
+
+	ctx := &ReadmeContext{
+		RepoName:   "test-repo",
+		CommitHash: "abc123def456",
+		ReadmePath: "README.md",
+	}
+
+	// Test fragments preserved for links
+	md := []byte(`[section](docs/guide.md#installation)`)
+	html, err := renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/guide.md#installation"))
+
+	// Test fragments preserved for images
+	md = []byte(`![diagram](docs/arch.png#diagram)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/arch.png?raw=1#diagram"))
+
+	// Test directories link to /tree without trailing slash
+	md = []byte(`[docs](docs/)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "/test-repo/tree/abc123def456/docs"))
+	is.True(!strings.Contains(string(html), "docs/\"")) // no trailing slash in href
+
+	// Test non-md files get ?raw=1
+	md = []byte(`[archive](file.zip)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/file.zip?raw=1"))
+
+	md = []byte(`[pdf](docs/manual.pdf)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/manual.pdf?raw=1"))
+
+	// Test uppercase .MD/.MARKDOWN files are treated as non-markdown
+	// (document current behavior - filepath.Ext is case-sensitive)
+	md = []byte(`[upper](README.MD)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	// .MD is not .md, so treated as regular file requiring ?raw=1
+	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/README.MD?raw=1"))
+
+	md = []byte(`[markdown](README.MARKDOWN)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	// .MARKDOWN is not .markdown, so treated as regular file requiring ?raw=1
+	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/README.MARKDOWN?raw=1"))
+}
+
+func TestRenderMarkdownSpecialCharacters(t *testing.T) {
+	is := is.New(t)
+
+	ctx := &ReadmeContext{
+		RepoName:   "test-repo",
+		CommitHash: "abc123def456",
+		ReadmePath: "README.md",
+	}
+
+	// Test spaces in filename - bluemonday may strip invalid URLs
+	md := []byte(`![spaces](my_image.png)`)
+	html, err := renderMarkdown(md, ctx)
+	is.NoErr(err)
+	// Should contain the repo path
+	is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/"))
+
+	// Test unicode
+	md = []byte(`![unicode](文件.png)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(string(html) != "")
+}
+
+func TestRenderMarkdownDangerousSchemes(t *testing.T) {
+	is := is.New(t)
+
+	ctx := &ReadmeContext{
+		RepoName:   "test-repo",
+		CommitHash: "abc123def456",
+		ReadmePath: "README.md",
+	}
+
+	// Test javascript: scheme - should be stripped by sanitizer
+	md := []byte(`[xss](javascript:alert('xss'))`)
+	html, err := renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(!strings.Contains(string(html), "javascript:"))
+
+	// Test data: scheme - should be stripped
+	md = []byte(`![data](data:image/png;base64,abc123)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(!strings.Contains(string(html), "data:image"))
+
+	// Test http: scheme - should be stripped (only https allowed)
+	md = []byte(`[http](http://example.com)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	// Sanitizer should strip non-https schemes
+	is.True(!strings.Contains(string(html), "http://example.com") || !strings.Contains(string(html), "href="))
+}
+
+func TestRenderMarkdownWithoutContext(t *testing.T) {
+	is := is.New(t)
+
+	// Test that rendering without context works (no rewriting)
+	md := []byte(`![image](image.png)`)
+	html, err := renderMarkdown(md, nil)
+	is.NoErr(err)
+	is.True(string(html) != "")
+	// Without context, relative URLs stay relative
+	is.True(strings.Contains(string(html), "image.png"))
+}

pkg/web/sanitizer_test.go 🔗

@@ -0,0 +1,481 @@
+package web
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/matryer/is"
+	"golang.org/x/net/html"
+)
+
+func TestSanitizerXSSProtection(t *testing.T) {
+	is := is.New(t)
+
+	ctx := &ReadmeContext{
+		RepoName:   "test-repo",
+		CommitHash: "abc123",
+		ReadmePath: "README.md",
+	}
+
+	// Test javascript: URL in link
+	md := []byte(`[click me](javascript:alert('xss'))`)
+	html, err := renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(!strings.Contains(string(html), "javascript:"))
+
+	// Test javascript: URL in image
+	md = []byte(`![img](javascript:alert('xss'))`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(!strings.Contains(string(html), "javascript:"))
+
+	// Test data: URI in image
+	md = []byte(`![img](data:image/png;base64,iVBORw0KG)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(!strings.Contains(string(html), "data:image"))
+
+	// Test data: URI in link
+	md = []byte(`[link](data:text/html,<script>alert('xss')</script>)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(!strings.Contains(string(html), "data:text"))
+
+	// Test onerror handler
+	md = []byte(`<img src="x" onerror="alert('xss')">`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(!strings.Contains(string(html), "onerror"))
+
+	// Test onclick handler
+	md = []byte(`<a href="#" onclick="alert('xss')">click</a>`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(!strings.Contains(string(html), "onclick"))
+
+	// Test style attribute
+	md = []byte(`<p style="background:url(javascript:alert('xss'))">test</p>`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(!strings.Contains(string(html), "style="))
+	is.True(!strings.Contains(string(html), "javascript"))
+
+	// Test iframe injection
+	md = []byte(`<iframe src="https://evil.com"></iframe>`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(!strings.Contains(string(html), "<iframe"))
+
+	// Test script tag
+	md = []byte(`<script>alert('xss')</script>`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(!strings.Contains(string(html), "<script"))
+
+	// Test object/embed tags
+	md = []byte(`<object data="https://evil.com"></object>`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(!strings.Contains(string(html), "<object"))
+
+	md = []byte(`<embed src="https://evil.com">`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(!strings.Contains(string(html), "<embed"))
+}
+
+func TestSanitizerAllowedTags(t *testing.T) {
+	is := is.New(t)
+
+	ctx := &ReadmeContext{
+		RepoName:   "test-repo",
+		CommitHash: "abc123",
+		ReadmePath: "README.md",
+	}
+
+	// Test allowed basic formatting
+	md := []byte(`**bold** *italic* ~~strikethrough~~`)
+	html, err := renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "<strong>"))
+	is.True(strings.Contains(string(html), "<em>"))
+	is.True(strings.Contains(string(html), "<del>"))
+
+	// Test allowed headings
+	md = []byte(`# H1
+## H2
+### H3`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "<h1"))
+	is.True(strings.Contains(string(html), "<h2"))
+	is.True(strings.Contains(string(html), "<h3"))
+
+	// Test allowed lists
+	md = []byte(`- item 1
+- item 2
+
+1. numbered
+2. list`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "<ul"))
+	is.True(strings.Contains(string(html), "<ol"))
+	is.True(strings.Contains(string(html), "<li"))
+
+	// Test allowed code blocks
+	md = []byte("```go\nfunc main() {}\n```")
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "<pre"))
+	is.True(strings.Contains(string(html), "<code"))
+
+	// Test allowed tables
+	md = []byte(`| Col1 | Col2 |
+|------|------|
+| A    | B    |`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "<table"))
+	is.True(strings.Contains(string(html), "<thead"))
+	is.True(strings.Contains(string(html), "<tbody"))
+	is.True(strings.Contains(string(html), "<tr"))
+	is.True(strings.Contains(string(html), "<th"))
+	is.True(strings.Contains(string(html), "<td"))
+
+	// Test allowed blockquote
+	md = []byte(`> quote`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	is.True(strings.Contains(string(html), "<blockquote"))
+}
+
+func TestSanitizerHTTPSOnly(t *testing.T) {
+	is := is.New(t)
+
+	ctx := &ReadmeContext{
+		RepoName:   "test-repo",
+		CommitHash: "abc123",
+		ReadmePath: "README.md",
+	}
+
+	// Test http:// link is stripped
+	md := []byte(`[insecure](http://example.com)`)
+	html, err := renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr := string(html)
+	// Either no anchor tag or the anchor has no href attribute
+	is.True(!hasElementWithTag(htmlStr, "a") || elementAttrAbsent(htmlStr, "a", "href"))
+
+	// Test https:// link is allowed
+	md = []byte(`[secure](https://example.com)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr = string(html)
+	is.True(elementHasAttr(htmlStr, "a", "href"))
+	is.True(elementAttrContains(htmlStr, "a", "href", "https://example.com"))
+
+	// Test http:// image is stripped
+	md = []byte(`![img](http://example.com/image.png)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr = string(html)
+	is.True(!hasElementWithTag(htmlStr, "img") || elementAttrAbsent(htmlStr, "img", "src"))
+
+	// Test https:// image is allowed
+	md = []byte(`![img](https://example.com/image.png)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr = string(html)
+	is.True(elementHasAttr(htmlStr, "img", "src"))
+	is.True(elementAttrContains(htmlStr, "img", "src", "https://example.com/image.png"))
+}
+
+func TestSanitizerNoFollowNoReferrer(t *testing.T) {
+	is := is.New(t)
+
+	ctx := &ReadmeContext{
+		RepoName:   "test-repo",
+		CommitHash: "abc123",
+		ReadmePath: "README.md",
+	}
+
+	// Test that external links get rel="nofollow noreferrer"
+	md := []byte(`[external](https://example.com)`)
+	html, err := renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr := string(html)
+	is.True(elementHasAttr(htmlStr, "a", "rel"))
+	relAttr := getElementAttr(htmlStr, "a", "rel")
+	is.True(strings.Contains(relAttr, "nofollow"))
+	is.True(strings.Contains(relAttr, "noreferrer"))
+
+	// Test that relative/internal links also get rel="nofollow noreferrer"
+	md = []byte(`[internal](docs/README.md)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr = string(html)
+	is.True(elementHasAttr(htmlStr, "a", "rel"))
+	relAttr = getElementAttr(htmlStr, "a", "rel")
+	is.True(strings.Contains(relAttr, "nofollow"))
+	is.True(strings.Contains(relAttr, "noreferrer"))
+}
+
+func TestSanitizerAdditionalSchemes(t *testing.T) {
+	is := is.New(t)
+
+	ctx := &ReadmeContext{
+		RepoName:   "test-repo",
+		CommitHash: "abc123",
+		ReadmePath: "README.md",
+	}
+
+	// Test protocol-relative URLs are stripped
+	md := []byte(`[protocol-relative](//evil.com/script.js)`)
+	html, err := renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr := string(html)
+	// Rewriter blocks protocol-relative URLs with empty href, then sanitizer strips empty href links
+	is.True(!hasElementWithTag(htmlStr, "a"))
+
+	// Test mailto: scheme is stripped
+	md = []byte(`[email](mailto:user@example.com)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr = string(html)
+	// mailto: is not in allowed schemes, so sanitizer strips the entire link
+	is.True(!hasElementWithTag(htmlStr, "a"))
+
+	// Test ftp: scheme is stripped
+	md = []byte(`[ftp](ftp://ftp.example.com/file.txt)`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr = string(html)
+	// ftp: is not in allowed schemes, so sanitizer strips the entire link
+	is.True(!hasElementWithTag(htmlStr, "a"))
+}
+
+func TestSanitizerDisallowedAttributes(t *testing.T) {
+	is := is.New(t)
+
+	ctx := &ReadmeContext{
+		RepoName:   "test-repo",
+		CommitHash: "abc123",
+		ReadmePath: "README.md",
+	}
+
+	// Test target attribute is stripped from links
+	md := []byte(`<a href="https://example.com" target="_blank">link</a>`)
+	html, err := renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr := string(html)
+	is.True(elementAttrAbsent(htmlStr, "a", "target"))
+
+	// Test class attribute is stripped
+	md = []byte(`<a href="https://example.com" class="dangerous">link</a>`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr = string(html)
+	is.True(elementAttrAbsent(htmlStr, "a", "class"))
+
+	// Test style attribute is stripped (already tested in XSS, but explicit here)
+	md = []byte(`<a href="https://example.com" style="color: red;">link</a>`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr = string(html)
+	is.True(elementAttrAbsent(htmlStr, "a", "style"))
+
+	// Test onclick is stripped (already tested in XSS, but explicit here)
+	md = []byte(`<a href="#" onclick="alert('xss')">click</a>`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr = string(html)
+	is.True(elementAttrAbsent(htmlStr, "a", "onclick"))
+
+	// Test onerror is stripped (already tested in XSS, but explicit here)
+	md = []byte(`<img src="x" onerror="alert('xss')">`)
+	html, err = renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr = string(html)
+	is.True(elementAttrAbsent(htmlStr, "img", "onerror"))
+}
+
+func TestSanitizerImageAttributes(t *testing.T) {
+	is := is.New(t)
+
+	ctx := &ReadmeContext{
+		RepoName:   "test-repo",
+		CommitHash: "abc123",
+		ReadmePath: "README.md",
+	}
+
+	// Test that width and height attributes are preserved on images
+	md := []byte(`<img src="https://example.com/image.png" width="100" height="50" alt="test">`)
+	html, err := renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr := string(html)
+	is.True(elementAttrEquals(htmlStr, "img", "width", "100"))
+	is.True(elementAttrEquals(htmlStr, "img", "height", "50"))
+	is.True(elementAttrEquals(htmlStr, "img", "alt", "test"))
+}
+
+func TestSanitizerHeadingIDs(t *testing.T) {
+	is := is.New(t)
+
+	ctx := &ReadmeContext{
+		RepoName:   "test-repo",
+		CommitHash: "abc123",
+		ReadmePath: "README.md",
+	}
+
+	// Test that heading IDs are preserved (generated by AutoHeadingID)
+	md := []byte(`# Installation
+
+Jump to [installation](#installation).`)
+	html, err := renderMarkdown(md, ctx)
+	is.NoErr(err)
+	htmlStr := string(html)
+	// Check that the heading has an id attribute set to "installation"
+	is.True(elementAttrEquals(htmlStr, "h1", "id", "installation"))
+	// Check that the anchor link is preserved
+	is.True(elementAttrContains(htmlStr, "a", "href", "#installation"))
+}
+
+// HTML Testing Helpers
+// These utilities help test HTML output by parsing the DOM instead of string matching.
+
+// findElement searches the HTML tree for an element matching the given tag name.
+// Returns the first matching element node, or nil if not found.
+func findElement(n *html.Node, tag string) *html.Node {
+	if n.Type == html.ElementNode && n.Data == tag {
+		return n
+	}
+	for c := n.FirstChild; c != nil; c = c.NextSibling {
+		if found := findElement(c, tag); found != nil {
+			return found
+		}
+	}
+	return nil
+}
+
+// findElementWithAttr searches for an element with a specific attribute value.
+// Returns the first matching element, or nil if not found.
+func findElementWithAttr(n *html.Node, tag, attrKey, attrValue string) *html.Node {
+	if n.Type == html.ElementNode && n.Data == tag {
+		if getAttr(n, attrKey) == attrValue {
+			return n
+		}
+	}
+	for c := n.FirstChild; c != nil; c = c.NextSibling {
+		if found := findElementWithAttr(c, tag, attrKey, attrValue); found != nil {
+			return found
+		}
+	}
+	return nil
+}
+
+// getAttr returns the value of an attribute, or empty string if not present.
+func getAttr(n *html.Node, key string) string {
+	for _, attr := range n.Attr {
+		if attr.Key == key {
+			return attr.Val
+		}
+	}
+	return ""
+}
+
+// hasAttr returns true if the element has the specified attribute (regardless of value).
+func hasAttr(n *html.Node, key string) bool {
+	for _, attr := range n.Attr {
+		if attr.Key == key {
+			return true
+		}
+	}
+	return false
+}
+
+// attrContains checks if an attribute value contains a substring.
+func attrContains(n *html.Node, key, substr string) bool {
+	val := getAttr(n, key)
+	return strings.Contains(val, substr)
+}
+
+// parseHTML parses an HTML string and returns the root node.
+func parseHTML(htmlStr string) (*html.Node, error) {
+	return html.Parse(strings.NewReader(htmlStr))
+}
+
+// hasElementWithTag returns true if the HTML contains an element with the given tag.
+func hasElementWithTag(htmlStr, tag string) bool {
+	doc, err := parseHTML(htmlStr)
+	if err != nil {
+		return false
+	}
+	return findElement(doc, tag) != nil
+}
+
+// getElementAttr finds an element by tag and returns the value of the specified attribute.
+// Returns empty string if element or attribute not found.
+func getElementAttr(htmlStr, tag, attrKey string) string {
+	doc, err := parseHTML(htmlStr)
+	if err != nil {
+		return ""
+	}
+	elem := findElement(doc, tag)
+	if elem == nil {
+		return ""
+	}
+	return getAttr(elem, attrKey)
+}
+
+// elementHasAttr checks if an element has a specific attribute key (regardless of value).
+func elementHasAttr(htmlStr, tag, attrKey string) bool {
+	doc, err := parseHTML(htmlStr)
+	if err != nil {
+		return false
+	}
+	elem := findElement(doc, tag)
+	if elem == nil {
+		return false
+	}
+	return hasAttr(elem, attrKey)
+}
+
+// elementAttrEquals checks if an element's attribute equals a specific value.
+func elementAttrEquals(htmlStr, tag, attrKey, expected string) bool {
+	return getElementAttr(htmlStr, tag, attrKey) == expected
+}
+
+// elementAttrContains checks if an element's attribute contains a substring.
+func elementAttrContains(htmlStr, tag, attrKey, substr string) bool {
+	doc, err := parseHTML(htmlStr)
+	if err != nil {
+		return false
+	}
+	elem := findElement(doc, tag)
+	if elem == nil {
+		return false
+	}
+	return attrContains(elem, attrKey, substr)
+}
+
+// elementAttrAbsent checks that an element does NOT have a specific attribute.
+func elementAttrAbsent(htmlStr, tag, attrKey string) bool {
+	return !elementHasAttr(htmlStr, tag, attrKey)
+}
+
+// elementAttrEmpty checks if an element's attribute is present but empty.
+func elementAttrEmpty(htmlStr, tag, attrKey string) bool {
+	doc, err := parseHTML(htmlStr)
+	if err != nil {
+		return false
+	}
+	elem := findElement(doc, tag)
+	if elem == nil {
+		return false
+	}
+	if !hasAttr(elem, attrKey) {
+		return false
+	}
+	return getAttr(elem, attrKey) == ""
+}

pkg/web/webui.go 🔗

@@ -240,6 +240,13 @@ func renderHTML(w http.ResponseWriter, templateName string, data interface{}) {
 	}
 
 	w.Header().Set("Content-Type", "text/html; charset=utf-8")
+	
+	// Security headers
+	// Note: style-src 'unsafe-inline' is required for inline styles in templates (tree.html, overview.html)
+	w.Header().Set("Content-Security-Policy", "default-src 'self'; img-src 'self' https:; style-src 'self' 'unsafe-inline'; script-src 'self'; object-src 'none'; frame-ancestors 'self'; base-uri 'none'")
+	w.Header().Set("Referrer-Policy", "no-referrer")
+	w.Header().Set("X-Content-Type-Options", "nosniff")
+	
 	if err := tmpl.ExecuteTemplate(w, "layout", data); err != nil {
 		log.Debug("template execution failed", "template", templateName, "err", err)
 		// Already started writing response, so we can't render an error page

pkg/web/webui_blob.go 🔗

@@ -94,7 +94,16 @@ func repoBlob(w http.ResponseWriter, r *http.Request) {
 	var renderedHTML template.HTML
 
 	if isMarkdown && !isBinaryContent(content) && !showSource {
-		renderedHTML, _ = renderMarkdown(content)
+		ctx := &ReadmeContext{
+			RepoName:   repo.Name(),
+			CommitHash: refObj.Reference.ID,
+			ReadmePath: path,
+		}
+		var err error
+		renderedHTML, err = renderMarkdown(content, ctx)
+		if err != nil {
+			logger.Debug("failed to render markdown", "repo", repo.Name(), "path", path, "err", err)
+		}
 	} else if !isBinaryContent(content) {
 		renderedHTML = highlightCode(path, content)
 	}

pkg/web/webui_helpers.go 🔗

@@ -5,6 +5,7 @@ import (
 	"context"
 	"html/template"
 	"strings"
+	"sync"
 
 	"github.com/charmbracelet/soft-serve/git"
 	"github.com/charmbracelet/soft-serve/pkg/backend"
@@ -14,12 +15,58 @@ import (
 	extension "github.com/yuin/goldmark/extension"
 	"github.com/yuin/goldmark/parser"
 	goldmarkhtml "github.com/yuin/goldmark/renderer/html"
+	"github.com/yuin/goldmark/util"
 )
 
+var (
+	sanitizerPolicy     *bluemonday.Policy
+	sanitizerPolicyOnce sync.Once
+)
+
+// getSanitizerPolicy returns a cached sanitizer policy.
+// The policy is safe for concurrent use after initialization.
+// Note: Tests should not use t.Parallel() with functions that call renderMarkdown
+// until we verify bluemonday.Policy initialization is thread-safe during sync.Once.
+func getSanitizerPolicy() *bluemonday.Policy {
+	sanitizerPolicyOnce.Do(func() {
+		policy := bluemonday.NewPolicy()
+
+		// Allowed tags
+		policy.AllowElements("p", "h1", "h2", "h3", "h4", "h5", "h6",
+			"ul", "ol", "li", "pre", "code", "blockquote",
+			"strong", "em", "del", "br", "hr",
+			"table", "thead", "tbody", "tr", "th", "td",
+			"center", "a", "img")
+
+		// Allow id attributes on headings (generated by AutoHeadingID for anchor links)
+		policy.AllowAttrs("id").OnElements("h1", "h2", "h3", "h4", "h5", "h6")
+
+		// Links: only https scheme and relative URLs, add nofollow/noreferrer
+		policy.AllowAttrs("href").OnElements("a")
+		policy.RequireNoFollowOnLinks(true)
+		policy.RequireNoReferrerOnLinks(true)
+
+		// Images: only https scheme or repo-relative paths, allow descriptive attrs
+		policy.AllowAttrs("src", "alt", "title", "width", "height").OnElements("img")
+
+		// URL schemes: only HTTPS for external URLs, relative URLs for repo paths
+		policy.AllowURLSchemes("https")
+		policy.AllowRelativeURLs(true)
+
+		// Table alignment attributes
+		policy.AllowAttrs("align").Matching(bluemonday.SpaceSeparatedTokens).OnElements("th", "td")
+
+		sanitizerPolicy = policy
+	})
+	return sanitizerPolicy
+}
+
 // renderMarkdown converts markdown content to sanitized HTML.
-func renderMarkdown(content []byte) (template.HTML, error) {
+// If ctx is provided, relative URLs will be rewritten to point to repository files.
+func renderMarkdown(content []byte, ctx *ReadmeContext) (template.HTML, error) {
 	var buf bytes.Buffer
-	md := goldmark.New(
+	
+	mdOpts := []goldmark.Option{
 		goldmark.WithExtensions(extension.GFM),
 		goldmark.WithParserOptions(
 			parser.WithAutoHeadingID(),
@@ -27,17 +74,24 @@ func renderMarkdown(content []byte) (template.HTML, error) {
 		goldmark.WithRendererOptions(
 			goldmarkhtml.WithUnsafe(),
 		),
-	)
+	}
+	
+	// Add URL rewriter if context is provided
+	if ctx != nil {
+		rewriter := newURLRewriter(*ctx)
+		mdOpts = append(mdOpts, goldmark.WithParserOptions(
+			parser.WithASTTransformers(util.Prioritized(rewriter, 500)),
+		))
+	}
+	
+	md := goldmark.New(mdOpts...)
 
 	if err := md.Convert(content, &buf); err != nil {
 		return "", err
 	}
 
-	policy := bluemonday.UGCPolicy()
-	policy.AllowStyling()
-	policy.RequireNoFollowOnLinks(false)
-	policy.AllowElements("center")
-	sanitized := policy.SanitizeBytes(buf.Bytes())
+	// Use cached sanitizer policy
+	sanitized := getSanitizerPolicy().SanitizeBytes(buf.Bytes())
 
 	return template.HTML(sanitized), nil
 }
@@ -57,7 +111,7 @@ func getServerReadme(ctx context.Context, be *backend.Backend) (raw string, html
 				return "", "", err
 			}
 			if readme != "" {
-				html, err := renderMarkdown([]byte(readme))
+				html, err := renderMarkdown([]byte(readme), nil)
 				if err != nil {
 					return "", "", err
 				}

pkg/web/webui_overview.go 🔗

@@ -44,9 +44,18 @@ func repoOverview(w http.ResponseWriter, r *http.Request) {
 	var readmeHTML template.HTML
 	if !isEmpty {
 		head, _ := gr.HEAD()
-		readmeContent, _, err := backend.Readme(repo, head)
+		readmeContent, readmePath, err := backend.Readme(repo, head)
 		if err == nil && readmeContent != "" {
-			readmeHTML, _ = renderMarkdown([]byte(readmeContent))
+			ctx := &ReadmeContext{
+				RepoName:   repo.Name(),
+				CommitHash: head.Reference.ID,
+				ReadmePath: readmePath,
+			}
+			var mdErr error
+			readmeHTML, mdErr = renderMarkdown([]byte(readmeContent), ctx)
+			if mdErr != nil {
+				logger.Debug("failed to render readme markdown", "repo", repo.Name(), "path", readmePath, "err", mdErr)
+			}
 		}
 	}