From 6a6fb6dac7ba99c6f953870dd7bcdc1ba501302a Mon Sep 17 00:00:00 2001 From: Amolith Date: Sat, 8 Nov 2025 10:05:22 -0700 Subject: [PATCH] feat(webui): rewrite README links without proxy Implement rendering of images and links in README files without requiring an image proxy. Relative URLs are rewritten to point to repository files pinned to specific commits. Relative images and files are served through the blob endpoint with ?raw=1 query parameter. Links to markdown files render the formatted view, while other files download raw. Directory links go to tree view. Assisted-by: Claude Sonnet 4.5 via Crush --- pkg/web/markdown_rewriter.go | 147 +++++++++ pkg/web/markdown_rewriter_test.go | 241 +++++++++++++++ pkg/web/sanitizer_test.go | 481 ++++++++++++++++++++++++++++++ pkg/web/webui.go | 7 + pkg/web/webui_blob.go | 11 +- pkg/web/webui_helpers.go | 72 ++++- pkg/web/webui_overview.go | 13 +- 7 files changed, 960 insertions(+), 12 deletions(-) create mode 100644 pkg/web/markdown_rewriter.go create mode 100644 pkg/web/markdown_rewriter_test.go create mode 100644 pkg/web/sanitizer_test.go diff --git a/pkg/web/markdown_rewriter.go b/pkg/web/markdown_rewriter.go new file mode 100644 index 0000000000000000000000000000000000000000..2ee5a9bb7353be89163b97b9c71290799c722efd --- /dev/null +++ b/pkg/web/markdown_rewriter.go @@ -0,0 +1,147 @@ +package web + +import ( + "net/url" + "path" + "strings" + + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/text" +) + +// ReadmeContext contains information needed to rewrite URLs in README files. +type ReadmeContext struct { + RepoName string // Repository name (e.g., "myrepo") + CommitHash string // Full commit hash to pin URLs to + ReadmePath string // Path to README file (e.g., "docs/README.md") +} + +// urlRewriter is a Goldmark AST transformer that rewrites relative URLs in markdown. +type urlRewriter struct { + ctx ReadmeContext +} + +// newURLRewriter creates a new URL rewriter transformer. +func newURLRewriter(ctx ReadmeContext) *urlRewriter { + return &urlRewriter{ctx: ctx} +} + +// Transform implements ast.Transformer. +func (r *urlRewriter) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { + // Get the directory containing the README + readmeDir := path.Dir(r.ctx.ReadmePath) + if readmeDir == "." { + readmeDir = "" + } + + // Walk the AST and rewrite links and images + ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + + switch v := n.(type) { + case *ast.Link: + v.Destination = r.rewriteURL(v.Destination, readmeDir, false) + case *ast.Image: + v.Destination = r.rewriteURL(v.Destination, readmeDir, true) + } + + return ast.WalkContinue, nil + }) +} + +// rewriteURL rewrites a single URL destination. +func (r *urlRewriter) rewriteURL(dest []byte, readmeDir string, isImage bool) []byte { + destStr := string(dest) + + // Skip empty URLs + if destStr == "" { + return dest + } + + // Parse the URL + u, err := url.Parse(destStr) + if err != nil { + // Invalid URL, fail closed by returning empty + return []byte("") + } + + // Skip absolute URLs (http://, https://, mailto:, etc.) + if u.Scheme != "" { + return dest + } + + // Skip protocol-relative URLs (//example.com/path) + // These have no scheme but do have a host + if u.Host != "" { + return []byte("") + } + + // Skip anchor-only links (#section) + if strings.HasPrefix(destStr, "#") { + return dest + } + + // Skip absolute paths (starting with /) + if strings.HasPrefix(destStr, "/") { + return dest + } + + // Now we have a relative URL - resolve it against the README directory + // Join README dir with the relative path + var resolvedPath string + if readmeDir != "" { + resolvedPath = path.Join(readmeDir, u.Path) + } else { + resolvedPath = u.Path + } + + // Clean the path to resolve .. and . + resolvedPath = path.Clean(resolvedPath) + + // Security check: reject any path that escapes the repository root + // After path.Clean, a path starting with ../ indicates traversal outside the repo + if strings.HasPrefix(resolvedPath, "../") || resolvedPath == ".." { + // Path tries to escape repo, return empty/invalid + return []byte("") + } + + // For images, always rewrite to blob endpoint with ?raw=1 + // For links, check if it's a file (has extension) or could be a directory/markdown file + if isImage { + // Build URL: /{repo}/blob/{commit}/{path}?raw=1 + rewritten := "/" + r.ctx.RepoName + "/blob/" + r.ctx.CommitHash + "/" + resolvedPath + "?raw=1" + // Preserve fragment if present + if u.Fragment != "" { + rewritten += "#" + u.Fragment + } + return []byte(rewritten) + } + + // For links, determine if it's a file or navigation + ext := path.Ext(resolvedPath) + if ext != "" && ext != ".md" && ext != ".markdown" { + // It's a file (not markdown), serve raw + rewritten := "/" + r.ctx.RepoName + "/blob/" + r.ctx.CommitHash + "/" + resolvedPath + "?raw=1" + if u.Fragment != "" { + rewritten += "#" + u.Fragment + } + return []byte(rewritten) + } else if ext == ".md" || ext == ".markdown" { + // It's a markdown file, link to blob view (rendered) + rewritten := "/" + r.ctx.RepoName + "/blob/" + r.ctx.CommitHash + "/" + resolvedPath + if u.Fragment != "" { + rewritten += "#" + u.Fragment + } + return []byte(rewritten) + } else { + // No extension, could be a directory - link to tree view + rewritten := "/" + r.ctx.RepoName + "/tree/" + r.ctx.CommitHash + "/" + resolvedPath + if u.Fragment != "" { + rewritten += "#" + u.Fragment + } + return []byte(rewritten) + } +} diff --git a/pkg/web/markdown_rewriter_test.go b/pkg/web/markdown_rewriter_test.go new file mode 100644 index 0000000000000000000000000000000000000000..fbf802e7314858fbe0edc50b1c57414b1fd4aa57 --- /dev/null +++ b/pkg/web/markdown_rewriter_test.go @@ -0,0 +1,241 @@ +package web + +import ( + "strings" + "testing" + + "github.com/matryer/is" +) + +func TestRenderMarkdownWithURLRewriting(t *testing.T) { + is := is.New(t) + + ctx := &ReadmeContext{ + RepoName: "test-repo", + CommitHash: "abc123def456", + ReadmePath: "README.md", + } + + // Test relative image in root README + md := []byte(`![image](image.png)`) + html, err := renderMarkdown(md, ctx) + is.NoErr(err) + is.True(string(html) != "") + is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/image.png?raw=1")) + + // Test relative link to markdown file + md = []byte(`[docs](docs/README.md)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/README.md")) + + // Test relative link to non-markdown file + md = []byte(`[download](file.tar.gz)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/file.tar.gz?raw=1")) + + // Test relative link to directory + md = []byte(`[folder](docs)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(strings.Contains(string(html), "/test-repo/tree/abc123def456/docs")) + + // Test absolute https URL unchanged + md = []byte(`![remote](https://example.com/image.png)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(strings.Contains(string(html), "https://example.com/image.png")) + + // Test anchor-only link unchanged + md = []byte(`[section](#heading)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(strings.Contains(string(html), "#heading")) + + // Test absolute path unchanged (starts with /) + md = []byte(`[root](/other-repo/file)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(strings.Contains(string(html), "/other-repo/file")) +} + +func TestRenderMarkdownNestedReadme(t *testing.T) { + is := is.New(t) + + ctx := &ReadmeContext{ + RepoName: "test-repo", + CommitHash: "abc123def456", + ReadmePath: "docs/README.md", + } + + // Test relative image in nested README + md := []byte(`![image](image.png)`) + html, err := renderMarkdown(md, ctx) + is.NoErr(err) + is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/image.png?raw=1")) + + // Test going up with ../ + md = []byte(`![up](../root.png)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/root.png?raw=1")) + + // Test deep nesting + md = []byte(`[deep](subdir/file.md)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/subdir/file.md")) +} + +func TestRenderMarkdownTraversalAttempts(t *testing.T) { + is := is.New(t) + + ctx := &ReadmeContext{ + RepoName: "test-repo", + CommitHash: "abc123def456", + ReadmePath: "README.md", + } + + // Test ../ traversal outside repo - should be blocked (empty src/href) + md := []byte(`![escape](../../etc/passwd)`) + html, err := renderMarkdown(md, ctx) + is.NoErr(err) + htmlStr := string(html) + // Should not contain the path in src attribute + is.True(!elementAttrContains(htmlStr, "img", "src", "etc/passwd")) + + // Test absolute path traversal + md = []byte(`![abs](/../../../etc/passwd)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + htmlStr = string(html) + // Absolute paths starting with / are left unchanged by rewriter + is.True(elementAttrContains(htmlStr, "img", "src", "/../../../etc/passwd")) + + // Test that fragments don't leak when path traversal is blocked + md = []byte(`![escape](../../secret.png#anchor)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + htmlStr = string(html) + // Should not contain the path or fragment in src attribute + is.True(!elementAttrContains(htmlStr, "img", "src", "secret.png") && !elementAttrContains(htmlStr, "img", "src", "#anchor")) +} + +func TestRenderMarkdownURLDetails(t *testing.T) { + is := is.New(t) + + ctx := &ReadmeContext{ + RepoName: "test-repo", + CommitHash: "abc123def456", + ReadmePath: "README.md", + } + + // Test fragments preserved for links + md := []byte(`[section](docs/guide.md#installation)`) + html, err := renderMarkdown(md, ctx) + is.NoErr(err) + is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/guide.md#installation")) + + // Test fragments preserved for images + md = []byte(`![diagram](docs/arch.png#diagram)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/arch.png?raw=1#diagram")) + + // Test directories link to /tree without trailing slash + md = []byte(`[docs](docs/)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(strings.Contains(string(html), "/test-repo/tree/abc123def456/docs")) + is.True(!strings.Contains(string(html), "docs/\"")) // no trailing slash in href + + // Test non-md files get ?raw=1 + md = []byte(`[archive](file.zip)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/file.zip?raw=1")) + + md = []byte(`[pdf](docs/manual.pdf)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/docs/manual.pdf?raw=1")) + + // Test uppercase .MD/.MARKDOWN files are treated as non-markdown + // (document current behavior - filepath.Ext is case-sensitive) + md = []byte(`[upper](README.MD)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + // .MD is not .md, so treated as regular file requiring ?raw=1 + is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/README.MD?raw=1")) + + md = []byte(`[markdown](README.MARKDOWN)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + // .MARKDOWN is not .markdown, so treated as regular file requiring ?raw=1 + is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/README.MARKDOWN?raw=1")) +} + +func TestRenderMarkdownSpecialCharacters(t *testing.T) { + is := is.New(t) + + ctx := &ReadmeContext{ + RepoName: "test-repo", + CommitHash: "abc123def456", + ReadmePath: "README.md", + } + + // Test spaces in filename - bluemonday may strip invalid URLs + md := []byte(`![spaces](my_image.png)`) + html, err := renderMarkdown(md, ctx) + is.NoErr(err) + // Should contain the repo path + is.True(strings.Contains(string(html), "/test-repo/blob/abc123def456/")) + + // Test unicode + md = []byte(`![unicode](文件.png)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(string(html) != "") +} + +func TestRenderMarkdownDangerousSchemes(t *testing.T) { + is := is.New(t) + + ctx := &ReadmeContext{ + RepoName: "test-repo", + CommitHash: "abc123def456", + ReadmePath: "README.md", + } + + // Test javascript: scheme - should be stripped by sanitizer + md := []byte(`[xss](javascript:alert('xss'))`) + html, err := renderMarkdown(md, ctx) + is.NoErr(err) + is.True(!strings.Contains(string(html), "javascript:")) + + // Test data: scheme - should be stripped + md = []byte(`![data](data:image/png;base64,abc123)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(!strings.Contains(string(html), "data:image")) + + // Test http: scheme - should be stripped (only https allowed) + md = []byte(`[http](http://example.com)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + // Sanitizer should strip non-https schemes + is.True(!strings.Contains(string(html), "http://example.com") || !strings.Contains(string(html), "href=")) +} + +func TestRenderMarkdownWithoutContext(t *testing.T) { + is := is.New(t) + + // Test that rendering without context works (no rewriting) + md := []byte(`![image](image.png)`) + html, err := renderMarkdown(md, nil) + is.NoErr(err) + is.True(string(html) != "") + // Without context, relative URLs stay relative + is.True(strings.Contains(string(html), "image.png")) +} diff --git a/pkg/web/sanitizer_test.go b/pkg/web/sanitizer_test.go new file mode 100644 index 0000000000000000000000000000000000000000..2228522ec786c43f5bb5b530478982b4ec2d54ca --- /dev/null +++ b/pkg/web/sanitizer_test.go @@ -0,0 +1,481 @@ +package web + +import ( + "strings" + "testing" + + "github.com/matryer/is" + "golang.org/x/net/html" +) + +func TestSanitizerXSSProtection(t *testing.T) { + is := is.New(t) + + ctx := &ReadmeContext{ + RepoName: "test-repo", + CommitHash: "abc123", + ReadmePath: "README.md", + } + + // Test javascript: URL in link + md := []byte(`[click me](javascript:alert('xss'))`) + html, err := renderMarkdown(md, ctx) + is.NoErr(err) + is.True(!strings.Contains(string(html), "javascript:")) + + // Test javascript: URL in image + md = []byte(`![img](javascript:alert('xss'))`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(!strings.Contains(string(html), "javascript:")) + + // Test data: URI in image + md = []byte(`![img](data:image/png;base64,iVBORw0KG)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(!strings.Contains(string(html), "data:image")) + + // Test data: URI in link + md = []byte(`[link](data:text/html,)`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(!strings.Contains(string(html), "data:text")) + + // Test onerror handler + md = []byte(``) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(!strings.Contains(string(html), "onerror")) + + // Test onclick handler + md = []byte(`click`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(!strings.Contains(string(html), "onclick")) + + // Test style attribute + md = []byte(`

test

`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(!strings.Contains(string(html), "style=")) + is.True(!strings.Contains(string(html), "javascript")) + + // Test iframe injection + md = []byte(``) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(!strings.Contains(string(html), "alert('xss')`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(!strings.Contains(string(html), "`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(!strings.Contains(string(html), "`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(!strings.Contains(string(html), "")) + is.True(strings.Contains(string(html), "")) + is.True(strings.Contains(string(html), "")) + + // Test allowed headings + md = []byte(`# H1 +## H2 +### H3`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(strings.Contains(string(html), " quote`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + is.True(strings.Contains(string(html), "link`) + html, err := renderMarkdown(md, ctx) + is.NoErr(err) + htmlStr := string(html) + is.True(elementAttrAbsent(htmlStr, "a", "target")) + + // Test class attribute is stripped + md = []byte(`link`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + htmlStr = string(html) + is.True(elementAttrAbsent(htmlStr, "a", "class")) + + // Test style attribute is stripped (already tested in XSS, but explicit here) + md = []byte(`link`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + htmlStr = string(html) + is.True(elementAttrAbsent(htmlStr, "a", "style")) + + // Test onclick is stripped (already tested in XSS, but explicit here) + md = []byte(`click`) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + htmlStr = string(html) + is.True(elementAttrAbsent(htmlStr, "a", "onclick")) + + // Test onerror is stripped (already tested in XSS, but explicit here) + md = []byte(``) + html, err = renderMarkdown(md, ctx) + is.NoErr(err) + htmlStr = string(html) + is.True(elementAttrAbsent(htmlStr, "img", "onerror")) +} + +func TestSanitizerImageAttributes(t *testing.T) { + is := is.New(t) + + ctx := &ReadmeContext{ + RepoName: "test-repo", + CommitHash: "abc123", + ReadmePath: "README.md", + } + + // Test that width and height attributes are preserved on images + md := []byte(`test`) + html, err := renderMarkdown(md, ctx) + is.NoErr(err) + htmlStr := string(html) + is.True(elementAttrEquals(htmlStr, "img", "width", "100")) + is.True(elementAttrEquals(htmlStr, "img", "height", "50")) + is.True(elementAttrEquals(htmlStr, "img", "alt", "test")) +} + +func TestSanitizerHeadingIDs(t *testing.T) { + is := is.New(t) + + ctx := &ReadmeContext{ + RepoName: "test-repo", + CommitHash: "abc123", + ReadmePath: "README.md", + } + + // Test that heading IDs are preserved (generated by AutoHeadingID) + md := []byte(`# Installation + +Jump to [installation](#installation).`) + html, err := renderMarkdown(md, ctx) + is.NoErr(err) + htmlStr := string(html) + // Check that the heading has an id attribute set to "installation" + is.True(elementAttrEquals(htmlStr, "h1", "id", "installation")) + // Check that the anchor link is preserved + is.True(elementAttrContains(htmlStr, "a", "href", "#installation")) +} + +// HTML Testing Helpers +// These utilities help test HTML output by parsing the DOM instead of string matching. + +// findElement searches the HTML tree for an element matching the given tag name. +// Returns the first matching element node, or nil if not found. +func findElement(n *html.Node, tag string) *html.Node { + if n.Type == html.ElementNode && n.Data == tag { + return n + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + if found := findElement(c, tag); found != nil { + return found + } + } + return nil +} + +// findElementWithAttr searches for an element with a specific attribute value. +// Returns the first matching element, or nil if not found. +func findElementWithAttr(n *html.Node, tag, attrKey, attrValue string) *html.Node { + if n.Type == html.ElementNode && n.Data == tag { + if getAttr(n, attrKey) == attrValue { + return n + } + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + if found := findElementWithAttr(c, tag, attrKey, attrValue); found != nil { + return found + } + } + return nil +} + +// getAttr returns the value of an attribute, or empty string if not present. +func getAttr(n *html.Node, key string) string { + for _, attr := range n.Attr { + if attr.Key == key { + return attr.Val + } + } + return "" +} + +// hasAttr returns true if the element has the specified attribute (regardless of value). +func hasAttr(n *html.Node, key string) bool { + for _, attr := range n.Attr { + if attr.Key == key { + return true + } + } + return false +} + +// attrContains checks if an attribute value contains a substring. +func attrContains(n *html.Node, key, substr string) bool { + val := getAttr(n, key) + return strings.Contains(val, substr) +} + +// parseHTML parses an HTML string and returns the root node. +func parseHTML(htmlStr string) (*html.Node, error) { + return html.Parse(strings.NewReader(htmlStr)) +} + +// hasElementWithTag returns true if the HTML contains an element with the given tag. +func hasElementWithTag(htmlStr, tag string) bool { + doc, err := parseHTML(htmlStr) + if err != nil { + return false + } + return findElement(doc, tag) != nil +} + +// getElementAttr finds an element by tag and returns the value of the specified attribute. +// Returns empty string if element or attribute not found. +func getElementAttr(htmlStr, tag, attrKey string) string { + doc, err := parseHTML(htmlStr) + if err != nil { + return "" + } + elem := findElement(doc, tag) + if elem == nil { + return "" + } + return getAttr(elem, attrKey) +} + +// elementHasAttr checks if an element has a specific attribute key (regardless of value). +func elementHasAttr(htmlStr, tag, attrKey string) bool { + doc, err := parseHTML(htmlStr) + if err != nil { + return false + } + elem := findElement(doc, tag) + if elem == nil { + return false + } + return hasAttr(elem, attrKey) +} + +// elementAttrEquals checks if an element's attribute equals a specific value. +func elementAttrEquals(htmlStr, tag, attrKey, expected string) bool { + return getElementAttr(htmlStr, tag, attrKey) == expected +} + +// elementAttrContains checks if an element's attribute contains a substring. +func elementAttrContains(htmlStr, tag, attrKey, substr string) bool { + doc, err := parseHTML(htmlStr) + if err != nil { + return false + } + elem := findElement(doc, tag) + if elem == nil { + return false + } + return attrContains(elem, attrKey, substr) +} + +// elementAttrAbsent checks that an element does NOT have a specific attribute. +func elementAttrAbsent(htmlStr, tag, attrKey string) bool { + return !elementHasAttr(htmlStr, tag, attrKey) +} + +// elementAttrEmpty checks if an element's attribute is present but empty. +func elementAttrEmpty(htmlStr, tag, attrKey string) bool { + doc, err := parseHTML(htmlStr) + if err != nil { + return false + } + elem := findElement(doc, tag) + if elem == nil { + return false + } + if !hasAttr(elem, attrKey) { + return false + } + return getAttr(elem, attrKey) == "" +} diff --git a/pkg/web/webui.go b/pkg/web/webui.go index a0d7e51918c609b53896e91545fd621f2a007283..03a48eae5f64469b8e98fe475a288d82591b65f4 100644 --- a/pkg/web/webui.go +++ b/pkg/web/webui.go @@ -240,6 +240,13 @@ func renderHTML(w http.ResponseWriter, templateName string, data interface{}) { } w.Header().Set("Content-Type", "text/html; charset=utf-8") + + // Security headers + // Note: style-src 'unsafe-inline' is required for inline styles in templates (tree.html, overview.html) + w.Header().Set("Content-Security-Policy", "default-src 'self'; img-src 'self' https:; style-src 'self' 'unsafe-inline'; script-src 'self'; object-src 'none'; frame-ancestors 'self'; base-uri 'none'") + w.Header().Set("Referrer-Policy", "no-referrer") + w.Header().Set("X-Content-Type-Options", "nosniff") + if err := tmpl.ExecuteTemplate(w, "layout", data); err != nil { log.Debug("template execution failed", "template", templateName, "err", err) // Already started writing response, so we can't render an error page diff --git a/pkg/web/webui_blob.go b/pkg/web/webui_blob.go index 878156497e637ab1f047d9c593d72f8db044632b..3af2373e4271ffa131b3e7d72378f0f07b105796 100644 --- a/pkg/web/webui_blob.go +++ b/pkg/web/webui_blob.go @@ -94,7 +94,16 @@ func repoBlob(w http.ResponseWriter, r *http.Request) { var renderedHTML template.HTML if isMarkdown && !isBinaryContent(content) && !showSource { - renderedHTML, _ = renderMarkdown(content) + ctx := &ReadmeContext{ + RepoName: repo.Name(), + CommitHash: refObj.Reference.ID, + ReadmePath: path, + } + var err error + renderedHTML, err = renderMarkdown(content, ctx) + if err != nil { + logger.Debug("failed to render markdown", "repo", repo.Name(), "path", path, "err", err) + } } else if !isBinaryContent(content) { renderedHTML = highlightCode(path, content) } diff --git a/pkg/web/webui_helpers.go b/pkg/web/webui_helpers.go index 7beae31a84e4e9368d968463ddfb016935ea4b38..94d5b821cc8ef1fab14997d84f13bc436c0a9623 100644 --- a/pkg/web/webui_helpers.go +++ b/pkg/web/webui_helpers.go @@ -5,6 +5,7 @@ import ( "context" "html/template" "strings" + "sync" "github.com/charmbracelet/soft-serve/git" "github.com/charmbracelet/soft-serve/pkg/backend" @@ -14,12 +15,58 @@ import ( extension "github.com/yuin/goldmark/extension" "github.com/yuin/goldmark/parser" goldmarkhtml "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/util" ) +var ( + sanitizerPolicy *bluemonday.Policy + sanitizerPolicyOnce sync.Once +) + +// getSanitizerPolicy returns a cached sanitizer policy. +// The policy is safe for concurrent use after initialization. +// Note: Tests should not use t.Parallel() with functions that call renderMarkdown +// until we verify bluemonday.Policy initialization is thread-safe during sync.Once. +func getSanitizerPolicy() *bluemonday.Policy { + sanitizerPolicyOnce.Do(func() { + policy := bluemonday.NewPolicy() + + // Allowed tags + policy.AllowElements("p", "h1", "h2", "h3", "h4", "h5", "h6", + "ul", "ol", "li", "pre", "code", "blockquote", + "strong", "em", "del", "br", "hr", + "table", "thead", "tbody", "tr", "th", "td", + "center", "a", "img") + + // Allow id attributes on headings (generated by AutoHeadingID for anchor links) + policy.AllowAttrs("id").OnElements("h1", "h2", "h3", "h4", "h5", "h6") + + // Links: only https scheme and relative URLs, add nofollow/noreferrer + policy.AllowAttrs("href").OnElements("a") + policy.RequireNoFollowOnLinks(true) + policy.RequireNoReferrerOnLinks(true) + + // Images: only https scheme or repo-relative paths, allow descriptive attrs + policy.AllowAttrs("src", "alt", "title", "width", "height").OnElements("img") + + // URL schemes: only HTTPS for external URLs, relative URLs for repo paths + policy.AllowURLSchemes("https") + policy.AllowRelativeURLs(true) + + // Table alignment attributes + policy.AllowAttrs("align").Matching(bluemonday.SpaceSeparatedTokens).OnElements("th", "td") + + sanitizerPolicy = policy + }) + return sanitizerPolicy +} + // renderMarkdown converts markdown content to sanitized HTML. -func renderMarkdown(content []byte) (template.HTML, error) { +// If ctx is provided, relative URLs will be rewritten to point to repository files. +func renderMarkdown(content []byte, ctx *ReadmeContext) (template.HTML, error) { var buf bytes.Buffer - md := goldmark.New( + + mdOpts := []goldmark.Option{ goldmark.WithExtensions(extension.GFM), goldmark.WithParserOptions( parser.WithAutoHeadingID(), @@ -27,17 +74,24 @@ func renderMarkdown(content []byte) (template.HTML, error) { goldmark.WithRendererOptions( goldmarkhtml.WithUnsafe(), ), - ) + } + + // Add URL rewriter if context is provided + if ctx != nil { + rewriter := newURLRewriter(*ctx) + mdOpts = append(mdOpts, goldmark.WithParserOptions( + parser.WithASTTransformers(util.Prioritized(rewriter, 500)), + )) + } + + md := goldmark.New(mdOpts...) if err := md.Convert(content, &buf); err != nil { return "", err } - policy := bluemonday.UGCPolicy() - policy.AllowStyling() - policy.RequireNoFollowOnLinks(false) - policy.AllowElements("center") - sanitized := policy.SanitizeBytes(buf.Bytes()) + // Use cached sanitizer policy + sanitized := getSanitizerPolicy().SanitizeBytes(buf.Bytes()) return template.HTML(sanitized), nil } @@ -57,7 +111,7 @@ func getServerReadme(ctx context.Context, be *backend.Backend) (raw string, html return "", "", err } if readme != "" { - html, err := renderMarkdown([]byte(readme)) + html, err := renderMarkdown([]byte(readme), nil) if err != nil { return "", "", err } diff --git a/pkg/web/webui_overview.go b/pkg/web/webui_overview.go index 7b14bc97f29038e66a916da635a4ff2029129807..be0a0d77f80a843550c8cc9e12f1bb1288d5547f 100644 --- a/pkg/web/webui_overview.go +++ b/pkg/web/webui_overview.go @@ -44,9 +44,18 @@ func repoOverview(w http.ResponseWriter, r *http.Request) { var readmeHTML template.HTML if !isEmpty { head, _ := gr.HEAD() - readmeContent, _, err := backend.Readme(repo, head) + readmeContent, readmePath, err := backend.Readme(repo, head) if err == nil && readmeContent != "" { - readmeHTML, _ = renderMarkdown([]byte(readmeContent)) + ctx := &ReadmeContext{ + RepoName: repo.Name(), + CommitHash: head.Reference.ID, + ReadmePath: readmePath, + } + var mdErr error + readmeHTML, mdErr = renderMarkdown([]byte(readmeContent), ctx) + if mdErr != nil { + logger.Debug("failed to render readme markdown", "repo", repo.Name(), "path", readmePath, "err", mdErr) + } } }