markdown_rewriter.go

  1package web
  2
  3import (
  4	"net/url"
  5	"path"
  6	"strings"
  7
  8	"github.com/yuin/goldmark/ast"
  9	"github.com/yuin/goldmark/parser"
 10	"github.com/yuin/goldmark/text"
 11)
 12
 13// ReadmeContext contains information needed to rewrite URLs in README files.
 14type ReadmeContext struct {
 15	RepoName   string // Repository name (e.g., "myrepo")
 16	CommitHash string // Full commit hash to pin URLs to
 17	ReadmePath string // Path to README file (e.g., "docs/README.md")
 18}
 19
 20// urlRewriter is a Goldmark AST transformer that rewrites relative URLs in markdown.
 21type urlRewriter struct {
 22	ctx ReadmeContext
 23}
 24
 25// newURLRewriter creates a new URL rewriter transformer.
 26func newURLRewriter(ctx ReadmeContext) *urlRewriter {
 27	return &urlRewriter{ctx: ctx}
 28}
 29
 30// Transform implements ast.Transformer.
 31func (r *urlRewriter) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
 32	// Get the directory containing the README
 33	readmeDir := path.Dir(r.ctx.ReadmePath)
 34	if readmeDir == "." {
 35		readmeDir = ""
 36	}
 37
 38	// Walk the AST and rewrite links and images
 39	ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
 40		if !entering {
 41			return ast.WalkContinue, nil
 42		}
 43
 44		switch v := n.(type) {
 45		case *ast.Link:
 46			v.Destination = r.rewriteURL(v.Destination, readmeDir, false)
 47		case *ast.Image:
 48			v.Destination = r.rewriteURL(v.Destination, readmeDir, true)
 49		}
 50
 51		return ast.WalkContinue, nil
 52	})
 53}
 54
 55// rewriteURL rewrites a single URL destination.
 56func (r *urlRewriter) rewriteURL(dest []byte, readmeDir string, isImage bool) []byte {
 57	destStr := string(dest)
 58
 59	// Skip empty URLs
 60	if destStr == "" {
 61		return dest
 62	}
 63
 64	// Parse the URL
 65	u, err := url.Parse(destStr)
 66	if err != nil {
 67		// Invalid URL, fail closed by returning empty
 68		return []byte("")
 69	}
 70
 71	// Skip absolute URLs (http://, https://, mailto:, etc.)
 72	if u.Scheme != "" {
 73		return dest
 74	}
 75
 76	// Skip protocol-relative URLs (//example.com/path)
 77	// These have no scheme but do have a host
 78	if u.Host != "" {
 79		return []byte("")
 80	}
 81
 82	// Skip anchor-only links (#section)
 83	if strings.HasPrefix(destStr, "#") {
 84		return dest
 85	}
 86
 87	// Skip absolute paths (starting with /)
 88	if strings.HasPrefix(destStr, "/") {
 89		return dest
 90	}
 91
 92	// Now we have a relative URL - resolve it against the README directory
 93	// Join README dir with the relative path
 94	var resolvedPath string
 95	if readmeDir != "" {
 96		resolvedPath = path.Join(readmeDir, u.Path)
 97	} else {
 98		resolvedPath = u.Path
 99	}
100
101	// Clean the path to resolve .. and .
102	resolvedPath = path.Clean(resolvedPath)
103
104	// Security check: reject any path that escapes the repository root
105	// After path.Clean, a path starting with ../ indicates traversal outside the repo
106	if strings.HasPrefix(resolvedPath, "../") || resolvedPath == ".." {
107		// Path tries to escape repo, return empty/invalid
108		return []byte("")
109	}
110
111	// For images, always rewrite to blob endpoint with ?raw=1
112	// For links, check if it's a file (has extension) or could be a directory/markdown file
113	if isImage {
114		// Build URL: /{repo}/blob/{commit}/{path}?raw=1
115		rewritten := "/" + r.ctx.RepoName + "/blob/" + r.ctx.CommitHash + "/" + resolvedPath + "?raw=1"
116		// Preserve fragment if present
117		if u.Fragment != "" {
118			rewritten += "#" + u.Fragment
119		}
120		return []byte(rewritten)
121	}
122
123	// For links, determine if it's a file or navigation
124	ext := path.Ext(resolvedPath)
125	if ext != "" && ext != ".md" && ext != ".markdown" {
126		// It's a file (not markdown), serve raw
127		rewritten := "/" + r.ctx.RepoName + "/blob/" + r.ctx.CommitHash + "/" + resolvedPath + "?raw=1"
128		if u.Fragment != "" {
129			rewritten += "#" + u.Fragment
130		}
131		return []byte(rewritten)
132	} else if ext == ".md" || ext == ".markdown" {
133		// It's a markdown file, link to blob view (rendered)
134		rewritten := "/" + r.ctx.RepoName + "/blob/" + r.ctx.CommitHash + "/" + resolvedPath
135		if u.Fragment != "" {
136			rewritten += "#" + u.Fragment
137		}
138		return []byte(rewritten)
139	} else {
140		// No extension, could be a directory - link to tree view
141		rewritten := "/" + r.ctx.RepoName + "/tree/" + r.ctx.CommitHash + "/" + resolvedPath
142		if u.Fragment != "" {
143			rewritten += "#" + u.Fragment
144		}
145		return []byte(rewritten)
146	}
147}