1package web
2
3import (
4 "net/url"
5 "path"
6 "strings"
7
8 "github.com/yuin/goldmark/ast"
9 "github.com/yuin/goldmark/parser"
10 "github.com/yuin/goldmark/text"
11)
12
13// ReadmeContext contains information needed to rewrite URLs in README files.
14type ReadmeContext struct {
15 RepoName string // Repository name (e.g., "myrepo")
16 CommitHash string // Full commit hash to pin URLs to
17 ReadmePath string // Path to README file (e.g., "docs/README.md")
18}
19
20// urlRewriter is a Goldmark AST transformer that rewrites relative URLs in markdown.
21type urlRewriter struct {
22 ctx ReadmeContext
23}
24
25// newURLRewriter creates a new URL rewriter transformer.
26func newURLRewriter(ctx ReadmeContext) *urlRewriter {
27 return &urlRewriter{ctx: ctx}
28}
29
30// Transform implements ast.Transformer.
31func (r *urlRewriter) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
32 // Get the directory containing the README
33 readmeDir := path.Dir(r.ctx.ReadmePath)
34 if readmeDir == "." {
35 readmeDir = ""
36 }
37
38 // Walk the AST and rewrite links and images
39 ast.Walk(node, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
40 if !entering {
41 return ast.WalkContinue, nil
42 }
43
44 switch v := n.(type) {
45 case *ast.Link:
46 v.Destination = r.rewriteURL(v.Destination, readmeDir, false)
47 case *ast.Image:
48 v.Destination = r.rewriteURL(v.Destination, readmeDir, true)
49 }
50
51 return ast.WalkContinue, nil
52 })
53}
54
55// rewriteURL rewrites a single URL destination.
56func (r *urlRewriter) rewriteURL(dest []byte, readmeDir string, isImage bool) []byte {
57 destStr := string(dest)
58
59 // Skip empty URLs
60 if destStr == "" {
61 return dest
62 }
63
64 // Parse the URL
65 u, err := url.Parse(destStr)
66 if err != nil {
67 // Invalid URL, fail closed by returning empty
68 return []byte("")
69 }
70
71 // Skip absolute URLs (http://, https://, mailto:, etc.)
72 if u.Scheme != "" {
73 return dest
74 }
75
76 // Skip protocol-relative URLs (//example.com/path)
77 // These have no scheme but do have a host
78 if u.Host != "" {
79 return []byte("")
80 }
81
82 // Skip anchor-only links (#section)
83 if strings.HasPrefix(destStr, "#") {
84 return dest
85 }
86
87 // Skip absolute paths (starting with /)
88 if strings.HasPrefix(destStr, "/") {
89 return dest
90 }
91
92 // Now we have a relative URL - resolve it against the README directory
93 // Join README dir with the relative path
94 var resolvedPath string
95 if readmeDir != "" {
96 resolvedPath = path.Join(readmeDir, u.Path)
97 } else {
98 resolvedPath = u.Path
99 }
100
101 // Clean the path to resolve .. and .
102 resolvedPath = path.Clean(resolvedPath)
103
104 // Security check: reject any path that escapes the repository root
105 // After path.Clean, a path starting with ../ indicates traversal outside the repo
106 if strings.HasPrefix(resolvedPath, "../") || resolvedPath == ".." {
107 // Path tries to escape repo, return empty/invalid
108 return []byte("")
109 }
110
111 // For images, always rewrite to blob endpoint with ?raw=1
112 // For links, check if it's a file (has extension) or could be a directory/markdown file
113 if isImage {
114 // Build URL: /{repo}/blob/{commit}/{path}?raw=1
115 rewritten := "/" + r.ctx.RepoName + "/blob/" + r.ctx.CommitHash + "/" + resolvedPath + "?raw=1"
116 // Preserve fragment if present
117 if u.Fragment != "" {
118 rewritten += "#" + u.Fragment
119 }
120 return []byte(rewritten)
121 }
122
123 // For links, determine if it's a file or navigation
124 ext := path.Ext(resolvedPath)
125 if ext != "" && ext != ".md" && ext != ".markdown" {
126 // It's a file (not markdown), serve raw
127 rewritten := "/" + r.ctx.RepoName + "/blob/" + r.ctx.CommitHash + "/" + resolvedPath + "?raw=1"
128 if u.Fragment != "" {
129 rewritten += "#" + u.Fragment
130 }
131 return []byte(rewritten)
132 } else if ext == ".md" || ext == ".markdown" {
133 // It's a markdown file, link to blob view (rendered)
134 rewritten := "/" + r.ctx.RepoName + "/blob/" + r.ctx.CommitHash + "/" + resolvedPath
135 if u.Fragment != "" {
136 rewritten += "#" + u.Fragment
137 }
138 return []byte(rewritten)
139 } else {
140 // No extension, could be a directory - link to tree view
141 rewritten := "/" + r.ctx.RepoName + "/tree/" + r.ctx.CommitHash + "/" + resolvedPath
142 if u.Fragment != "" {
143 rewritten += "#" + u.Fragment
144 }
145 return []byte(rewritten)
146 }
147}