fix(view): sanitize HTML rendering (#1361)

FromSi created

## What?

HTML email rendering now sanitizes untrusted content before converting
it to terminal text.

Available after sanitization:
- safe text/layout tags such as paragraphs, lists, tables, headings,
blockquotes, and inline formatting
- safe links using `http`, `https`, `mailto`, and `tel`
- image sources using `http`, `https`, safe `cid`, and valid
`data:image` payloads

Not available after sanitization:
- `javascript:`, `vbscript:`, `file:`, relative, and protocol-relative
links
- `data:` or `cid:` links in `<a href>`
- scripts, styles, event handlers like `onclick`, and unsafe attributes
- OSC 8 clickable image fallbacks for `cid:` and `data:image` sources

## Why?

Closes #653

HTML emails could include unsafe links or attributes such as
`javascript:` URLs and `on*` event handlers. In terminals with OSC 8
hyperlink support, these could become risky clickable terminal links.

This hardens HTML rendering for untrusted email content and addresses.

Change summary

go.mod                                       |   3 
go.sum                                       |   6 
internal/htmlsanitizer/lib_sanitizer.go      |  58 ++++
internal/htmlsanitizer/lib_sanitizer_test.go | 277 ++++++++++++++++++++++
internal/htmlsanitizer/sanitizer.go          |   5 
view/html.go                                 |  47 +++
view/html_test.go                            | 157 ++++++++++++
7 files changed, 548 insertions(+), 5 deletions(-)

Detailed changes

go.mod 🔗

@@ -23,6 +23,7 @@ require (
 	github.com/hashicorp/golang-lru/v2 v2.0.7
 	github.com/knadh/go-pop3 v1.0.2
 	github.com/mattn/go-sixel v0.0.9
+	github.com/microcosm-cc/bluemonday v1.0.27
 	github.com/wagslane/go-password-validator v0.3.0
 	github.com/yuin/goldmark v1.8.2
 	github.com/yuin/gopher-lua v1.1.2
@@ -37,6 +38,7 @@ require (
 require (
 	github.com/andybalholm/cascadia v1.3.3 // indirect
 	github.com/atotto/clipboard v0.1.4 // indirect
+	github.com/aymerick/douceur v0.2.0 // indirect
 	github.com/charmbracelet/colorprofile v0.4.3 // indirect
 	github.com/charmbracelet/ultraviolet v0.0.0-20260416155717-489999b90468 // indirect
 	github.com/charmbracelet/x/term v0.2.2 // indirect
@@ -47,6 +49,7 @@ require (
 	github.com/cloudflare/circl v1.6.3 // indirect
 	github.com/danieljoos/wincred v1.2.3 // indirect
 	github.com/godbus/dbus/v5 v5.2.2 // indirect
+	github.com/gorilla/css v1.0.1 // indirect
 	github.com/lucasb-eyer/go-colorful v1.4.0 // indirect
 	github.com/mattn/go-runewidth v0.0.23 // indirect
 	github.com/muesli/cancelreader v0.2.2 // indirect

go.sum 🔗

@@ -25,6 +25,8 @@ github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z
 github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
 github.com/aymanbagabas/go-udiff v0.4.1 h1:OEIrQ8maEeDBXQDoGCbbTTXYJMYRCRO1fnodZ12Gv5o=
 github.com/aymanbagabas/go-udiff v0.4.1/go.mod h1:0L9PGwj20lrtmEMeyw4WKJ/TMyDtvAoK9bf2u/mNo3w=
+github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
+github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
 github.com/bwesterb/go-ristretto v1.2.3/go.mod h1:fUIoIZaG73pV5biE2Blr2xEzDoMj7NFEuV9ekS419A0=
 github.com/charmbracelet/colorprofile v0.4.3 h1:QPa1IWkYI+AOB+fE+mg/5/4HRMZcaXex9t5KX76i20Q=
 github.com/charmbracelet/colorprofile v0.4.3/go.mod h1:/zT4BhpD5aGFpqQQqw7a+VtHCzu+zrQtt1zhMt9mR4Q=
@@ -72,6 +74,8 @@ github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
 github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
+github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
 github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
 github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
 github.com/knadh/go-pop3 v1.0.2 h1:gbdtwzEYedLVos/vpebM2d73NTyZxEgjgRJ4S77HlzM=
@@ -84,6 +88,8 @@ github.com/mattn/go-runewidth v0.0.23 h1:7ykA0T0jkPpzSvMS5i9uoNn2Xy3R383f9HDx3Ry
 github.com/mattn/go-runewidth v0.0.23/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
 github.com/mattn/go-sixel v0.0.9 h1:ncx/rVU35Ut7/6gpVk4deC4/Wp2js9fDKmFmWnzmGoY=
 github.com/mattn/go-sixel v0.0.9/go.mod h1:mfichvavqIDFW14LGU24ux/UZ/wF0/hG+4pUWOWrQgM=
+github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk=
+github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
 github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA=
 github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
 github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=

internal/htmlsanitizer/lib_sanitizer.go 🔗

@@ -0,0 +1,58 @@
+package htmlsanitizer
+
+import (
+	"encoding/base64"
+	"net/url"
+	"regexp"
+
+	"github.com/microcosm-cc/bluemonday"
+)
+
+type LibSanitizer struct {
+	policy *bluemonday.Policy
+}
+
+func NewLibSanitizer() LibSanitizer {
+	return LibSanitizer{policy: newPolicy()}
+}
+
+func (s LibSanitizer) SanitizeBytes(html []byte) []byte {
+	return s.policy.SanitizeBytes(html)
+}
+
+func newPolicy() *bluemonday.Policy {
+	p := bluemonday.NewPolicy()
+	linkURLPattern := regexp.MustCompile(`(?i)^(https?://|mailto:|tel:)`)
+	imageURLPattern := regexp.MustCompile(`(?i)^(https?://|cid:|data:image/)`)
+	dataImagePrefixPattern := regexp.MustCompile(`(?i)^image/(gif|jpe?g|png|webp);base64,`)
+	p.AllowElements(
+		"a", "b", "blockquote", "br", "code", "div", "em", "h1", "h2",
+		"i", "img", "li", "ol", "p", "pre", "span", "strong", "table",
+		"tbody", "td", "th", "thead", "tr", "u", "ul",
+	)
+	p.AllowAttrs("href").Matching(linkURLPattern).OnElements("a")
+	p.AllowAttrs("src").Matching(imageURLPattern).OnElements("img")
+	p.AllowAttrs("alt").OnElements("img")
+	p.AllowAttrs("cite").OnElements("blockquote")
+	p.RequireParseableURLs(true)
+	p.AllowURLSchemes("http", "https", "mailto", "tel")
+	p.AllowURLSchemeWithCustomPolicy("cid", func(u *url.URL) bool {
+		return u.Opaque != "" && u.RawQuery == "" && u.Fragment == ""
+	})
+	p.AllowURLSchemeWithCustomPolicy("data", func(u *url.URL) bool {
+		if u.RawQuery != "" || u.Fragment != "" {
+			return false
+		}
+		prefix := dataImagePrefixPattern.FindString(u.Opaque)
+		if prefix == "" {
+			return false
+		}
+		payload := u.Opaque[len(prefix):]
+		if _, err := base64.StdEncoding.DecodeString(payload); err == nil {
+			return true
+		}
+		_, err := base64.RawStdEncoding.DecodeString(payload)
+		return err == nil
+	})
+	return p
+}

internal/htmlsanitizer/lib_sanitizer_test.go 🔗

@@ -0,0 +1,277 @@
+package htmlsanitizer
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestLibSanitizerRemovesUnsafeHTML(t *testing.T) {
+	sanitizer := NewLibSanitizer()
+	input := []byte(`
+		<p onclick="alert(1)">Hello</p>
+		<script>alert(1)</script>
+		<style>body { background-image: url("javascript:alert(1)") }</style>
+		<a href="javascript:alert(1)">bad link</a>
+		<a href="https://example.com">good link</a>
+		<img src="file:///tmp/bad.png" alt="bad image">
+		<img src="cid:test@example.com" alt="cid image">
+		<img src="data:text/html,<script>alert(1)</script>" alt="bad data">
+		<img src="data:image/png;base64,iVBORw0KGgo=" alt="data image">
+	`)
+
+	got := string(sanitizer.SanitizeBytes(input))
+
+	for _, forbidden := range []string{
+		"onclick",
+		"<script",
+		"<style",
+		"javascript:",
+		"file:///tmp/bad.png",
+		"data:text/html",
+	} {
+		if strings.Contains(got, forbidden) {
+			t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
+		}
+	}
+
+	for _, want := range []string{
+		`href="https://example.com"`,
+		`src="cid:test@example.com"`,
+		`src="data:image/png;base64,iVBORw0KGgo="`,
+	} {
+		if !strings.Contains(got, want) {
+			t.Fatalf("sanitized HTML does not contain %q:\n%s", want, got)
+		}
+	}
+}
+
+func TestLibSanitizerDoesNotAllowDataOrCIDLinks(t *testing.T) {
+	sanitizer := NewLibSanitizer()
+	input := []byte(`
+		<a href="data:image/png;base64,iVBORw0KGgo=">data link</a>
+		<a href="cid:test@example.com">cid link</a>
+		<a href="ftp://example.com/file.txt">ftp link</a>
+		<a href="file:///tmp/bad.txt">file link</a>
+		<a href="vbscript:msgbox(1)">vbscript link</a>
+		<a href="//example.com/protocol-relative">protocol relative link</a>
+		<a href="/relative/path">relative link</a>
+		<a href=":not-a-url">broken link</a>
+	`)
+
+	got := string(sanitizer.SanitizeBytes(input))
+
+	for _, forbidden := range []string{
+		"href=\"data:image",
+		"href=\"cid:",
+		"href=\"ftp:",
+		"href=\"file:",
+		"href=\"vbscript:",
+		"href=\"//example.com",
+		"href=\"/relative",
+		"href=\":not-a-url",
+	} {
+		if strings.Contains(got, forbidden) {
+			t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
+		}
+	}
+
+	for _, wantText := range []string{
+		"data link",
+		"cid link",
+		"ftp link",
+		"file link",
+		"vbscript link",
+		"protocol relative link",
+		"relative link",
+		"broken link",
+	} {
+		if !strings.Contains(got, wantText) {
+			t.Fatalf("sanitized HTML should keep link text %q:\n%s", wantText, got)
+		}
+	}
+}
+
+func TestLibSanitizerAllowsSafeLinks(t *testing.T) {
+	sanitizer := NewLibSanitizer()
+	input := []byte(`
+		<a href="http://example.com/path?x=1">http link</a>
+		<a href="https://example.com/path?x=1">https link</a>
+		<a href="HTTPS://example.com/path?x=1">uppercase https link</a>
+		<a href="mailto:security@example.com">mailto link</a>
+		<a href="MAILTO:security@example.com">uppercase mailto link</a>
+		<a href="tel:+15551234567">tel link</a>
+	`)
+
+	got := string(sanitizer.SanitizeBytes(input))
+
+	for _, want := range []string{
+		`href="http://example.com/path?x=1"`,
+		`href="https://example.com/path?x=1"`,
+		`href="https://example.com/path?x=1"`,
+		`href="mailto:security@example.com"`,
+		`href="mailto:security@example.com"`,
+		`href="tel:+15551234567"`,
+	} {
+		if !strings.Contains(got, want) {
+			t.Fatalf("sanitized HTML does not contain %q:\n%s", want, got)
+		}
+	}
+}
+
+func TestLibSanitizerFiltersImageSources(t *testing.T) {
+	sanitizer := NewLibSanitizer()
+	input := []byte(`
+		<img src="http://example.com/image.png" alt="http image">
+		<img src="https://example.com/image.png" alt="https image">
+		<img src="cid:test@example.com" alt="cid image">
+		<img src="data:image/png;base64,iVBORw0KGgo=" alt="data image">
+		<img src="javascript:alert(1)" alt="javascript image">
+		<img src="file:///tmp/bad.png" alt="file image">
+		<img src="data:text/html,<script>alert(1)</script>" alt="html data image">
+		<img src="/relative.png" alt="relative image">
+	`)
+
+	got := string(sanitizer.SanitizeBytes(input))
+
+	for _, want := range []string{
+		`src="http://example.com/image.png"`,
+		`src="https://example.com/image.png"`,
+		`src="cid:test@example.com"`,
+		`src="data:image/png;base64,iVBORw0KGgo="`,
+	} {
+		if !strings.Contains(got, want) {
+			t.Fatalf("sanitized HTML does not contain %q:\n%s", want, got)
+		}
+	}
+
+	for _, forbidden := range []string{
+		"src=\"javascript:",
+		"src=\"file:",
+		"src=\"data:text/html",
+		"src=\"/relative.png",
+	} {
+		if strings.Contains(got, forbidden) {
+			t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
+		}
+	}
+}
+
+func TestLibSanitizerRemovesUnknownElementsButKeepsText(t *testing.T) {
+	sanitizer := NewLibSanitizer()
+	input := []byte(`
+		<form action="https://example.com"><input name="token" value="secret">form text</form>
+		<iframe src="https://example.com">iframe text</iframe>
+		<object data="https://example.com">object text</object>
+		<p>safe text</p>
+	`)
+
+	got := string(sanitizer.SanitizeBytes(input))
+
+	for _, forbidden := range []string{
+		"<form",
+		"<input",
+		"<iframe",
+		"<object",
+		"action=",
+		"value=\"secret\"",
+		"src=\"https://example.com\"",
+		"data=\"https://example.com\"",
+	} {
+		if strings.Contains(got, forbidden) {
+			t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
+		}
+	}
+
+	for _, wantText := range []string{
+		"form text",
+		"safe text",
+	} {
+		if !strings.Contains(got, wantText) {
+			t.Fatalf("sanitized HTML should keep text %q:\n%s", wantText, got)
+		}
+	}
+}
+
+func TestLibSanitizerRemovesUnsafeGlobalAttributes(t *testing.T) {
+	sanitizer := NewLibSanitizer()
+	input := []byte(`
+		<p style="color: red" class="promo" data-secret="token" id="message">styled text</p>
+		<blockquote cite="https://example.com" onclick="alert(1)">quote text</blockquote>
+	`)
+
+	got := string(sanitizer.SanitizeBytes(input))
+
+	for _, forbidden := range []string{
+		"style=",
+		"class=",
+		"data-secret",
+		"id=",
+		"onclick=",
+	} {
+		if strings.Contains(got, forbidden) {
+			t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
+		}
+	}
+
+	for _, want := range []string{
+		"styled text",
+		`cite="https://example.com"`,
+		"quote text",
+	} {
+		if !strings.Contains(got, want) {
+			t.Fatalf("sanitized HTML does not contain %q:\n%s", want, got)
+		}
+	}
+}
+
+func TestLibSanitizerRejectsCIDWithQueryOrFragment(t *testing.T) {
+	sanitizer := NewLibSanitizer()
+	input := []byte(`
+		<img src="cid:test@example.com?x=1" alt="cid query">
+		<img src="cid:test@example.com#frag" alt="cid fragment">
+		<img src="cid:test@example.com" alt="cid ok">
+	`)
+
+	got := string(sanitizer.SanitizeBytes(input))
+
+	for _, forbidden := range []string{
+		`src="cid:test@example.com?x=1"`,
+		`src="cid:test@example.com#frag"`,
+	} {
+		if strings.Contains(got, forbidden) {
+			t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
+		}
+	}
+
+	if !strings.Contains(got, `src="cid:test@example.com"`) {
+		t.Fatalf("sanitized HTML should keep clean cid source:\n%s", got)
+	}
+}
+
+func TestLibSanitizerRejectsInvalidDataImages(t *testing.T) {
+	sanitizer := NewLibSanitizer()
+	input := []byte(`
+		<img src="data:image/png;base64,not base64!" alt="invalid base64">
+		<img src="data:image/svg+xml;base64,PHN2Zy8+" alt="svg data">
+		<img src="data:image/png;base64,iVBORw0KGgo=" alt="png data">
+		<img src="data:image/png;base64,iVBORw0KGgo" alt="raw png data">
+	`)
+
+	got := string(sanitizer.SanitizeBytes(input))
+
+	for _, forbidden := range []string{
+		"not base64",
+		"data:image/svg+xml",
+	} {
+		if strings.Contains(got, forbidden) {
+			t.Fatalf("sanitized HTML contains %q:\n%s", forbidden, got)
+		}
+	}
+
+	if !strings.Contains(got, `src="data:image/png;base64,iVBORw0KGgo="`) {
+		t.Fatalf("sanitized HTML should keep valid png data URI:\n%s", got)
+	}
+	if !strings.Contains(got, `src="data:image/png;base64,iVBORw0KGgo"`) {
+		t.Fatalf("sanitized HTML should keep valid unpadded png data URI:\n%s", got)
+	}
+}

view/html.go 🔗

@@ -13,12 +13,15 @@ import (
 
 	"charm.land/lipgloss/v2"
 	"github.com/floatpane/matcha/clib"
+	"github.com/floatpane/matcha/internal/htmlsanitizer"
 	"github.com/floatpane/matcha/internal/httpclient"
 	"github.com/floatpane/matcha/internal/loglevel"
 	"github.com/floatpane/matcha/theme"
 	lru "github.com/hashicorp/golang-lru/v2"
 )
 
+var htmlSanitizer htmlsanitizer.Sanitizer = htmlsanitizer.NewLibSanitizer()
+
 const termGhostty = "ghostty"
 
 func linkStyle() lipgloss.Style {
@@ -107,6 +110,8 @@ func hyperlinkSupported() bool {
 
 // hyperlink formats a string as either a terminal-clickable hyperlink or plain text with URL.
 func hyperlink(url, text string) string {
+	url = strings.TrimSpace(url)
+	text = stripTerminalControls(text)
 	if text == "" {
 		text = url
 	}
@@ -124,6 +129,24 @@ func hyperlink(url, text string) string {
 	return fmt.Sprintf("%s <%s>", linkStyle().Render(text), linkStyle().Render(url))
 }
 
+func stripTerminalControls(s string) string {
+	return strings.Map(func(r rune) rune {
+		if r == '\n' || r == '\t' {
+			return r
+		}
+		if r < 0x20 || r == 0x7f || r == 0x9c {
+			return -1
+		}
+		return r
+	}, s)
+}
+
+func hasTerminalControls(s string) bool {
+	return strings.IndexFunc(s, func(r rune) bool {
+		return r < 0x20 || r == 0x7f || r == 0x9c
+	}) != -1
+}
+
 func decodeQuotedPrintable(s string) (string, error) {
 	reader := quotedprintable.NewReader(strings.NewReader(s))
 	body, err := io.ReadAll(reader)
@@ -589,6 +612,7 @@ func processBody(rawBody, mimeType string, inline map[string]string, h1Style, h2
 	} else {
 		htmlBody = markdownToHTML([]byte(decodedBody))
 	}
+	htmlBody = htmlSanitizer.SanitizeBytes(htmlBody)
 
 	result, placements, err := renderHTMLToText(htmlBody, inline, h1Style, h2Style, disableImages)
 	if err != nil {
@@ -601,7 +625,8 @@ func processBody(rawBody, mimeType string, inline map[string]string, h1Style, h2
 	// keep these alive. Retry through the markdown pre-pass when the direct
 	// HTML path produces nothing.
 	if directHTML && strings.TrimSpace(result) == "" {
-		result, placements, err = renderHTMLToText(markdownToHTML([]byte(decodedBody)), inline, h1Style, h2Style, disableImages)
+		fallbackHTML := htmlSanitizer.SanitizeBytes(markdownToHTML([]byte(decodedBody)))
+		result, placements, err = renderHTMLToText(fallbackHTML, inline, h1Style, h2Style, disableImages)
 		if err != nil {
 			return "", nil, err
 		}
@@ -643,11 +668,18 @@ func renderHTMLToText(htmlBody []byte, inline map[string]string, h1Style, h2Styl
 			text.WriteString("\n\n")
 
 		case clib.HElemLink:
-			text.WriteString(hyperlink(elem.Attr1, elem.Text))
+			if hasTerminalControls(elem.Attr1) {
+				text.WriteString(stripTerminalControls(elem.Text))
+			} else {
+				text.WriteString(hyperlink(elem.Attr1, elem.Text))
+			}
 
 		case clib.HElemImage:
-			src := elem.Attr1
-			alt := elem.Attr2
+			src := strings.TrimSpace(elem.Attr1)
+			alt := stripTerminalControls(elem.Attr2)
+			if hasTerminalControls(src) {
+				continue
+			}
 
 			if !disableImages && imageProtocolSupported() {
 				var payload string
@@ -685,7 +717,7 @@ func renderHTMLToText(htmlBody []byte, inline map[string]string, h1Style, h2Styl
 				}
 				debugImageProtocol("no payload for src=%s", src)
 			}
-			if hyperlinkSupported() {
+			if isRemoteImageURL(src) && hyperlinkSupported() {
 				fmt.Fprintf(&text, "\n %s \n", hyperlink(src, fmt.Sprintf("[Click here to view image: %s]", alt)))
 			} else {
 				fmt.Fprintf(&text, "\n %s \n", linkStyle().Render(fmt.Sprintf("[Image: %s, %s]", alt, src)))
@@ -755,6 +787,11 @@ func renderHTMLToText(htmlBody []byte, inline map[string]string, h1Style, h2Styl
 	return result, placements, nil
 }
 
+func isRemoteImageURL(src string) bool {
+	src = strings.ToLower(src)
+	return strings.HasPrefix(src, "http://") || strings.HasPrefix(src, "https://")
+}
+
 func tableHeaderStyle() lipgloss.Style {
 	return lipgloss.NewStyle().Bold(true).Foreground(theme.ActiveTheme.Accent)
 }

view/html_test.go 🔗

@@ -670,6 +670,163 @@ func TestProcessBodyWithHyperlinkSupport(t *testing.T) {
 	}
 }
 
+func TestProcessBodySanitizesUnsafeHTMLLinks(t *testing.T) {
+	origTerm := os.Getenv("TERM")
+	origTermProgram := os.Getenv("TERM_PROGRAM")
+	origVTEVersion := os.Getenv("VTE_VERSION")
+	defer func() {
+		os.Setenv("TERM", origTerm)
+		os.Setenv("TERM_PROGRAM", origTermProgram)
+		os.Setenv("VTE_VERSION", origVTEVersion)
+	}()
+
+	os.Setenv("TERM", "xterm-kitty")
+	os.Setenv("TERM_PROGRAM", "")
+	os.Unsetenv("VTE_VERSION")
+
+	h1Style := lipgloss.NewStyle()
+	h2Style := lipgloss.NewStyle()
+	bodyStyle := lipgloss.NewStyle()
+
+	tests := []struct {
+		name              string
+		input             string
+		wantContains      string
+		forbiddenContains []string
+	}{
+		{
+			name:         "javascript link is rendered as text only",
+			input:        `<a href="javascript:alert(1)">Click here</a>`,
+			wantContains: "Click here",
+			forbiddenContains: []string{
+				"javascript:",
+				"\x1b]8;;javascript:",
+			},
+		},
+		{
+			name:         "mixed-case javascript link is rejected",
+			input:        `<a href="JaVaScRiPt:alert(1)">Click here</a>`,
+			wantContains: "Click here",
+			forbiddenContains: []string{
+				"JaVaScRiPt:",
+				"javascript:",
+			},
+		},
+		{
+			name:         "unsafe image source is not linked",
+			input:        `<img src="javascript:alert(1)" alt="bad image">After`,
+			wantContains: "After",
+			forbiddenContains: []string{
+				"javascript:",
+				"bad image",
+				"Click here to view image",
+			},
+		},
+		{
+			name:         "data image href is not rendered as a link",
+			input:        `<a href="data:image/png;base64,iVBORw0KGgo=">data link</a>`,
+			wantContains: "data link",
+			forbiddenContains: []string{
+				"data:image",
+				"\x1b]8;;data:",
+			},
+		},
+		{
+			name:         "cid href is not rendered as a link",
+			input:        `<a href="cid:test-image@example.com">cid link</a>`,
+			wantContains: "cid link",
+			forbiddenContains: []string{
+				"cid:test-image",
+				"\x1b]8;;cid:",
+			},
+		},
+		{
+			name:         "OSC control characters are stripped from safe links",
+			input:        "<a href=\"https://example.com/\x1b]8;;file:///tmp/pwn\x07\">safe</a>",
+			wantContains: "safe",
+			forbiddenContains: []string{
+				"\x1b]8;;file:",
+				"file:///tmp/pwn",
+				"\x07",
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			processed, _, err := ProcessBody(tt.input, BodyMIMETypeHTML, h1Style, h2Style, bodyStyle, false)
+			if err != nil {
+				t.Fatalf("ProcessBody() failed: %v", err)
+			}
+			if !strings.Contains(processed, tt.wantContains) {
+				t.Fatalf("processed body does not contain %q:\n%q", tt.wantContains, processed)
+			}
+			for _, forbidden := range tt.forbiddenContains {
+				if strings.Contains(processed, forbidden) {
+					t.Fatalf("processed body contains forbidden %q:\n%q", forbidden, processed)
+				}
+			}
+		})
+	}
+}
+
+func TestProcessBodyDoesNotHyperlinkNonRemoteImageFallbacks(t *testing.T) {
+	t.Setenv("TERM", "xterm")
+	t.Setenv("TERM_PROGRAM", "")
+	t.Setenv("WEZTERM_EXECUTABLE", "/usr/bin/wezterm")
+
+	h1Style := lipgloss.NewStyle()
+	h2Style := lipgloss.NewStyle()
+	bodyStyle := lipgloss.NewStyle()
+
+	input := `
+		<img src="data:image/png;base64,iVBORw0KGgo=" alt="data image">
+		<img src="cid:test-image@example.com" alt="cid image">
+	`
+
+	processed, _, err := ProcessBody(input, BodyMIMETypeHTML, h1Style, h2Style, bodyStyle, true)
+	if err != nil {
+		t.Fatalf("ProcessBody() failed: %v", err)
+	}
+
+	for _, want := range []string{
+		"[Image: data image, data:image/png;base64,iVBORw0KGgo=]",
+		"[Image: cid image, cid:test-image@example.com]",
+	} {
+		if !strings.Contains(processed, want) {
+			t.Fatalf("processed body does not contain %q:\n%q", want, processed)
+		}
+	}
+
+	for _, forbidden := range []string{
+		"Click here to view image",
+		"\x1b]8;;data:",
+		"\x1b]8;;cid:",
+	} {
+		if strings.Contains(processed, forbidden) {
+			t.Fatalf("processed body contains forbidden %q:\n%q", forbidden, processed)
+		}
+	}
+}
+
+func TestIsRemoteImageURLAllowsUppercaseHTTPSScheme(t *testing.T) {
+	tests := []struct {
+		src  string
+		want bool
+	}{
+		{src: "http://example.com/image.png", want: true},
+		{src: "HTTPS://example.com/image.png", want: true},
+		{src: "cid:test-image@example.com", want: false},
+		{src: "data:image/png;base64,iVBORw0KGgo=", want: false},
+	}
+
+	for _, tt := range tests {
+		if got := isRemoteImageURL(tt.src); got != tt.want {
+			t.Fatalf("isRemoteImageURL(%q) = %v, want %v", tt.src, got, tt.want)
+		}
+	}
+}
+
 func TestProcessBodyWithImageProtocol(t *testing.T) {
 	// Save original environment variables
 	origTerm := os.Getenv("TERM")