From 6735b1f5dcaf8f69c6f6f57724a1ee1edf516c80 Mon Sep 17 00:00:00 2001 From: Amolith Date: Sun, 21 Dec 2025 16:47:50 -0700 Subject: [PATCH] fix(body): decode HTML entities after sanitization Wraps bluemonday output with html.UnescapeString so that characters like quotes remain as literal "quotes" instead of "quotes". Assisted-by: Claude Opus 4.5 via Crush --- wrapBody.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/wrapBody.go b/wrapBody.go index 37fbe72808ae44180cb328e6789ff8c3023d4f9d..7289bb1e0443c3c68a82c29de9abbe0420c04ba5 100644 --- a/wrapBody.go +++ b/wrapBody.go @@ -5,14 +5,20 @@ package main import ( + "html" "regexp" "strings" + + "github.com/microcosm-cc/bluemonday" ) var numberedListRegex = regexp.MustCompile(`^\d+\.\s`) func formatBody(body string) (string, error) { - lines := strings.Split(body, "\n") + p := bluemonday.UGCPolicy() + sanitized := html.UnescapeString(p.Sanitize(body)) + + lines := strings.Split(sanitized, "\n") var result []string var plainTextBuffer []string