fix(fetcher): decode bad charset (#1214)

guyua9 created

## What?
- Fix `decodePart` so malformed `Content-Type` headers still go through
the charset decoder instead of returning raw bytes.
- Add a best-effort charset fallback for invalid media parameters.
- Add regression tests for malformed `Content-Type` with ISO-8859-1
bytes and malformed `Content-Type` without a charset.

Fixes #1085.

## Why?
Malformed `Content-Type` headers can still include a usable charset.
Returning raw bytes in that path makes non-UTF-8 email bodies render as
mojibake. Decoding with the best-effort charset keeps those messages
readable while preserving UTF-8 fallback behavior.

Change summary

fetcher/fetcher.go      | 48 ++++++++++++++++++++++++++++--------------
fetcher/fetcher_test.go | 48 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 80 insertions(+), 16 deletions(-)

Detailed changes

fetcher/fetcher.go 🔗

@@ -168,36 +168,52 @@ func deliveryHeadersMatch(data []byte, fetchEmail string, account *config.Accoun
 }
 
 func decodePart(reader io.Reader, header mail.PartHeader) (string, error) {
-	mediaType, params, err := mime.ParseMediaType(header.Get("Content-Type"))
-	if err != nil {
-		body, readErr := io.ReadAll(reader)
-		if readErr != nil {
-			return string(body), fmt.Errorf("fallback read after Content-Type parse error (%v): %w", err, readErr)
-		}
-		return string(body), nil
-	}
+	contentType := header.Get("Content-Type")
+	mediaType, params, parseErr := mime.ParseMediaType(contentType)
 
 	charset := "utf-8"
-	if params["charset"] != "" {
+	if parseErr != nil {
+		charset = bestEffortCharset(contentType)
+	} else if params["charset"] != "" {
 		charset = strings.ToLower(params["charset"])
 	}
 
+	decodedBody, err := decodeReaderWithCharset(reader, charset)
+	if err != nil {
+		return "", err
+	}
+
+	if parseErr == nil && strings.HasPrefix(mediaType, "multipart/") {
+		return "[This is a multipart message]", nil
+	}
+
+	return string(decodedBody), nil
+}
+
+func decodeReaderWithCharset(reader io.Reader, charset string) ([]byte, error) {
 	encoding, err := ianaindex.IANA.Encoding(charset)
 	if err != nil || encoding == nil {
 		encoding, _ = ianaindex.IANA.Encoding("utf-8")
 	}
 
 	transformReader := transform.NewReader(reader, encoding.NewDecoder())
-	decodedBody, err := ioutil.ReadAll(transformReader)
-	if err != nil {
-		return "", err
-	}
+	return ioutil.ReadAll(transformReader)
+}
 
-	if strings.HasPrefix(mediaType, "multipart/") {
-		return "[This is a multipart message]", nil
+func bestEffortCharset(contentType string) string {
+	for _, param := range strings.Split(contentType, ";") {
+		key, value, found := strings.Cut(param, "=")
+		if !found || !strings.EqualFold(strings.TrimSpace(key), "charset") {
+			continue
+		}
+
+		value = strings.Trim(strings.TrimSpace(value), `"`)
+		if value != "" {
+			return strings.ToLower(value)
+		}
 	}
 
-	return string(decodedBody), nil
+	return "utf-8"
 }
 
 func decodeHeader(header string) string {

fetcher/fetcher_test.go 🔗

@@ -1,11 +1,59 @@
 package fetcher
 
 import (
+	"bytes"
+	"strings"
 	"testing"
 
 	"github.com/floatpane/matcha/config"
 )
 
+type testPartHeader map[string]string
+
+func (h testPartHeader) Add(key, value string) {
+	h[key] = value
+}
+
+func (h testPartHeader) Del(key string) {
+	delete(h, key)
+}
+
+func (h testPartHeader) Get(key string) string {
+	return h[key]
+}
+
+func (h testPartHeader) Set(key, value string) {
+	h[key] = value
+}
+
+func TestDecodePartUsesCharsetWhenContentTypeIsMalformed(t *testing.T) {
+	header := testPartHeader{}
+	header.Set("Content-Type", "text/plain; charset=iso-8859-1; broken")
+
+	decoded, err := decodePart(bytes.NewReader([]byte{0x63, 0x61, 0x66, 0xe9}), header)
+	if err != nil {
+		t.Fatalf("decodePart() returned error: %v", err)
+	}
+
+	if decoded != "café" {
+		t.Fatalf("decodePart() = %q, want %q", decoded, "café")
+	}
+}
+
+func TestDecodePartFallsBackToUTF8WhenMalformedContentTypeHasNoCharset(t *testing.T) {
+	header := testPartHeader{}
+	header.Set("Content-Type", "text/plain; broken")
+
+	decoded, err := decodePart(strings.NewReader("hello"), header)
+	if err != nil {
+		t.Fatalf("decodePart() returned error: %v", err)
+	}
+
+	if decoded != "hello" {
+		t.Fatalf("decodePart() = %q, want %q", decoded, "hello")
+	}
+}
+
 // TestFetchEmails is an integration test that requires a live IMAP server and valid credentials.
 // NOTE: This test will be skipped if it cannot load a configuration file,
 // making it safe to run in a CI environment without credentials.