fix(fetcher): nil-safe charset lookup (#1276)

Sai Asish Y created

Change summary

fetcher/fetcher.go      | 28 ++++++++++++++++++++--------
fetcher/fetcher_test.go | 21 +++++++++++++++++++++
2 files changed, 41 insertions(+), 8 deletions(-)

Detailed changes

fetcher/fetcher.go 🔗

@@ -30,7 +30,9 @@ import (
 	"github.com/emersion/go-pgpmail"
 	"github.com/floatpane/matcha/config"
 	"go.mozilla.org/pkcs7"
+	"golang.org/x/text/encoding"
 	"golang.org/x/text/encoding/ianaindex"
+	"golang.org/x/text/encoding/unicode"
 	"golang.org/x/text/transform"
 )
 
@@ -228,15 +230,22 @@ func decodePart(reader io.Reader, header mail.PartHeader) (string, error) {
 }
 
 func decodeReaderWithCharset(reader io.Reader, charset string) ([]byte, error) {
-	encoding, err := ianaindex.IANA.Encoding(charset)
-	if err != nil || encoding == nil {
-		encoding, _ = ianaindex.IANA.Encoding("utf-8")
-	}
-
-	transformReader := transform.NewReader(reader, encoding.NewDecoder())
+	enc := lookupCharsetEncoding(charset)
+	transformReader := transform.NewReader(reader, enc.NewDecoder())
 	return ioutil.ReadAll(transformReader)
 }
 
+// lookupCharsetEncoding resolves a charset name, falling back to UTF-8.
+func lookupCharsetEncoding(charset string) encoding.Encoding {
+	if enc, err := ianaindex.IANA.Encoding(charset); err == nil && enc != nil {
+		return enc
+	}
+	if enc, err := ianaindex.IANA.Encoding("utf-8"); err == nil && enc != nil {
+		return enc
+	}
+	return unicode.UTF8
+}
+
 func bestEffortCharset(contentType string) string {
 	for _, param := range strings.Split(contentType, ";") {
 		key, value, found := strings.Cut(param, "=")
@@ -256,11 +265,14 @@ func bestEffortCharset(contentType string) string {
 func decodeHeader(header string) string {
 	dec := new(mime.WordDecoder)
 	dec.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
-		encoding, err := ianaindex.IANA.Encoding(charset)
+		enc, err := ianaindex.IANA.Encoding(charset)
 		if err != nil {
 			return nil, err
 		}
-		return transform.NewReader(input, encoding.NewDecoder()), nil
+		if enc == nil {
+			return nil, fmt.Errorf("fetcher: no encoding implementation for charset %q", charset)
+		}
+		return transform.NewReader(input, enc.NewDecoder()), nil
 	}
 	decoded, err := dec.DecodeHeader(header)
 	if err != nil {

fetcher/fetcher_test.go 🔗

@@ -54,6 +54,27 @@ func TestDecodePartFallsBackToUTF8WhenMalformedContentTypeHasNoCharset(t *testin
 	}
 }
 
+func TestDecodeReaderWithCharsetSurvivesUnknownCharset(t *testing.T) {
+	decoded, err := decodeReaderWithCharset(strings.NewReader("hello"), "bogus-charset-name")
+	if err != nil {
+		t.Fatalf("decodeReaderWithCharset() returned error: %v", err)
+	}
+	if string(decoded) != "hello" {
+		t.Fatalf("decodeReaderWithCharset() = %q, want %q", string(decoded), "hello")
+	}
+}
+
+func TestLookupCharsetEncodingAlwaysReturnsNonNil(t *testing.T) {
+	cases := []string{"", "utf-8", "iso-8859-1", "bogus-charset-name", "this/is/not/real"}
+	for _, name := range cases {
+		t.Run(name, func(t *testing.T) {
+			if enc := lookupCharsetEncoding(name); enc == nil {
+				t.Fatalf("lookupCharsetEncoding(%q) returned nil", name)
+			}
+		})
+	}
+}
+
 // TestFetchEmails is an integration test that requires a live IMAP server and valid credentials.
 // NOTE: This test will be skipped if it cannot load a configuration file,
 // making it safe to run in a CI environment without credentials.