@@ -30,7 +30,9 @@ import (
"github.com/emersion/go-pgpmail"
"github.com/floatpane/matcha/config"
"go.mozilla.org/pkcs7"
+ "golang.org/x/text/encoding"
"golang.org/x/text/encoding/ianaindex"
+ "golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)
@@ -228,15 +230,22 @@ func decodePart(reader io.Reader, header mail.PartHeader) (string, error) {
}
func decodeReaderWithCharset(reader io.Reader, charset string) ([]byte, error) {
- encoding, err := ianaindex.IANA.Encoding(charset)
- if err != nil || encoding == nil {
- encoding, _ = ianaindex.IANA.Encoding("utf-8")
- }
-
- transformReader := transform.NewReader(reader, encoding.NewDecoder())
+ enc := lookupCharsetEncoding(charset)
+ transformReader := transform.NewReader(reader, enc.NewDecoder())
return ioutil.ReadAll(transformReader)
}
+// lookupCharsetEncoding resolves a charset name, falling back to UTF-8.
+func lookupCharsetEncoding(charset string) encoding.Encoding {
+ if enc, err := ianaindex.IANA.Encoding(charset); err == nil && enc != nil {
+ return enc
+ }
+ if enc, err := ianaindex.IANA.Encoding("utf-8"); err == nil && enc != nil {
+ return enc
+ }
+ return unicode.UTF8
+}
+
func bestEffortCharset(contentType string) string {
for _, param := range strings.Split(contentType, ";") {
key, value, found := strings.Cut(param, "=")
@@ -256,11 +265,14 @@ func bestEffortCharset(contentType string) string {
func decodeHeader(header string) string {
dec := new(mime.WordDecoder)
dec.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
- encoding, err := ianaindex.IANA.Encoding(charset)
+ enc, err := ianaindex.IANA.Encoding(charset)
if err != nil {
return nil, err
}
- return transform.NewReader(input, encoding.NewDecoder()), nil
+ if enc == nil {
+ return nil, fmt.Errorf("fetcher: no encoding implementation for charset %q", charset)
+ }
+ return transform.NewReader(input, enc.NewDecoder()), nil
}
decoded, err := dec.DecodeHeader(header)
if err != nil {
@@ -54,6 +54,27 @@ func TestDecodePartFallsBackToUTF8WhenMalformedContentTypeHasNoCharset(t *testin
}
}
+func TestDecodeReaderWithCharsetSurvivesUnknownCharset(t *testing.T) {
+ decoded, err := decodeReaderWithCharset(strings.NewReader("hello"), "bogus-charset-name")
+ if err != nil {
+ t.Fatalf("decodeReaderWithCharset() returned error: %v", err)
+ }
+ if string(decoded) != "hello" {
+ t.Fatalf("decodeReaderWithCharset() = %q, want %q", string(decoded), "hello")
+ }
+}
+
+func TestLookupCharsetEncodingAlwaysReturnsNonNil(t *testing.T) {
+ cases := []string{"", "utf-8", "iso-8859-1", "bogus-charset-name", "this/is/not/real"}
+ for _, name := range cases {
+ t.Run(name, func(t *testing.T) {
+ if enc := lookupCharsetEncoding(name); enc == nil {
+ t.Fatalf("lookupCharsetEncoding(%q) returned nil", name)
+ }
+ })
+ }
+}
+
// TestFetchEmails is an integration test that requires a live IMAP server and valid credentials.
// NOTE: This test will be skipped if it cannot load a configuration file,
// making it safe to run in a CI environment without credentials.