diff --git a/fetcher/fetcher.go b/fetcher/fetcher.go index 5cf6bac65b202a9f6ae5cf8846c8e7585387b09a..b36345a0601c723110fccc77edc862a7244367be 100644 --- a/fetcher/fetcher.go +++ b/fetcher/fetcher.go @@ -168,36 +168,52 @@ func deliveryHeadersMatch(data []byte, fetchEmail string, account *config.Accoun } func decodePart(reader io.Reader, header mail.PartHeader) (string, error) { - mediaType, params, err := mime.ParseMediaType(header.Get("Content-Type")) - if err != nil { - body, readErr := io.ReadAll(reader) - if readErr != nil { - return string(body), fmt.Errorf("fallback read after Content-Type parse error (%v): %w", err, readErr) - } - return string(body), nil - } + contentType := header.Get("Content-Type") + mediaType, params, parseErr := mime.ParseMediaType(contentType) charset := "utf-8" - if params["charset"] != "" { + if parseErr != nil { + charset = bestEffortCharset(contentType) + } else if params["charset"] != "" { charset = strings.ToLower(params["charset"]) } + decodedBody, err := decodeReaderWithCharset(reader, charset) + if err != nil { + return "", err + } + + if parseErr == nil && strings.HasPrefix(mediaType, "multipart/") { + return "[This is a multipart message]", nil + } + + return string(decodedBody), nil +} + +func decodeReaderWithCharset(reader io.Reader, charset string) ([]byte, error) { encoding, err := ianaindex.IANA.Encoding(charset) if err != nil || encoding == nil { encoding, _ = ianaindex.IANA.Encoding("utf-8") } transformReader := transform.NewReader(reader, encoding.NewDecoder()) - decodedBody, err := ioutil.ReadAll(transformReader) - if err != nil { - return "", err - } + return ioutil.ReadAll(transformReader) +} - if strings.HasPrefix(mediaType, "multipart/") { - return "[This is a multipart message]", nil +func bestEffortCharset(contentType string) string { + for _, param := range strings.Split(contentType, ";") { + key, value, found := strings.Cut(param, "=") + if !found || !strings.EqualFold(strings.TrimSpace(key), "charset") { + continue + } + + value = strings.Trim(strings.TrimSpace(value), `"`) + if value != "" { + return strings.ToLower(value) + } } - return string(decodedBody), nil + return "utf-8" } func decodeHeader(header string) string { diff --git a/fetcher/fetcher_test.go b/fetcher/fetcher_test.go index fb4119ac5aaa322cb6cb9f8c6037558e69e98bac..9937ad2dadfb95592b60adcf2b6db7bca81ec7ca 100644 --- a/fetcher/fetcher_test.go +++ b/fetcher/fetcher_test.go @@ -1,11 +1,59 @@ package fetcher import ( + "bytes" + "strings" "testing" "github.com/floatpane/matcha/config" ) +type testPartHeader map[string]string + +func (h testPartHeader) Add(key, value string) { + h[key] = value +} + +func (h testPartHeader) Del(key string) { + delete(h, key) +} + +func (h testPartHeader) Get(key string) string { + return h[key] +} + +func (h testPartHeader) Set(key, value string) { + h[key] = value +} + +func TestDecodePartUsesCharsetWhenContentTypeIsMalformed(t *testing.T) { + header := testPartHeader{} + header.Set("Content-Type", "text/plain; charset=iso-8859-1; broken") + + decoded, err := decodePart(bytes.NewReader([]byte{0x63, 0x61, 0x66, 0xe9}), header) + if err != nil { + t.Fatalf("decodePart() returned error: %v", err) + } + + if decoded != "café" { + t.Fatalf("decodePart() = %q, want %q", decoded, "café") + } +} + +func TestDecodePartFallsBackToUTF8WhenMalformedContentTypeHasNoCharset(t *testing.T) { + header := testPartHeader{} + header.Set("Content-Type", "text/plain; broken") + + decoded, err := decodePart(strings.NewReader("hello"), header) + if err != nil { + t.Fatalf("decodePart() returned error: %v", err) + } + + if decoded != "hello" { + t.Fatalf("decodePart() = %q, want %q", decoded, "hello") + } +} + // TestFetchEmails is an integration test that requires a live IMAP server and valid credentials. // NOTE: This test will be skipped if it cannot load a configuration file, // making it safe to run in a CI environment without credentials.