From b858d589d63522352977c915614fc9dd23a143cc Mon Sep 17 00:00:00 2001 From: Sean Corliss <1108.han@gmail.com> Date: Mon, 11 May 2026 21:46:35 +0800 Subject: [PATCH] fix: UTF-8 filename truncation (#1272) ## What? - Add UTF-8-safe truncation for sanitized attachment filenames - Preserve filename extensions while truncating the base name on valid rune boundaries - Add regression coverage for long CJK and emoji filenames ## Why? The previous byte-slicing logic could cut through a multi-byte UTF-8 sequence when shortening long attachment filenames, producing invalid UTF-8 for names containing CJK characters or emoji. Fixes #1102. --- main.go | 21 +++++++++++++++++++-- main_test.go | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) create mode 100644 main_test.go diff --git a/main.go b/main.go index 498d3403fd43509cbaac9d37ebaa15770ee259df..830a77bc624c153e344fdb47eb3e095ff8c5beea 100644 --- a/main.go +++ b/main.go @@ -24,6 +24,7 @@ import ( "strings" "sync" "time" + "unicode/utf8" tea "charm.land/bubbletea/v2" "github.com/floatpane/matcha/backend" @@ -3008,13 +3009,29 @@ func sanitizeFilename(name string) string { if len(name) > maxFilenameLen { ext := filepath.Ext(name) if len(ext) > maxFilenameLen { - ext = ext[:maxFilenameLen] + ext = truncateUTF8(ext, maxFilenameLen) } - name = name[:maxFilenameLen-len(ext)] + ext + base := strings.TrimSuffix(name, ext) + name = truncateUTF8(base, maxFilenameLen-len(ext)) + ext } return name } +func truncateUTF8(s string, maxBytes int) string { + if maxBytes <= 0 { + return "" + } + if len(s) <= maxBytes { + return s + } + s = s[:maxBytes] + for !utf8.ValidString(s) { + _, size := utf8.DecodeLastRuneInString(s) + s = s[:len(s)-size] + } + return s +} + func downloadAttachmentCmd(account *config.Account, uid uint32, msg tui.DownloadAttachmentMsg) tea.Cmd { return func() tea.Msg { // Download and decode the attachment using encoding provided in msg.Encoding. diff --git a/main_test.go b/main_test.go new file mode 100644 index 0000000000000000000000000000000000000000..ea0b44882a2b1b86022a5eb58f749ebc0d653ab2 --- /dev/null +++ b/main_test.go @@ -0,0 +1,40 @@ +package main + +import ( + "path/filepath" + "strings" + "testing" + "unicode/utf8" +) + +func TestSanitizeFilenameTruncatesCJKOnUTF8Boundary(t *testing.T) { + name := strings.Repeat("文", 100) + ".txt" + + got := sanitizeFilename(name) + + if !utf8.ValidString(got) { + t.Fatalf("sanitizeFilename returned invalid UTF-8: %q", got) + } + if len(got) > 255 { + t.Fatalf("sanitizeFilename returned %d bytes, want at most 255", len(got)) + } + if filepath.Ext(got) != ".txt" { + t.Fatalf("sanitizeFilename lost extension: got %q", got) + } +} + +func TestSanitizeFilenameTruncatesEmojiOnUTF8Boundary(t *testing.T) { + name := strings.Repeat("🚀", 80) + ".log" + + got := sanitizeFilename(name) + + if !utf8.ValidString(got) { + t.Fatalf("sanitizeFilename returned invalid UTF-8: %q", got) + } + if len(got) > 255 { + t.Fatalf("sanitizeFilename returned %d bytes, want at most 255", len(got)) + } + if filepath.Ext(got) != ".log" { + t.Fatalf("sanitizeFilename lost extension: got %q", got) + } +}