fix: UTF-8 filename truncation (#1272)
Sean Corliss
created 1 month ago
## What?
- Add UTF-8-safe truncation for sanitized attachment filenames
- Preserve filename extensions while truncating the base name on valid
rune boundaries
- Add regression coverage for long CJK and emoji filenames
## Why?
The previous byte-slicing logic could cut through a multi-byte UTF-8
sequence when shortening long attachment filenames, producing invalid
UTF-8 for names containing CJK characters or emoji.
Fixes #1102.
Change summary
main.go | 21 +++++++++++++++++++--
main_test.go | 40 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 59 insertions(+), 2 deletions(-)
Detailed changes
@@ -24,6 +24,7 @@ import (
"strings"
"sync"
"time"
+ "unicode/utf8"
tea "charm.land/bubbletea/v2"
"github.com/floatpane/matcha/backend"
@@ -3008,13 +3009,29 @@ func sanitizeFilename(name string) string {
if len(name) > maxFilenameLen {
ext := filepath.Ext(name)
if len(ext) > maxFilenameLen {
- ext = ext[:maxFilenameLen]
+ ext = truncateUTF8(ext, maxFilenameLen)
}
- name = name[:maxFilenameLen-len(ext)] + ext
+ base := strings.TrimSuffix(name, ext)
+ name = truncateUTF8(base, maxFilenameLen-len(ext)) + ext
}
return name
}
+func truncateUTF8(s string, maxBytes int) string {
+ if maxBytes <= 0 {
+ return ""
+ }
+ if len(s) <= maxBytes {
+ return s
+ }
+ s = s[:maxBytes]
+ for !utf8.ValidString(s) {
+ _, size := utf8.DecodeLastRuneInString(s)
+ s = s[:len(s)-size]
+ }
+ return s
+}
+
func downloadAttachmentCmd(account *config.Account, uid uint32, msg tui.DownloadAttachmentMsg) tea.Cmd {
return func() tea.Msg {
// Download and decode the attachment using encoding provided in msg.Encoding.
@@ -0,0 +1,40 @@
+package main
+
+import (
+ "path/filepath"
+ "strings"
+ "testing"
+ "unicode/utf8"
+)
+
+func TestSanitizeFilenameTruncatesCJKOnUTF8Boundary(t *testing.T) {
+ name := strings.Repeat("文", 100) + ".txt"
+
+ got := sanitizeFilename(name)
+
+ if !utf8.ValidString(got) {
+ t.Fatalf("sanitizeFilename returned invalid UTF-8: %q", got)
+ }
+ if len(got) > 255 {
+ t.Fatalf("sanitizeFilename returned %d bytes, want at most 255", len(got))
+ }
+ if filepath.Ext(got) != ".txt" {
+ t.Fatalf("sanitizeFilename lost extension: got %q", got)
+ }
+}
+
+func TestSanitizeFilenameTruncatesEmojiOnUTF8Boundary(t *testing.T) {
+ name := strings.Repeat("🚀", 80) + ".log"
+
+ got := sanitizeFilename(name)
+
+ if !utf8.ValidString(got) {
+ t.Fatalf("sanitizeFilename returned invalid UTF-8: %q", got)
+ }
+ if len(got) > 255 {
+ t.Fatalf("sanitizeFilename returned %d bytes, want at most 255", len(got))
+ }
+ if filepath.Ext(got) != ".log" {
+ t.Fatalf("sanitizeFilename lost extension: got %q", got)
+ }
+}