fix: UTF-8 filename truncation (#1272)

Sean Corliss created

## What?

- Add UTF-8-safe truncation for sanitized attachment filenames
- Preserve filename extensions while truncating the base name on valid
rune boundaries
- Add regression coverage for long CJK and emoji filenames

## Why?

The previous byte-slicing logic could cut through a multi-byte UTF-8
sequence when shortening long attachment filenames, producing invalid
UTF-8 for names containing CJK characters or emoji.

Fixes #1102.

Change summary

main.go      | 21 +++++++++++++++++++--
main_test.go | 40 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 59 insertions(+), 2 deletions(-)

Detailed changes

main.go 🔗

@@ -24,6 +24,7 @@ import (
 	"strings"
 	"sync"
 	"time"
+	"unicode/utf8"
 
 	tea "charm.land/bubbletea/v2"
 	"github.com/floatpane/matcha/backend"
@@ -3008,13 +3009,29 @@ func sanitizeFilename(name string) string {
 	if len(name) > maxFilenameLen {
 		ext := filepath.Ext(name)
 		if len(ext) > maxFilenameLen {
-			ext = ext[:maxFilenameLen]
+			ext = truncateUTF8(ext, maxFilenameLen)
 		}
-		name = name[:maxFilenameLen-len(ext)] + ext
+		base := strings.TrimSuffix(name, ext)
+		name = truncateUTF8(base, maxFilenameLen-len(ext)) + ext
 	}
 	return name
 }
 
+func truncateUTF8(s string, maxBytes int) string {
+	if maxBytes <= 0 {
+		return ""
+	}
+	if len(s) <= maxBytes {
+		return s
+	}
+	s = s[:maxBytes]
+	for !utf8.ValidString(s) {
+		_, size := utf8.DecodeLastRuneInString(s)
+		s = s[:len(s)-size]
+	}
+	return s
+}
+
 func downloadAttachmentCmd(account *config.Account, uid uint32, msg tui.DownloadAttachmentMsg) tea.Cmd {
 	return func() tea.Msg {
 		// Download and decode the attachment using encoding provided in msg.Encoding.

main_test.go 🔗

@@ -0,0 +1,40 @@
+package main
+
+import (
+	"path/filepath"
+	"strings"
+	"testing"
+	"unicode/utf8"
+)
+
+func TestSanitizeFilenameTruncatesCJKOnUTF8Boundary(t *testing.T) {
+	name := strings.Repeat("文", 100) + ".txt"
+
+	got := sanitizeFilename(name)
+
+	if !utf8.ValidString(got) {
+		t.Fatalf("sanitizeFilename returned invalid UTF-8: %q", got)
+	}
+	if len(got) > 255 {
+		t.Fatalf("sanitizeFilename returned %d bytes, want at most 255", len(got))
+	}
+	if filepath.Ext(got) != ".txt" {
+		t.Fatalf("sanitizeFilename lost extension: got %q", got)
+	}
+}
+
+func TestSanitizeFilenameTruncatesEmojiOnUTF8Boundary(t *testing.T) {
+	name := strings.Repeat("🚀", 80) + ".log"
+
+	got := sanitizeFilename(name)
+
+	if !utf8.ValidString(got) {
+		t.Fatalf("sanitizeFilename returned invalid UTF-8: %q", got)
+	}
+	if len(got) > 255 {
+		t.Fatalf("sanitizeFilename returned %d bytes, want at most 255", len(got))
+	}
+	if filepath.Ext(got) != ".log" {
+		t.Fatalf("sanitizeFilename lost extension: got %q", got)
+	}
+}