From 68452a3daea53843b9b388ec53a2cfd7673baac0 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Fri, 3 Apr 2026 21:55:25 -0400 Subject: [PATCH] Fix crash on non-ASCII thread titles in archive search (#53114) The archive view's `fuzzy_match_positions` used `chars().enumerate()` which produces **character indices**, not **byte indices**. When thread titles contain multi-byte UTF-8 characters (emoji, CJK, accented characters, etc.), these character indices don't correspond to valid byte boundaries, causing a panic in `HighlightedLabel::new` which asserts that highlight indices are valid UTF-8 boundaries. The fix switches to `char_indices()` and `eq_ignore_ascii_case()` to produce correct byte positions, matching the approach already used by the sidebar's version of the same function. Release Notes: - Fixed a crash when searching archived threads whose titles contain emoji or other non-ASCII characters. --- crates/agent_ui/src/threads_archive_view.rs | 70 +++++++++++++++++++-- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/crates/agent_ui/src/threads_archive_view.rs b/crates/agent_ui/src/threads_archive_view.rs index 9aca31e1edbe729fccecfc0dd8f0530d2aed2564..f0c02eefc34a03c5c45730ac4b53645c5b15a2e1 100644 --- a/crates/agent_ui/src/threads_archive_view.rs +++ b/crates/agent_ui/src/threads_archive_view.rs @@ -91,14 +91,16 @@ impl TimeBucket { } fn fuzzy_match_positions(query: &str, text: &str) -> Option> { - let query = query.to_lowercase(); - let text_lower = text.to_lowercase(); let mut positions = Vec::new(); let mut query_chars = query.chars().peekable(); - for (i, c) in text_lower.chars().enumerate() { - if query_chars.peek() == Some(&c) { - positions.push(i); - query_chars.next(); + for (byte_idx, candidate_char) in text.char_indices() { + if let Some(&query_char) = query_chars.peek() { + if candidate_char.eq_ignore_ascii_case(&query_char) { + positions.push(byte_idx); + query_chars.next(); + } + } else { + break; } } if query_chars.peek().is_none() { @@ -1283,3 +1285,59 @@ impl PickerDelegate for ProjectPickerDelegate { ) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_fuzzy_match_positions_returns_byte_indices() { + // "🔥abc" — the fire emoji is 4 bytes, so 'a' starts at byte 4, 'b' at 5, 'c' at 6. + let text = "🔥abc"; + let positions = fuzzy_match_positions("ab", text).expect("should match"); + assert_eq!(positions, vec![4, 5]); + + // Verify positions are valid char boundaries (this is the assertion that + // panicked before the fix). + for &pos in &positions { + assert!( + text.is_char_boundary(pos), + "position {pos} is not a valid UTF-8 boundary in {text:?}" + ); + } + } + + #[test] + fn test_fuzzy_match_positions_ascii_still_works() { + let positions = fuzzy_match_positions("he", "hello").expect("should match"); + assert_eq!(positions, vec![0, 1]); + } + + #[test] + fn test_fuzzy_match_positions_case_insensitive() { + let positions = fuzzy_match_positions("HE", "hello").expect("should match"); + assert_eq!(positions, vec![0, 1]); + } + + #[test] + fn test_fuzzy_match_positions_no_match() { + assert!(fuzzy_match_positions("xyz", "hello").is_none()); + } + + #[test] + fn test_fuzzy_match_positions_multi_byte_interior() { + // "café" — 'é' is 2 bytes (0xC3 0xA9), so 'f' starts at byte 4, 'é' at byte 5. + let text = "café"; + let positions = fuzzy_match_positions("fé", text).expect("should match"); + // 'c'=0, 'a'=1, 'f'=2, 'é'=3..4 — wait, let's verify: + // Actually: c=1 byte, a=1 byte, f=1 byte, é=2 bytes + // So byte positions: c=0, a=1, f=2, é=3 + assert_eq!(positions, vec![2, 3]); + for &pos in &positions { + assert!( + text.is_char_boundary(pos), + "position {pos} is not a valid UTF-8 boundary in {text:?}" + ); + } + } +}