From 7655e22ff5941682cc58cfce8d5f121efb029e5a Mon Sep 17 00:00:00 2001 From: Michael Sloan Date: Thu, 11 Sep 2025 13:57:24 -0600 Subject: [PATCH] Fix panics from unicode slicing in license detection (#38015) Closes #37954 Release Notes: - N/A --- crates/zeta/src/license_detection.rs | 80 +++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 7 deletions(-) diff --git a/crates/zeta/src/license_detection.rs b/crates/zeta/src/license_detection.rs index e06e1577a66cc160efa00213b80c6ca407f7be85..6f27e00022a922241182a3d0b6ea0031333daf7c 100644 --- a/crates/zeta/src/license_detection.rs +++ b/crates/zeta/src/license_detection.rs @@ -202,22 +202,48 @@ fn check_pattern(pattern: &[PatternPart], input: &str) -> bool { match_any_chars.end += part.match_any_chars.end; continue; } - let search_range_start = input_ix.saturating_sub(match_any_chars.end + part.text.len()); - let search_range_end = input_ix.saturating_sub(match_any_chars.start); - let found_ix = &input[search_range_start..search_range_end].rfind(&part.text); + + let search_range_end = n_chars_before_offset(match_any_chars.start, input_ix, input); + let search_range_start = n_chars_before_offset( + match_any_chars.len() + part.text.len(), + search_range_end, + input, + ); + let found_ix = input[search_range_start..search_range_end].rfind(&part.text); + if let Some(found_ix) = found_ix { input_ix = search_range_start + found_ix; match_any_chars = part.match_any_chars.clone(); } else if !part.optional { log::trace!( - "Failed to match pattern `...{}` against input `...{}`", - &part.text[part.text.len().saturating_sub(128)..], - &input[input_ix.saturating_sub(128)..] + "Failed to match pattern\n`...{}`\nagainst input\n`...{}`", + &part.text[n_chars_before_offset(128, part.text.len(), &part.text)..], + &input[n_chars_before_offset(128, search_range_end, input)..search_range_end], ); return false; } } - match_any_chars.contains(&input_ix) + is_char_count_within_range(&input[..input_ix], match_any_chars) +} + +fn n_chars_before_offset(char_count: usize, offset: usize, string: &str) -> usize { + if char_count == 0 { + return offset; + } + string[..offset] + .char_indices() + .nth_back(char_count.saturating_sub(1)) + .map_or(0, |(byte_ix, _)| byte_ix) +} + +fn is_char_count_within_range(string: &str, char_count_range: Range) -> bool { + if string.len() >= char_count_range.start * 4 && string.len() < char_count_range.end { + return true; + } + if string.len() < char_count_range.start || string.len() >= char_count_range.end * 4 { + return false; + } + char_count_range.contains(&string.chars().count()) } /// Canonicalizes license text by removing all non-alphanumeric characters, lowercasing, and turning @@ -360,6 +386,7 @@ impl LicenseDetectionWatcher { mod tests { use fs::FakeFs; use gpui::TestAppContext; + use rand::Rng as _; use serde_json::json; use settings::{Settings as _, SettingsStore}; use worktree::WorktreeSettings; @@ -578,6 +605,45 @@ mod tests { ); } + #[test] + fn random_strings_negative_detection() { + for _i in 0..20 { + let random_string = rand::rng() + .sample_iter::(rand::distr::StandardUniform) + .take(512) + .collect::(); + assert_eq!(detect_license(&random_string), None); + } + } + + #[test] + fn test_n_chars_before_offset() { + assert_eq!(n_chars_before_offset(2, 4, "hello"), 2); + + let input = "ㄒ乇丂ㄒ"; + assert_eq!(n_chars_before_offset(2, input.len(), input), "ㄒ乇".len()); + } + + #[test] + fn test_is_char_count_within_range() { + // TODO: make this into a proper property test. + for _i in 0..20 { + let mut rng = rand::rng(); + let random_char_count = rng.random_range(0..64); + let random_string = rand::rng() + .sample_iter::(rand::distr::StandardUniform) + .take(random_char_count) + .collect::(); + let min_chars = rng.random_range(0..10); + let max_chars = rng.random_range(min_chars..32); + let char_count_range = min_chars..max_chars; + assert_eq!( + is_char_count_within_range(&random_string, char_count_range.clone()), + char_count_range.contains(&random_char_count), + ); + } + } + #[test] fn test_license_file_name_regex() { // Test basic license file names