From ae5abf0b268d7ce34f1bc401f996acd6709a162c Mon Sep 17 00:00:00 2001 From: Ben Kunkle Date: Sun, 15 Feb 2026 12:24:33 -0600 Subject: [PATCH] Remove dependency on bm25. Use exact prefix match in settings UI search instead (#49231) Closes #ISSUE Before you mark this PR as ready for review, make sure that you have: - [ ] Added a solid test coverage and/or screenshots from doing manual testing - [ ] Done a self-review taking into account security and performance aspects - [ ] Aligned any UI changes with the [UI checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist) Release Notes: - N/A *or* Added/Fixed/Improved ... --------- Co-authored-by: Zed Zippy <234243425+zed-zippy[bot]@users.noreply.github.com> --- Cargo.lock | 49 ----------- crates/settings_ui/Cargo.toml | 1 - crates/settings_ui/src/settings_ui.rs | 112 ++++++++++++++------------ 3 files changed, 60 insertions(+), 102 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d8995da31339b03ac3e57d52ce457b3d75f0c615..0eae257836e8d804d74e6c60bd45780a8e4f879c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2186,20 +2186,6 @@ dependencies = [ "piper", ] -[[package]] -name = "bm25" -version = "2.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cbd8ffdfb7b4c2ff038726178a780a94f90525ed0ad264c0afaa75dd8c18a64" -dependencies = [ - "cached", - "deunicode", - "fxhash", - "rust-stemmers", - "stop-words", - "unicode-segmentation", -] - [[package]] name = "bon" version = "3.8.2" @@ -4831,12 +4817,6 @@ dependencies = [ "syn 2.0.106", ] -[[package]] -name = "deunicode" -version = "1.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04" - [[package]] name = "dev_container" version = "0.1.0" @@ -6730,15 +6710,6 @@ dependencies = [ "thread_local", ] -[[package]] -name = "fxhash" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" -dependencies = [ - "byteorder", -] - [[package]] name = "gemm" version = "0.17.1" @@ -14286,16 +14257,6 @@ dependencies = [ "walkdir", ] -[[package]] -name = "rust-stemmers" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" -dependencies = [ - "serde", - "serde_derive", -] - [[package]] name = "rust_decimal" version = "1.39.0" @@ -15222,7 +15183,6 @@ dependencies = [ "anyhow", "assets", "audio", - "bm25", "client", "codestral", "component", @@ -15986,15 +15946,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" -[[package]] -name = "stop-words" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645a3d441ccf4bf47f2e4b7681461986681a6eeea9937d4c3bc9febd61d17c71" -dependencies = [ - "serde_json", -] - [[package]] name = "story" version = "0.1.0" diff --git a/crates/settings_ui/Cargo.toml b/crates/settings_ui/Cargo.toml index b598585e15ff4be03037a0bc2c97eace91443584..399534b968dfba941d17e2f6ce76261ca4e71859 100644 --- a/crates/settings_ui/Cargo.toml +++ b/crates/settings_ui/Cargo.toml @@ -20,7 +20,6 @@ agent.workspace = true agent_settings.workspace = true anyhow.workspace = true audio.workspace = true -bm25 = "2.3.2" component.workspace = true codestral.workspace = true copilot.workspace = true diff --git a/crates/settings_ui/src/settings_ui.rs b/crates/settings_ui/src/settings_ui.rs index 71dd6ff1aa2e91382bb36b9842780cf633459287..2ec92f3fbf8ea39948547cd6595316c77c6497af 100644 --- a/crates/settings_ui/src/settings_ui.rs +++ b/crates/settings_ui/src/settings_ui.rs @@ -758,8 +758,13 @@ pub struct SettingsWindow { pub(crate) regex_validation_error: Option, } +struct SearchDocument { + id: usize, + words: Vec, +} + struct SearchIndex { - bm25_engine: bm25::SearchEngine, + documents: Vec, fuzzy_match_candidates: Vec, key_lut: Vec, } @@ -1919,11 +1924,25 @@ impl SettingsWindow { let search_index = self.search_index.as_ref().unwrap().clone(); self.search_task = Some(cx.spawn(async move |this, cx| { - let bm25_task = cx.background_spawn({ + let exact_match_task = cx.background_spawn({ let search_index = search_index.clone(); - let max_results = search_index.key_lut.len(); let query = query.clone(); - async move { search_index.bm25_engine.search(&query, max_results) } + async move { + let query_lower = query.to_lowercase(); + let query_words: Vec<&str> = query_lower.split_whitespace().collect(); + search_index + .documents + .iter() + .filter(|doc| { + query_words.iter().any(|query_word| { + doc.words + .iter() + .any(|doc_word| doc_word.starts_with(query_word)) + }) + }) + .map(|doc| doc.id) + .collect::>() + } }); let cancel_flag = std::sync::atomic::AtomicBool::new(false); let fuzzy_search_task = fuzzy::match_strings( @@ -1937,46 +1956,16 @@ impl SettingsWindow { ); let fuzzy_matches = fuzzy_search_task.await; - // PERF: - // If results are slow to appear, we should: - // - return to the structure we had previously where we wait on fuzzy matches first (they resolve quickly) with a min match score of 0.3 - // - wait on bm25 and replace fuzzy matches with bm25 matches - // - to deal with lack of fuzzyness with bm25 searches however, we should keep the fuzzy matches around, and merge fuzzy matches with high score (>0.75?) into bm25 results - let bm25_matches = bm25_task.await; + let exact_matches = exact_match_task.await; _ = this .update(cx, |this, cx| { - // For tuning the score threshold - // for fuzzy_match in &fuzzy_matches { - // let SearchItemKey { - // page_index, - // header_index, - // item_index, - // } = search_index.key_lut[fuzzy_match.candidate_id]; - // let SettingsPageItem::SectionHeader(header) = - // this.pages[page_index].items[header_index] - // else { - // continue; - // }; - // let SettingsPageItem::SettingItem(SettingItem { - // title, description, .. - // }) = this.pages[page_index].items[item_index] - // else { - // continue; - // }; - // let score = fuzzy_match.score; - // eprint!("# {header} :: QUERY = {query} :: SCORE = {score}\n{title}\n{description}\n\n"); - // } + let exact_indices = exact_matches.into_iter(); let fuzzy_indices = fuzzy_matches .into_iter() - // MAGIC NUMBER: Was found to have right balance between not too many weird matches, but also - // flexible enough to catch misspellings and <4 letter queries .take_while(|fuzzy_match| fuzzy_match.score >= 0.5) .map(|fuzzy_match| fuzzy_match.candidate_id); - let bm25_indices = bm25_matches - .into_iter() - .map(|bm25_match| bm25_match.document.id); - let merged_indices = bm25_indices.chain(fuzzy_indices); + let merged_indices = exact_indices.chain(fuzzy_indices); this.apply_match_indices(merged_indices); cx.notify(); @@ -1997,8 +1986,19 @@ impl SettingsWindow { } fn build_search_index(&mut self) { + fn split_into_words(parts: &[&str]) -> Vec { + parts + .iter() + .flat_map(|s| { + s.split(|c: char| !c.is_alphanumeric()) + .filter(|w| !w.is_empty()) + .map(|w| w.to_lowercase()) + }) + .collect() + } + let mut key_lut: Vec = vec![]; - let mut documents = Vec::default(); + let mut documents: Vec = Vec::default(); let mut fuzzy_match_candidates = Vec::default(); fn push_candidates( @@ -2029,18 +2029,22 @@ impl SettingsWindow { .field .json_path() .map(|path| path.trim_end_matches('$')); - documents.push(bm25::Document { + documents.push(SearchDocument { id: key_index, - contents: [page.title, header_str, item.title, item.description] - .join("\n"), + words: split_into_words(&[ + page.title, + header_str, + item.title, + item.description, + ]), }); push_candidates(&mut fuzzy_match_candidates, key_index, item.title); push_candidates(&mut fuzzy_match_candidates, key_index, item.description); } SettingsPageItem::SectionHeader(header) => { - documents.push(bm25::Document { + documents.push(SearchDocument { id: key_index, - contents: header.to_string(), + words: split_into_words(&[header]), }); push_candidates(&mut fuzzy_match_candidates, key_index, header); header_index = item_index; @@ -2048,10 +2052,13 @@ impl SettingsWindow { } SettingsPageItem::SubPageLink(sub_page_link) => { json_path = sub_page_link.json_path; - documents.push(bm25::Document { + documents.push(SearchDocument { id: key_index, - contents: [page.title, header_str, sub_page_link.title.as_ref()] - .join("\n"), + words: split_into_words(&[ + page.title, + header_str, + sub_page_link.title.as_ref(), + ]), }); push_candidates( &mut fuzzy_match_candidates, @@ -2060,10 +2067,13 @@ impl SettingsWindow { ); } SettingsPageItem::ActionLink(action_link) => { - documents.push(bm25::Document { + documents.push(SearchDocument { id: key_index, - contents: [page.title, header_str, action_link.title.as_ref()] - .join("\n"), + words: split_into_words(&[ + page.title, + header_str, + action_link.title.as_ref(), + ]), }); push_candidates( &mut fuzzy_match_candidates, @@ -2083,10 +2093,8 @@ impl SettingsWindow { }); } } - let engine = - bm25::SearchEngineBuilder::with_documents(bm25::Language::English, documents).build(); self.search_index = Some(Arc::new(SearchIndex { - bm25_engine: engine, + documents, key_lut, fuzzy_match_candidates, }));