diff --git a/Cargo.lock b/Cargo.lock index f8ecd53ef7ef0d24f7ca7d454591ae72abd6e797..fbbb5cc03e334559e0bd9c3af2bc82e76c27315d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2413,6 +2413,20 @@ dependencies = [ "piper", ] +[[package]] +name = "bm25" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cbd8ffdfb7b4c2ff038726178a780a94f90525ed0ad264c0afaa75dd8c18a64" +dependencies = [ + "cached", + "deunicode", + "fxhash", + "rust-stemmers", + "stop-words", + "unicode-segmentation", +] + [[package]] name = "borrow-or-share" version = "0.2.2" @@ -2619,6 +2633,39 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "cached" +version = "0.56.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "801927ee168e17809ab8901d9f01f700cd7d8d6a6527997fee44e4b0327a253c" +dependencies = [ + "ahash 0.8.11", + "cached_proc_macro", + "cached_proc_macro_types", + "hashbrown 0.15.3", + "once_cell", + "thiserror 2.0.12", + "web-time", +] + +[[package]] +name = "cached_proc_macro" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9225bdcf4e4a9a4c08bf16607908eb2fbf746828d5e0b5e019726dbf6571f201" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.101", +] + +[[package]] +name = "cached_proc_macro_types" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade8366b8bd5ba243f0a58f036cc0ca8a2f069cff1a2351ef1cac6b083e16fc0" + [[package]] name = "call" version = "0.1.0" @@ -4792,6 +4839,12 @@ dependencies = [ "syn 2.0.101", ] +[[package]] +name = "deunicode" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04" + [[package]] name = "diagnostics" version = "0.1.0" @@ -6443,6 +6496,15 @@ dependencies = [ "thread_local", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "gemm" version = "0.17.1" @@ -13476,6 +13538,16 @@ dependencies = [ "walkdir", ] +[[package]] +name = "rust-stemmers" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" +dependencies = [ + "serde", + "serde_derive", +] + [[package]] name = "rust_decimal" version = "1.38.0" @@ -14380,6 +14452,7 @@ version = "0.1.0" dependencies = [ "anyhow", "assets", + "bm25", "client", "editor", "feature_flags", @@ -15072,6 +15145,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "stop-words" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645a3d441ccf4bf47f2e4b7681461986681a6eeea9937d4c3bc9febd61d17c71" +dependencies = [ + "serde_json", +] + [[package]] name = "story" version = "0.1.0" diff --git a/crates/settings_ui/Cargo.toml b/crates/settings_ui/Cargo.toml index b9bf45d61b3475c2e23a82ea3c6aa313803bcd9a..ab5e1b839510a990e17e7abea63e6412e4a10e4b 100644 --- a/crates/settings_ui/Cargo.toml +++ b/crates/settings_ui/Cargo.toml @@ -17,6 +17,7 @@ test-support = [] [dependencies] anyhow.workspace = true +bm25 = "2.3.2" heck.workspace = true editor.workspace = true feature_flags.workspace = true diff --git a/crates/settings_ui/src/settings_ui.rs b/crates/settings_ui/src/settings_ui.rs index 79c58313f7580fd6b2d80e6cfd591928ec5aef76..46bef9365b28043fe145cc1df54cd7dc49623848 100644 --- a/crates/settings_ui/src/settings_ui.rs +++ b/crates/settings_ui/src/settings_ui.rs @@ -27,7 +27,7 @@ use std::{ num::{NonZero, NonZeroU32}, ops::Range, rc::Rc, - sync::{Arc, LazyLock, RwLock, atomic::AtomicBool}, + sync::{Arc, LazyLock, RwLock}, }; use title_bar::platform_title_bar::PlatformTitleBar; use ui::{ @@ -463,6 +463,19 @@ pub struct SettingsWindow { navbar_focus_handle: Entity, content_focus_handle: Entity, files_focus_handle: FocusHandle, + search_index: Option>, +} + +struct SearchIndex { + bm25_engine: bm25::SearchEngine, + fuzzy_match_candidates: Vec, + key_lut: Vec, +} + +struct SearchItemKey { + page_index: usize, + header_index: usize, + item_index: usize, } struct SubPage { @@ -881,10 +894,12 @@ impl SettingsWindow { .focus_handle() .tab_index(HEADER_CONTAINER_TAB_INDEX) .tab_stop(false), + search_index: None, }; this.fetch_files(window, cx); this.build_ui(window, cx); + this.build_search_index(); this.search_bar.update(cx, |editor, cx| { editor.focus_handle(cx).focus(window); @@ -1023,7 +1038,7 @@ impl SettingsWindow { fn update_matches(&mut self, cx: &mut Context) { self.search_task.take(); let query = self.search_bar.read(cx).text(cx); - if query.is_empty() { + if query.is_empty() || self.search_index.is_none() { for page in &mut self.search_matches { page.fill(true); } @@ -1032,104 +1047,193 @@ impl SettingsWindow { return; } - struct ItemKey { - page_index: usize, - header_index: usize, - item_index: usize, + let search_index = self.search_index.as_ref().unwrap().clone(); + + fn update_matches_inner( + this: &mut SettingsWindow, + search_index: &SearchIndex, + match_indices: impl Iterator, + cx: &mut Context, + ) { + for page in &mut this.search_matches { + page.fill(false); + } + + for match_index in match_indices { + let SearchItemKey { + page_index, + header_index, + item_index, + } = search_index.key_lut[match_index]; + let page = &mut this.search_matches[page_index]; + page[header_index] = true; + page[item_index] = true; + } + this.filter_matches_to_file(); + this.open_first_nav_page(); + cx.notify(); } - let mut key_lut: Vec = vec![]; - let mut candidates = Vec::default(); + + self.search_task = Some(cx.spawn(async move |this, cx| { + let bm25_task = cx.background_spawn({ + let search_index = search_index.clone(); + let max_results = search_index.key_lut.len(); + let query = query.clone(); + async move { search_index.bm25_engine.search(&query, max_results) } + }); + let cancel_flag = std::sync::atomic::AtomicBool::new(false); + let fuzzy_search_task = fuzzy::match_strings( + search_index.fuzzy_match_candidates.as_slice(), + &query, + false, + true, + search_index.fuzzy_match_candidates.len(), + &cancel_flag, + cx.background_executor().clone(), + ); + + let fuzzy_matches = fuzzy_search_task.await; + + _ = this + .update(cx, |this, cx| { + // For tuning the score threshold + // for fuzzy_match in &fuzzy_matches { + // let SearchItemKey { + // page_index, + // header_index, + // item_index, + // } = search_index.key_lut[fuzzy_match.candidate_id]; + // let SettingsPageItem::SectionHeader(header) = + // this.pages[page_index].items[header_index] + // else { + // continue; + // }; + // let SettingsPageItem::SettingItem(SettingItem { + // title, description, .. + // }) = this.pages[page_index].items[item_index] + // else { + // continue; + // }; + // let score = fuzzy_match.score; + // eprint!("# {header} :: QUERY = {query} :: SCORE = {score}\n{title}\n{description}\n\n"); + // } + update_matches_inner( + this, + search_index.as_ref(), + fuzzy_matches + .into_iter() + // MAGIC NUMBER: Was found to have right balance between not too many weird matches, but also + // flexible enough to catch misspellings and <4 letter queries + // More flexible is good for us here because fuzzy matches will only be used for things that don't + // match using bm25 + .take_while(|fuzzy_match| fuzzy_match.score >= 0.3) + .map(|fuzzy_match| fuzzy_match.candidate_id), + cx, + ); + }) + .ok(); + + let bm25_matches = bm25_task.await; + + _ = this + .update(cx, |this, cx| { + if bm25_matches.is_empty() { + return; + } + update_matches_inner( + this, + search_index.as_ref(), + bm25_matches + .into_iter() + .map(|bm25_match| bm25_match.document.id), + cx, + ); + }) + .ok(); + })); + } + + fn build_search_matches(&mut self) { + self.search_matches = self + .pages + .iter() + .map(|page| vec![true; page.items.len()]) + .collect::>(); + } + + fn build_search_index(&mut self) { + let mut key_lut: Vec = vec![]; + let mut documents = Vec::default(); + let mut fuzzy_match_candidates = Vec::default(); fn push_candidates( - candidates: &mut Vec, + fuzzy_match_candidates: &mut Vec, key_index: usize, input: &str, ) { for word in input.split_ascii_whitespace() { - candidates.push(StringMatchCandidate::new(key_index, word)); + fuzzy_match_candidates.push(StringMatchCandidate::new(key_index, word)); } } // PERF: We are currently searching all items even in project files // where many settings are filtered out, using the logic in filter_matches_to_file // we could only search relevant items based on the current file - // PERF: We are reconstructing the string match candidates Vec each time we search. - // This is completely unnecessary as now that pages are filtered, the string match candidates Vec - // will be constant. for (page_index, page) in self.pages.iter().enumerate() { let mut header_index = 0; + let mut header_str = ""; for (item_index, item) in page.items.iter().enumerate() { let key_index = key_lut.len(); match item { SettingsPageItem::SettingItem(item) => { - push_candidates(&mut candidates, key_index, item.title); - push_candidates(&mut candidates, key_index, item.description); + documents.push(bm25::Document { + id: key_index, + contents: [page.title, header_str, item.title, item.description] + .join("\n"), + }); + push_candidates(&mut fuzzy_match_candidates, key_index, item.title); + push_candidates(&mut fuzzy_match_candidates, key_index, item.description); } SettingsPageItem::SectionHeader(header) => { - push_candidates(&mut candidates, key_index, header); + documents.push(bm25::Document { + id: key_index, + contents: header.to_string(), + }); + push_candidates(&mut fuzzy_match_candidates, key_index, header); header_index = item_index; + header_str = *header; } SettingsPageItem::SubPageLink(sub_page_link) => { - push_candidates(&mut candidates, key_index, sub_page_link.title); - // candidates.push(StringMatchCandidate::new(key_index, sub_page_link.title)); + documents.push(bm25::Document { + id: key_index, + contents: [page.title, header_str, sub_page_link.title].join("\n"), + }); + push_candidates( + &mut fuzzy_match_candidates, + key_index, + sub_page_link.title, + ); } } - key_lut.push(ItemKey { + push_candidates(&mut fuzzy_match_candidates, key_index, page.title); + push_candidates(&mut fuzzy_match_candidates, key_index, header_str); + + key_lut.push(SearchItemKey { page_index, header_index, item_index, }); } } - let atomic_bool = AtomicBool::new(false); - - self.search_task = Some(cx.spawn(async move |this, cx| { - let string_matches = fuzzy::match_strings( - candidates.as_slice(), - &query, - false, - true, - candidates.len(), - &atomic_bool, - cx.background_executor().clone(), - ); - let string_matches = string_matches.await; - - this.update(cx, |this, cx| { - for page in &mut this.search_matches { - page.fill(false); - } - - for string_match in string_matches { - // todo(settings_ui): process gets killed by SIGKILL (Illegal instruction) when this is uncommented? - // if string_match.score < 0.4 { - // continue; - // } - let ItemKey { - page_index, - header_index, - item_index, - } = key_lut[string_match.candidate_id]; - let page = &mut this.search_matches[page_index]; - page[header_index] = true; - page[item_index] = true; - } - this.filter_matches_to_file(); - this.open_first_nav_page(); - cx.notify(); - }) - .ok(); + let engine = + bm25::SearchEngineBuilder::with_documents(bm25::Language::English, documents).build(); + self.search_index = Some(Arc::new(SearchIndex { + bm25_engine: engine, + key_lut, + fuzzy_match_candidates, })); } - fn build_search_matches(&mut self) { - self.search_matches = self - .pages - .iter() - .map(|page| vec![true; page.items.len()]) - .collect::>(); - } - fn build_content_handles(&mut self, window: &mut Window, cx: &mut Context) { self.content_handles = self .pages @@ -2303,8 +2407,9 @@ mod test { } fn build(mut self, cx: &App) -> Self { - self.build_search_matches(); self.build_navbar(cx); + self.build_search_matches(); + self.build_search_index(); self } @@ -2488,6 +2593,7 @@ mod test { cx, ), files_focus_handle: cx.focus_handle(), + search_index: None, }; settings_window.build_search_matches();