@@ -2413,6 +2413,20 @@ dependencies = [
"piper",
]
+[[package]]
+name = "bm25"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1cbd8ffdfb7b4c2ff038726178a780a94f90525ed0ad264c0afaa75dd8c18a64"
+dependencies = [
+ "cached",
+ "deunicode",
+ "fxhash",
+ "rust-stemmers",
+ "stop-words",
+ "unicode-segmentation",
+]
+
[[package]]
name = "borrow-or-share"
version = "0.2.2"
@@ -2619,6 +2633,39 @@ dependencies = [
"pkg-config",
]
+[[package]]
+name = "cached"
+version = "0.56.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "801927ee168e17809ab8901d9f01f700cd7d8d6a6527997fee44e4b0327a253c"
+dependencies = [
+ "ahash 0.8.11",
+ "cached_proc_macro",
+ "cached_proc_macro_types",
+ "hashbrown 0.15.3",
+ "once_cell",
+ "thiserror 2.0.12",
+ "web-time",
+]
+
+[[package]]
+name = "cached_proc_macro"
+version = "0.25.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9225bdcf4e4a9a4c08bf16607908eb2fbf746828d5e0b5e019726dbf6571f201"
+dependencies = [
+ "darling",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.101",
+]
+
+[[package]]
+name = "cached_proc_macro_types"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ade8366b8bd5ba243f0a58f036cc0ca8a2f069cff1a2351ef1cac6b083e16fc0"
+
[[package]]
name = "call"
version = "0.1.0"
@@ -4771,6 +4818,12 @@ dependencies = [
"syn 2.0.101",
]
+[[package]]
+name = "deunicode"
+version = "1.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04"
+
[[package]]
name = "diagnostics"
version = "0.1.0"
@@ -6421,6 +6474,15 @@ dependencies = [
"thread_local",
]
+[[package]]
+name = "fxhash"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c"
+dependencies = [
+ "byteorder",
+]
+
[[package]]
name = "gemm"
version = "0.17.1"
@@ -13451,6 +13513,16 @@ dependencies = [
"walkdir",
]
+[[package]]
+name = "rust-stemmers"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54"
+dependencies = [
+ "serde",
+ "serde_derive",
+]
+
[[package]]
name = "rust_decimal"
version = "1.38.0"
@@ -14355,6 +14427,7 @@ version = "0.1.0"
dependencies = [
"anyhow",
"assets",
+ "bm25",
"client",
"editor",
"feature_flags",
@@ -15047,6 +15120,15 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+[[package]]
+name = "stop-words"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "645a3d441ccf4bf47f2e4b7681461986681a6eeea9937d4c3bc9febd61d17c71"
+dependencies = [
+ "serde_json",
+]
+
[[package]]
name = "story"
version = "0.1.0"
@@ -27,7 +27,7 @@ use std::{
num::{NonZero, NonZeroU32},
ops::Range,
rc::Rc,
- sync::{Arc, LazyLock, RwLock, atomic::AtomicBool},
+ sync::{Arc, LazyLock, RwLock},
};
use title_bar::platform_title_bar::PlatformTitleBar;
use ui::{
@@ -463,6 +463,19 @@ pub struct SettingsWindow {
navbar_focus_handle: Entity<NonFocusableHandle>,
content_focus_handle: Entity<NonFocusableHandle>,
files_focus_handle: FocusHandle,
+ search_index: Option<Arc<SearchIndex>>,
+}
+
+struct SearchIndex {
+ bm25_engine: bm25::SearchEngine<usize>,
+ fuzzy_match_candidates: Vec<StringMatchCandidate>,
+ key_lut: Vec<SearchItemKey>,
+}
+
+struct SearchItemKey {
+ page_index: usize,
+ header_index: usize,
+ item_index: usize,
}
struct SubPage {
@@ -881,10 +894,12 @@ impl SettingsWindow {
.focus_handle()
.tab_index(HEADER_CONTAINER_TAB_INDEX)
.tab_stop(false),
+ search_index: None,
};
this.fetch_files(window, cx);
this.build_ui(window, cx);
+ this.build_search_index();
this.search_bar.update(cx, |editor, cx| {
editor.focus_handle(cx).focus(window);
@@ -1023,7 +1038,7 @@ impl SettingsWindow {
fn update_matches(&mut self, cx: &mut Context<SettingsWindow>) {
self.search_task.take();
let query = self.search_bar.read(cx).text(cx);
- if query.is_empty() {
+ if query.is_empty() || self.search_index.is_none() {
for page in &mut self.search_matches {
page.fill(true);
}
@@ -1032,104 +1047,193 @@ impl SettingsWindow {
return;
}
- struct ItemKey {
- page_index: usize,
- header_index: usize,
- item_index: usize,
+ let search_index = self.search_index.as_ref().unwrap().clone();
+
+ fn update_matches_inner(
+ this: &mut SettingsWindow,
+ search_index: &SearchIndex,
+ match_indices: impl Iterator<Item = usize>,
+ cx: &mut Context<SettingsWindow>,
+ ) {
+ for page in &mut this.search_matches {
+ page.fill(false);
+ }
+
+ for match_index in match_indices {
+ let SearchItemKey {
+ page_index,
+ header_index,
+ item_index,
+ } = search_index.key_lut[match_index];
+ let page = &mut this.search_matches[page_index];
+ page[header_index] = true;
+ page[item_index] = true;
+ }
+ this.filter_matches_to_file();
+ this.open_first_nav_page();
+ cx.notify();
}
- let mut key_lut: Vec<ItemKey> = vec![];
- let mut candidates = Vec::default();
+
+ self.search_task = Some(cx.spawn(async move |this, cx| {
+ let bm25_task = cx.background_spawn({
+ let search_index = search_index.clone();
+ let max_results = search_index.key_lut.len();
+ let query = query.clone();
+ async move { search_index.bm25_engine.search(&query, max_results) }
+ });
+ let cancel_flag = std::sync::atomic::AtomicBool::new(false);
+ let fuzzy_search_task = fuzzy::match_strings(
+ search_index.fuzzy_match_candidates.as_slice(),
+ &query,
+ false,
+ true,
+ search_index.fuzzy_match_candidates.len(),
+ &cancel_flag,
+ cx.background_executor().clone(),
+ );
+
+ let fuzzy_matches = fuzzy_search_task.await;
+
+ _ = this
+ .update(cx, |this, cx| {
+ // For tuning the score threshold
+ // for fuzzy_match in &fuzzy_matches {
+ // let SearchItemKey {
+ // page_index,
+ // header_index,
+ // item_index,
+ // } = search_index.key_lut[fuzzy_match.candidate_id];
+ // let SettingsPageItem::SectionHeader(header) =
+ // this.pages[page_index].items[header_index]
+ // else {
+ // continue;
+ // };
+ // let SettingsPageItem::SettingItem(SettingItem {
+ // title, description, ..
+ // }) = this.pages[page_index].items[item_index]
+ // else {
+ // continue;
+ // };
+ // let score = fuzzy_match.score;
+ // eprint!("# {header} :: QUERY = {query} :: SCORE = {score}\n{title}\n{description}\n\n");
+ // }
+ update_matches_inner(
+ this,
+ search_index.as_ref(),
+ fuzzy_matches
+ .into_iter()
+ // MAGIC NUMBER: Was found to have right balance between not too many weird matches, but also
+ // flexible enough to catch misspellings and <4 letter queries
+ // More flexible is good for us here because fuzzy matches will only be used for things that don't
+ // match using bm25
+ .take_while(|fuzzy_match| fuzzy_match.score >= 0.3)
+ .map(|fuzzy_match| fuzzy_match.candidate_id),
+ cx,
+ );
+ })
+ .ok();
+
+ let bm25_matches = bm25_task.await;
+
+ _ = this
+ .update(cx, |this, cx| {
+ if bm25_matches.is_empty() {
+ return;
+ }
+ update_matches_inner(
+ this,
+ search_index.as_ref(),
+ bm25_matches
+ .into_iter()
+ .map(|bm25_match| bm25_match.document.id),
+ cx,
+ );
+ })
+ .ok();
+ }));
+ }
+
+ fn build_search_matches(&mut self) {
+ self.search_matches = self
+ .pages
+ .iter()
+ .map(|page| vec![true; page.items.len()])
+ .collect::<Vec<_>>();
+ }
+
+ fn build_search_index(&mut self) {
+ let mut key_lut: Vec<SearchItemKey> = vec![];
+ let mut documents = Vec::default();
+ let mut fuzzy_match_candidates = Vec::default();
fn push_candidates(
- candidates: &mut Vec<StringMatchCandidate>,
+ fuzzy_match_candidates: &mut Vec<StringMatchCandidate>,
key_index: usize,
input: &str,
) {
for word in input.split_ascii_whitespace() {
- candidates.push(StringMatchCandidate::new(key_index, word));
+ fuzzy_match_candidates.push(StringMatchCandidate::new(key_index, word));
}
}
// PERF: We are currently searching all items even in project files
// where many settings are filtered out, using the logic in filter_matches_to_file
// we could only search relevant items based on the current file
- // PERF: We are reconstructing the string match candidates Vec each time we search.
- // This is completely unnecessary as now that pages are filtered, the string match candidates Vec
- // will be constant.
for (page_index, page) in self.pages.iter().enumerate() {
let mut header_index = 0;
+ let mut header_str = "";
for (item_index, item) in page.items.iter().enumerate() {
let key_index = key_lut.len();
match item {
SettingsPageItem::SettingItem(item) => {
- push_candidates(&mut candidates, key_index, item.title);
- push_candidates(&mut candidates, key_index, item.description);
+ documents.push(bm25::Document {
+ id: key_index,
+ contents: [page.title, header_str, item.title, item.description]
+ .join("\n"),
+ });
+ push_candidates(&mut fuzzy_match_candidates, key_index, item.title);
+ push_candidates(&mut fuzzy_match_candidates, key_index, item.description);
}
SettingsPageItem::SectionHeader(header) => {
- push_candidates(&mut candidates, key_index, header);
+ documents.push(bm25::Document {
+ id: key_index,
+ contents: header.to_string(),
+ });
+ push_candidates(&mut fuzzy_match_candidates, key_index, header);
header_index = item_index;
+ header_str = *header;
}
SettingsPageItem::SubPageLink(sub_page_link) => {
- push_candidates(&mut candidates, key_index, sub_page_link.title);
- // candidates.push(StringMatchCandidate::new(key_index, sub_page_link.title));
+ documents.push(bm25::Document {
+ id: key_index,
+ contents: [page.title, header_str, sub_page_link.title].join("\n"),
+ });
+ push_candidates(
+ &mut fuzzy_match_candidates,
+ key_index,
+ sub_page_link.title,
+ );
}
}
- key_lut.push(ItemKey {
+ push_candidates(&mut fuzzy_match_candidates, key_index, page.title);
+ push_candidates(&mut fuzzy_match_candidates, key_index, header_str);
+
+ key_lut.push(SearchItemKey {
page_index,
header_index,
item_index,
});
}
}
- let atomic_bool = AtomicBool::new(false);
-
- self.search_task = Some(cx.spawn(async move |this, cx| {
- let string_matches = fuzzy::match_strings(
- candidates.as_slice(),
- &query,
- false,
- true,
- candidates.len(),
- &atomic_bool,
- cx.background_executor().clone(),
- );
- let string_matches = string_matches.await;
-
- this.update(cx, |this, cx| {
- for page in &mut this.search_matches {
- page.fill(false);
- }
-
- for string_match in string_matches {
- // todo(settings_ui): process gets killed by SIGKILL (Illegal instruction) when this is uncommented?
- // if string_match.score < 0.4 {
- // continue;
- // }
- let ItemKey {
- page_index,
- header_index,
- item_index,
- } = key_lut[string_match.candidate_id];
- let page = &mut this.search_matches[page_index];
- page[header_index] = true;
- page[item_index] = true;
- }
- this.filter_matches_to_file();
- this.open_first_nav_page();
- cx.notify();
- })
- .ok();
+ let engine =
+ bm25::SearchEngineBuilder::with_documents(bm25::Language::English, documents).build();
+ self.search_index = Some(Arc::new(SearchIndex {
+ bm25_engine: engine,
+ key_lut,
+ fuzzy_match_candidates,
}));
}
- fn build_search_matches(&mut self) {
- self.search_matches = self
- .pages
- .iter()
- .map(|page| vec![true; page.items.len()])
- .collect::<Vec<_>>();
- }
-
fn build_content_handles(&mut self, window: &mut Window, cx: &mut Context<SettingsWindow>) {
self.content_handles = self
.pages
@@ -2303,8 +2407,9 @@ mod test {
}
fn build(mut self, cx: &App) -> Self {
- self.build_search_matches();
self.build_navbar(cx);
+ self.build_search_matches();
+ self.build_search_index();
self
}
@@ -2488,6 +2593,7 @@ mod test {
cx,
),
files_focus_handle: cx.focus_handle(),
+ search_index: None,
};
settings_window.build_search_matches();