Eager Semantic Indexing Queue (#2886)

Kyle Caverly created

Optimization to the Semantic Indexing Engine.

We've transitioned from a framework in which the entire project tree is
walked at each index command, to an eager queuing method, in which an
initial queue of outstanding indexing work is initialized upon workspace
creation, and then subscriptions are leveraged for file change events to
continually keep an updated view on outstanding work.

This optimization contributes towards quicker user feedback, when
initializing or using Semantic Search functionality. It also opens the
doors towards better transparency across the system on outstanding
indexing work.

Release Notes:

- Refactored index operation queue to an eager queuing framework.
- Moved semantic search initialization to workspace creation.
- Adjusted rate limiting strategy on api delays to reduce time spent
waiting for rate limits.

Change summary

Cargo.lock                                        | 317 ++++++++-------
crates/search/src/project_search.rs               |   7 
crates/semantic_index/Cargo.toml                  |   1 
crates/semantic_index/src/db.rs                   |  21 
crates/semantic_index/src/embedding.rs            |   4 
crates/semantic_index/src/parsing.rs              |  14 
crates/semantic_index/src/semantic_index.rs       | 337 ++++++++++++++--
crates/semantic_index/src/semantic_index_tests.rs |   7 
8 files changed, 506 insertions(+), 202 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -36,11 +36,11 @@ dependencies = [
 
 [[package]]
 name = "addr2line"
-version = "0.20.0"
+version = "0.21.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3"
+checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
 dependencies = [
- "gimli 0.27.3",
+ "gimli 0.28.0",
 ]
 
 [[package]]
@@ -88,9 +88,9 @@ dependencies = [
 
 [[package]]
 name = "aho-corasick"
-version = "1.0.2"
+version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
+checksum = "6748e8def348ed4d14996fa801f4122cd763fff530258cdc03f64b25f89d3a5a"
 dependencies = [
  "memchr",
 ]
@@ -140,7 +140,7 @@ source = "git+https://github.com/zed-industries/alacritty?rev=33306142195b354ef3
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.29",
 ]
 
 [[package]]
@@ -151,7 +151,7 @@ dependencies = [
  "alacritty_config",
  "alacritty_config_derive",
  "base64 0.13.1",
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "home",
  "libc",
  "log",
@@ -244,9 +244,9 @@ dependencies = [
 
 [[package]]
 name = "anstyle"
-version = "1.0.1"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd"
+checksum = "15c4c2c83f81532e5845a733998b6971faca23490340a418e9b72a3ec9de12ea"
 
 [[package]]
 name = "anstyle-parse"
@@ -268,9 +268,9 @@ dependencies = [
 
 [[package]]
 name = "anstyle-wincon"
-version = "1.0.1"
+version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188"
+checksum = "c677ab05e09154296dd37acecd46420c17b9713e8366facafa8fc0885167cf4c"
 dependencies = [
  "anstyle",
  "windows-sys",
@@ -278,9 +278,9 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.72"
+version = "1.0.75"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854"
+checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
 
 [[package]]
 name = "arrayref"
@@ -337,7 +337,7 @@ dependencies = [
  "futures-core",
  "futures-io",
  "once_cell",
- "pin-project-lite 0.2.10",
+ "pin-project-lite 0.2.12",
  "tokio",
 ]
 
@@ -351,7 +351,7 @@ dependencies = [
  "futures-core",
  "futures-io",
  "memchr",
- "pin-project-lite 0.2.10",
+ "pin-project-lite 0.2.12",
 ]
 
 [[package]]
@@ -411,15 +411,15 @@ dependencies = [
  "polling",
  "rustix 0.37.23",
  "slab",
- "socket2",
+ "socket2 0.4.9",
  "waker-fn",
 ]
 
 [[package]]
 name = "async-lock"
-version = "2.7.0"
+version = "2.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa24f727524730b077666307f2734b4a1a1c57acb79193127dcc8914d5242dd7"
+checksum = "287272293e9d8c41773cec55e365490fe034813a2f172f502d6ddcf75b2f582b"
 dependencies = [
  "event-listener",
 ]
@@ -482,7 +482,7 @@ checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.29",
 ]
 
 [[package]]
@@ -505,7 +505,7 @@ dependencies = [
  "log",
  "memchr",
  "once_cell",
- "pin-project-lite 0.2.10",
+ "pin-project-lite 0.2.12",
  "pin-utils",
  "slab",
  "wasm-bindgen-futures",
@@ -519,7 +519,7 @@ checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51"
 dependencies = [
  "async-stream-impl",
  "futures-core",
- "pin-project-lite 0.2.10",
+ "pin-project-lite 0.2.12",
 ]
 
 [[package]]
@@ -530,7 +530,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.29",
 ]
 
 [[package]]
@@ -567,13 +567,13 @@ dependencies = [
 
 [[package]]
 name = "async-trait"
-version = "0.1.72"
+version = "0.1.73"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc6dde6e4ed435a4c1ee4e73592f5ba9da2151af10076cc04858746af9352d09"
+checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.29",
 ]
 
 [[package]]
@@ -586,7 +586,7 @@ dependencies = [
  "futures-io",
  "futures-util",
  "log",
- "pin-project-lite 0.2.10",
+ "pin-project-lite 0.2.12",
  "tungstenite 0.16.0",
 ]
 
@@ -681,12 +681,12 @@ dependencies = [
  "http",
  "http-body",
  "hyper",
- "itoa 1.0.9",
+ "itoa",
  "matchit",
  "memchr",
  "mime",
  "percent-encoding",
- "pin-project-lite 0.2.10",
+ "pin-project-lite 0.2.12",
  "serde",
  "serde_json",
  "serde_urlencoded",
@@ -727,7 +727,7 @@ dependencies = [
  "futures-util",
  "http",
  "mime",
- "pin-project-lite 0.2.10",
+ "pin-project-lite 0.2.12",
  "serde",
  "serde_json",
  "tokio",
@@ -739,16 +739,16 @@ dependencies = [
 
 [[package]]
 name = "backtrace"
-version = "0.3.68"
+version = "0.3.69"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12"
+checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837"
 dependencies = [
- "addr2line 0.20.0",
+ "addr2line 0.21.0",
  "cc",
  "cfg-if 1.0.0",
  "libc",
  "miniz_oxide 0.7.1",
- "object 0.31.1",
+ "object 0.32.0",
  "rustc-demangle",
 ]
 
@@ -831,7 +831,7 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.28",
+ "syn 2.0.29",
  "which",
 ]
 
@@ -858,9 +858,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
 [[package]]
 name = "bitflags"
-version = "2.3.3"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42"
+checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635"
 dependencies = [
  "serde",
 ]
@@ -996,7 +996,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05"
 dependencies = [
  "memchr",
- "regex-automata 0.3.4",
+ "regex-automata 0.3.6",
  "serde",
 ]
 
@@ -1157,11 +1157,12 @@ checksum = "a2698f953def977c68f935bb0dfa959375ad4638570e969e2f1e9f433cbf1af6"
 
 [[package]]
 name = "cc"
-version = "1.0.79"
+version = "1.0.83"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
+checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
 dependencies = [
  "jobserver",
+ "libc",
 ]
 
 [[package]]
@@ -1287,9 +1288,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.3.19"
+version = "4.3.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5fd304a20bff958a57f04c4e96a2e7594cc4490a0e809cbd48bb6437edaa452d"
+checksum = "fb690e81c7840c0d7aade59f242ea3b41b9bc27bcd5997890e7702ae4b32e487"
 dependencies = [
  "clap_builder",
  "clap_derive 4.3.12",
@@ -1298,9 +1299,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.3.19"
+version = "4.3.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "01c6a3f08f1fe5662a35cfe393aec09c4df95f60ee93b7556505260f75eee9e1"
+checksum = "5ed2e96bc16d8d740f6f48d663eddf4b8a0983e79210fd55479b7bcd0a69860e"
 dependencies = [
  "anstream",
  "anstyle",
@@ -1330,7 +1331,7 @@ dependencies = [
  "heck 0.4.1",
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.29",
 ]
 
 [[package]]
@@ -1392,7 +1393,7 @@ dependencies = [
  "tempfile",
  "text",
  "thiserror",
- "time 0.3.24",
+ "time 0.3.27",
  "tiny_http",
  "url",
  "util",
@@ -1499,7 +1500,7 @@ dependencies = [
  "sqlx",
  "text",
  "theme",
- "time 0.3.24",
+ "time 0.3.27",
  "tokio",
  "tokio-tungstenite",
  "toml 0.5.11",
@@ -2041,7 +2042,7 @@ dependencies = [
  "openssl-probe",
  "openssl-sys",
  "schannel",
- "socket2",
+ "socket2 0.4.9",
  "winapi 0.3.9",
 ]
 
@@ -2062,9 +2063,9 @@ dependencies = [
 
 [[package]]
 name = "dashmap"
-version = "5.5.0"
+version = "5.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6943ae99c34386c84a470c499d3414f66502a41340aa895406e0d2e4a207b91d"
+checksum = "edd72493923899c6f10c641bdbdeddc7183d6396641d99c1a0d1597f37f92e28"
 dependencies = [
  "cfg-if 1.0.0",
  "hashbrown 0.14.0",
@@ -2122,9 +2123,9 @@ dependencies = [
 
 [[package]]
 name = "deranged"
-version = "0.3.6"
+version = "0.3.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8810e7e2cf385b1e9b50d68264908ec367ba642c96d02edfe61c39e88e2a3c01"
+checksum = "f2696e8a945f658fd14dc3b87242e6b80cd0f36ff04ea560fa39082368847946"
 dependencies = [
  "serde",
 ]
@@ -2312,9 +2313,9 @@ dependencies = [
 
 [[package]]
 name = "dyn-clone"
-version = "1.0.12"
+version = "1.0.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "304e6508efa593091e97a9abbc10f90aa7ca635b6d2784feff3c89d41dd12272"
+checksum = "bbfc4744c1b8f2a09adc0e55242f60b1af195d88596bd8700be74418c056c555"
 
 [[package]]
 name = "editor"
@@ -2377,9 +2378,9 @@ checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
 
 [[package]]
 name = "encoding_rs"
-version = "0.8.32"
+version = "0.8.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394"
+checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1"
 dependencies = [
  "cfg-if 1.0.0",
 ]
@@ -2427,9 +2428,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
 
 [[package]]
 name = "erased-serde"
-version = "0.3.28"
+version = "0.3.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da96524cc884f6558f1769b6c46686af2fe8e8b4cd253bd5a3cdba8181b8e070"
+checksum = "fc978899517288e3ebbd1a3bfc1d9537dbb87eeab149e53ea490e63bcdff561a"
 dependencies = [
  "serde",
 ]
@@ -2592,13 +2593,13 @@ dependencies = [
 
 [[package]]
 name = "filetime"
-version = "0.2.21"
+version = "0.2.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153"
+checksum = "d4029edd3e734da6fe05b6cd7bd2960760a616bd2ddd0d59a0124746d6272af0"
 dependencies = [
  "cfg-if 1.0.0",
  "libc",
- "redox_syscall 0.2.16",
+ "redox_syscall 0.3.5",
  "windows-sys",
 ]
 
@@ -2610,9 +2611,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
 
 [[package]]
 name = "flate2"
-version = "1.0.26"
+version = "1.0.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743"
+checksum = "c6c98ee8095e9d1dcbf2fcc6d95acccb90d1c81db1e44725c6a984b1dbdfb010"
 dependencies = [
  "crc32fast",
  "miniz_oxide 0.7.1",
@@ -2753,7 +2754,7 @@ dependencies = [
  "smol",
  "sum_tree",
  "tempfile",
- "time 0.3.24",
+ "time 0.3.27",
  "util",
 ]
 
@@ -2891,7 +2892,7 @@ dependencies = [
  "futures-io",
  "memchr",
  "parking",
- "pin-project-lite 0.2.10",
+ "pin-project-lite 0.2.12",
  "waker-fn",
 ]
 
@@ -2903,7 +2904,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.29",
 ]
 
 [[package]]
@@ -2932,7 +2933,7 @@ dependencies = [
  "futures-sink",
  "futures-task",
  "memchr",
- "pin-project-lite 0.2.10",
+ "pin-project-lite 0.2.12",
  "pin-utils",
  "slab",
  "tokio-io",
@@ -3010,9 +3011,9 @@ dependencies = [
 
 [[package]]
 name = "gimli"
-version = "0.27.3"
+version = "0.28.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
+checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0"
 
 [[package]]
 name = "git"
@@ -3055,11 +3056,11 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
 
 [[package]]
 name = "globset"
-version = "0.4.12"
+version = "0.4.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aca8bbd8e0707c1887a8bbb7e6b40e228f251ff5d62c8220a4a7a53c73aff006"
+checksum = "759c97c1e17c55525b57192c06a267cda0ac5210b222d6b82189a2338fa1c13d"
 dependencies = [
- "aho-corasick 1.0.2",
+ "aho-corasick 1.0.4",
  "bstr",
  "fnv",
  "log",
@@ -3146,7 +3147,7 @@ dependencies = [
  "sqlez",
  "sum_tree",
  "taffy",
- "time 0.3.24",
+ "time 0.3.27",
  "tiny-skia",
  "usvg",
  "util",
@@ -3172,9 +3173,9 @@ checksum = "eec1c01eb1de97451ee0d60de7d81cf1e72aabefb021616027f3d1c3ec1c723c"
 
 [[package]]
 name = "h2"
-version = "0.3.20"
+version = "0.3.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97ec8491ebaf99c8eaa73058b045fe58073cd6be7f596ac993ced0b0a0c01049"
+checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833"
 dependencies = [
  "bytes 1.4.0",
  "fnv",
@@ -3368,7 +3369,7 @@ checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482"
 dependencies = [
  "bytes 1.4.0",
  "fnv",
- "itoa 1.0.9",
+ "itoa",
 ]
 
 [[package]]
@@ -3379,7 +3380,7 @@ checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1"
 dependencies = [
  "bytes 1.4.0",
  "http",
- "pin-project-lite 0.2.10",
+ "pin-project-lite 0.2.12",
 ]
 
 [[package]]
@@ -3396,9 +3397,9 @@ checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904"
 
 [[package]]
 name = "httpdate"
-version = "1.0.2"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"
+checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
 
 [[package]]
 name = "human_bytes"
@@ -3427,9 +3428,9 @@ dependencies = [
  "http-body",
  "httparse",
  "httpdate",
- "itoa 1.0.9",
- "pin-project-lite 0.2.10",
- "socket2",
+ "itoa",
+ "pin-project-lite 0.2.12",
+ "socket2 0.4.9",
  "tokio",
  "tower-service",
  "tracing",
@@ -3443,7 +3444,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1"
 dependencies = [
  "hyper",
- "pin-project-lite 0.2.10",
+ "pin-project-lite 0.2.12",
  "tokio",
  "tokio-io-timeout",
 ]
@@ -3661,7 +3662,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
 dependencies = [
  "hermit-abi 0.3.2",
- "rustix 0.38.4",
+ "rustix 0.38.8",
  "windows-sys",
 ]
 
@@ -3701,12 +3702,6 @@ dependencies = [
  "either",
 ]
 
-[[package]]
-name = "itoa"
-version = "0.4.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
-
 [[package]]
 name = "itoa"
 version = "1.0.9"
@@ -4133,9 +4128,9 @@ dependencies = [
 
 [[package]]
 name = "log"
-version = "0.4.19"
+version = "0.4.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
+checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
 dependencies = [
  "serde",
  "value-bag",
@@ -4166,9 +4161,9 @@ dependencies = [
 
 [[package]]
 name = "lsp-types"
-version = "0.94.0"
+version = "0.94.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b63735a13a1f9cd4f4835223d828ed9c2e35c8c5e61837774399f558b6a1237"
+checksum = "c66bfd44a06ae10647fe3f8214762e9369fd4248df1350924b4ef9e770a85ea1"
 dependencies = [
  "bitflags 1.3.2",
  "serde",
@@ -4624,9 +4619,9 @@ dependencies = [
 
 [[package]]
 name = "num-bigint"
-version = "0.4.3"
+version = "0.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f"
+checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0"
 dependencies = [
  "autocfg",
  "num-integer",
@@ -4782,9 +4777,9 @@ dependencies = [
 
 [[package]]
 name = "object"
-version = "0.31.1"
+version = "0.32.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1"
+checksum = "77ac5bbd07aea88c60a577a1ce218075ffd59208b2d7ca97adf9bfc5aeb21ebe"
 dependencies = [
  "memchr",
 ]
@@ -4826,9 +4821,9 @@ checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
 
 [[package]]
 name = "openssl"
-version = "0.10.55"
+version = "0.10.56"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "345df152bc43501c5eb9e4654ff05f794effb78d4efe3d53abc158baddc0703d"
+checksum = "729b745ad4a5575dd06a3e1af1414bd330ee561c01b3899eb584baeaa8def17e"
 dependencies = [
  "bitflags 1.3.2",
  "cfg-if 1.0.0",
@@ -4847,7 +4842,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.29",
 ]
 
 [[package]]
@@ -4858,9 +4853,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
 
 [[package]]
 name = "openssl-sys"
-version = "0.9.90"
+version = "0.9.91"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "374533b0e45f3a7ced10fcaeccca020e66656bc03dac384f852e4e5a7a8104a6"
+checksum = "866b5f16f90776b9bb8dc1e1802ac6f0513de3a7a7465867bfbc563dc737faac"
 dependencies = [
  "cc",
  "libc",
@@ -4995,7 +4990,7 @@ dependencies = [
  "libc",
  "redox_syscall 0.3.5",
  "smallvec",
- "windows-targets 0.48.1",
+ "windows-targets 0.48.5",
 ]
 
 [[package]]
@@ -5087,12 +5082,12 @@ dependencies = [
 
 [[package]]
 name = "petgraph"
-version = "0.6.3"
+version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4dd7d28ee937e54fe3080c91faa1c3a46c06de6252988a7f4592ba2310ef22a4"
+checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9"
 dependencies = [
  "fixedbitset",
- "indexmap 1.9.3",
+ "indexmap 2.0.0",
 ]
 
 [[package]]
@@ -5120,22 +5115,22 @@ checksum = "db8bcd96cb740d03149cbad5518db9fd87126a10ab519c011893b1754134c468"
 
 [[package]]
 name = "pin-project"
-version = "1.1.2"
+version = "1.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "030ad2bc4db10a8944cb0d837f158bdfec4d4a4873ab701a95046770d11f8842"
+checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422"
 dependencies = [
  "pin-project-internal",
 ]
 
 [[package]]
 name = "pin-project-internal"
-version = "1.1.2"
+version = "1.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec2e072ecce94ec471b13398d5402c188e76ac03cf74dd1a975161b23a3f6d9c"
+checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.29",
 ]
 
 [[package]]
@@ -5146,9 +5141,9 @@ checksum = "257b64915a082f7811703966789728173279bdebb956b143dbcd23f6f970a777"
 
 [[package]]
 name = "pin-project-lite"
-version = "0.2.10"
+version = "0.2.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c40d25201921e5ff0c862a505c6557ea88568a4e3ace775ab55e93f2f4f9d57"
+checksum = "12cc1b0bf1727a77a54b6654e7b5f1af8604923edc8b81885f8ec92f9e3f0a05"
 
 [[package]]
 name = "pin-utils"
@@ -5200,7 +5195,7 @@ dependencies = [
  "line-wrap",
  "quick-xml",
  "serde",
- "time 0.3.24",
+ "time 0.3.27",
 ]
 
 [[package]]
@@ -5265,7 +5260,7 @@ dependencies = [
  "concurrent-queue",
  "libc",
  "log",
- "pin-project-lite 0.2.10",
+ "pin-project-lite 0.2.12",
  "windows-sys",
 ]
 
@@ -5315,7 +5310,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6c64d9ba0963cdcea2e1b2230fbae2bab30eb25a174be395c41e764bfb65dd62"
 dependencies = [
  "proc-macro2",
- "syn 2.0.28",
+ "syn 2.0.29",
 ]
 
 [[package]]
@@ -5655,9 +5650,9 @@ dependencies = [
 
 [[package]]
 name = "quote"
-version = "1.0.32"
+version = "1.0.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965"
+checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
 dependencies = [
  "proc-macro2",
 ]
@@ -5890,13 +5885,13 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.9.1"
+version = "1.9.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2eae68fc220f7cf2532e4494aded17545fce192d59cd996e0fe7887f4ceb575"
+checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a"
 dependencies = [
- "aho-corasick 1.0.2",
+ "aho-corasick 1.0.4",
  "memchr",
- "regex-automata 0.3.4",
+ "regex-automata 0.3.6",
  "regex-syntax 0.7.4",
 ]
 
@@ -5911,11 +5906,11 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.3.4"
+version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b7b6d6190b7594385f61bd3911cd1be99dfddcfc365a4160cc2ab5bff4aed294"
+checksum = "fed1ceff11a1dddaee50c9dc8e4938bd106e9d89ae372f192311e7da498e3b69"
 dependencies = [
- "aho-corasick 1.0.2",
+ "aho-corasick 1.0.4",
  "memchr",
  "regex-syntax 0.7.4",
 ]
@@ -5964,9 +5959,9 @@ dependencies = [
 
 [[package]]
 name = "reqwest"
-version = "0.11.18"
+version = "0.11.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55"
+checksum = "20b9b67e2ca7dd9e9f9285b759de30ff538aab981abaaf7bc9bd90b84a0126c3"
 dependencies = [
  "base64 0.21.2",
  "bytes 1.4.0",
@@ -5985,7 +5980,7 @@ dependencies = [
  "native-tls",
  "once_cell",
  "percent-encoding",
- "pin-project-lite 0.2.10",
+ "pin-project-lite 0.2.12",
  "serde",
  "serde_json",
  "serde_urlencoded",
@@ -6205,7 +6200,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rust-embed-utils",
- "syn 2.0.28",
+ "syn 2.0.29",
  "walkdir",
 ]
 
@@ -6222,13 +6217,12 @@ dependencies = [
 
 [[package]]
 name = "rust_decimal"
-version = "1.31.0"
+version = "1.32.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a2ab0025103a60ecaaf3abf24db1db240a4e1c15837090d2c32f625ac98abea"
+checksum = "a4c4216490d5a413bc6d10fa4742bd7d4955941d062c0ef873141d6b0e7b30fd"
 dependencies = [
  "arrayvec 0.7.4",
  "borsh",
- "byteorder",
  "bytes 1.4.0",
  "num-traits",
  "rand 0.8.5",
@@ -6276,7 +6270,7 @@ dependencies = [
  "bitflags 1.3.2",
  "errno 0.2.8",
  "io-lifetimes 0.5.3",
- "itoa 1.0.9",
+ "itoa",
  "libc",
  "linux-raw-sys 0.0.42",
  "once_cell",
@@ -6299,11 +6293,11 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.4"
+version = "0.38.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5"
+checksum = "19ed4fa021d81c8392ce04db050a3da9a60299050b7ae1cf482d862b54a7218f"
 dependencies = [
- "bitflags 2.3.3",
+ "bitflags 2.4.0",
  "errno 0.3.2",
  "libc",
  "linux-raw-sys 0.4.5",
@@ -6505,7 +6499,7 @@ dependencies = [
  "serde_json",
  "sqlx",
  "thiserror",
- "time 0.3.24",
+ "time 0.3.27",
  "tracing",
  "url",
  "uuid 1.4.1",
@@ -6533,7 +6527,7 @@ dependencies = [
  "rust_decimal",
  "sea-query-derive",
  "serde_json",
- "time 0.3.24",
+ "time 0.3.27",
  "uuid 1.4.1",
 ]
 
@@ -6548,7 +6542,7 @@ dependencies = [
  "sea-query",
  "serde_json",
  "sqlx",
- "time 0.3.24",
+ "time 0.3.27",
  "uuid 1.4.1",
 ]
 
@@ -6676,10 +6670,11 @@ dependencies = [
  "serde",
  "serde_json",
  "settings",
+ "sha1",
  "smol",
  "tempdir",
  "theme",
- "tiktoken-rs 0.5.0",
+ "tiktoken-rs 0.5.1",
  "tree-sitter",
  "tree-sitter-cpp",
  "tree-sitter-elixir",
@@ -6727,22 +6722,22 @@ checksum = "5a9f47faea3cad316faa914d013d24f471cd90bfca1a0c70f05a3f42c6441e99"
 
 [[package]]
 name = "serde"
-version = "1.0.180"
+version = "1.0.185"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ea67f183f058fe88a4e3ec6e2788e003840893b91bac4559cabedd00863b3ed"
+checksum = "be9b6f69f1dfd54c3b568ffa45c310d6973a5e5148fd40cf515acaf38cf5bc31"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.180"
+version = "1.0.185"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24e744d7782b686ab3b73267ef05697159cc0e5abbed3f47f9933165e5219036"
+checksum = "dc59dfdcbad1437773485e0367fea4b090a2e0a16d9ffc46af47764536a298ec"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.29",
 ]
 
 [[package]]
@@ -6767,24 +6762,24 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.104"
+version = "1.0.105"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c"
+checksum = "693151e1ac27563d6dbcec9dee9fbd5da8539b20fa14ad3752b2e6d363ace360"
 dependencies = [
  "indexmap 2.0.0",
- "itoa 1.0.9",
+ "itoa",
  "ryu",
  "serde",
 ]
 
 [[package]]
 name = "serde_json_lenient"
-version = "0.1.4"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d7b9ce5b0a63c6269b9623ed828b39259545a6ec0d8a35d6135ad6af6232add"
+checksum = "29591aaa3a13f5ad0f2dd1a8a21bcddab11eaae7c3522b20ade2e85e9df52206"
 dependencies = [
- "indexmap 1.9.3",
- "itoa 0.4.8",
+ "indexmap 2.0.0",
+ "itoa",
  "ryu",
  "serde",
 ]
@@ -6797,7 +6792,7 @@ checksum = "8725e1dfadb3a50f7e5ce0b1a540466f6ed3fe7a0fca2ac2b8b831d31316bd00"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.29",
 ]
 
 [[package]]
@@ -6816,7 +6811,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
 dependencies = [
  "form_urlencoded",
- "itoa 1.0.9",
+ "itoa",
  "ryu",
  "serde",
 ]
@@ -7028,9 +7023,9 @@ checksum = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac"
 
 [[package]]
 name = "slab"
-version = "0.4.8"
+version = "0.4.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d"
+checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
 dependencies = [
  "autocfg",
 ]
@@ -7112,6 +7107,16 @@ dependencies = [
  "winapi 0.3.9",
 ]
 
+[[package]]
+name = "socket2"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2538b18701741680e0322a2302176d3253a35388e2e62f172f64f4f16605f877"
+dependencies = [
+ "libc",
+ "windows-sys",
+]
+
 [[package]]
 name = "spin"
 version = "0.5.2"

crates/search/src/project_search.rs 🔗

@@ -640,6 +640,7 @@ impl ProjectSearchView {
             self.search_options = SearchOptions::none();
 
             let project = self.model.read(cx).project.clone();
+
             let index_task = semantic_index.update(cx, |semantic_index, cx| {
                 semantic_index.index_project(project, cx)
             });
@@ -1635,6 +1636,12 @@ impl ToolbarItemView for ProjectSearchBar {
         self.subscription = None;
         self.active_project_search = None;
         if let Some(search) = active_pane_item.and_then(|i| i.downcast::<ProjectSearchView>()) {
+            search.update(cx, |search, cx| {
+                if search.current_mode == SearchMode::Semantic {
+                    search.index_project(cx);
+                }
+            });
+
             self.subscription = Some(cx.observe(&search, |_, _, cx| cx.notify()));
             self.active_project_search = Some(search);
             ToolbarItemLocation::PrimaryLeft {

crates/semantic_index/Cargo.toml 🔗

@@ -38,6 +38,7 @@ parking_lot.workspace = true
 rand.workspace = true
 schemars.workspace = true
 globset.workspace = true
+sha1 = "0.10.5"
 
 [dev-dependencies]
 gpui = { path = "../gpui", features = ["test-support"] }

crates/semantic_index/src/db.rs 🔗

@@ -26,6 +26,9 @@ pub struct FileRecord {
 #[derive(Debug)]
 struct Embedding(pub Vec<f32>);
 
+#[derive(Debug)]
+struct Sha1(pub Vec<u8>);
+
 impl FromSql for Embedding {
     fn column_result(value: ValueRef) -> FromSqlResult<Self> {
         let bytes = value.as_blob()?;
@@ -37,6 +40,17 @@ impl FromSql for Embedding {
     }
 }
 
+impl FromSql for Sha1 {
+    fn column_result(value: ValueRef) -> FromSqlResult<Self> {
+        let bytes = value.as_blob()?;
+        let sha1: Result<Vec<u8>, Box<bincode::ErrorKind>> = bincode::deserialize(bytes);
+        if sha1.is_err() {
+            return Err(rusqlite::types::FromSqlError::Other(sha1.unwrap_err()));
+        }
+        return Ok(Sha1(sha1.unwrap()));
+    }
+}
+
 pub struct VectorDatabase {
     db: rusqlite::Connection,
 }
@@ -132,6 +146,7 @@ impl VectorDatabase {
                 end_byte INTEGER NOT NULL,
                 name VARCHAR NOT NULL,
                 embedding BLOB NOT NULL,
+                sha1 BLOB NOT NULL,
                 FOREIGN KEY(file_id) REFERENCES files(id) ON DELETE CASCADE
             )",
             [],
@@ -182,15 +197,17 @@ impl VectorDatabase {
         // I imagine we can speed this up with a bulk insert of some kind.
         for document in documents {
             let embedding_blob = bincode::serialize(&document.embedding)?;
+            let sha_blob = bincode::serialize(&document.sha1)?;
 
             self.db.execute(
-                "INSERT INTO documents (file_id, start_byte, end_byte, name, embedding) VALUES (?1, ?2, ?3, ?4, $5)",
+                "INSERT INTO documents (file_id, start_byte, end_byte, name, embedding, sha1) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
                 params![
                     file_id,
                     document.range.start.to_string(),
                     document.range.end.to_string(),
                     document.name,
-                    embedding_blob
+                    embedding_blob,
+                    sha_blob
                 ],
             )?;
         }

crates/semantic_index/src/embedding.rs 🔗

@@ -106,8 +106,8 @@ impl OpenAIEmbeddings {
 #[async_trait]
 impl EmbeddingProvider for OpenAIEmbeddings {
     async fn embed_batch(&self, spans: Vec<&str>) -> Result<Vec<Vec<f32>>> {
-        const BACKOFF_SECONDS: [usize; 3] = [45, 75, 125];
-        const MAX_RETRIES: usize = 3;
+        const BACKOFF_SECONDS: [usize; 4] = [3, 5, 15, 45];
+        const MAX_RETRIES: usize = 4;
 
         let api_key = OPENAI_API_KEY
             .as_ref()

crates/semantic_index/src/parsing.rs 🔗

@@ -1,5 +1,6 @@
 use anyhow::{anyhow, Ok, Result};
 use language::{Grammar, Language};
+use sha1::{Digest, Sha1};
 use std::{
     cmp::{self, Reverse},
     collections::HashSet,
@@ -15,6 +16,7 @@ pub struct Document {
     pub range: Range<usize>,
     pub content: String,
     pub embedding: Vec<f32>,
+    pub sha1: [u8; 20],
 }
 
 const CODE_CONTEXT_TEMPLATE: &str =
@@ -63,11 +65,15 @@ impl CodeContextRetriever {
             .replace("<language>", language_name.as_ref())
             .replace("<item>", &content);
 
+        let mut sha1 = Sha1::new();
+        sha1.update(&document_span);
+
         Ok(vec![Document {
             range: 0..content.len(),
             content: document_span,
             embedding: Vec::new(),
             name: language_name.to_string(),
+            sha1: sha1.finalize().into(),
         }])
     }
 
@@ -76,11 +82,15 @@ impl CodeContextRetriever {
             .replace("<path>", relative_path.to_string_lossy().as_ref())
             .replace("<item>", &content);
 
+        let mut sha1 = Sha1::new();
+        sha1.update(&document_span);
+
         Ok(vec![Document {
             range: 0..content.len(),
             content: document_span,
             embedding: Vec::new(),
             name: "Markdown".to_string(),
+            sha1: sha1.finalize().into(),
         }])
     }
 
@@ -253,11 +263,15 @@ impl CodeContextRetriever {
                 );
             }
 
+            let mut sha1 = Sha1::new();
+            sha1.update(&document_content);
+
             documents.push(Document {
                 name,
                 content: document_content,
                 range: item_range.clone(),
                 embedding: vec![],
+                sha1: sha1.finalize().into(),
             })
         }
 

crates/semantic_index/src/semantic_index.rs 🔗

@@ -16,7 +16,7 @@ use language::{Anchor, Buffer, Language, LanguageRegistry};
 use parking_lot::Mutex;
 use parsing::{CodeContextRetriever, Document, PARSEABLE_ENTIRE_FILE_TYPES};
 use postage::watch;
-use project::{search::PathMatcher, Fs, Project, WorktreeId};
+use project::{search::PathMatcher, Fs, PathChange, Project, ProjectEntryId, WorktreeId};
 use smol::channel;
 use std::{
     cmp::Ordering,
@@ -33,8 +33,9 @@ use util::{
     paths::EMBEDDINGS_DIR,
     ResultExt,
 };
+use workspace::WorkspaceCreated;
 
-const SEMANTIC_INDEX_VERSION: usize = 6;
+const SEMANTIC_INDEX_VERSION: usize = 7;
 const EMBEDDINGS_BATCH_SIZE: usize = 80;
 
 pub fn init(
@@ -54,6 +55,22 @@ pub fn init(
         return;
     }
 
+    cx.subscribe_global::<WorkspaceCreated, _>({
+        move |event, cx| {
+            let Some(semantic_index) = SemanticIndex::global(cx) else { return; };
+            let workspace = &event.0;
+            if let Some(workspace) = workspace.upgrade(cx) {
+                let project = workspace.read(cx).project().clone();
+                if project.read(cx).is_local() {
+                    semantic_index.update(cx, |index, cx| {
+                        index.initialize_project(project, cx).detach_and_log_err(cx)
+                    });
+                }
+            }
+        }
+    })
+    .detach();
+
     cx.spawn(move |mut cx| async move {
         let semantic_index = SemanticIndex::new(
             fs,
@@ -92,8 +109,11 @@ pub struct SemanticIndex {
 
 struct ProjectState {
     worktree_db_ids: Vec<(WorktreeId, i64)>,
+    _subscription: gpui::Subscription,
     outstanding_job_count_rx: watch::Receiver<usize>,
     _outstanding_job_count_tx: Arc<Mutex<watch::Sender<usize>>>,
+    job_queue_tx: channel::Sender<IndexOperation>,
+    _queue_update_task: Task<()>,
 }
 
 #[derive(Clone)]
@@ -112,6 +132,72 @@ impl JobHandle {
     }
 }
 impl ProjectState {
+    fn new(
+        cx: &mut AppContext,
+        subscription: gpui::Subscription,
+        worktree_db_ids: Vec<(WorktreeId, i64)>,
+        outstanding_job_count_rx: watch::Receiver<usize>,
+        _outstanding_job_count_tx: Arc<Mutex<watch::Sender<usize>>>,
+    ) -> Self {
+        let (job_queue_tx, job_queue_rx) = channel::unbounded();
+        let _queue_update_task = cx.background().spawn({
+            let mut worktree_queue = HashMap::new();
+            async move {
+                while let Ok(operation) = job_queue_rx.recv().await {
+                    Self::update_queue(&mut worktree_queue, operation);
+                }
+            }
+        });
+
+        Self {
+            worktree_db_ids,
+            outstanding_job_count_rx,
+            _outstanding_job_count_tx,
+            _subscription: subscription,
+            _queue_update_task,
+            job_queue_tx,
+        }
+    }
+
+    pub fn get_outstanding_count(&self) -> usize {
+        self.outstanding_job_count_rx.borrow().clone()
+    }
+
+    fn update_queue(queue: &mut HashMap<PathBuf, IndexOperation>, operation: IndexOperation) {
+        match operation {
+            IndexOperation::FlushQueue => {
+                let queue = std::mem::take(queue);
+                for (_, op) in queue {
+                    match op {
+                        IndexOperation::IndexFile {
+                            absolute_path: _,
+                            payload,
+                            tx,
+                        } => {
+                            let _ = tx.try_send(payload);
+                        }
+                        IndexOperation::DeleteFile {
+                            absolute_path: _,
+                            payload,
+                            tx,
+                        } => {
+                            let _ = tx.try_send(payload);
+                        }
+                        _ => {}
+                    }
+                }
+            }
+            IndexOperation::IndexFile {
+                ref absolute_path, ..
+            }
+            | IndexOperation::DeleteFile {
+                ref absolute_path, ..
+            } => {
+                queue.insert(absolute_path.clone(), operation);
+            }
+        }
+    }
+
     fn db_id_for_worktree_id(&self, id: WorktreeId) -> Option<i64> {
         self.worktree_db_ids
             .iter()
@@ -137,6 +223,7 @@ impl ProjectState {
     }
 }
 
+#[derive(Clone)]
 pub struct PendingFile {
     worktree_db_id: i64,
     relative_path: PathBuf,
@@ -145,6 +232,19 @@ pub struct PendingFile {
     modified_time: SystemTime,
     job_handle: JobHandle,
 }
+enum IndexOperation {
+    IndexFile {
+        absolute_path: PathBuf,
+        payload: PendingFile,
+        tx: channel::Sender<PendingFile>,
+    },
+    DeleteFile {
+        absolute_path: PathBuf,
+        payload: DbOperation,
+        tx: channel::Sender<DbOperation>,
+    },
+    FlushQueue,
+}
 
 pub struct SearchResult {
     pub buffer: ModelHandle<Buffer>,
@@ -576,12 +676,112 @@ impl SemanticIndex {
         })
     }
 
-    pub fn index_project(
+    fn project_entries_changed(
+        &self,
+        project: ModelHandle<Project>,
+        changes: Arc<[(Arc<Path>, ProjectEntryId, PathChange)]>,
+        cx: &mut ModelContext<'_, SemanticIndex>,
+        worktree_id: &WorktreeId,
+    ) -> Result<()> {
+        let parsing_files_tx = self.parsing_files_tx.clone();
+        let db_update_tx = self.db_update_tx.clone();
+        let (job_queue_tx, outstanding_job_tx, worktree_db_id) = {
+            let state = self
+                .projects
+                .get(&project.downgrade())
+                .ok_or(anyhow!("Project not yet initialized"))?;
+            let worktree_db_id = state
+                .db_id_for_worktree_id(*worktree_id)
+                .ok_or(anyhow!("Worktree ID in Database Not Available"))?;
+            (
+                state.job_queue_tx.clone(),
+                state._outstanding_job_count_tx.clone(),
+                worktree_db_id,
+            )
+        };
+
+        let language_registry = self.language_registry.clone();
+        let parsing_files_tx = parsing_files_tx.clone();
+        let db_update_tx = db_update_tx.clone();
+
+        let worktree = project
+            .read(cx)
+            .worktree_for_id(worktree_id.clone(), cx)
+            .ok_or(anyhow!("Worktree not available"))?
+            .read(cx)
+            .snapshot();
+        cx.spawn(|_, _| async move {
+            let worktree = worktree.clone();
+            for (path, entry_id, path_change) in changes.iter() {
+                let relative_path = path.to_path_buf();
+                let absolute_path = worktree.absolutize(path);
+
+                let Some(entry) = worktree.entry_for_id(*entry_id) else {
+                    continue;
+                };
+                if entry.is_ignored || entry.is_symlink || entry.is_external {
+                    continue;
+                }
+
+                log::trace!("File Event: {:?}, Path: {:?}", &path_change, &path);
+                match path_change {
+                    PathChange::AddedOrUpdated | PathChange::Updated | PathChange::Added => {
+                        if let Ok(language) = language_registry
+                            .language_for_file(&relative_path, None)
+                            .await
+                        {
+                            if !PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref())
+                                && &language.name().as_ref() != &"Markdown"
+                                && language
+                                    .grammar()
+                                    .and_then(|grammar| grammar.embedding_config.as_ref())
+                                    .is_none()
+                            {
+                                continue;
+                            }
+
+                            let job_handle = JobHandle::new(&outstanding_job_tx);
+                            let new_operation = IndexOperation::IndexFile {
+                                absolute_path: absolute_path.clone(),
+                                payload: PendingFile {
+                                    worktree_db_id,
+                                    relative_path,
+                                    absolute_path,
+                                    language,
+                                    modified_time: entry.mtime,
+                                    job_handle,
+                                },
+                                tx: parsing_files_tx.clone(),
+                            };
+                            let _ = job_queue_tx.try_send(new_operation);
+                        }
+                    }
+                    PathChange::Removed => {
+                        let new_operation = IndexOperation::DeleteFile {
+                            absolute_path,
+                            payload: DbOperation::Delete {
+                                worktree_id: worktree_db_id,
+                                path: relative_path,
+                            },
+                            tx: db_update_tx.clone(),
+                        };
+                        let _ = job_queue_tx.try_send(new_operation);
+                    }
+                    _ => {}
+                }
+            }
+        })
+        .detach();
+
+        Ok(())
+    }
+
+    pub fn initialize_project(
         &mut self,
         project: ModelHandle<Project>,
         cx: &mut ModelContext<Self>,
-    ) -> Task<Result<(usize, watch::Receiver<usize>)>> {
-        let t0 = Instant::now();
+    ) -> Task<Result<()>> {
+        log::trace!("Initializing Project for Semantic Index");
         let worktree_scans_complete = project
             .read(cx)
             .worktrees(cx)
@@ -592,6 +792,7 @@ impl SemanticIndex {
                 }
             })
             .collect::<Vec<_>>();
+
         let worktree_db_ids = project
             .read(cx)
             .worktrees(cx)
@@ -600,15 +801,21 @@ impl SemanticIndex {
             })
             .collect::<Vec<_>>();
 
+        let _subscription = cx.subscribe(&project, |this, project, event, cx| {
+            if let project::Event::WorktreeUpdatedEntries(worktree_id, changes) = event {
+                let _ =
+                    this.project_entries_changed(project.clone(), changes.clone(), cx, worktree_id);
+            };
+        });
+
         let language_registry = self.language_registry.clone();
-        let db_update_tx = self.db_update_tx.clone();
         let parsing_files_tx = self.parsing_files_tx.clone();
+        let db_update_tx = self.db_update_tx.clone();
 
         cx.spawn(|this, mut cx| async move {
             futures::future::join_all(worktree_scans_complete).await;
 
             let worktree_db_ids = futures::future::join_all(worktree_db_ids).await;
-
             let worktrees = project.read_with(&cx, |project, cx| {
                 project
                     .worktrees(cx)
@@ -618,6 +825,7 @@ impl SemanticIndex {
 
             let mut worktree_file_mtimes = HashMap::new();
             let mut db_ids_by_worktree_id = HashMap::new();
+
             for (worktree, db_id) in worktrees.iter().zip(worktree_db_ids) {
                 let db_id = db_id?;
                 db_ids_by_worktree_id.insert(worktree.id(), db_id);
@@ -628,34 +836,34 @@ impl SemanticIndex {
                 );
             }
 
+            let worktree_db_ids = db_ids_by_worktree_id
+                .iter()
+                .map(|(a, b)| (*a, *b))
+                .collect();
+
             let (job_count_tx, job_count_rx) = watch::channel_with(0);
             let job_count_tx = Arc::new(Mutex::new(job_count_tx));
-            this.update(&mut cx, |this, _| {
-                this.projects.insert(
-                    project.downgrade(),
-                    ProjectState {
-                        worktree_db_ids: db_ids_by_worktree_id
-                            .iter()
-                            .map(|(a, b)| (*a, *b))
-                            .collect(),
-                        outstanding_job_count_rx: job_count_rx.clone(),
-                        _outstanding_job_count_tx: job_count_tx.clone(),
-                    },
-                );
-            });
+            let job_count_tx_longlived = job_count_tx.clone();
 
-            cx.background()
+            let worktree_files = cx
+                .background()
                 .spawn(async move {
-                    let mut count = 0;
+                    let mut worktree_files = Vec::new();
                     for worktree in worktrees.into_iter() {
                         let mut file_mtimes = worktree_file_mtimes.remove(&worktree.id()).unwrap();
+                        let worktree_db_id = db_ids_by_worktree_id[&worktree.id()];
                         for file in worktree.files(false, 0) {
                             let absolute_path = worktree.absolutize(&file.path);
 
+                            if file.is_external || file.is_ignored || file.is_symlink {
+                                continue;
+                            }
+
                             if let Ok(language) = language_registry
                                 .language_for_file(&absolute_path, None)
                                 .await
                             {
+                                // Test if file is valid parseable file
                                 if !PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref())
                                     && &language.name().as_ref() != &"Markdown"
                                     && language
@@ -672,39 +880,84 @@ impl SemanticIndex {
                                     .map_or(false, |existing_mtime| existing_mtime == file.mtime);
 
                                 if !already_stored {
-                                    count += 1;
-
                                     let job_handle = JobHandle::new(&job_count_tx);
-                                    parsing_files_tx
-                                        .try_send(PendingFile {
-                                            worktree_db_id: db_ids_by_worktree_id[&worktree.id()],
+                                    worktree_files.push(IndexOperation::IndexFile {
+                                        absolute_path: absolute_path.clone(),
+                                        payload: PendingFile {
+                                            worktree_db_id,
                                             relative_path: path_buf,
                                             absolute_path,
                                             language,
                                             job_handle,
                                             modified_time: file.mtime,
-                                        })
-                                        .unwrap();
+                                        },
+                                        tx: parsing_files_tx.clone(),
+                                    });
                                 }
                             }
                         }
-                        for file in file_mtimes.keys() {
-                            db_update_tx
-                                .try_send(DbOperation::Delete {
-                                    worktree_id: db_ids_by_worktree_id[&worktree.id()],
-                                    path: file.to_owned(),
-                                })
-                                .unwrap();
+                        // Clean up entries from database that are no longer in the worktree.
+                        for (path, _) in file_mtimes {
+                            worktree_files.push(IndexOperation::DeleteFile {
+                                absolute_path: worktree.absolutize(path.as_path()),
+                                payload: DbOperation::Delete {
+                                    worktree_id: worktree_db_id,
+                                    path,
+                                },
+                                tx: db_update_tx.clone(),
+                            });
                         }
                     }
 
-                    log::trace!(
-                        "walking worktree took {:?} milliseconds",
-                        t0.elapsed().as_millis()
-                    );
-                    anyhow::Ok((count, job_count_rx))
+                    anyhow::Ok(worktree_files)
                 })
-                .await
+                .await?;
+
+            this.update(&mut cx, |this, cx| {
+                let project_state = ProjectState::new(
+                    cx,
+                    _subscription,
+                    worktree_db_ids,
+                    job_count_rx,
+                    job_count_tx_longlived,
+                );
+
+                for op in worktree_files {
+                    let _ = project_state.job_queue_tx.try_send(op);
+                }
+
+                this.projects.insert(project.downgrade(), project_state);
+            });
+            Result::<(), _>::Ok(())
+        })
+    }
+
+    pub fn index_project(
+        &mut self,
+        project: ModelHandle<Project>,
+        cx: &mut ModelContext<Self>,
+    ) -> Task<Result<(usize, watch::Receiver<usize>)>> {
+        let state = self.projects.get_mut(&project.downgrade());
+        let state = if state.is_none() {
+            return Task::Ready(Some(Err(anyhow!("Project not yet initialized"))));
+        } else {
+            state.unwrap()
+        };
+
+        // let parsing_files_tx = self.parsing_files_tx.clone();
+        // let db_update_tx = self.db_update_tx.clone();
+        let job_count_rx = state.outstanding_job_count_rx.clone();
+        let count = state.get_outstanding_count();
+
+        cx.spawn(|this, mut cx| async move {
+            this.update(&mut cx, |this, _| {
+                let Some(state) = this.projects.get_mut(&project.downgrade()) else {
+                    return;
+                };
+                let _ = state.job_queue_tx.try_send(IndexOperation::FlushQueue);
+            });
+
+            Ok((count, job_count_rx))
         })
     }
 

crates/semantic_index/src/semantic_index_tests.rs 🔗

@@ -86,6 +86,13 @@ async fn test_semantic_index(cx: &mut TestAppContext) {
     .unwrap();
 
     let project = Project::test(fs.clone(), ["/the-root".as_ref()], cx).await;
+
+    let _ = store
+        .update(cx, |store, cx| {
+            store.initialize_project(project.clone(), cx)
+        })
+        .await;
+
     let (file_count, outstanding_file_count) = store
         .update(cx, |store, cx| store.index_project(project.clone(), cx))
         .await