Semantic index eval (#2988)

Kyle Caverly created

v0 of the Semantic Index evaluate test suite

Release Notes:

- Added eval.rs as an example to the semantic-index crates
- Generates test metrics for two small projects, as a starting point to
systematically evaluate retrieval quality

Change summary

Cargo.lock                                   | 297 ++++++-----
Cargo.toml                                   |   1 
crates/fs/Cargo.toml                         |   2 
crates/git/Cargo.toml                        |   2 
crates/project/Cargo.toml                    |   2 
crates/semantic_index/Cargo.toml             |   7 
crates/semantic_index/README.md              |  39 -
crates/semantic_index/eval/gpt-engineer.json | 114 ++++
crates/semantic_index/eval/tree-sitter.json  | 104 ++++
crates/semantic_index/examples/eval.rs       | 531 ++++++++++++++++++++++
crates/semantic_index/src/semantic_index.rs  |   6 
crates/util/Cargo.toml                       |   4 
script/evaluate_semantic_index               |   3 
13 files changed, 935 insertions(+), 177 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -130,7 +130,7 @@ source = "git+https://github.com/zed-industries/alacritty?rev=33306142195b354ef3
 dependencies = [
  "log",
  "serde",
- "toml 0.7.6",
+ "toml 0.7.8",
 ]
 
 [[package]]
@@ -140,7 +140,7 @@ source = "git+https://github.com/zed-industries/alacritty?rev=33306142195b354ef3
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.29",
+ "syn 2.0.37",
 ]
 
 [[package]]
@@ -159,14 +159,14 @@ dependencies = [
  "mio-anonymous-pipes",
  "mio-extras",
  "miow 0.3.7",
- "nix 0.26.2",
+ "nix 0.26.4",
  "parking_lot 0.12.1",
  "regex-automata 0.1.10",
  "serde",
  "serde_yaml",
  "signal-hook",
  "signal-hook-mio",
- "toml 0.7.6",
+ "toml 0.7.8",
  "unicode-width",
  "vte",
  "windows-sys",
@@ -229,24 +229,23 @@ dependencies = [
 
 [[package]]
 name = "anstream"
-version = "0.3.2"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163"
+checksum = "b1f58811cfac344940f1a400b6e6231ce35171f614f26439e80f8c1465c5cc0c"
 dependencies = [
  "anstyle",
  "anstyle-parse",
  "anstyle-query",
  "anstyle-wincon",
  "colorchoice",
- "is-terminal 0.4.9",
  "utf8parse",
 ]
 
 [[package]]
 name = "anstyle"
-version = "1.0.2"
+version = "1.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15c4c2c83f81532e5845a733998b6971faca23490340a418e9b72a3ec9de12ea"
+checksum = "b84bf0a05bbb2a83e5eb6fa36bb6e87baa08193c35ff52bbf6b38d8af2890e46"
 
 [[package]]
 name = "anstyle-parse"
@@ -268,9 +267,9 @@ dependencies = [
 
 [[package]]
 name = "anstyle-wincon"
-version = "1.0.2"
+version = "2.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c677ab05e09154296dd37acecd46420c17b9713e8366facafa8fc0885167cf4c"
+checksum = "58f54d10c6dfa51283a066ceab3ec1ab78d13fae00aa49243a45e4571fb79dfd"
 dependencies = [
  "anstyle",
  "windows-sys",
@@ -337,7 +336,7 @@ dependencies = [
  "futures-core",
  "futures-io",
  "once_cell",
- "pin-project-lite 0.2.12",
+ "pin-project-lite 0.2.13",
  "tokio",
 ]
 
@@ -351,7 +350,7 @@ dependencies = [
  "futures-core",
  "futures-io",
  "memchr",
- "pin-project-lite 0.2.12",
+ "pin-project-lite 0.2.13",
 ]
 
 [[package]]
@@ -476,13 +475,13 @@ dependencies = [
 
 [[package]]
 name = "async-recursion"
-version = "1.0.4"
+version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba"
+checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.29",
+ "syn 2.0.37",
 ]
 
 [[package]]
@@ -505,7 +504,7 @@ dependencies = [
  "log",
  "memchr",
  "once_cell",
- "pin-project-lite 0.2.12",
+ "pin-project-lite 0.2.13",
  "pin-utils",
  "slab",
  "wasm-bindgen-futures",
@@ -519,7 +518,7 @@ checksum = "cd56dd203fef61ac097dd65721a419ddccb106b2d2b70ba60a6b529f03961a51"
 dependencies = [
  "async-stream-impl",
  "futures-core",
- "pin-project-lite 0.2.12",
+ "pin-project-lite 0.2.13",
 ]
 
 [[package]]
@@ -530,7 +529,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.29",
+ "syn 2.0.37",
 ]
 
 [[package]]
@@ -573,7 +572,7 @@ checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.29",
+ "syn 2.0.37",
 ]
 
 [[package]]
@@ -586,7 +585,7 @@ dependencies = [
  "futures-io",
  "futures-util",
  "log",
- "pin-project-lite 0.2.12",
+ "pin-project-lite 0.2.13",
  "tungstenite 0.16.0",
 ]
 
@@ -675,7 +674,7 @@ dependencies = [
  "axum-core",
  "base64 0.13.1",
  "bitflags 1.3.2",
- "bytes 1.4.0",
+ "bytes 1.5.0",
  "futures-util",
  "headers",
  "http",
@@ -686,7 +685,7 @@ dependencies = [
  "memchr",
  "mime",
  "percent-encoding",
- "pin-project-lite 0.2.12",
+ "pin-project-lite 0.2.13",
  "serde",
  "serde_json",
  "serde_urlencoded",
@@ -707,7 +706,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37e5939e02c56fecd5c017c37df4238c0a839fa76b7f97acdd7efb804fd181cc"
 dependencies = [
  "async-trait",
- "bytes 1.4.0",
+ "bytes 1.5.0",
  "futures-util",
  "http",
  "http-body",
@@ -723,11 +722,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "69034b3b0fd97923eee2ce8a47540edb21e07f48f87f67d44bb4271cec622bdb"
 dependencies = [
  "axum",
- "bytes 1.4.0",
+ "bytes 1.5.0",
  "futures-util",
  "http",
  "mime",
- "pin-project-lite 0.2.12",
+ "pin-project-lite 0.2.13",
  "serde",
  "serde_json",
  "tokio",
@@ -748,7 +747,7 @@ dependencies = [
  "cfg-if 1.0.0",
  "libc",
  "miniz_oxide 0.7.1",
- "object 0.32.0",
+ "object 0.32.1",
  "rustc-demangle",
 ]
 
@@ -773,9 +772,9 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8"
 
 [[package]]
 name = "base64"
-version = "0.21.2"
+version = "0.21.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d"
+checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2"
 
 [[package]]
 name = "base64ct"
@@ -831,7 +830,7 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.29",
+ "syn 2.0.37",
  "which",
 ]
 
@@ -968,7 +967,7 @@ dependencies = [
  "collections",
  "editor",
  "gpui",
- "itertools",
+ "itertools 0.10.5",
  "language",
  "outline",
  "project",
@@ -991,20 +990,20 @@ dependencies = [
 
 [[package]]
 name = "bstr"
-version = "1.6.0"
+version = "1.6.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05"
+checksum = "4c2f7349907b712260e64b0afe2f84692af14a454be26187d9df565c7f69266a"
 dependencies = [
  "memchr",
- "regex-automata 0.3.6",
+ "regex-automata 0.3.8",
  "serde",
 ]
 
 [[package]]
 name = "bumpalo"
-version = "3.13.0"
+version = "3.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1"
+checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec"
 
 [[package]]
 name = "bytecheck"
@@ -1030,9 +1029,9 @@ dependencies = [
 
 [[package]]
 name = "bytemuck"
-version = "1.13.1"
+version = "1.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea"
+checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6"
 
 [[package]]
 name = "byteorder"
@@ -1052,9 +1051,9 @@ dependencies = [
 
 [[package]]
 name = "bytes"
-version = "1.4.0"
+version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be"
+checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223"
 
 [[package]]
 name = "call"
@@ -1220,7 +1219,7 @@ dependencies = [
  "tempfile",
  "text",
  "thiserror",
- "time 0.3.27",
+ "time",
  "tiny_http",
  "url",
  "util",
@@ -1229,18 +1228,17 @@ dependencies = [
 
 [[package]]
 name = "chrono"
-version = "0.4.26"
+version = "0.4.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec837a71355b28f6556dbd569b37b3f363091c0bd4b2e735674521b4c5fd9bc5"
+checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38"
 dependencies = [
  "android-tzdata",
  "iana-time-zone",
  "js-sys",
  "num-traits",
  "serde",
- "time 0.1.45",
  "wasm-bindgen",
- "winapi 0.3.9",
+ "windows-targets 0.48.5",
 ]
 
 [[package]]
@@ -1288,24 +1286,23 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.3.24"
+version = "4.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fb690e81c7840c0d7aade59f242ea3b41b9bc27bcd5997890e7702ae4b32e487"
+checksum = "b1d7b8d5ec32af0fadc644bf1fd509a688c2103b185644bb1e29d164e0703136"
 dependencies = [
  "clap_builder",
- "clap_derive 4.3.12",
- "once_cell",
+ "clap_derive 4.4.2",
 ]
 
 [[package]]
 name = "clap_builder"
-version = "4.3.24"
+version = "4.4.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ed2e96bc16d8d740f6f48d663eddf4b8a0983e79210fd55479b7bcd0a69860e"
+checksum = "5179bb514e4d7c2051749d8fcefa2ed6d06a9f4e6d69faf3805f5d80b8cf8d56"
 dependencies = [
  "anstream",
  "anstyle",
- "clap_lex 0.5.0",
+ "clap_lex 0.5.1",
  "strsim",
 ]
 
@@ -1324,14 +1321,14 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.3.12"
+version = "4.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050"
+checksum = "0862016ff20d69b84ef8247369fabf5c008a7417002411897d40ee1f4532b873"
 dependencies = [
  "heck 0.4.1",
  "proc-macro2",
  "quote",
- "syn 2.0.29",
+ "syn 2.0.37",
 ]
 
 [[package]]
@@ -1345,9 +1342,9 @@ dependencies = [
 
 [[package]]
 name = "clap_lex"
-version = "0.5.0"
+version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b"
+checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961"
 
 [[package]]
 name = "cli"
@@ -1393,7 +1390,7 @@ dependencies = [
  "tempfile",
  "text",
  "thiserror",
- "time 0.3.27",
+ "time",
  "tiny_http",
  "url",
  "util",
@@ -1501,7 +1498,7 @@ dependencies = [
  "sqlx",
  "text",
  "theme",
- "time 0.3.27",
+ "time",
  "tokio",
  "tokio-tungstenite",
  "toml 0.5.11",
@@ -1547,7 +1544,7 @@ dependencies = [
  "settings",
  "theme",
  "theme_selector",
- "time 0.3.27",
+ "time",
  "util",
  "vcs_menu",
  "workspace",
@@ -1579,7 +1576,7 @@ version = "4.6.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4"
 dependencies = [
- "bytes 1.4.0",
+ "bytes 1.5.0",
  "memchr",
 ]
 
@@ -1748,9 +1745,9 @@ dependencies = [
 
 [[package]]
 name = "core-services"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "51b344b958cae90858bf6086f49599ecc5ec8698eacad0ea155509ba11fab347"
+checksum = "92567e81db522550ebaf742c5d875624ec7820c2c7ee5f8c60e4ce7c2ae3c0fd"
 dependencies = [
  "core-foundation",
 ]
@@ -1919,7 +1916,7 @@ dependencies = [
  "cranelift-codegen",
  "cranelift-entity",
  "cranelift-frontend",
- "itertools",
+ "itertools 0.10.5",
  "log",
  "smallvec",
  "wasmparser",
@@ -2050,9 +2047,9 @@ dependencies = [
 
 [[package]]
 name = "curl-sys"
-version = "0.4.65+curl-8.2.1"
+version = "0.4.66+curl-8.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "961ba061c9ef2fe34bbd12b807152d96f0badd2bebe7b90ce6c8c8b7572a0986"
+checksum = "70c44a72e830f0e40ad90dda8a6ab6ed6314d39776599a58a2e5e37fbc6db5b9"
 dependencies = [
  "cc",
  "libc",
@@ -2060,14 +2057,14 @@ dependencies = [
  "openssl-sys",
  "pkg-config",
  "vcpkg",
- "winapi 0.3.9",
+ "windows-sys",
 ]
 
 [[package]]
 name = "dashmap"
-version = "5.5.1"
+version = "5.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "edd72493923899c6f10c641bdbdeddc7183d6396641d99c1a0d1597f37f92e28"
+checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
 dependencies = [
  "cfg-if 1.0.0",
  "hashbrown 0.14.0",
@@ -2315,9 +2312,9 @@ dependencies = [
 
 [[package]]
 name = "dyn-clone"
-version = "1.0.13"
+version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bbfc4744c1b8f2a09adc0e55242f60b1af195d88596bd8700be74418c056c555"
+checksum = "23d2f3407d9a573d666de4b5bdf10569d73ca9478087346697dcbae6244bfbcd"
 
 [[package]]
 name = "editor"
@@ -2340,7 +2337,7 @@ dependencies = [
  "git",
  "gpui",
  "indoc",
- "itertools",
+ "itertools 0.10.5",
  "language",
  "lazy_static",
  "log",
@@ -2430,9 +2427,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
 
 [[package]]
 name = "erased-serde"
-version = "0.3.29"
+version = "0.3.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc978899517288e3ebbd1a3bfc1d9537dbb87eeab149e53ea490e63bcdff561a"
+checksum = "6c138974f9d5e7fe373eb04df7cae98833802ae4b11c24ac7039a21d5af4b26c"
 dependencies = [
  "serde",
 ]
@@ -2450,9 +2447,9 @@ dependencies = [
 
 [[package]]
 name = "errno"
-version = "0.3.2"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f"
+checksum = "136526188508e25c6fef639d7927dfb3e0e3084488bf202267829cf7fc23dbdd"
 dependencies = [
  "errno-dragonfly",
  "libc",
@@ -2613,6 +2610,12 @@ dependencies = [
  "windows-sys",
 ]
 
+[[package]]
+name = "finl_unicode"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6"
+
 [[package]]
 name = "fixedbitset"
 version = "0.4.2"
@@ -2765,7 +2768,7 @@ dependencies = [
  "sum_tree",
  "tempfile",
  "text",
- "time 0.3.27",
+ "time",
  "util",
 ]
 
@@ -2903,7 +2906,7 @@ dependencies = [
  "futures-io",
  "memchr",
  "parking",
- "pin-project-lite 0.2.12",
+ "pin-project-lite 0.2.13",
  "waker-fn",
 ]
 
@@ -2915,7 +2918,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.29",
+ "syn 2.0.37",
 ]
 
 [[package]]
@@ -2944,7 +2947,7 @@ dependencies = [
  "futures-sink",
  "futures-task",
  "memchr",
- "pin-project-lite 0.2.12",
+ "pin-project-lite 0.2.13",
  "pin-utils",
  "slab",
  "tokio-io",
@@ -3130,7 +3133,7 @@ dependencies = [
  "futures 0.3.28",
  "gpui_macros",
  "image",
- "itertools",
+ "itertools 0.10.5",
  "lazy_static",
  "log",
  "media",
@@ -3159,7 +3162,7 @@ dependencies = [
  "sum_tree",
  "taffy",
  "thiserror",
- "time 0.3.27",
+ "time",
  "tiny-skia",
  "usvg",
  "util",
@@ -3219,7 +3222,7 @@ version = "0.3.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833"
 dependencies = [
- "bytes 1.4.0",
+ "bytes 1.5.0",
  "fnv",
  "futures-core",
  "futures-sink",
@@ -3280,22 +3283,21 @@ dependencies = [
 
 [[package]]
 name = "hashlink"
-version = "0.8.3"
+version = "0.8.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "312f66718a2d7789ffef4f4b7b213138ed9f1eb3aa1d0d82fc99f88fb3ffd26f"
+checksum = "e8094feaf31ff591f651a2664fb9cfd92bba7a60ce3197265e9482ebe753c8f7"
 dependencies = [
  "hashbrown 0.14.0",
 ]
 
 [[package]]
 name = "headers"
-version = "0.3.8"
+version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3e372db8e5c0d213e0cd0b9be18be2aca3d44cf2fe30a9d46a65581cd454584"
+checksum = "06683b93020a07e3dbcf5f8c0f6d40080d725bea7936fc01ad345c01b97dc270"
 dependencies = [
- "base64 0.13.1",
- "bitflags 1.3.2",
- "bytes 1.4.0",
+ "base64 0.21.4",
+ "bytes 1.5.0",
  "headers-core",
  "http",
  "httpdate",
@@ -3409,7 +3411,7 @@ version = "0.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482"
 dependencies = [
- "bytes 1.4.0",
+ "bytes 1.5.0",
  "fnv",
  "itoa",
 ]
@@ -3420,9 +3422,9 @@ version = "0.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1"
 dependencies = [
- "bytes 1.4.0",
+ "bytes 1.5.0",
  "http",
- "pin-project-lite 0.2.12",
+ "pin-project-lite 0.2.13",
 ]
 
 [[package]]
@@ -3445,9 +3447,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
 
 [[package]]
 name = "human_bytes"
-version = "0.4.2"
+version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "27e2b089f28ad15597b48d8c0a8fe94eeb1c1cb26ca99b6f66ac9582ae10c5e6"
+checksum = "91f255a4535024abf7640cb288260811fc14794f62b063652ed349f9a6c2348e"
 
 [[package]]
 name = "humantime"
@@ -3461,7 +3463,7 @@ version = "0.14.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468"
 dependencies = [
- "bytes 1.4.0",
+ "bytes 1.5.0",
  "futures-channel",
  "futures-core",
  "futures-util",
@@ -3471,7 +3473,7 @@ dependencies = [
  "httparse",
  "httpdate",
  "itoa",
- "pin-project-lite 0.2.12",
+ "pin-project-lite 0.2.13",
  "socket2 0.4.9",
  "tokio",
  "tower-service",
@@ -3486,7 +3488,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1"
 dependencies = [
  "hyper",
- "pin-project-lite 0.2.12",
+ "pin-project-lite 0.2.13",
  "tokio",
  "tokio-io-timeout",
 ]
@@ -3497,7 +3499,7 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905"
 dependencies = [
- "bytes 1.4.0",
+ "bytes 1.5.0",
  "hyper",
  "native-tls",
  "tokio",
@@ -3704,7 +3706,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
 dependencies = [
  "hermit-abi 0.3.2",
- "rustix 0.38.8",
+ "rustix 0.38.13",
  "windows-sys",
 ]
 
@@ -3744,6 +3746,15 @@ dependencies = [
  "either",
 ]
 
+[[package]]
+name = "itertools"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
+dependencies = [
+ "either",
+]
+
 [[package]]
 name = "itoa"
 version = "1.0.9"
@@ -3998,9 +4009,9 @@ checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67"
 
 [[package]]
 name = "libc"
-version = "0.2.147"
+version = "0.2.148"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
+checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b"
 
 [[package]]
 name = "libgit2-sys"
@@ -4092,9 +4103,9 @@ checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.4.5"
+version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503"
+checksum = "1a9bad9f94746442c783ca431b22403b519cd7fbeed0533fdd6328b2f2212128"
 
 [[package]]
 name = "lipsum"
@@ -4115,7 +4126,7 @@ dependencies = [
  "async-trait",
  "block",
  "byteorder",
- "bytes 1.4.0",
+ "bytes 1.5.0",
  "cocoa",
  "collections",
  "core-foundation",
@@ -4293,7 +4304,7 @@ dependencies = [
  "anyhow",
  "bindgen 0.65.1",
  "block",
- "bytes 1.4.0",
+ "bytes 1.5.0",
  "core-foundation",
  "foreign-types",
  "metal",
@@ -4302,9 +4313,9 @@ dependencies = [
 
 [[package]]
 name = "memchr"
-version = "2.6.0"
+version = "2.6.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76fc44e2588d5b436dbc3c6cf62aef290f90dab6235744a93dfe1cc18f451e2c"
+checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c"
 
 [[package]]
 name = "memfd"
@@ -4592,14 +4603,13 @@ dependencies = [
 
 [[package]]
 name = "nix"
-version = "0.26.2"
+version = "0.26.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"
+checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
 dependencies = [
  "bitflags 1.3.2",
  "cfg-if 1.0.0",
  "libc",
- "static_assertions",
 ]
 
 [[package]]
@@ -4820,8 +4830,8 @@ dependencies = [
 
 [[package]]
 name = "nvim-rs"
-version = "0.5.0"
-source = "git+https://github.com/KillTheMule/nvim-rs?branch=master#d701c2790dcb2579f8f4d7003ba30e2100a7d25b"
+version = "0.6.0-pre"
+source = "git+https://github.com/KillTheMule/nvim-rs?branch=master#0d2b1c884f3c39a76b5b7aac0b429f4624843954"
 dependencies = [
  "async-trait",
  "futures 0.3.28",
@@ -4866,9 +4876,9 @@ dependencies = [
 
 [[package]]
 name = "object"
-version = "0.32.0"
+version = "0.32.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77ac5bbd07aea88c60a577a1ce218075ffd59208b2d7ca97adf9bfc5aeb21ebe"
+checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0"
 dependencies = [
  "memchr",
 ]
@@ -4910,11 +4920,11 @@ checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5"
 
 [[package]]
 name = "openssl"
-version = "0.10.56"
+version = "0.10.57"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "729b745ad4a5575dd06a3e1af1414bd330ee561c01b3899eb584baeaa8def17e"
+checksum = "bac25ee399abb46215765b1cb35bc0212377e58a061560d8b29b024fd0430e7c"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags 2.4.0",
  "cfg-if 1.0.0",
  "foreign-types",
  "libc",
@@ -4931,7 +4941,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.29",
+ "syn 2.0.37",
 ]
 
 [[package]]
@@ -4942,9 +4952,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
 
 [[package]]
 name = "openssl-sys"
-version = "0.9.91"
+version = "0.9.93"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "866b5f16f90776b9bb8dc1e1802ac6f0513de3a7a7465867bfbc563dc737faac"
+checksum = "db4d56a4c0478783083cfafcc42493dd4a981d41669da64b4572a2a089b51b1d"
 dependencies = [
  "cc",
  "libc",
@@ -5172,10 +5182,11 @@ checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
 
 [[package]]
 name = "pest"
-version = "2.7.2"
+version = "2.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1acb4a4365a13f749a93f1a094a7805e5cfa0955373a9de860d962eaa3a5fe5a"
+checksum = "d7a4d085fd991ac8d5b05a147b437791b4260b76326baf0fc60cf7c9c27ecd33"
 dependencies = [
+ "memchr",
  "thiserror",
  "ucd-trie",
 ]
@@ -5230,7 +5241,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.29",
+ "syn 2.0.37",
 ]
 
 [[package]]
@@ -5241,9 +5252,9 @@ checksum = "257b64915a082f7811703966789728173279bdebb956b143dbcd23f6f970a777"
 
 [[package]]
 name = "pin-project-lite"
-version = "0.2.12"
+version = "0.2.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12cc1b0bf1727a77a54b6654e7b5f1af8604923edc8b81885f8ec92f9e3f0a05"
+checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58"
 
 [[package]]
 name = "pin-utils"
@@ -5263,12 +5274,12 @@ version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bdc0001cfea3db57a2e24bc0d818e9e20e554b5f97fabb9bc231dc240269ae06"
 dependencies = [
- "base64 0.21.2",
+ "base64 0.21.4",
  "indexmap 1.9.3",
  "line-wrap",
  "quick-xml",
  "serde",
- "time 0.3.27",
+ "time",
 ]
 
 [[package]]
@@ -5333,7 +5344,7 @@ dependencies = [
  "concurrent-queue",
  "libc",
  "log",
- "pin-project-lite 0.2.12",
+ "pin-project-lite 0.2.13",
  "windows-sys",
 ]
 
@@ -5378,12 +5389,12 @@ dependencies = [
 
 [[package]]
 name = "prettyplease"
-version = "0.2.12"
+version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c64d9ba0963cdcea2e1b2230fbae2bab30eb25a174be395c41e764bfb65dd62"
+checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d"
 dependencies = [
  "proc-macro2",
- "syn 2.0.29",
+ "syn 2.0.37",
 ]
 
 [[package]]
@@ -5431,9 +5442,9 @@ dependencies = [
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.66"
+version = "1.0.67"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
+checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328"
 dependencies = [
  "unicode-ident",
 ]
@@ -5473,7 +5484,7 @@ dependencies = [
  "globset",
  "gpui",
  "ignore",
- "itertools",
+ "itertools 0.10.5",
  "language",
  "lazy_static",
  "log",
@@ -5575,7 +5586,7 @@ version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "de5e2533f59d08fcf364fd374ebda0692a70bd6d7e66ef97f306f45c6c5d8020"
 dependencies = [
- "bytes 1.4.0",
+ "bytes 1.5.0",
  "prost-derive 0.8.0",
 ]
 
@@ -5585,7 +5596,7 @@ version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "444879275cb4fd84958b1a1d5420d15e6fcf7c235fe47f053c9c2a80aceb6001"
 dependencies = [
- "bytes 1.4.0",
+ "bytes 1.5.0",
  "prost-derive 0.9.0",
 ]
 
@@ -5595,9 +5606,9 @@ version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "62941722fb675d463659e49c4f3fe1fe792ff24fe5bbaa9c08cd3b98a1c354f5"
 dependencies = [
- "bytes 1.4.0",
+ "bytes 1.5.0",
  "heck 0.3.3",
- "itertools",
+ "itertools 0.10.5",
  "lazy_static",
  "log",
  "multimap",
@@ -5616,7 +5627,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "600d2f334aa05acb02a755e217ef1ab6dea4d51b58b7846588b747edec04efba"
 dependencies = [
  "anyhow",
- "itertools",
+ "itertools 0.10.5",
  "proc-macro2",
  "quote",
  "syn 1.0.109",
@@ -5629,7 +5640,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f9cc1a3263e07e0bf68e96268f37665207b49560d98739662cdfaae215c720fe"
 dependencies = [
  "anyhow",
- "itertools",
+ "itertools 0.10.5",
  "proc-macro2",
  "quote",
  "syn 1.0.109",
@@ -5641,7 +5652,7 @@ version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "603bbd6394701d13f3f25aada59c7de9d35a6a5887cfc156181234a44002771b"
 dependencies = [
- "bytes 1.4.0",
+ "bytes 1.5.0",
  "prost 0.8.0",
 ]
 
@@ -5651,7 +5662,7 @@ version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "534b7a0e836e3c482d2693070f982e39e7611da9695d4d1f5a4b186b51faef0a"
 dependencies = [
- "bytes 1.4.0",
+ "bytes 1.5.0",
  "prost 0.9.0",
 ]
 

Cargo.toml 🔗

@@ -116,6 +116,7 @@ toml = { version = "0.5" }
 tree-sitter = "0.20"
 unindent = { version = "0.1.7" }
 pretty_assertions = "1.3.0"
+git2 = { version = "0.15", default-features = false}
 
 tree-sitter-bash = { git = "https://github.com/tree-sitter/tree-sitter-bash", rev = "1b0321ee85701d5036c334a6f04761cdc672e64c" }
 tree-sitter-c = "0.20.1"

crates/fs/Cargo.toml 🔗

@@ -26,7 +26,7 @@ lazy_static.workspace = true
 parking_lot.workspace = true
 smol.workspace = true
 regex.workspace = true
-git2 = { version = "0.15", default-features = false }
+git2.workspace = true
 serde.workspace = true
 serde_derive.workspace = true
 serde_json.workspace = true

crates/git/Cargo.toml 🔗

@@ -20,7 +20,7 @@ smol.workspace = true
 parking_lot.workspace = true
 async-trait.workspace = true
 futures.workspace = true
-git2 = { version = "0.15", default-features = false }
+git2.workspace = true
 
 [dev-dependencies]
 unindent.workspace = true

crates/project/Cargo.toml 🔗

@@ -75,6 +75,6 @@ lsp = { path = "../lsp", features = ["test-support"] }
 settings = { path = "../settings", features = ["test-support"] }
 util = { path = "../util", features = ["test-support"] }
 rpc = { path = "../rpc", features = ["test-support"] }
-git2 = { version = "0.15", default-features = false }
+git2.workspace = true
 tempdir.workspace = true
 unindent.workspace = true

crates/semantic_index/Cargo.toml 🔗

@@ -51,6 +51,10 @@ project = { path = "../project", features = ["test-support"] }
 rpc = { path = "../rpc", features = ["test-support"] }
 workspace = { path = "../workspace", features = ["test-support"] }
 settings = { path = "../settings", features = ["test-support"]}
+rust-embed = { version = "8.0", features = ["include-exclude"] }
+client = { path = "../client" }
+zed = { path = "../zed"}
+node_runtime = { path = "../node_runtime"}
 
 pretty_assertions.workspace = true
 rand.workspace = true
@@ -68,3 +72,6 @@ tree-sitter-elixir.workspace = true
 tree-sitter-lua.workspace = true
 tree-sitter-ruby.workspace = true
 tree-sitter-php.workspace = true
+
+[[example]]
+name = "eval"

crates/semantic_index/README.md 🔗

@@ -1,31 +1,20 @@
 
-WIP: Sample SQL Queries
-/*
+# Semantic Index
 
-create table "files" (
-"id" INTEGER PRIMARY KEY,
-"path" VARCHAR,
-"sha1" VARCHAR,
-);
+## Evaluation
 
-create table symbols (
-"file_id" INTEGER REFERENCES("files", "id") ON CASCADE DELETE,
-"offset" INTEGER,
-"embedding" VECTOR,
-);
+### Metrics
 
-insert into "files" ("path", "sha1") values ("src/main.rs", "sha1") return id;
-insert into symbols (
-"file_id",
-"start",
-"end",
-"embedding"
-) values (
-(id,),
-(id,),
-(id,),
-(id,),
-)
+nDCG@k:
+- "The value of NDCG is determined by comparing the relevance of the items returned by the search engine to the relevance of the item that a hypothetical "ideal" search engine would return.
+- "The relevance of result is represented by a score (also known as a 'grade') that is assigned to the search query. The scores of these results are then discounted based on their position in the search results -- did they get recommended first or last?"
 
+MRR@k:
+- "Mean reciprocal rank quantifies the rank of the first relevant item found in teh recommendation list."
 
-*/
+MAP@k:
+- "Mean average precision averages the precision@k metric at each relevant item position in the recommendation list.
+
+Resources:
+- [Evaluating recommendation metrics](https://www.shaped.ai/blog/evaluating-recommendation-systems-map-mmr-ndcg)
+- [Math Walkthrough](https://towardsdatascience.com/demystifying-ndcg-bee3be58cfe0)

crates/semantic_index/eval/gpt-engineer.json 🔗

@@ -0,0 +1,114 @@
+{
+  "repo": "https://github.com/AntonOsika/gpt-engineer.git",
+  "commit": "7735a6445bae3611c62f521e6464c67c957f87c2",
+  "assertions": [
+    {
+      "query": "How do I contribute to this project?",
+      "matches": [
+        ".github/CONTRIBUTING.md:1",
+        "ROADMAP.md:48"
+      ]
+    },
+    {
+      "query": "What version of the openai package is active?",
+      "matches": [
+        "pyproject.toml:14"
+      ]
+    },
+    {
+      "query": "Ask user for clarification",
+      "matches": [
+        "gpt_engineer/steps.py:69"
+      ]
+    },
+    {
+      "query": "generate tests for python code",
+      "matches": [
+        "gpt_engineer/steps.py:153"
+      ]
+    },
+    {
+      "query": "get item from database based on key",
+      "matches": [
+        "gpt_engineer/db.py:42",
+        "gpt_engineer/db.py:68"
+      ]
+    },
+    {
+      "query": "prompt user to select files",
+      "matches": [
+        "gpt_engineer/file_selector.py:171",
+        "gpt_engineer/file_selector.py:306",
+        "gpt_engineer/file_selector.py:289",
+        "gpt_engineer/file_selector.py:234"
+      ]
+    },
+    {
+      "query": "send to rudderstack",
+      "matches": [
+        "gpt_engineer/collect.py:11",
+        "gpt_engineer/collect.py:38"
+      ]
+    },
+    {
+      "query": "parse code blocks from chat messages",
+      "matches": [
+        "gpt_engineer/chat_to_files.py:10",
+        "docs/intro/chat_parsing.md:1"
+      ]
+    },
+    {
+      "query": "how do I use the docker cli?",
+      "matches": [
+        "docker/README.md:1"
+      ]
+    },
+    {
+      "query": "ask the user if the code ran successfully?",
+      "matches": [
+        "gpt_engineer/learning.py:54"
+      ]
+    },
+    {
+      "query": "how is consent granted by the user?",
+      "matches": [
+        "gpt_engineer/learning.py:107",
+        "gpt_engineer/learning.py:130",
+        "gpt_engineer/learning.py:152"
+      ]
+    },
+    {
+      "query": "what are all the different steps the agent can take?",
+      "matches": [
+        "docs/intro/steps_module.md:1",
+        "gpt_engineer/steps.py:391"
+      ]
+    },
+    {
+      "query": "ask the user for clarification?",
+      "matches": [
+        "gpt_engineer/steps.py:69"
+      ]
+    },
+    {
+      "query": "what models are available?",
+      "matches": [
+        "gpt_engineer/ai.py:315",
+        "gpt_engineer/ai.py:341",
+        "docs/open-models.md:1"
+      ]
+    },
+    {
+      "query": "what is the current focus of the project?",
+      "matches": [
+        "ROADMAP.md:11"
+      ]
+    },
+    {
+      "query": "does the agent know how to fix code?",
+      "matches": [
+        "gpt_engineer/steps.py:367"
+      ]
+    }
+  ]
+}

crates/semantic_index/eval/tree-sitter.json 🔗

@@ -0,0 +1,104 @@
+{
+  "repo": "https://github.com/tree-sitter/tree-sitter.git",
+  "commit": "46af27796a76c72d8466627d499f2bca4af958ee",
+  "assertions": [
+    {
+      "query": "What attributes are available for the tags configuration struct?",
+      "matches": [
+        "tags/src/lib.rs:24"
+      ]
+    },
+    {
+      "query": "create a new tag configuration",
+      "matches": [
+        "tags/src/lib.rs:119"
+      ]
+    },
+    {
+      "query": "generate tags based on config",
+      "matches": [
+        "tags/src/lib.rs:261"
+      ]
+    },
+    {
+      "query": "match on ts quantifier in rust",
+      "matches": [
+        "lib/binding_rust/lib.rs:139"
+      ]
+    },
+    {
+      "query": "cli command to generate tags",
+      "matches": [
+        "cli/src/tags.rs:10"
+      ]
+    },
+    {
+      "query": "what version of the tree-sitter-tags package is active?",
+      "matches": [
+        "tags/Cargo.toml:4"
+      ]
+    },
+    {
+      "query": "Insert a new parse state",
+      "matches": [
+        "cli/src/generate/build_tables/build_parse_table.rs:153"
+      ]
+    },
+    {
+      "query": "Handle conflict when numerous actions occur on the same symbol",
+      "matches": [
+        "cli/src/generate/build_tables/build_parse_table.rs:363",
+        "cli/src/generate/build_tables/build_parse_table.rs:442"
+      ]
+    },
+    {
+      "query": "Match based on associativity of actions",
+      "matches": [
+        "cri/src/generate/build_tables/build_parse_table.rs:542"
+      ]
+    },
+    {
+      "query": "Format token set display",
+      "matches": [
+        "cli/src/generate/build_tables/item.rs:246"
+      ]
+    },
+    {
+      "query": "extract choices from rule",
+      "matches": [
+        "cli/src/generate/prepare_grammar/flatten_grammar.rs:124"
+      ]
+    },
+    {
+      "query": "How do we identify if a symbol is being used?",
+      "matches": [
+        "cli/src/generate/prepare_grammar/flatten_grammar.rs:175"
+      ]
+    },
+    {
+      "query": "How do we launch the playground?",
+      "matches": [
+        "cli/src/playground.rs:46"
+      ]
+    },
+    {
+      "query": "How do we test treesitter query matches in rust?",
+      "matches": [
+        "cli/src/query_testing.rs:152",
+        "cli/src/tests/query_test.rs:781",
+        "cli/src/tests/query_test.rs:2163",
+        "cli/src/tests/query_test.rs:3781",
+        "cli/src/tests/query_test.rs:887"
+      ]
+    },
+    {
+      "query": "What does the CLI do?",
+      "matches": [
+        "cli/README.md:10",
+        "cli/loader/README.md:3",
+        "docs/section-5-implementation.md:14",
+        "docs/section-5-implementation.md:18"
+      ]
+    }
+  ]
+}

crates/semantic_index/examples/eval.rs 🔗

@@ -0,0 +1,531 @@
+use anyhow::{anyhow, Result};
+use client::{self, UserStore};
+use gpui::{AsyncAppContext, ModelHandle, Task};
+use language::LanguageRegistry;
+use node_runtime::RealNodeRuntime;
+use project::{Project, RealFs};
+use semantic_index::embedding::OpenAIEmbeddings;
+use semantic_index::semantic_index_settings::SemanticIndexSettings;
+use semantic_index::{SearchResult, SemanticIndex};
+use serde::{Deserialize, Serialize};
+use settings::{default_settings, SettingsStore};
+use std::path::{Path, PathBuf};
+use std::process::Command;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+use std::{cmp, env, fs};
+use util::channel::{RELEASE_CHANNEL, RELEASE_CHANNEL_NAME};
+use util::http::{self};
+use util::paths::EMBEDDINGS_DIR;
+use zed::languages;
+
+#[derive(Deserialize, Clone, Serialize)]
+struct EvaluationQuery {
+    query: String,
+    matches: Vec<String>,
+}
+
+impl EvaluationQuery {
+    fn match_pairs(&self) -> Vec<(PathBuf, u32)> {
+        let mut pairs = Vec::new();
+        for match_identifier in self.matches.iter() {
+            let mut match_parts = match_identifier.split(":");
+
+            if let Some(file_path) = match_parts.next() {
+                if let Some(row_number) = match_parts.next() {
+                    pairs.push((PathBuf::from(file_path), row_number.parse::<u32>().unwrap()));
+                }
+            }
+        }
+        pairs
+    }
+}
+
+#[derive(Deserialize, Clone)]
+struct RepoEval {
+    repo: String,
+    commit: String,
+    assertions: Vec<EvaluationQuery>,
+}
+
+const TMP_REPO_PATH: &str = "eval_repos";
+
+fn parse_eval() -> anyhow::Result<Vec<RepoEval>> {
+    let eval_folder = env::current_dir()?
+        .as_path()
+        .parent()
+        .unwrap()
+        .join("crates/semantic_index/eval");
+
+    let mut repo_evals: Vec<RepoEval> = Vec::new();
+    for entry in fs::read_dir(eval_folder)? {
+        let file_path = entry.unwrap().path();
+        if let Some(extension) = file_path.extension() {
+            if extension == "json" {
+                if let Ok(file) = fs::read_to_string(file_path) {
+                    let repo_eval = serde_json::from_str(file.as_str());
+
+                    match repo_eval {
+                        Ok(repo_eval) => {
+                            repo_evals.push(repo_eval);
+                        }
+                        Err(err) => {
+                            println!("Err: {:?}", err);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    Ok(repo_evals)
+}
+
+fn clone_repo(repo_eval: RepoEval) -> anyhow::Result<(String, PathBuf)> {
+    let repo_name = Path::new(repo_eval.repo.as_str())
+        .file_name()
+        .unwrap()
+        .to_str()
+        .unwrap()
+        .to_owned()
+        .replace(".git", "");
+
+    let clone_path = fs::canonicalize(env::current_dir()?)?
+        .parent()
+        .ok_or(anyhow!("path canonicalization failed"))?
+        .parent()
+        .unwrap()
+        .join(TMP_REPO_PATH);
+
+    // Delete Clone Path if already exists
+    let _ = fs::remove_dir_all(&clone_path);
+    let _ = fs::create_dir(&clone_path);
+
+    let _ = Command::new("git")
+        .args(["clone", repo_eval.repo.as_str()])
+        .current_dir(clone_path.clone())
+        .output()?;
+    // Update clone path to be new directory housing the repo.
+    let clone_path = clone_path.join(repo_name.clone());
+    let _ = Command::new("git")
+        .args(["checkout", repo_eval.commit.as_str()])
+        .current_dir(clone_path.clone())
+        .output()?;
+
+    Ok((repo_name, clone_path))
+}
+
+fn dcg(hits: Vec<usize>) -> f32 {
+    let mut result = 0.0;
+    for (idx, hit) in hits.iter().enumerate() {
+        result += *hit as f32 / (2.0 + idx as f32).log2();
+    }
+
+    result
+}
+
+fn get_hits(
+    eval_query: EvaluationQuery,
+    search_results: Vec<SearchResult>,
+    k: usize,
+    cx: &AsyncAppContext,
+) -> (Vec<usize>, Vec<usize>) {
+    let ideal = vec![1; cmp::min(eval_query.matches.len(), k)];
+
+    let mut hits = Vec::new();
+    for result in search_results {
+        let (path, start_row, end_row) = result.buffer.read_with(cx, |buffer, _cx| {
+            let path = buffer.file().unwrap().path().to_path_buf();
+            let start_row = buffer.offset_to_point(result.range.start.offset).row;
+            let end_row = buffer.offset_to_point(result.range.end.offset).row;
+            (path, start_row, end_row)
+        });
+
+        let match_pairs = eval_query.match_pairs();
+        let mut found = 0;
+        for (match_path, match_row) in match_pairs {
+            if match_path == path {
+                if match_row >= start_row && match_row <= end_row {
+                    found = 1;
+                    break;
+                }
+            }
+        }
+
+        hits.push(found);
+    }
+
+    // For now, we are calculating ideal_hits a bit different, as technically
+    // with overlapping ranges, one match can result in more than result.
+    let mut ideal_hits = hits.clone();
+    ideal_hits.retain(|x| x == &1);
+
+    let ideal = if ideal.len() > ideal_hits.len() {
+        ideal
+    } else {
+        ideal_hits
+    };
+
+    // Fill ideal to 10 length
+    let mut filled_ideal = [0; 10];
+    for (idx, i) in ideal.to_vec().into_iter().enumerate() {
+        filled_ideal[idx] = i;
+    }
+
+    (filled_ideal.to_vec(), hits)
+}
+
+fn evaluate_ndcg(hits: Vec<usize>, ideal: Vec<usize>) -> Vec<f32> {
+    // NDCG or Normalized Discounted Cumulative Gain, is determined by comparing the relevance of
+    // items returned by the search engine relative to the hypothetical ideal.
+    // Relevance is represented as a series of booleans, in which each search result returned
+    // is identified as being inside the test set of matches (1) or not (0).
+
+    // For example, if result 1, 3 and 5 match the 3 relevant results provided
+    // actual dcg is calculated against a vector of [1, 0, 1, 0, 1]
+    // whereas ideal dcg is calculated against a vector of [1, 1, 1, 0, 0]
+    // as this ideal vector assumes the 3 relevant results provided were returned first
+    // normalized dcg is then calculated as actual dcg / ideal dcg.
+
+    // NDCG ranges from 0 to 1, which higher values indicating better performance
+    // Commonly NDCG is expressed as NDCG@k, in which k represents the metric calculated
+    // including only the top k values returned.
+    // The @k metrics can help you identify, at what point does the relevant results start to fall off.
+    // Ie. a NDCG@1 of 0.9 and a NDCG@3 of 0.5 may indicate that the first result returned in usually
+    // very high quality, whereas rank results quickly drop off after the first result.
+
+    let mut ndcg = Vec::new();
+    for idx in 1..(hits.len() + 1) {
+        let hits_at_k = hits[0..idx].to_vec();
+        let ideal_at_k = ideal[0..idx].to_vec();
+
+        let at_k = dcg(hits_at_k.clone()) / dcg(ideal_at_k.clone());
+
+        ndcg.push(at_k);
+    }
+
+    ndcg
+}
+
+fn evaluate_map(hits: Vec<usize>) -> Vec<f32> {
+    let mut map_at_k = Vec::new();
+
+    let non_zero = hits.iter().sum::<usize>() as f32;
+    if non_zero == 0.0 {
+        return vec![0.0; hits.len()];
+    }
+
+    let mut rolling_non_zero = 0.0;
+    let mut rolling_map = 0.0;
+    for (idx, h) in hits.into_iter().enumerate() {
+        rolling_non_zero += h as f32;
+        if h == 1 {
+            rolling_map += rolling_non_zero / (idx + 1) as f32;
+        }
+        map_at_k.push(rolling_map / non_zero);
+    }
+
+    map_at_k
+}
+
+fn evaluate_mrr(hits: Vec<usize>) -> f32 {
+    for (idx, h) in hits.into_iter().enumerate() {
+        if h == 1 {
+            return 1.0 / (idx + 1) as f32;
+        }
+    }
+
+    return 0.0;
+}
+
+fn init_logger() {
+    env_logger::init();
+}
+
+#[derive(Serialize)]
+struct QueryMetrics {
+    query: EvaluationQuery,
+    millis_to_search: Duration,
+    ndcg: Vec<f32>,
+    map: Vec<f32>,
+    mrr: f32,
+    hits: Vec<usize>,
+    precision: Vec<f32>,
+    recall: Vec<f32>,
+}
+
+#[derive(Serialize)]
+struct SummaryMetrics {
+    millis_to_search: f32,
+    ndcg: Vec<f32>,
+    map: Vec<f32>,
+    mrr: f32,
+    precision: Vec<f32>,
+    recall: Vec<f32>,
+}
+
+#[derive(Serialize)]
+struct RepoEvaluationMetrics {
+    millis_to_index: Duration,
+    query_metrics: Vec<QueryMetrics>,
+    repo_metrics: Option<SummaryMetrics>,
+}
+
+impl RepoEvaluationMetrics {
+    fn new(millis_to_index: Duration) -> Self {
+        RepoEvaluationMetrics {
+            millis_to_index,
+            query_metrics: Vec::new(),
+            repo_metrics: None,
+        }
+    }
+
+    fn save(&self, repo_name: String) -> Result<()> {
+        let results_string = serde_json::to_string(&self)?;
+        fs::write(format!("./{}_evaluation.json", repo_name), results_string)
+            .expect("Unable to write file");
+        Ok(())
+    }
+
+    fn summarize(&mut self) {
+        let l = self.query_metrics.len() as f32;
+        let millis_to_search: f32 = self
+            .query_metrics
+            .iter()
+            .map(|metrics| metrics.millis_to_search.as_millis())
+            .sum::<u128>() as f32
+            / l;
+
+        let mut ndcg_sum = vec![0.0; 10];
+        let mut map_sum = vec![0.0; 10];
+        let mut precision_sum = vec![0.0; 10];
+        let mut recall_sum = vec![0.0; 10];
+        let mut mmr_sum = 0.0;
+
+        for query_metric in self.query_metrics.iter() {
+            for (ndcg, query_ndcg) in ndcg_sum.iter_mut().zip(query_metric.ndcg.clone()) {
+                *ndcg += query_ndcg;
+            }
+
+            for (mapp, query_map) in map_sum.iter_mut().zip(query_metric.map.clone()) {
+                *mapp += query_map;
+            }
+
+            for (pre, query_pre) in precision_sum.iter_mut().zip(query_metric.precision.clone()) {
+                *pre += query_pre;
+            }
+
+            for (rec, query_rec) in recall_sum.iter_mut().zip(query_metric.recall.clone()) {
+                *rec += query_rec;
+            }
+
+            mmr_sum += query_metric.mrr;
+        }
+
+        let ndcg = ndcg_sum.iter().map(|val| val / l).collect::<Vec<f32>>();
+        let map = map_sum.iter().map(|val| val / l).collect::<Vec<f32>>();
+        let precision = precision_sum
+            .iter()
+            .map(|val| val / l)
+            .collect::<Vec<f32>>();
+        let recall = recall_sum.iter().map(|val| val / l).collect::<Vec<f32>>();
+        let mrr = mmr_sum / l;
+
+        self.repo_metrics = Some(SummaryMetrics {
+            millis_to_search,
+            ndcg,
+            map,
+            mrr,
+            precision,
+            recall,
+        })
+    }
+}
+
+fn evaluate_precision(hits: Vec<usize>) -> Vec<f32> {
+    let mut rolling_hit: f32 = 0.0;
+    let mut precision = Vec::new();
+    for (idx, hit) in hits.into_iter().enumerate() {
+        rolling_hit += hit as f32;
+        precision.push(rolling_hit / ((idx as f32) + 1.0));
+    }
+
+    precision
+}
+
+fn evaluate_recall(hits: Vec<usize>, ideal: Vec<usize>) -> Vec<f32> {
+    let total_relevant = ideal.iter().sum::<usize>() as f32;
+    let mut recall = Vec::new();
+    let mut rolling_hit: f32 = 0.0;
+    for hit in hits {
+        rolling_hit += hit as f32;
+        recall.push(rolling_hit / total_relevant);
+    }
+
+    recall
+}
+
+async fn evaluate_repo(
+    repo_name: String,
+    index: ModelHandle<SemanticIndex>,
+    project: ModelHandle<Project>,
+    query_matches: Vec<EvaluationQuery>,
+    cx: &mut AsyncAppContext,
+) -> Result<RepoEvaluationMetrics> {
+    // Index Project
+    let index_t0 = Instant::now();
+    index
+        .update(cx, |index, cx| index.index_project(project.clone(), cx))
+        .await?;
+    let mut repo_metrics = RepoEvaluationMetrics::new(index_t0.elapsed());
+
+    for query in query_matches {
+        // Query each match in order
+        let search_t0 = Instant::now();
+        let search_results = index
+            .update(cx, |index, cx| {
+                index.search_project(project.clone(), query.clone().query, 10, vec![], vec![], cx)
+            })
+            .await?;
+        let millis_to_search = search_t0.elapsed();
+
+        // Get Hits/Ideal
+        let k = 10;
+        let (ideal, hits) = self::get_hits(query.clone(), search_results, k, cx);
+
+        // Evaluate ndcg@k, for k = 1, 3, 5, 10
+        let ndcg = evaluate_ndcg(hits.clone(), ideal.clone());
+
+        // Evaluate map@k, for k = 1, 3, 5, 10
+        let map = evaluate_map(hits.clone());
+
+        // Evaluate mrr
+        let mrr = evaluate_mrr(hits.clone());
+
+        // Evaluate precision
+        let precision = evaluate_precision(hits.clone());
+
+        // Evaluate Recall
+        let recall = evaluate_recall(hits.clone(), ideal);
+
+        let query_metrics = QueryMetrics {
+            query,
+            millis_to_search,
+            ndcg,
+            map,
+            mrr,
+            hits,
+            precision,
+            recall,
+        };
+
+        repo_metrics.query_metrics.push(query_metrics);
+    }
+
+    repo_metrics.summarize();
+    let _ = repo_metrics.save(repo_name);
+
+    anyhow::Ok(repo_metrics)
+}
+
+fn main() {
+    // Launch new repo as a new Zed workspace/project
+    let app = gpui::App::new(()).unwrap();
+    let fs = Arc::new(RealFs);
+    let http = http::client();
+    let http_client = http::client();
+    init_logger();
+
+    app.run(move |cx| {
+        cx.set_global(*RELEASE_CHANNEL);
+
+        let client = client::Client::new(http.clone(), cx);
+        let user_store = cx.add_model(|cx| UserStore::new(client.clone(), http_client.clone(), cx));
+
+        // Initialize Settings
+        let mut store = SettingsStore::default();
+        store
+            .set_default_settings(default_settings().as_ref(), cx)
+            .unwrap();
+        cx.set_global(store);
+
+        // Initialize Languages
+        let login_shell_env_loaded = Task::ready(());
+        let mut languages = LanguageRegistry::new(login_shell_env_loaded);
+        languages.set_executor(cx.background().clone());
+        let languages = Arc::new(languages);
+
+        let node_runtime = RealNodeRuntime::new(http.clone());
+        languages::init(languages.clone(), node_runtime.clone());
+        language::init(cx);
+
+        project::Project::init(&client, cx);
+        semantic_index::init(fs.clone(), http.clone(), languages.clone(), cx);
+
+        settings::register::<SemanticIndexSettings>(cx);
+
+        let db_file_path = EMBEDDINGS_DIR
+            .join(Path::new(RELEASE_CHANNEL_NAME.as_str()))
+            .join("embeddings_db");
+
+        let languages = languages.clone();
+        let fs = fs.clone();
+        cx.spawn(|mut cx| async move {
+            let semantic_index = SemanticIndex::new(
+                fs.clone(),
+                db_file_path,
+                Arc::new(OpenAIEmbeddings::new(http_client, cx.background())),
+                languages.clone(),
+                cx.clone(),
+            )
+            .await?;
+
+            if let Ok(repo_evals) = parse_eval() {
+                for repo in repo_evals {
+                    let cloned = clone_repo(repo.clone());
+                    match cloned {
+                        Ok((repo_name, clone_path)) => {
+                            println!(
+                                "Cloned {:?} @ {:?} into {:?}",
+                                repo.repo, repo.commit, &clone_path
+                            );
+
+                            // Create Project
+                            let project = cx.update(|cx| {
+                                Project::local(
+                                    client.clone(),
+                                    user_store.clone(),
+                                    languages.clone(),
+                                    fs.clone(),
+                                    cx,
+                                )
+                            });
+
+                            // Register Worktree
+                            let _ = project
+                                .update(&mut cx, |project, cx| {
+                                    project.find_or_create_local_worktree(clone_path, true, cx)
+                                })
+                                .await;
+
+                            let _ = evaluate_repo(
+                                repo_name,
+                                semantic_index.clone(),
+                                project,
+                                repo.assertions,
+                                &mut cx,
+                            )
+                            .await?;
+                        }
+                        Err(err) => {
+                            println!("Error cloning: {:?}", err);
+                        }
+                    }
+                }
+            }
+
+            anyhow::Ok(())
+        })
+        .detach();
+    });
+}

crates/semantic_index/src/semantic_index.rs 🔗

@@ -1,5 +1,5 @@
 mod db;
-mod embedding;
+pub mod embedding;
 mod embedding_queue;
 mod parsing;
 pub mod semantic_index_settings;
@@ -294,7 +294,7 @@ impl SemanticIndex {
         }
     }
 
-    async fn new(
+    pub async fn new(
         fs: Arc<dyn Fs>,
         database_path: PathBuf,
         embedding_provider: Arc<dyn EmbeddingProvider>,
@@ -966,8 +966,6 @@ impl SemanticIndex {
         cx: &mut ModelContext<Self>,
     ) -> Task<Result<()>> {
         if !self.projects.contains_key(&project.downgrade()) {
-            log::trace!("Registering Project for Semantic Index");
-
             let subscription = cx.subscribe(&project, |this, project, event, cx| match event {
                 project::Event::WorktreeAdded | project::Event::WorktreeRemoved(_) => {
                     this.project_worktrees_changed(project.clone(), cx);

crates/util/Cargo.toml 🔗

@@ -25,10 +25,10 @@ rust-embed.workspace = true
 tempdir = { workspace = true, optional = true }
 serde.workspace = true
 serde_json.workspace = true
-git2 = { version = "0.15", default-features = false, optional = true }
+git2 = { workspace = true, optional = true }
 dirs = "3.0"
 take-until = "0.2.0"
 
 [dev-dependencies]
 tempdir.workspace = true
-git2 = { version = "0.15", default-features = false }
+git2.workspace = true