start work on eval script for semantic_index

KCaverly created

Change summary

Cargo.lock                                  | 19 ++++
crates/semantic_index/Cargo.toml            |  4 
crates/semantic_index/eval/tree-sitter.json | 10 ++
crates/semantic_index/examples/eval.rs      | 97 +++++++++++++++++++++++
script/evaluate_semantic_index              |  3 
5 files changed, 133 insertions(+)

Detailed changes

Cargo.lock 🔗

@@ -3061,6 +3061,8 @@ dependencies = [
  "libc",
  "libgit2-sys",
  "log",
+ "openssl-probe",
+ "openssl-sys",
  "url",
 ]
 
@@ -4015,7 +4017,9 @@ checksum = "7f3d95f6b51075fe9810a7ae22c7095f12b98005ab364d8544797a825ce946a4"
 dependencies = [
  "cc",
  "libc",
+ "libssh2-sys",
  "libz-sys",
+ "openssl-sys",
  "pkg-config",
 ]
 
@@ -4056,6 +4060,20 @@ dependencies = [
  "vcpkg",
 ]
 
+[[package]]
+name = "libssh2-sys"
+version = "0.2.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b094a36eb4b8b8c8a7b4b8ae43b2944502be3e59cd87687595cf6b0a71b3f4ca"
+dependencies = [
+ "cc",
+ "libc",
+ "libz-sys",
+ "openssl-sys",
+ "pkg-config",
+ "vcpkg",
+]
+
 [[package]]
 name = "libz-sys"
 version = "1.1.12"
@@ -6731,6 +6749,7 @@ dependencies = [
  "editor",
  "env_logger 0.9.3",
  "futures 0.3.28",
+ "git2",
  "globset",
  "gpui",
  "isahc",

crates/semantic_index/Cargo.toml 🔗

@@ -50,6 +50,7 @@ project = { path = "../project", features = ["test-support"] }
 rpc = { path = "../rpc", features = ["test-support"] }
 workspace = { path = "../workspace", features = ["test-support"] }
 settings = { path = "../settings", features = ["test-support"]}
+git2 = { version = "0.15"}
 
 pretty_assertions.workspace = true
 rand.workspace = true
@@ -67,3 +68,6 @@ tree-sitter-elixir.workspace = true
 tree-sitter-lua.workspace = true
 tree-sitter-ruby.workspace = true
 tree-sitter-php.workspace = true
+
+[[example]]
+name = "eval"

crates/semantic_index/eval/tree-sitter.json 🔗

@@ -0,0 +1,10 @@
+{
+  "repo": "https://github.com/tree-sitter/tree-sitter.git",
+  "commit": "46af27796a76c72d8466627d499f2bca4af958ee",
+  "assertions": [
+    {
+      "query": "",
+      "matches": []
+    }
+  ]
+}

crates/semantic_index/examples/eval.rs 🔗

@@ -0,0 +1,97 @@
+use git2::{Object, Oid, Repository};
+use serde::Deserialize;
+use std::path::{Path, PathBuf};
+use std::{env, fs};
+
+#[derive(Deserialize, Clone)]
+struct QueryMatches {
+    query: String,
+    matches: Vec<String>,
+}
+
+#[derive(Deserialize, Clone)]
+struct RepoEval {
+    repo: String,
+    commit: String,
+    assertions: Vec<QueryMatches>,
+}
+
+const TMP_REPO_PATH: &str = "./target/eval_repos";
+
+fn parse_eval() -> anyhow::Result<Vec<RepoEval>> {
+    let eval_folder = env::current_dir()?
+        .as_path()
+        .parent()
+        .unwrap()
+        .join("crates/semantic_index/eval");
+
+    let mut repo_evals: Vec<RepoEval> = Vec::new();
+    for entry in fs::read_dir(eval_folder)? {
+        let file_path = entry.unwrap().path();
+        if let Some(extension) = file_path.extension() {
+            if extension == "json" {
+                if let Ok(file) = fs::read_to_string(file_path) {
+                    let repo_eval = serde_json::from_str(file.as_str());
+
+                    match repo_eval {
+                        Ok(repo_eval) => {
+                            repo_evals.push(repo_eval);
+                        }
+                        Err(err) => {
+                            println!("Err: {:?}", err);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    Ok(repo_evals)
+}
+
+fn clone_repo(repo_eval: RepoEval) -> anyhow::Result<PathBuf> {
+    let repo_name = Path::new(repo_eval.repo.as_str())
+        .file_name()
+        .unwrap()
+        .to_str()
+        .unwrap()
+        .to_owned()
+        .replace(".git", "");
+    let clone_path = Path::new(TMP_REPO_PATH).join(&repo_name).to_path_buf();
+
+    // Delete Clone Path if already exists
+    let _ = fs::remove_dir_all(&clone_path);
+
+    // Clone in Repo
+    git2::build::RepoBuilder::new()
+        // .branch(repo_eval.sha.as_str())
+        .clone(repo_eval.repo.as_str(), clone_path.as_path())?;
+
+    let repo: Repository = Repository::open(clone_path.clone())?;
+    let obj: Object = repo
+        .find_commit(Oid::from_str(repo_eval.commit.as_str())?)?
+        .into_object();
+    repo.checkout_tree(&obj, None)?;
+    repo.set_head_detached(obj.id())?;
+
+    Ok(clone_path)
+}
+
+fn main() {
+    if let Ok(repo_evals) = parse_eval() {
+        for repo in repo_evals {
+            let cloned = clone_repo(repo.clone());
+            match cloned {
+                Ok(clone_path) => {
+                    println!(
+                        "Cloned {:?} @ {:?} into {:?}",
+                        repo.repo, repo.commit, clone_path
+                    );
+                }
+                Err(err) => {
+                    println!("Error Cloning: {:?}", err);
+                }
+            }
+        }
+    }
+}