From e6214b2b718fc91e057d7dc00da8eae8646cd15c Mon Sep 17 00:00:00 2001 From: Kian Kasad Date: Mon, 8 Dec 2025 08:48:04 -0500 Subject: [PATCH] extensions: Don't recompile Tree-sitter parsers when up-to-date (#43442) When installing local "dev" extensions that provide tree-sitter grammars, compiling the parser can be quite intensive[^anecdote]. This PR changes the logic to only compile the parser if the WASM object doesn't exist or the source files are newer than the object (just like `make(1)` would do). [^anecdote]: The tree-sitter parser for LLVM IR takes >10 minutes to compile and uses 20 GB of memory on my laptop. Release Notes: - N/A --------- Co-authored-by: Finn Evers --- Cargo.lock | 1 + crates/extension/Cargo.toml | 1 + crates/extension/src/extension_builder.rs | 114 ++++++++++++++++++---- 3 files changed, 97 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a671c1797f7b9abd9a7ec5262965a68132cef8ee..a0a5c7e27cf76822a3f7f985332d6c59a3871806 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5809,6 +5809,7 @@ dependencies = [ "serde", "serde_json", "task", + "tempfile", "toml 0.8.23", "url", "util", diff --git a/crates/extension/Cargo.toml b/crates/extension/Cargo.toml index ed084a0c1c0d6ea237dd06391330d8e41917b574..a00406c43e796b1b2bafafc3d0481b09ee5fd43a 100644 --- a/crates/extension/Cargo.toml +++ b/crates/extension/Cargo.toml @@ -38,3 +38,4 @@ wasmparser.workspace = true [dev-dependencies] pretty_assertions.workspace = true +tempfile.workspace = true diff --git a/crates/extension/src/extension_builder.rs b/crates/extension/src/extension_builder.rs index 1385ec488a2de36f5894ab91ac0ae45881aa625f..b6d4cc0c4da3d1f7d998512f099eba6c9c04e1a5 100644 --- a/crates/extension/src/extension_builder.rs +++ b/crates/extension/src/extension_builder.rs @@ -247,26 +247,34 @@ impl ExtensionBuilder { let parser_path = src_path.join("parser.c"); let scanner_path = src_path.join("scanner.c"); - log::info!("compiling {grammar_name} parser"); - let clang_output = util::command::new_smol_command(&clang_path) - .args(["-fPIC", "-shared", "-Os"]) - .arg(format!("-Wl,--export=tree_sitter_{grammar_name}")) - .arg("-o") - .arg(&grammar_wasm_path) - .arg("-I") - .arg(&src_path) - .arg(&parser_path) - .args(scanner_path.exists().then_some(scanner_path)) - .output() - .await - .context("failed to run clang")?; - - if !clang_output.status.success() { - bail!( - "failed to compile {} parser with clang: {}", - grammar_name, - String::from_utf8_lossy(&clang_output.stderr), + // Skip recompiling if the WASM object is already newer than the source files + if file_newer_than_deps(&grammar_wasm_path, &[&parser_path, &scanner_path]).unwrap_or(false) + { + log::info!( + "skipping compilation of {grammar_name} parser because the existing compiled grammar is up to date" ); + } else { + log::info!("compiling {grammar_name} parser"); + let clang_output = util::command::new_smol_command(&clang_path) + .args(["-fPIC", "-shared", "-Os"]) + .arg(format!("-Wl,--export=tree_sitter_{grammar_name}")) + .arg("-o") + .arg(&grammar_wasm_path) + .arg("-I") + .arg(&src_path) + .arg(&parser_path) + .args(scanner_path.exists().then_some(scanner_path)) + .output() + .await + .context("failed to run clang")?; + + if !clang_output.status.success() { + bail!( + "failed to compile {} parser with clang: {}", + grammar_name, + String::from_utf8_lossy(&clang_output.stderr), + ); + } } Ok(()) @@ -643,3 +651,71 @@ fn populate_defaults(manifest: &mut ExtensionManifest, extension_path: &Path) -> Ok(()) } + +/// Returns `true` if the target exists and its last modified time is greater than that +/// of each dependency which exists (i.e., dependency paths which do not exist are ignored). +/// +/// # Errors +/// +/// Returns `Err` if any of the underlying file I/O operations fail. +fn file_newer_than_deps(target: &Path, dependencies: &[&Path]) -> Result { + if !target.try_exists()? { + return Ok(false); + } + let target_modified = target.metadata()?.modified()?; + for dependency in dependencies { + if !dependency.try_exists()? { + continue; + } + let dep_modified = dependency.metadata()?.modified()?; + if target_modified < dep_modified { + return Ok(false); + } + } + Ok(true) +} + +#[cfg(test)] +mod tests { + use super::*; + + use std::{fs, thread::sleep, time::Duration}; + + #[test] + fn test_file_newer_than_deps() { + // Don't use TempTree because we need to guarantee the order + let tmpdir = tempfile::tempdir().unwrap(); + let target = tmpdir.path().join("target.wasm"); + let dep1 = tmpdir.path().join("parser.c"); + let dep2 = tmpdir.path().join("scanner.c"); + + assert!( + !file_newer_than_deps(&target, &[&dep1, &dep2]).unwrap(), + "target doesn't exist" + ); + fs::write(&target, "foo").unwrap(); // Create target + assert!( + file_newer_than_deps(&target, &[&dep1, &dep2]).unwrap(), + "dependencies don't exist; target is newer" + ); + sleep(Duration::from_secs(1)); + fs::write(&dep1, "foo").unwrap(); // Create dep1 (newer than target) + // Dependency is newer + assert!( + !file_newer_than_deps(&target, &[&dep1, &dep2]).unwrap(), + "a dependency is newer (target {:?}, dep1 {:?})", + target.metadata().unwrap().modified().unwrap(), + dep1.metadata().unwrap().modified().unwrap(), + ); + sleep(Duration::from_secs(1)); + fs::write(&dep2, "foo").unwrap(); // Create dep2 + sleep(Duration::from_secs(1)); + fs::write(&target, "foobar").unwrap(); // Update target + assert!( + file_newer_than_deps(&target, &[&dep1, &dep2]).unwrap(), + "target is newer than dependencies (target {:?}, dep2 {:?})", + target.metadata().unwrap().modified().unwrap(), + dep2.metadata().unwrap().modified().unwrap(), + ); + } +}