build_extension.rs

  1use crate::ExtensionManifest;
  2use crate::{extension_manifest::ExtensionLibraryKind, GrammarManifestEntry};
  3use anyhow::{anyhow, bail, Context as _, Result};
  4use async_compression::futures::bufread::GzipDecoder;
  5use async_tar::Archive;
  6use futures::io::BufReader;
  7use futures::AsyncReadExt;
  8use serde::Deserialize;
  9use std::{
 10    env, fs,
 11    path::{Path, PathBuf},
 12    process::{Command, Stdio},
 13    sync::Arc,
 14};
 15use util::http::{AsyncBody, HttpClient};
 16use wit_component::ComponentEncoder;
 17
 18/// Currently, we compile with Rust's `wasm32-wasi` target, which works with WASI `preview1`.
 19/// But the WASM component model is based on WASI `preview2`. So we need an 'adapter' WASM
 20/// module, which implements the `preview1` interface in terms of `preview2`.
 21///
 22/// Once Rust 1.78 is released, there will be a `wasm32-wasip2` target available, so we will
 23/// not need the adapter anymore.
 24const RUST_TARGET: &str = "wasm32-wasi";
 25const WASI_ADAPTER_URL: &str =
 26    "https://github.com/bytecodealliance/wasmtime/releases/download/v18.0.2/wasi_snapshot_preview1.reactor.wasm";
 27
 28/// Compiling Tree-sitter parsers from C to WASM requires Clang 17, and a WASM build of libc
 29/// and clang's runtime library. The `wasi-sdk` provides these binaries.
 30///
 31/// Once Clang 17 and its wasm target are available via system package managers, we won't need
 32/// to download this.
 33const WASI_SDK_URL: &str = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-21/";
 34const WASI_SDK_ASSET_NAME: Option<&str> = if cfg!(target_os = "macos") {
 35    Some("wasi-sdk-21.0-macos.tar.gz")
 36} else if cfg!(target_os = "linux") {
 37    Some("wasi-sdk-21.0-linux.tar.gz")
 38} else {
 39    None
 40};
 41
 42pub struct ExtensionBuilder {
 43    cache_dir: PathBuf,
 44    pub http: Arc<dyn HttpClient>,
 45}
 46
 47pub struct CompileExtensionOptions {
 48    pub release: bool,
 49}
 50
 51#[derive(Deserialize)]
 52struct CargoToml {
 53    package: CargoTomlPackage,
 54}
 55
 56#[derive(Deserialize)]
 57struct CargoTomlPackage {
 58    name: String,
 59}
 60
 61impl ExtensionBuilder {
 62    pub fn new(cache_dir: PathBuf, http: Arc<dyn HttpClient>) -> Self {
 63        Self { cache_dir, http }
 64    }
 65
 66    pub async fn compile_extension(
 67        &self,
 68        extension_dir: &Path,
 69        options: CompileExtensionOptions,
 70    ) -> Result<()> {
 71        fs::create_dir_all(&self.cache_dir)?;
 72        let extension_toml_path = extension_dir.join("extension.toml");
 73        let extension_toml_content = fs::read_to_string(&extension_toml_path)?;
 74        let extension_toml: ExtensionManifest = toml::from_str(&extension_toml_content)?;
 75
 76        let cargo_toml_path = extension_dir.join("Cargo.toml");
 77        if extension_toml.lib.kind == Some(ExtensionLibraryKind::Rust)
 78            || fs::metadata(&cargo_toml_path)?.is_file()
 79        {
 80            self.compile_rust_extension(extension_dir, options).await?;
 81        }
 82
 83        for (grammar_name, grammar_metadata) in extension_toml.grammars {
 84            self.compile_grammar(extension_dir, grammar_name, grammar_metadata)
 85                .await?;
 86        }
 87
 88        log::info!("finished compiling extension {}", extension_dir.display());
 89        Ok(())
 90    }
 91
 92    async fn compile_rust_extension(
 93        &self,
 94        extension_dir: &Path,
 95        options: CompileExtensionOptions,
 96    ) -> Result<(), anyhow::Error> {
 97        self.install_rust_wasm_target_if_needed()?;
 98        let adapter_bytes = self.install_wasi_preview1_adapter_if_needed().await?;
 99
100        let cargo_toml_content = fs::read_to_string(&extension_dir.join("Cargo.toml"))?;
101        let cargo_toml: CargoToml = toml::from_str(&cargo_toml_content)?;
102
103        log::info!("compiling rust extension {}", extension_dir.display());
104        let output = Command::new("cargo")
105            .args(["build", "--target", RUST_TARGET])
106            .args(options.release.then_some("--release"))
107            .arg("--target-dir")
108            .arg(extension_dir.join("target"))
109            .current_dir(&extension_dir)
110            .output()
111            .context("failed to run `cargo`")?;
112        if !output.status.success() {
113            bail!(
114                "failed to build extension {}",
115                String::from_utf8_lossy(&output.stderr)
116            );
117        }
118
119        let mut wasm_path = PathBuf::from(extension_dir);
120        wasm_path.extend([
121            "target",
122            RUST_TARGET,
123            if options.release { "release" } else { "debug" },
124            cargo_toml.package.name.as_str(),
125        ]);
126        wasm_path.set_extension("wasm");
127
128        let wasm_bytes = fs::read(&wasm_path)
129            .with_context(|| format!("failed to read output module `{}`", wasm_path.display()))?;
130
131        let encoder = ComponentEncoder::default()
132            .module(&wasm_bytes)?
133            .adapter("wasi_snapshot_preview1", &adapter_bytes)
134            .context("failed to load adapter module")?
135            .validate(true);
136
137        let component_bytes = encoder
138            .encode()
139            .context("failed to encode wasm component")?;
140
141        fs::write(extension_dir.join("extension.wasm"), &component_bytes)
142            .context("failed to write extension.wasm")?;
143
144        Ok(())
145    }
146
147    async fn compile_grammar(
148        &self,
149        extension_dir: &Path,
150        grammar_name: Arc<str>,
151        grammar_metadata: GrammarManifestEntry,
152    ) -> Result<()> {
153        let clang_path = self.install_wasi_sdk_if_needed().await?;
154
155        let mut grammar_repo_dir = extension_dir.to_path_buf();
156        grammar_repo_dir.extend(["grammars", grammar_name.as_ref()]);
157
158        let mut grammar_wasm_path = grammar_repo_dir.clone();
159        grammar_wasm_path.set_extension("wasm");
160
161        log::info!("checking out {grammar_name} parser");
162        self.checkout_repo(
163            &grammar_repo_dir,
164            &grammar_metadata.repository,
165            &grammar_metadata.rev,
166        )?;
167
168        let src_path = grammar_repo_dir.join("src");
169        let parser_path = src_path.join("parser.c");
170        let scanner_path = src_path.join("scanner.c");
171
172        log::info!("compiling {grammar_name} parser");
173        let clang_output = Command::new(&clang_path)
174            .args(["-fPIC", "-shared", "-Os"])
175            .arg(format!("-Wl,--export=tree_sitter_{grammar_name}"))
176            .arg("-o")
177            .arg(&grammar_wasm_path)
178            .arg("-I")
179            .arg(&src_path)
180            .arg(&parser_path)
181            .args(scanner_path.exists().then_some(scanner_path))
182            .output()
183            .context("failed to run clang")?;
184        if !clang_output.status.success() {
185            bail!(
186                "failed to compile {} parser with clang: {}",
187                grammar_name,
188                String::from_utf8_lossy(&clang_output.stderr),
189            );
190        }
191
192        Ok(())
193    }
194
195    fn checkout_repo(&self, directory: &Path, url: &str, rev: &str) -> Result<()> {
196        let git_dir = directory.join(".git");
197
198        if directory.exists() {
199            let remotes_output = Command::new("git")
200                .arg("--git-dir")
201                .arg(&git_dir)
202                .args(["remote", "-v"])
203                .output()?;
204            let has_remote = remotes_output.status.success()
205                && String::from_utf8_lossy(&remotes_output.stdout)
206                    .lines()
207                    .any(|line| {
208                        let mut parts = line.split(|c: char| c.is_whitespace());
209                        parts.next() == Some("origin") && parts.any(|part| part == url)
210                    });
211            if !has_remote {
212                bail!(
213                    "grammar directory '{}' already exists, but is not a git clone of '{}'",
214                    directory.display(),
215                    url
216                );
217            }
218        } else {
219            fs::create_dir_all(&directory).with_context(|| {
220                format!("failed to create grammar directory {}", directory.display(),)
221            })?;
222            let init_output = Command::new("git")
223                .arg("init")
224                .current_dir(&directory)
225                .output()?;
226            if !init_output.status.success() {
227                bail!(
228                    "failed to run `git init` in directory '{}'",
229                    directory.display()
230                );
231            }
232
233            let remote_add_output = Command::new("git")
234                .arg("--git-dir")
235                .arg(&git_dir)
236                .args(["remote", "add", "origin", url])
237                .output()
238                .context("failed to execute `git remote add`")?;
239            if !remote_add_output.status.success() {
240                bail!(
241                    "failed to add remote {url} for git repository {}",
242                    git_dir.display()
243                );
244            }
245        }
246
247        let fetch_output = Command::new("git")
248            .arg("--git-dir")
249            .arg(&git_dir)
250            .args(["fetch", "--depth", "1", "origin", &rev])
251            .output()
252            .context("failed to execute `git fetch`")?;
253        if !fetch_output.status.success() {
254            bail!(
255                "failed to fetch revision {} in directory '{}'",
256                rev,
257                directory.display()
258            );
259        }
260
261        let checkout_output = Command::new("git")
262            .arg("--git-dir")
263            .arg(&git_dir)
264            .args(["checkout", &rev])
265            .current_dir(&directory)
266            .output()
267            .context("failed to execute `git checkout`")?;
268        if !checkout_output.status.success() {
269            bail!(
270                "failed to checkout revision {} in directory '{}'",
271                rev,
272                directory.display()
273            );
274        }
275
276        Ok(())
277    }
278
279    fn install_rust_wasm_target_if_needed(&self) -> Result<()> {
280        let rustc_output = Command::new("rustc")
281            .arg("--print")
282            .arg("sysroot")
283            .output()
284            .context("failed to run rustc")?;
285        if !rustc_output.status.success() {
286            bail!(
287                "failed to retrieve rust sysroot: {}",
288                String::from_utf8_lossy(&rustc_output.stderr)
289            );
290        }
291
292        let sysroot = PathBuf::from(String::from_utf8(rustc_output.stdout)?.trim());
293        if sysroot.join("lib/rustlib").join(RUST_TARGET).exists() {
294            return Ok(());
295        }
296
297        let output = Command::new("rustup")
298            .args(["target", "add", RUST_TARGET])
299            .stderr(Stdio::inherit())
300            .stdout(Stdio::inherit())
301            .output()
302            .context("failed to run `rustup target add`")?;
303        if !output.status.success() {
304            bail!("failed to install the `{RUST_TARGET}` target");
305        }
306
307        Ok(())
308    }
309
310    async fn install_wasi_preview1_adapter_if_needed(&self) -> Result<Vec<u8>> {
311        let cache_path = self.cache_dir.join("wasi_snapshot_preview1.reactor.wasm");
312        if let Ok(content) = fs::read(&cache_path) {
313            if wasmparser::Parser::is_core_wasm(&content) {
314                return Ok(content);
315            }
316        }
317
318        fs::remove_file(&cache_path).ok();
319
320        log::info!("downloading wasi adapter module");
321        let mut response = self
322            .http
323            .get(WASI_ADAPTER_URL, AsyncBody::default(), true)
324            .await?;
325
326        let mut content = Vec::new();
327        let mut body = BufReader::new(response.body_mut());
328        body.read_to_end(&mut content).await?;
329
330        fs::write(&cache_path, &content)
331            .with_context(|| format!("failed to save file {}", cache_path.display()))?;
332
333        if !wasmparser::Parser::is_core_wasm(&content) {
334            bail!("downloaded wasi adapter is invalid");
335        }
336        Ok(content)
337    }
338
339    async fn install_wasi_sdk_if_needed(&self) -> Result<PathBuf> {
340        let url = if let Some(asset_name) = WASI_SDK_ASSET_NAME {
341            format!("{WASI_SDK_URL}/{asset_name}")
342        } else {
343            bail!("wasi-sdk is not available for platform {}", env::consts::OS);
344        };
345
346        let wasi_sdk_dir = self.cache_dir.join("wasi-sdk");
347        let mut clang_path = wasi_sdk_dir.clone();
348        clang_path.extend(["bin", "clang-17"]);
349
350        if fs::metadata(&clang_path).map_or(false, |metadata| metadata.is_file()) {
351            return Ok(clang_path);
352        }
353
354        let mut tar_out_dir = wasi_sdk_dir.clone();
355        tar_out_dir.set_extension("archive");
356
357        fs::remove_dir_all(&wasi_sdk_dir).ok();
358        fs::remove_dir_all(&tar_out_dir).ok();
359
360        let mut response = self.http.get(&url, AsyncBody::default(), true).await?;
361        let body = BufReader::new(response.body_mut());
362        let body = GzipDecoder::new(body);
363        let tar = Archive::new(body);
364        tar.unpack(&tar_out_dir)
365            .await
366            .context("failed to unpack wasi-sdk archive")?;
367
368        let inner_dir = fs::read_dir(&tar_out_dir)?
369            .next()
370            .ok_or_else(|| anyhow!("no content"))?
371            .context("failed to read contents of extracted wasi archive directory")?
372            .path();
373        fs::rename(&inner_dir, &wasi_sdk_dir).context("failed to move extracted wasi dir")?;
374        fs::remove_dir_all(&tar_out_dir).ok();
375
376        Ok(clang_path)
377    }
378}