extension_builder.rs

  1use crate::ExtensionManifest;
  2use crate::{extension_manifest::ExtensionLibraryKind, GrammarManifestEntry};
  3use anyhow::{anyhow, bail, Context as _, Result};
  4use async_compression::futures::bufread::GzipDecoder;
  5use async_tar::Archive;
  6use futures::io::BufReader;
  7use futures::AsyncReadExt;
  8use serde::Deserialize;
  9use std::{
 10    env, fs, mem,
 11    path::{Path, PathBuf},
 12    process::{Command, Stdio},
 13    sync::Arc,
 14};
 15use util::http::{self, AsyncBody, HttpClient};
 16use wasm_encoder::{ComponentSectionId, Encode as _, RawSection, Section as _};
 17use wasmparser::Parser;
 18use wit_component::ComponentEncoder;
 19
 20/// Currently, we compile with Rust's `wasm32-wasi` target, which works with WASI `preview1`.
 21/// But the WASM component model is based on WASI `preview2`. So we need an 'adapter' WASM
 22/// module, which implements the `preview1` interface in terms of `preview2`.
 23///
 24/// Once Rust 1.78 is released, there will be a `wasm32-wasip2` target available, so we will
 25/// not need the adapter anymore.
 26const RUST_TARGET: &str = "wasm32-wasi";
 27const WASI_ADAPTER_URL: &str =
 28    "https://github.com/bytecodealliance/wasmtime/releases/download/v18.0.2/wasi_snapshot_preview1.reactor.wasm";
 29
 30/// Compiling Tree-sitter parsers from C to WASM requires Clang 17, and a WASM build of libc
 31/// and clang's runtime library. The `wasi-sdk` provides these binaries.
 32///
 33/// Once Clang 17 and its wasm target are available via system package managers, we won't need
 34/// to download this.
 35const WASI_SDK_URL: &str = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-21/";
 36const WASI_SDK_ASSET_NAME: Option<&str> = if cfg!(target_os = "macos") {
 37    Some("wasi-sdk-21.0-macos.tar.gz")
 38} else if cfg!(target_os = "linux") {
 39    Some("wasi-sdk-21.0-linux.tar.gz")
 40} else {
 41    None
 42};
 43
 44pub struct ExtensionBuilder {
 45    cache_dir: PathBuf,
 46    pub http: Arc<dyn HttpClient>,
 47}
 48
 49pub struct CompileExtensionOptions {
 50    pub release: bool,
 51}
 52
 53#[derive(Deserialize)]
 54struct CargoToml {
 55    package: CargoTomlPackage,
 56}
 57
 58#[derive(Deserialize)]
 59struct CargoTomlPackage {
 60    name: String,
 61}
 62
 63impl ExtensionBuilder {
 64    pub fn new(cache_dir: PathBuf) -> Self {
 65        Self {
 66            cache_dir,
 67            http: http::client(),
 68        }
 69    }
 70
 71    pub async fn compile_extension(
 72        &self,
 73        extension_dir: &Path,
 74        extension_manifest: &ExtensionManifest,
 75        options: CompileExtensionOptions,
 76    ) -> Result<()> {
 77        if extension_dir.is_relative() {
 78            bail!(
 79                "extension dir {} is not an absolute path",
 80                extension_dir.display()
 81            );
 82        }
 83
 84        fs::create_dir_all(&self.cache_dir).context("failed to create cache dir")?;
 85
 86        let cargo_toml_path = extension_dir.join("Cargo.toml");
 87        if extension_manifest.lib.kind == Some(ExtensionLibraryKind::Rust)
 88            || fs::metadata(&cargo_toml_path)
 89                .ok()
 90                .map(|metadata| metadata.is_file())
 91                .unwrap_or(false)
 92        {
 93            log::info!("compiling Rust extension {}", extension_dir.display());
 94            self.compile_rust_extension(extension_dir, options)
 95                .await
 96                .context("failed to compile Rust extension")?;
 97        }
 98
 99        for (grammar_name, grammar_metadata) in &extension_manifest.grammars {
100            self.compile_grammar(extension_dir, grammar_name.as_ref(), grammar_metadata)
101                .await
102                .with_context(|| format!("failed to compile grammar '{grammar_name}'"))?;
103        }
104
105        log::info!("finished compiling extension {}", extension_dir.display());
106        Ok(())
107    }
108
109    async fn compile_rust_extension(
110        &self,
111        extension_dir: &Path,
112        options: CompileExtensionOptions,
113    ) -> Result<(), anyhow::Error> {
114        self.install_rust_wasm_target_if_needed()?;
115        let adapter_bytes = self.install_wasi_preview1_adapter_if_needed().await?;
116
117        let cargo_toml_content = fs::read_to_string(&extension_dir.join("Cargo.toml"))?;
118        let cargo_toml: CargoToml = toml::from_str(&cargo_toml_content)?;
119
120        log::info!("compiling rust extension {}", extension_dir.display());
121        let output = Command::new("cargo")
122            .args(["build", "--target", RUST_TARGET])
123            .args(options.release.then_some("--release"))
124            .arg("--target-dir")
125            .arg(extension_dir.join("target"))
126            .current_dir(&extension_dir)
127            .output()
128            .context("failed to run `cargo`")?;
129        if !output.status.success() {
130            bail!(
131                "failed to build extension {}",
132                String::from_utf8_lossy(&output.stderr)
133            );
134        }
135
136        let mut wasm_path = PathBuf::from(extension_dir);
137        wasm_path.extend([
138            "target",
139            RUST_TARGET,
140            if options.release { "release" } else { "debug" },
141            &cargo_toml
142                .package
143                .name
144                // The wasm32-wasi target normalizes `-` in package names to `_` in the resulting `.wasm` file.
145                .replace('-', "_"),
146        ]);
147        wasm_path.set_extension("wasm");
148
149        let wasm_bytes = fs::read(&wasm_path)
150            .with_context(|| format!("failed to read output module `{}`", wasm_path.display()))?;
151
152        let encoder = ComponentEncoder::default()
153            .module(&wasm_bytes)?
154            .adapter("wasi_snapshot_preview1", &adapter_bytes)
155            .context("failed to load adapter module")?
156            .validate(true);
157
158        let component_bytes = encoder
159            .encode()
160            .context("failed to encode wasm component")?;
161
162        let component_bytes = self
163            .strip_custom_sections(&component_bytes)
164            .context("failed to strip debug sections from wasm component")?;
165
166        fs::write(extension_dir.join("extension.wasm"), &component_bytes)
167            .context("failed to write extension.wasm")?;
168
169        Ok(())
170    }
171
172    async fn compile_grammar(
173        &self,
174        extension_dir: &Path,
175        grammar_name: &str,
176        grammar_metadata: &GrammarManifestEntry,
177    ) -> Result<()> {
178        let clang_path = self.install_wasi_sdk_if_needed().await?;
179
180        let mut grammar_repo_dir = extension_dir.to_path_buf();
181        grammar_repo_dir.extend(["grammars", grammar_name]);
182
183        let mut grammar_wasm_path = grammar_repo_dir.clone();
184        grammar_wasm_path.set_extension("wasm");
185
186        log::info!("checking out {grammar_name} parser");
187        self.checkout_repo(
188            &grammar_repo_dir,
189            &grammar_metadata.repository,
190            &grammar_metadata.rev,
191        )?;
192
193        let src_path = grammar_repo_dir.join("src");
194        let parser_path = src_path.join("parser.c");
195        let scanner_path = src_path.join("scanner.c");
196
197        log::info!("compiling {grammar_name} parser");
198        let clang_output = Command::new(&clang_path)
199            .args(["-fPIC", "-shared", "-Os"])
200            .arg(format!("-Wl,--export=tree_sitter_{grammar_name}"))
201            .arg("-o")
202            .arg(&grammar_wasm_path)
203            .arg("-I")
204            .arg(&src_path)
205            .arg(&parser_path)
206            .args(scanner_path.exists().then_some(scanner_path))
207            .output()
208            .context("failed to run clang")?;
209        if !clang_output.status.success() {
210            bail!(
211                "failed to compile {} parser with clang: {}",
212                grammar_name,
213                String::from_utf8_lossy(&clang_output.stderr),
214            );
215        }
216
217        Ok(())
218    }
219
220    fn checkout_repo(&self, directory: &Path, url: &str, rev: &str) -> Result<()> {
221        let git_dir = directory.join(".git");
222
223        if directory.exists() {
224            let remotes_output = Command::new("git")
225                .arg("--git-dir")
226                .arg(&git_dir)
227                .args(["remote", "-v"])
228                .output()?;
229            let has_remote = remotes_output.status.success()
230                && String::from_utf8_lossy(&remotes_output.stdout)
231                    .lines()
232                    .any(|line| {
233                        let mut parts = line.split(|c: char| c.is_whitespace());
234                        parts.next() == Some("origin") && parts.any(|part| part == url)
235                    });
236            if !has_remote {
237                bail!(
238                    "grammar directory '{}' already exists, but is not a git clone of '{}'",
239                    directory.display(),
240                    url
241                );
242            }
243        } else {
244            fs::create_dir_all(&directory).with_context(|| {
245                format!("failed to create grammar directory {}", directory.display(),)
246            })?;
247            let init_output = Command::new("git")
248                .arg("init")
249                .current_dir(&directory)
250                .output()?;
251            if !init_output.status.success() {
252                bail!(
253                    "failed to run `git init` in directory '{}'",
254                    directory.display()
255                );
256            }
257
258            let remote_add_output = Command::new("git")
259                .arg("--git-dir")
260                .arg(&git_dir)
261                .args(["remote", "add", "origin", url])
262                .output()
263                .context("failed to execute `git remote add`")?;
264            if !remote_add_output.status.success() {
265                bail!(
266                    "failed to add remote {url} for git repository {}",
267                    git_dir.display()
268                );
269            }
270        }
271
272        let fetch_output = Command::new("git")
273            .arg("--git-dir")
274            .arg(&git_dir)
275            .args(["fetch", "--depth", "1", "origin", &rev])
276            .output()
277            .context("failed to execute `git fetch`")?;
278
279        let checkout_output = Command::new("git")
280            .arg("--git-dir")
281            .arg(&git_dir)
282            .args(["checkout", &rev])
283            .current_dir(&directory)
284            .output()
285            .context("failed to execute `git checkout`")?;
286        if !checkout_output.status.success() {
287            if !fetch_output.status.success() {
288                bail!(
289                    "failed to fetch revision {} in directory '{}'",
290                    rev,
291                    directory.display()
292                );
293            }
294            bail!(
295                "failed to checkout revision {} in directory '{}': {}",
296                rev,
297                directory.display(),
298                String::from_utf8_lossy(&checkout_output.stderr)
299            );
300        }
301
302        Ok(())
303    }
304
305    fn install_rust_wasm_target_if_needed(&self) -> Result<()> {
306        let rustc_output = Command::new("rustc")
307            .arg("--print")
308            .arg("sysroot")
309            .output()
310            .context("failed to run rustc")?;
311        if !rustc_output.status.success() {
312            bail!(
313                "failed to retrieve rust sysroot: {}",
314                String::from_utf8_lossy(&rustc_output.stderr)
315            );
316        }
317
318        let sysroot = PathBuf::from(String::from_utf8(rustc_output.stdout)?.trim());
319        if sysroot.join("lib/rustlib").join(RUST_TARGET).exists() {
320            return Ok(());
321        }
322
323        let output = Command::new("rustup")
324            .args(["target", "add", RUST_TARGET])
325            .stderr(Stdio::inherit())
326            .stdout(Stdio::inherit())
327            .output()
328            .context("failed to run `rustup target add`")?;
329        if !output.status.success() {
330            bail!("failed to install the `{RUST_TARGET}` target");
331        }
332
333        Ok(())
334    }
335
336    async fn install_wasi_preview1_adapter_if_needed(&self) -> Result<Vec<u8>> {
337        let cache_path = self.cache_dir.join("wasi_snapshot_preview1.reactor.wasm");
338        if let Ok(content) = fs::read(&cache_path) {
339            if Parser::is_core_wasm(&content) {
340                return Ok(content);
341            }
342        }
343
344        fs::remove_file(&cache_path).ok();
345
346        log::info!(
347            "downloading wasi adapter module to {}",
348            cache_path.display()
349        );
350        let mut response = self
351            .http
352            .get(WASI_ADAPTER_URL, AsyncBody::default(), true)
353            .await?;
354
355        let mut content = Vec::new();
356        let mut body = BufReader::new(response.body_mut());
357        body.read_to_end(&mut content).await?;
358
359        fs::write(&cache_path, &content)
360            .with_context(|| format!("failed to save file {}", cache_path.display()))?;
361
362        if !Parser::is_core_wasm(&content) {
363            bail!("downloaded wasi adapter is invalid");
364        }
365        Ok(content)
366    }
367
368    async fn install_wasi_sdk_if_needed(&self) -> Result<PathBuf> {
369        let url = if let Some(asset_name) = WASI_SDK_ASSET_NAME {
370            format!("{WASI_SDK_URL}/{asset_name}")
371        } else {
372            bail!("wasi-sdk is not available for platform {}", env::consts::OS);
373        };
374
375        let wasi_sdk_dir = self.cache_dir.join("wasi-sdk");
376        let mut clang_path = wasi_sdk_dir.clone();
377        clang_path.extend(["bin", "clang-17"]);
378
379        if fs::metadata(&clang_path).map_or(false, |metadata| metadata.is_file()) {
380            return Ok(clang_path);
381        }
382
383        let mut tar_out_dir = wasi_sdk_dir.clone();
384        tar_out_dir.set_extension("archive");
385
386        fs::remove_dir_all(&wasi_sdk_dir).ok();
387        fs::remove_dir_all(&tar_out_dir).ok();
388
389        log::info!("downloading wasi-sdk to {}", wasi_sdk_dir.display());
390        let mut response = self.http.get(&url, AsyncBody::default(), true).await?;
391        let body = BufReader::new(response.body_mut());
392        let body = GzipDecoder::new(body);
393        let tar = Archive::new(body);
394        tar.unpack(&tar_out_dir)
395            .await
396            .context("failed to unpack wasi-sdk archive")?;
397
398        let inner_dir = fs::read_dir(&tar_out_dir)?
399            .next()
400            .ok_or_else(|| anyhow!("no content"))?
401            .context("failed to read contents of extracted wasi archive directory")?
402            .path();
403        fs::rename(&inner_dir, &wasi_sdk_dir).context("failed to move extracted wasi dir")?;
404        fs::remove_dir_all(&tar_out_dir).ok();
405
406        Ok(clang_path)
407    }
408
409    // This was adapted from:
410    // https://github.com/bytecodealliance/wasm-tools/1791a8f139722e9f8679a2bd3d8e423e55132b22/src/bin/wasm-tools/strip.rs
411    fn strip_custom_sections(&self, input: &Vec<u8>) -> Result<Vec<u8>> {
412        use wasmparser::Payload::*;
413
414        let strip_custom_section = |name: &str| name.starts_with(".debug");
415
416        let mut output = Vec::new();
417        let mut stack = Vec::new();
418
419        for payload in Parser::new(0).parse_all(input) {
420            let payload = payload?;
421
422            // Track nesting depth, so that we don't mess with inner producer sections:
423            match payload {
424                Version { encoding, .. } => {
425                    output.extend_from_slice(match encoding {
426                        wasmparser::Encoding::Component => &wasm_encoder::Component::HEADER,
427                        wasmparser::Encoding::Module => &wasm_encoder::Module::HEADER,
428                    });
429                }
430                ModuleSection { .. } | ComponentSection { .. } => {
431                    stack.push(mem::take(&mut output));
432                    continue;
433                }
434                End { .. } => {
435                    let mut parent = match stack.pop() {
436                        Some(c) => c,
437                        None => break,
438                    };
439                    if output.starts_with(&wasm_encoder::Component::HEADER) {
440                        parent.push(ComponentSectionId::Component as u8);
441                        output.encode(&mut parent);
442                    } else {
443                        parent.push(ComponentSectionId::CoreModule as u8);
444                        output.encode(&mut parent);
445                    }
446                    output = parent;
447                }
448                _ => {}
449            }
450
451            match &payload {
452                CustomSection(c) => {
453                    if strip_custom_section(c.name()) {
454                        continue;
455                    }
456                }
457
458                _ => {}
459            }
460
461            if let Some((id, range)) = payload.as_section() {
462                RawSection {
463                    id,
464                    data: &input[range],
465                }
466                .append_to(&mut output);
467            }
468        }
469
470        Ok(output)
471    }
472}