build_extension.rs

  1use crate::ExtensionManifest;
  2use crate::{extension_manifest::ExtensionLibraryKind, GrammarManifestEntry};
  3use anyhow::{anyhow, bail, Context as _, Result};
  4use async_compression::futures::bufread::GzipDecoder;
  5use async_tar::Archive;
  6use futures::io::BufReader;
  7use futures::AsyncReadExt;
  8use serde::Deserialize;
  9use std::mem;
 10use std::{
 11    env, fs,
 12    path::{Path, PathBuf},
 13    process::{Command, Stdio},
 14    sync::Arc,
 15};
 16use util::http::{self, AsyncBody, HttpClient};
 17use wasm_encoder::{ComponentSectionId, Encode as _, RawSection, Section as _};
 18use wasmparser::Parser;
 19use wit_component::ComponentEncoder;
 20
 21/// Currently, we compile with Rust's `wasm32-wasi` target, which works with WASI `preview1`.
 22/// But the WASM component model is based on WASI `preview2`. So we need an 'adapter' WASM
 23/// module, which implements the `preview1` interface in terms of `preview2`.
 24///
 25/// Once Rust 1.78 is released, there will be a `wasm32-wasip2` target available, so we will
 26/// not need the adapter anymore.
 27const RUST_TARGET: &str = "wasm32-wasi";
 28const WASI_ADAPTER_URL: &str =
 29    "https://github.com/bytecodealliance/wasmtime/releases/download/v18.0.2/wasi_snapshot_preview1.reactor.wasm";
 30
 31/// Compiling Tree-sitter parsers from C to WASM requires Clang 17, and a WASM build of libc
 32/// and clang's runtime library. The `wasi-sdk` provides these binaries.
 33///
 34/// Once Clang 17 and its wasm target are available via system package managers, we won't need
 35/// to download this.
 36const WASI_SDK_URL: &str = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-21/";
 37const WASI_SDK_ASSET_NAME: Option<&str> = if cfg!(target_os = "macos") {
 38    Some("wasi-sdk-21.0-macos.tar.gz")
 39} else if cfg!(target_os = "linux") {
 40    Some("wasi-sdk-21.0-linux.tar.gz")
 41} else {
 42    None
 43};
 44
 45pub struct ExtensionBuilder {
 46    cache_dir: PathBuf,
 47    pub http: Arc<dyn HttpClient>,
 48}
 49
 50pub struct CompileExtensionOptions {
 51    pub release: bool,
 52}
 53
 54#[derive(Deserialize)]
 55struct CargoToml {
 56    package: CargoTomlPackage,
 57}
 58
 59#[derive(Deserialize)]
 60struct CargoTomlPackage {
 61    name: String,
 62}
 63
 64impl ExtensionBuilder {
 65    pub fn new(cache_dir: PathBuf) -> Self {
 66        Self {
 67            cache_dir,
 68            http: http::client(),
 69        }
 70    }
 71
 72    pub async fn compile_extension(
 73        &self,
 74        extension_dir: &Path,
 75        options: CompileExtensionOptions,
 76    ) -> Result<()> {
 77        fs::create_dir_all(&self.cache_dir)?;
 78        let extension_toml_path = extension_dir.join("extension.toml");
 79        let extension_toml_content = fs::read_to_string(&extension_toml_path)?;
 80        let extension_toml: ExtensionManifest = toml::from_str(&extension_toml_content)?;
 81
 82        let cargo_toml_path = extension_dir.join("Cargo.toml");
 83        if extension_toml.lib.kind == Some(ExtensionLibraryKind::Rust)
 84            || fs::metadata(&cargo_toml_path)?.is_file()
 85        {
 86            self.compile_rust_extension(extension_dir, options).await?;
 87        }
 88
 89        for (grammar_name, grammar_metadata) in extension_toml.grammars {
 90            self.compile_grammar(extension_dir, grammar_name, grammar_metadata)
 91                .await?;
 92        }
 93
 94        log::info!("finished compiling extension {}", extension_dir.display());
 95        Ok(())
 96    }
 97
 98    async fn compile_rust_extension(
 99        &self,
100        extension_dir: &Path,
101        options: CompileExtensionOptions,
102    ) -> Result<(), anyhow::Error> {
103        self.install_rust_wasm_target_if_needed()?;
104        let adapter_bytes = self.install_wasi_preview1_adapter_if_needed().await?;
105
106        let cargo_toml_content = fs::read_to_string(&extension_dir.join("Cargo.toml"))?;
107        let cargo_toml: CargoToml = toml::from_str(&cargo_toml_content)?;
108
109        log::info!("compiling rust extension {}", extension_dir.display());
110        let output = Command::new("cargo")
111            .args(["build", "--target", RUST_TARGET])
112            .args(options.release.then_some("--release"))
113            .arg("--target-dir")
114            .arg(extension_dir.join("target"))
115            .current_dir(&extension_dir)
116            .output()
117            .context("failed to run `cargo`")?;
118        if !output.status.success() {
119            bail!(
120                "failed to build extension {}",
121                String::from_utf8_lossy(&output.stderr)
122            );
123        }
124
125        let mut wasm_path = PathBuf::from(extension_dir);
126        wasm_path.extend([
127            "target",
128            RUST_TARGET,
129            if options.release { "release" } else { "debug" },
130            cargo_toml.package.name.as_str(),
131        ]);
132        wasm_path.set_extension("wasm");
133
134        let wasm_bytes = fs::read(&wasm_path)
135            .with_context(|| format!("failed to read output module `{}`", wasm_path.display()))?;
136
137        let encoder = ComponentEncoder::default()
138            .module(&wasm_bytes)?
139            .adapter("wasi_snapshot_preview1", &adapter_bytes)
140            .context("failed to load adapter module")?
141            .validate(true);
142
143        let component_bytes = encoder
144            .encode()
145            .context("failed to encode wasm component")?;
146
147        let component_bytes = self
148            .strip_custom_sections(&component_bytes)
149            .context("failed to strip debug sections from wasm component")?;
150
151        fs::write(extension_dir.join("extension.wasm"), &component_bytes)
152            .context("failed to write extension.wasm")?;
153
154        Ok(())
155    }
156
157    async fn compile_grammar(
158        &self,
159        extension_dir: &Path,
160        grammar_name: Arc<str>,
161        grammar_metadata: GrammarManifestEntry,
162    ) -> Result<()> {
163        let clang_path = self.install_wasi_sdk_if_needed().await?;
164
165        let mut grammar_repo_dir = extension_dir.to_path_buf();
166        grammar_repo_dir.extend(["grammars", grammar_name.as_ref()]);
167
168        let mut grammar_wasm_path = grammar_repo_dir.clone();
169        grammar_wasm_path.set_extension("wasm");
170
171        log::info!("checking out {grammar_name} parser");
172        self.checkout_repo(
173            &grammar_repo_dir,
174            &grammar_metadata.repository,
175            &grammar_metadata.rev,
176        )?;
177
178        let src_path = grammar_repo_dir.join("src");
179        let parser_path = src_path.join("parser.c");
180        let scanner_path = src_path.join("scanner.c");
181
182        log::info!("compiling {grammar_name} parser");
183        let clang_output = Command::new(&clang_path)
184            .args(["-fPIC", "-shared", "-Os"])
185            .arg(format!("-Wl,--export=tree_sitter_{grammar_name}"))
186            .arg("-o")
187            .arg(&grammar_wasm_path)
188            .arg("-I")
189            .arg(&src_path)
190            .arg(&parser_path)
191            .args(scanner_path.exists().then_some(scanner_path))
192            .output()
193            .context("failed to run clang")?;
194        if !clang_output.status.success() {
195            bail!(
196                "failed to compile {} parser with clang: {}",
197                grammar_name,
198                String::from_utf8_lossy(&clang_output.stderr),
199            );
200        }
201
202        Ok(())
203    }
204
205    fn checkout_repo(&self, directory: &Path, url: &str, rev: &str) -> Result<()> {
206        let git_dir = directory.join(".git");
207
208        if directory.exists() {
209            let remotes_output = Command::new("git")
210                .arg("--git-dir")
211                .arg(&git_dir)
212                .args(["remote", "-v"])
213                .output()?;
214            let has_remote = remotes_output.status.success()
215                && String::from_utf8_lossy(&remotes_output.stdout)
216                    .lines()
217                    .any(|line| {
218                        let mut parts = line.split(|c: char| c.is_whitespace());
219                        parts.next() == Some("origin") && parts.any(|part| part == url)
220                    });
221            if !has_remote {
222                bail!(
223                    "grammar directory '{}' already exists, but is not a git clone of '{}'",
224                    directory.display(),
225                    url
226                );
227            }
228        } else {
229            fs::create_dir_all(&directory).with_context(|| {
230                format!("failed to create grammar directory {}", directory.display(),)
231            })?;
232            let init_output = Command::new("git")
233                .arg("init")
234                .current_dir(&directory)
235                .output()?;
236            if !init_output.status.success() {
237                bail!(
238                    "failed to run `git init` in directory '{}'",
239                    directory.display()
240                );
241            }
242
243            let remote_add_output = Command::new("git")
244                .arg("--git-dir")
245                .arg(&git_dir)
246                .args(["remote", "add", "origin", url])
247                .output()
248                .context("failed to execute `git remote add`")?;
249            if !remote_add_output.status.success() {
250                bail!(
251                    "failed to add remote {url} for git repository {}",
252                    git_dir.display()
253                );
254            }
255        }
256
257        let fetch_output = Command::new("git")
258            .arg("--git-dir")
259            .arg(&git_dir)
260            .args(["fetch", "--depth", "1", "origin", &rev])
261            .output()
262            .context("failed to execute `git fetch`")?;
263
264        let checkout_output = Command::new("git")
265            .arg("--git-dir")
266            .arg(&git_dir)
267            .args(["checkout", &rev])
268            .current_dir(&directory)
269            .output()
270            .context("failed to execute `git checkout`")?;
271        if !checkout_output.status.success() {
272            if !fetch_output.status.success() {
273                bail!(
274                    "failed to fetch revision {} in directory '{}'",
275                    rev,
276                    directory.display()
277                );
278            }
279            bail!(
280                "failed to checkout revision {} in directory '{}'",
281                rev,
282                directory.display()
283            );
284        }
285
286        Ok(())
287    }
288
289    fn install_rust_wasm_target_if_needed(&self) -> Result<()> {
290        let rustc_output = Command::new("rustc")
291            .arg("--print")
292            .arg("sysroot")
293            .output()
294            .context("failed to run rustc")?;
295        if !rustc_output.status.success() {
296            bail!(
297                "failed to retrieve rust sysroot: {}",
298                String::from_utf8_lossy(&rustc_output.stderr)
299            );
300        }
301
302        let sysroot = PathBuf::from(String::from_utf8(rustc_output.stdout)?.trim());
303        if sysroot.join("lib/rustlib").join(RUST_TARGET).exists() {
304            return Ok(());
305        }
306
307        let output = Command::new("rustup")
308            .args(["target", "add", RUST_TARGET])
309            .stderr(Stdio::inherit())
310            .stdout(Stdio::inherit())
311            .output()
312            .context("failed to run `rustup target add`")?;
313        if !output.status.success() {
314            bail!("failed to install the `{RUST_TARGET}` target");
315        }
316
317        Ok(())
318    }
319
320    async fn install_wasi_preview1_adapter_if_needed(&self) -> Result<Vec<u8>> {
321        let cache_path = self.cache_dir.join("wasi_snapshot_preview1.reactor.wasm");
322        if let Ok(content) = fs::read(&cache_path) {
323            if Parser::is_core_wasm(&content) {
324                return Ok(content);
325            }
326        }
327
328        fs::remove_file(&cache_path).ok();
329
330        log::info!(
331            "downloading wasi adapter module to {}",
332            cache_path.display()
333        );
334        let mut response = self
335            .http
336            .get(WASI_ADAPTER_URL, AsyncBody::default(), true)
337            .await?;
338
339        let mut content = Vec::new();
340        let mut body = BufReader::new(response.body_mut());
341        body.read_to_end(&mut content).await?;
342
343        fs::write(&cache_path, &content)
344            .with_context(|| format!("failed to save file {}", cache_path.display()))?;
345
346        if !Parser::is_core_wasm(&content) {
347            bail!("downloaded wasi adapter is invalid");
348        }
349        Ok(content)
350    }
351
352    async fn install_wasi_sdk_if_needed(&self) -> Result<PathBuf> {
353        let url = if let Some(asset_name) = WASI_SDK_ASSET_NAME {
354            format!("{WASI_SDK_URL}/{asset_name}")
355        } else {
356            bail!("wasi-sdk is not available for platform {}", env::consts::OS);
357        };
358
359        let wasi_sdk_dir = self.cache_dir.join("wasi-sdk");
360        let mut clang_path = wasi_sdk_dir.clone();
361        clang_path.extend(["bin", "clang-17"]);
362
363        if fs::metadata(&clang_path).map_or(false, |metadata| metadata.is_file()) {
364            return Ok(clang_path);
365        }
366
367        let mut tar_out_dir = wasi_sdk_dir.clone();
368        tar_out_dir.set_extension("archive");
369
370        fs::remove_dir_all(&wasi_sdk_dir).ok();
371        fs::remove_dir_all(&tar_out_dir).ok();
372
373        log::info!("downloading wasi-sdk to {}", wasi_sdk_dir.display());
374        let mut response = self.http.get(&url, AsyncBody::default(), true).await?;
375        let body = BufReader::new(response.body_mut());
376        let body = GzipDecoder::new(body);
377        let tar = Archive::new(body);
378        tar.unpack(&tar_out_dir)
379            .await
380            .context("failed to unpack wasi-sdk archive")?;
381
382        let inner_dir = fs::read_dir(&tar_out_dir)?
383            .next()
384            .ok_or_else(|| anyhow!("no content"))?
385            .context("failed to read contents of extracted wasi archive directory")?
386            .path();
387        fs::rename(&inner_dir, &wasi_sdk_dir).context("failed to move extracted wasi dir")?;
388        fs::remove_dir_all(&tar_out_dir).ok();
389
390        Ok(clang_path)
391    }
392
393    // This was adapted from:
394    // https://github.com/bytecodealliance/wasm-tools/1791a8f139722e9f8679a2bd3d8e423e55132b22/src/bin/wasm-tools/strip.rs
395    fn strip_custom_sections(&self, input: &Vec<u8>) -> Result<Vec<u8>> {
396        use wasmparser::Payload::*;
397
398        let strip_custom_section = |name: &str| name.starts_with(".debug");
399
400        let mut output = Vec::new();
401        let mut stack = Vec::new();
402
403        for payload in Parser::new(0).parse_all(input) {
404            let payload = payload?;
405
406            // Track nesting depth, so that we don't mess with inner producer sections:
407            match payload {
408                Version { encoding, .. } => {
409                    output.extend_from_slice(match encoding {
410                        wasmparser::Encoding::Component => &wasm_encoder::Component::HEADER,
411                        wasmparser::Encoding::Module => &wasm_encoder::Module::HEADER,
412                    });
413                }
414                ModuleSection { .. } | ComponentSection { .. } => {
415                    stack.push(mem::take(&mut output));
416                    continue;
417                }
418                End { .. } => {
419                    let mut parent = match stack.pop() {
420                        Some(c) => c,
421                        None => break,
422                    };
423                    if output.starts_with(&wasm_encoder::Component::HEADER) {
424                        parent.push(ComponentSectionId::Component as u8);
425                        output.encode(&mut parent);
426                    } else {
427                        parent.push(ComponentSectionId::CoreModule as u8);
428                        output.encode(&mut parent);
429                    }
430                    output = parent;
431                }
432                _ => {}
433            }
434
435            match &payload {
436                CustomSection(c) => {
437                    if strip_custom_section(c.name()) {
438                        continue;
439                    }
440                }
441
442                _ => {}
443            }
444
445            if let Some((id, range)) = payload.as_section() {
446                RawSection {
447                    id,
448                    data: &input[range],
449                }
450                .append_to(&mut output);
451            }
452        }
453
454        Ok(output)
455    }
456}