1use crate::ExtensionManifest;
2use crate::{extension_manifest::ExtensionLibraryKind, GrammarManifestEntry};
3use anyhow::{anyhow, bail, Context as _, Result};
4use async_compression::futures::bufread::GzipDecoder;
5use async_tar::Archive;
6use futures::io::BufReader;
7use futures::AsyncReadExt;
8use serde::Deserialize;
9use std::{
10 env, fs, mem,
11 path::{Path, PathBuf},
12 process::{Command, Stdio},
13 sync::Arc,
14};
15use util::http::{self, AsyncBody, HttpClient};
16use wasm_encoder::{ComponentSectionId, Encode as _, RawSection, Section as _};
17use wasmparser::Parser;
18use wit_component::ComponentEncoder;
19
20/// Currently, we compile with Rust's `wasm32-wasi` target, which works with WASI `preview1`.
21/// But the WASM component model is based on WASI `preview2`. So we need an 'adapter' WASM
22/// module, which implements the `preview1` interface in terms of `preview2`.
23///
24/// Once Rust 1.78 is released, there will be a `wasm32-wasip2` target available, so we will
25/// not need the adapter anymore.
26const RUST_TARGET: &str = "wasm32-wasi";
27const WASI_ADAPTER_URL: &str =
28 "https://github.com/bytecodealliance/wasmtime/releases/download/v18.0.2/wasi_snapshot_preview1.reactor.wasm";
29
30/// Compiling Tree-sitter parsers from C to WASM requires Clang 17, and a WASM build of libc
31/// and clang's runtime library. The `wasi-sdk` provides these binaries.
32///
33/// Once Clang 17 and its wasm target are available via system package managers, we won't need
34/// to download this.
35const WASI_SDK_URL: &str = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-21/";
36const WASI_SDK_ASSET_NAME: Option<&str> = if cfg!(target_os = "macos") {
37 Some("wasi-sdk-21.0-macos.tar.gz")
38} else if cfg!(target_os = "linux") {
39 Some("wasi-sdk-21.0-linux.tar.gz")
40} else {
41 None
42};
43
44pub struct ExtensionBuilder {
45 cache_dir: PathBuf,
46 pub http: Arc<dyn HttpClient>,
47}
48
49pub struct CompileExtensionOptions {
50 pub release: bool,
51}
52
53#[derive(Deserialize)]
54struct CargoToml {
55 package: CargoTomlPackage,
56}
57
58#[derive(Deserialize)]
59struct CargoTomlPackage {
60 name: String,
61}
62
63impl ExtensionBuilder {
64 pub fn new(cache_dir: PathBuf) -> Self {
65 Self {
66 cache_dir,
67 http: http::client(),
68 }
69 }
70
71 pub async fn compile_extension(
72 &self,
73 extension_dir: &Path,
74 extension_manifest: &ExtensionManifest,
75 options: CompileExtensionOptions,
76 ) -> Result<()> {
77 if extension_dir.is_relative() {
78 bail!(
79 "extension dir {} is not an absolute path",
80 extension_dir.display()
81 );
82 }
83
84 fs::create_dir_all(&self.cache_dir).context("failed to create cache dir")?;
85
86 let cargo_toml_path = extension_dir.join("Cargo.toml");
87 if extension_manifest.lib.kind == Some(ExtensionLibraryKind::Rust)
88 || fs::metadata(&cargo_toml_path)
89 .ok()
90 .map(|metadata| metadata.is_file())
91 .unwrap_or(false)
92 {
93 log::info!("compiling Rust extension {}", extension_dir.display());
94 self.compile_rust_extension(extension_dir, options)
95 .await
96 .context("failed to compile Rust extension")?;
97 }
98
99 for (grammar_name, grammar_metadata) in &extension_manifest.grammars {
100 self.compile_grammar(extension_dir, grammar_name.as_ref(), grammar_metadata)
101 .await
102 .with_context(|| format!("failed to compile grammar '{grammar_name}'"))?;
103 }
104
105 log::info!("finished compiling extension {}", extension_dir.display());
106 Ok(())
107 }
108
109 async fn compile_rust_extension(
110 &self,
111 extension_dir: &Path,
112 options: CompileExtensionOptions,
113 ) -> Result<(), anyhow::Error> {
114 self.install_rust_wasm_target_if_needed()?;
115 let adapter_bytes = self.install_wasi_preview1_adapter_if_needed().await?;
116
117 let cargo_toml_content = fs::read_to_string(&extension_dir.join("Cargo.toml"))?;
118 let cargo_toml: CargoToml = toml::from_str(&cargo_toml_content)?;
119
120 log::info!("compiling rust extension {}", extension_dir.display());
121 let output = Command::new("cargo")
122 .args(["build", "--target", RUST_TARGET])
123 .args(options.release.then_some("--release"))
124 .arg("--target-dir")
125 .arg(extension_dir.join("target"))
126 .current_dir(&extension_dir)
127 .output()
128 .context("failed to run `cargo`")?;
129 if !output.status.success() {
130 bail!(
131 "failed to build extension {}",
132 String::from_utf8_lossy(&output.stderr)
133 );
134 }
135
136 let mut wasm_path = PathBuf::from(extension_dir);
137 wasm_path.extend([
138 "target",
139 RUST_TARGET,
140 if options.release { "release" } else { "debug" },
141 &cargo_toml
142 .package
143 .name
144 // The wasm32-wasi target normalizes `-` in package names to `_` in the resulting `.wasm` file.
145 .replace('-', "_"),
146 ]);
147 wasm_path.set_extension("wasm");
148
149 let wasm_bytes = fs::read(&wasm_path)
150 .with_context(|| format!("failed to read output module `{}`", wasm_path.display()))?;
151
152 let encoder = ComponentEncoder::default()
153 .module(&wasm_bytes)?
154 .adapter("wasi_snapshot_preview1", &adapter_bytes)
155 .context("failed to load adapter module")?
156 .validate(true);
157
158 let component_bytes = encoder
159 .encode()
160 .context("failed to encode wasm component")?;
161
162 let component_bytes = self
163 .strip_custom_sections(&component_bytes)
164 .context("failed to strip debug sections from wasm component")?;
165
166 fs::write(extension_dir.join("extension.wasm"), &component_bytes)
167 .context("failed to write extension.wasm")?;
168
169 Ok(())
170 }
171
172 async fn compile_grammar(
173 &self,
174 extension_dir: &Path,
175 grammar_name: &str,
176 grammar_metadata: &GrammarManifestEntry,
177 ) -> Result<()> {
178 let clang_path = self.install_wasi_sdk_if_needed().await?;
179
180 let mut grammar_repo_dir = extension_dir.to_path_buf();
181 grammar_repo_dir.extend(["grammars", grammar_name]);
182
183 let mut grammar_wasm_path = grammar_repo_dir.clone();
184 grammar_wasm_path.set_extension("wasm");
185
186 log::info!("checking out {grammar_name} parser");
187 self.checkout_repo(
188 &grammar_repo_dir,
189 &grammar_metadata.repository,
190 &grammar_metadata.rev,
191 )?;
192
193 let src_path = grammar_repo_dir.join("src");
194 let parser_path = src_path.join("parser.c");
195 let scanner_path = src_path.join("scanner.c");
196
197 log::info!("compiling {grammar_name} parser");
198 let clang_output = Command::new(&clang_path)
199 .args(["-fPIC", "-shared", "-Os"])
200 .arg(format!("-Wl,--export=tree_sitter_{grammar_name}"))
201 .arg("-o")
202 .arg(&grammar_wasm_path)
203 .arg("-I")
204 .arg(&src_path)
205 .arg(&parser_path)
206 .args(scanner_path.exists().then_some(scanner_path))
207 .output()
208 .context("failed to run clang")?;
209 if !clang_output.status.success() {
210 bail!(
211 "failed to compile {} parser with clang: {}",
212 grammar_name,
213 String::from_utf8_lossy(&clang_output.stderr),
214 );
215 }
216
217 Ok(())
218 }
219
220 fn checkout_repo(&self, directory: &Path, url: &str, rev: &str) -> Result<()> {
221 let git_dir = directory.join(".git");
222
223 if directory.exists() {
224 let remotes_output = Command::new("git")
225 .arg("--git-dir")
226 .arg(&git_dir)
227 .args(["remote", "-v"])
228 .output()?;
229 let has_remote = remotes_output.status.success()
230 && String::from_utf8_lossy(&remotes_output.stdout)
231 .lines()
232 .any(|line| {
233 let mut parts = line.split(|c: char| c.is_whitespace());
234 parts.next() == Some("origin") && parts.any(|part| part == url)
235 });
236 if !has_remote {
237 bail!(
238 "grammar directory '{}' already exists, but is not a git clone of '{}'",
239 directory.display(),
240 url
241 );
242 }
243 } else {
244 fs::create_dir_all(&directory).with_context(|| {
245 format!("failed to create grammar directory {}", directory.display(),)
246 })?;
247 let init_output = Command::new("git")
248 .arg("init")
249 .current_dir(&directory)
250 .output()?;
251 if !init_output.status.success() {
252 bail!(
253 "failed to run `git init` in directory '{}'",
254 directory.display()
255 );
256 }
257
258 let remote_add_output = Command::new("git")
259 .arg("--git-dir")
260 .arg(&git_dir)
261 .args(["remote", "add", "origin", url])
262 .output()
263 .context("failed to execute `git remote add`")?;
264 if !remote_add_output.status.success() {
265 bail!(
266 "failed to add remote {url} for git repository {}",
267 git_dir.display()
268 );
269 }
270 }
271
272 let fetch_output = Command::new("git")
273 .arg("--git-dir")
274 .arg(&git_dir)
275 .args(["fetch", "--depth", "1", "origin", &rev])
276 .output()
277 .context("failed to execute `git fetch`")?;
278
279 let checkout_output = Command::new("git")
280 .arg("--git-dir")
281 .arg(&git_dir)
282 .args(["checkout", &rev])
283 .current_dir(&directory)
284 .output()
285 .context("failed to execute `git checkout`")?;
286 if !checkout_output.status.success() {
287 if !fetch_output.status.success() {
288 bail!(
289 "failed to fetch revision {} in directory '{}'",
290 rev,
291 directory.display()
292 );
293 }
294 bail!(
295 "failed to checkout revision {} in directory '{}': {}",
296 rev,
297 directory.display(),
298 String::from_utf8_lossy(&checkout_output.stderr)
299 );
300 }
301
302 Ok(())
303 }
304
305 fn install_rust_wasm_target_if_needed(&self) -> Result<()> {
306 let rustc_output = Command::new("rustc")
307 .arg("--print")
308 .arg("sysroot")
309 .output()
310 .context("failed to run rustc")?;
311 if !rustc_output.status.success() {
312 bail!(
313 "failed to retrieve rust sysroot: {}",
314 String::from_utf8_lossy(&rustc_output.stderr)
315 );
316 }
317
318 let sysroot = PathBuf::from(String::from_utf8(rustc_output.stdout)?.trim());
319 if sysroot.join("lib/rustlib").join(RUST_TARGET).exists() {
320 return Ok(());
321 }
322
323 let output = Command::new("rustup")
324 .args(["target", "add", RUST_TARGET])
325 .stderr(Stdio::inherit())
326 .stdout(Stdio::inherit())
327 .output()
328 .context("failed to run `rustup target add`")?;
329 if !output.status.success() {
330 bail!("failed to install the `{RUST_TARGET}` target");
331 }
332
333 Ok(())
334 }
335
336 async fn install_wasi_preview1_adapter_if_needed(&self) -> Result<Vec<u8>> {
337 let cache_path = self.cache_dir.join("wasi_snapshot_preview1.reactor.wasm");
338 if let Ok(content) = fs::read(&cache_path) {
339 if Parser::is_core_wasm(&content) {
340 return Ok(content);
341 }
342 }
343
344 fs::remove_file(&cache_path).ok();
345
346 log::info!(
347 "downloading wasi adapter module to {}",
348 cache_path.display()
349 );
350 let mut response = self
351 .http
352 .get(WASI_ADAPTER_URL, AsyncBody::default(), true)
353 .await?;
354
355 let mut content = Vec::new();
356 let mut body = BufReader::new(response.body_mut());
357 body.read_to_end(&mut content).await?;
358
359 fs::write(&cache_path, &content)
360 .with_context(|| format!("failed to save file {}", cache_path.display()))?;
361
362 if !Parser::is_core_wasm(&content) {
363 bail!("downloaded wasi adapter is invalid");
364 }
365 Ok(content)
366 }
367
368 async fn install_wasi_sdk_if_needed(&self) -> Result<PathBuf> {
369 let url = if let Some(asset_name) = WASI_SDK_ASSET_NAME {
370 format!("{WASI_SDK_URL}/{asset_name}")
371 } else {
372 bail!("wasi-sdk is not available for platform {}", env::consts::OS);
373 };
374
375 let wasi_sdk_dir = self.cache_dir.join("wasi-sdk");
376 let mut clang_path = wasi_sdk_dir.clone();
377 clang_path.extend(["bin", "clang-17"]);
378
379 if fs::metadata(&clang_path).map_or(false, |metadata| metadata.is_file()) {
380 return Ok(clang_path);
381 }
382
383 let mut tar_out_dir = wasi_sdk_dir.clone();
384 tar_out_dir.set_extension("archive");
385
386 fs::remove_dir_all(&wasi_sdk_dir).ok();
387 fs::remove_dir_all(&tar_out_dir).ok();
388
389 log::info!("downloading wasi-sdk to {}", wasi_sdk_dir.display());
390 let mut response = self.http.get(&url, AsyncBody::default(), true).await?;
391 let body = BufReader::new(response.body_mut());
392 let body = GzipDecoder::new(body);
393 let tar = Archive::new(body);
394 tar.unpack(&tar_out_dir)
395 .await
396 .context("failed to unpack wasi-sdk archive")?;
397
398 let inner_dir = fs::read_dir(&tar_out_dir)?
399 .next()
400 .ok_or_else(|| anyhow!("no content"))?
401 .context("failed to read contents of extracted wasi archive directory")?
402 .path();
403 fs::rename(&inner_dir, &wasi_sdk_dir).context("failed to move extracted wasi dir")?;
404 fs::remove_dir_all(&tar_out_dir).ok();
405
406 Ok(clang_path)
407 }
408
409 // This was adapted from:
410 // https://github.com/bytecodealliance/wasm-tools/1791a8f139722e9f8679a2bd3d8e423e55132b22/src/bin/wasm-tools/strip.rs
411 fn strip_custom_sections(&self, input: &Vec<u8>) -> Result<Vec<u8>> {
412 use wasmparser::Payload::*;
413
414 let strip_custom_section = |name: &str| name.starts_with(".debug");
415
416 let mut output = Vec::new();
417 let mut stack = Vec::new();
418
419 for payload in Parser::new(0).parse_all(input) {
420 let payload = payload?;
421
422 // Track nesting depth, so that we don't mess with inner producer sections:
423 match payload {
424 Version { encoding, .. } => {
425 output.extend_from_slice(match encoding {
426 wasmparser::Encoding::Component => &wasm_encoder::Component::HEADER,
427 wasmparser::Encoding::Module => &wasm_encoder::Module::HEADER,
428 });
429 }
430 ModuleSection { .. } | ComponentSection { .. } => {
431 stack.push(mem::take(&mut output));
432 continue;
433 }
434 End { .. } => {
435 let mut parent = match stack.pop() {
436 Some(c) => c,
437 None => break,
438 };
439 if output.starts_with(&wasm_encoder::Component::HEADER) {
440 parent.push(ComponentSectionId::Component as u8);
441 output.encode(&mut parent);
442 } else {
443 parent.push(ComponentSectionId::CoreModule as u8);
444 output.encode(&mut parent);
445 }
446 output = parent;
447 }
448 _ => {}
449 }
450
451 match &payload {
452 CustomSection(c) => {
453 if strip_custom_section(c.name()) {
454 continue;
455 }
456 }
457
458 _ => {}
459 }
460
461 if let Some((id, range)) = payload.as_section() {
462 RawSection {
463 id,
464 data: &input[range],
465 }
466 .append_to(&mut output);
467 }
468 }
469
470 Ok(output)
471 }
472}