1use crate::ExtensionManifest;
2use crate::{extension_manifest::ExtensionLibraryKind, GrammarManifestEntry};
3use anyhow::{anyhow, bail, Context as _, Result};
4use async_compression::futures::bufread::GzipDecoder;
5use async_tar::Archive;
6use futures::io::BufReader;
7use futures::AsyncReadExt;
8use serde::Deserialize;
9use std::mem;
10use std::{
11 env, fs,
12 path::{Path, PathBuf},
13 process::{Command, Stdio},
14 sync::Arc,
15};
16use util::http::{self, AsyncBody, HttpClient};
17use wasm_encoder::{ComponentSectionId, Encode as _, RawSection, Section as _};
18use wasmparser::Parser;
19use wit_component::ComponentEncoder;
20
21/// Currently, we compile with Rust's `wasm32-wasi` target, which works with WASI `preview1`.
22/// But the WASM component model is based on WASI `preview2`. So we need an 'adapter' WASM
23/// module, which implements the `preview1` interface in terms of `preview2`.
24///
25/// Once Rust 1.78 is released, there will be a `wasm32-wasip2` target available, so we will
26/// not need the adapter anymore.
27const RUST_TARGET: &str = "wasm32-wasi";
28const WASI_ADAPTER_URL: &str =
29 "https://github.com/bytecodealliance/wasmtime/releases/download/v18.0.2/wasi_snapshot_preview1.reactor.wasm";
30
31/// Compiling Tree-sitter parsers from C to WASM requires Clang 17, and a WASM build of libc
32/// and clang's runtime library. The `wasi-sdk` provides these binaries.
33///
34/// Once Clang 17 and its wasm target are available via system package managers, we won't need
35/// to download this.
36const WASI_SDK_URL: &str = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-21/";
37const WASI_SDK_ASSET_NAME: Option<&str> = if cfg!(target_os = "macos") {
38 Some("wasi-sdk-21.0-macos.tar.gz")
39} else if cfg!(target_os = "linux") {
40 Some("wasi-sdk-21.0-linux.tar.gz")
41} else {
42 None
43};
44
45pub struct ExtensionBuilder {
46 cache_dir: PathBuf,
47 pub http: Arc<dyn HttpClient>,
48}
49
50pub struct CompileExtensionOptions {
51 pub release: bool,
52}
53
54#[derive(Deserialize)]
55struct CargoToml {
56 package: CargoTomlPackage,
57}
58
59#[derive(Deserialize)]
60struct CargoTomlPackage {
61 name: String,
62}
63
64impl ExtensionBuilder {
65 pub fn new(cache_dir: PathBuf) -> Self {
66 Self {
67 cache_dir,
68 http: http::client(),
69 }
70 }
71
72 pub async fn compile_extension(
73 &self,
74 extension_dir: &Path,
75 options: CompileExtensionOptions,
76 ) -> Result<()> {
77 fs::create_dir_all(&self.cache_dir)?;
78 let extension_toml_path = extension_dir.join("extension.toml");
79 let extension_toml_content = fs::read_to_string(&extension_toml_path)?;
80 let extension_toml: ExtensionManifest = toml::from_str(&extension_toml_content)?;
81
82 let cargo_toml_path = extension_dir.join("Cargo.toml");
83 if extension_toml.lib.kind == Some(ExtensionLibraryKind::Rust)
84 || fs::metadata(&cargo_toml_path)?.is_file()
85 {
86 self.compile_rust_extension(extension_dir, options).await?;
87 }
88
89 for (grammar_name, grammar_metadata) in extension_toml.grammars {
90 self.compile_grammar(extension_dir, grammar_name, grammar_metadata)
91 .await?;
92 }
93
94 log::info!("finished compiling extension {}", extension_dir.display());
95 Ok(())
96 }
97
98 async fn compile_rust_extension(
99 &self,
100 extension_dir: &Path,
101 options: CompileExtensionOptions,
102 ) -> Result<(), anyhow::Error> {
103 self.install_rust_wasm_target_if_needed()?;
104 let adapter_bytes = self.install_wasi_preview1_adapter_if_needed().await?;
105
106 let cargo_toml_content = fs::read_to_string(&extension_dir.join("Cargo.toml"))?;
107 let cargo_toml: CargoToml = toml::from_str(&cargo_toml_content)?;
108
109 log::info!("compiling rust extension {}", extension_dir.display());
110 let output = Command::new("cargo")
111 .args(["build", "--target", RUST_TARGET])
112 .args(options.release.then_some("--release"))
113 .arg("--target-dir")
114 .arg(extension_dir.join("target"))
115 .current_dir(&extension_dir)
116 .output()
117 .context("failed to run `cargo`")?;
118 if !output.status.success() {
119 bail!(
120 "failed to build extension {}",
121 String::from_utf8_lossy(&output.stderr)
122 );
123 }
124
125 let mut wasm_path = PathBuf::from(extension_dir);
126 wasm_path.extend([
127 "target",
128 RUST_TARGET,
129 if options.release { "release" } else { "debug" },
130 cargo_toml.package.name.as_str(),
131 ]);
132 wasm_path.set_extension("wasm");
133
134 let wasm_bytes = fs::read(&wasm_path)
135 .with_context(|| format!("failed to read output module `{}`", wasm_path.display()))?;
136
137 let encoder = ComponentEncoder::default()
138 .module(&wasm_bytes)?
139 .adapter("wasi_snapshot_preview1", &adapter_bytes)
140 .context("failed to load adapter module")?
141 .validate(true);
142
143 let component_bytes = encoder
144 .encode()
145 .context("failed to encode wasm component")?;
146
147 let component_bytes = self
148 .strip_custom_sections(&component_bytes)
149 .context("failed to strip debug sections from wasm component")?;
150
151 fs::write(extension_dir.join("extension.wasm"), &component_bytes)
152 .context("failed to write extension.wasm")?;
153
154 Ok(())
155 }
156
157 async fn compile_grammar(
158 &self,
159 extension_dir: &Path,
160 grammar_name: Arc<str>,
161 grammar_metadata: GrammarManifestEntry,
162 ) -> Result<()> {
163 let clang_path = self.install_wasi_sdk_if_needed().await?;
164
165 let mut grammar_repo_dir = extension_dir.to_path_buf();
166 grammar_repo_dir.extend(["grammars", grammar_name.as_ref()]);
167
168 let mut grammar_wasm_path = grammar_repo_dir.clone();
169 grammar_wasm_path.set_extension("wasm");
170
171 log::info!("checking out {grammar_name} parser");
172 self.checkout_repo(
173 &grammar_repo_dir,
174 &grammar_metadata.repository,
175 &grammar_metadata.rev,
176 )?;
177
178 let src_path = grammar_repo_dir.join("src");
179 let parser_path = src_path.join("parser.c");
180 let scanner_path = src_path.join("scanner.c");
181
182 log::info!("compiling {grammar_name} parser");
183 let clang_output = Command::new(&clang_path)
184 .args(["-fPIC", "-shared", "-Os"])
185 .arg(format!("-Wl,--export=tree_sitter_{grammar_name}"))
186 .arg("-o")
187 .arg(&grammar_wasm_path)
188 .arg("-I")
189 .arg(&src_path)
190 .arg(&parser_path)
191 .args(scanner_path.exists().then_some(scanner_path))
192 .output()
193 .context("failed to run clang")?;
194 if !clang_output.status.success() {
195 bail!(
196 "failed to compile {} parser with clang: {}",
197 grammar_name,
198 String::from_utf8_lossy(&clang_output.stderr),
199 );
200 }
201
202 Ok(())
203 }
204
205 fn checkout_repo(&self, directory: &Path, url: &str, rev: &str) -> Result<()> {
206 let git_dir = directory.join(".git");
207
208 if directory.exists() {
209 let remotes_output = Command::new("git")
210 .arg("--git-dir")
211 .arg(&git_dir)
212 .args(["remote", "-v"])
213 .output()?;
214 let has_remote = remotes_output.status.success()
215 && String::from_utf8_lossy(&remotes_output.stdout)
216 .lines()
217 .any(|line| {
218 let mut parts = line.split(|c: char| c.is_whitespace());
219 parts.next() == Some("origin") && parts.any(|part| part == url)
220 });
221 if !has_remote {
222 bail!(
223 "grammar directory '{}' already exists, but is not a git clone of '{}'",
224 directory.display(),
225 url
226 );
227 }
228 } else {
229 fs::create_dir_all(&directory).with_context(|| {
230 format!("failed to create grammar directory {}", directory.display(),)
231 })?;
232 let init_output = Command::new("git")
233 .arg("init")
234 .current_dir(&directory)
235 .output()?;
236 if !init_output.status.success() {
237 bail!(
238 "failed to run `git init` in directory '{}'",
239 directory.display()
240 );
241 }
242
243 let remote_add_output = Command::new("git")
244 .arg("--git-dir")
245 .arg(&git_dir)
246 .args(["remote", "add", "origin", url])
247 .output()
248 .context("failed to execute `git remote add`")?;
249 if !remote_add_output.status.success() {
250 bail!(
251 "failed to add remote {url} for git repository {}",
252 git_dir.display()
253 );
254 }
255 }
256
257 let fetch_output = Command::new("git")
258 .arg("--git-dir")
259 .arg(&git_dir)
260 .args(["fetch", "--depth", "1", "origin", &rev])
261 .output()
262 .context("failed to execute `git fetch`")?;
263
264 let checkout_output = Command::new("git")
265 .arg("--git-dir")
266 .arg(&git_dir)
267 .args(["checkout", &rev])
268 .current_dir(&directory)
269 .output()
270 .context("failed to execute `git checkout`")?;
271 if !checkout_output.status.success() {
272 if !fetch_output.status.success() {
273 bail!(
274 "failed to fetch revision {} in directory '{}'",
275 rev,
276 directory.display()
277 );
278 }
279 bail!(
280 "failed to checkout revision {} in directory '{}'",
281 rev,
282 directory.display()
283 );
284 }
285
286 Ok(())
287 }
288
289 fn install_rust_wasm_target_if_needed(&self) -> Result<()> {
290 let rustc_output = Command::new("rustc")
291 .arg("--print")
292 .arg("sysroot")
293 .output()
294 .context("failed to run rustc")?;
295 if !rustc_output.status.success() {
296 bail!(
297 "failed to retrieve rust sysroot: {}",
298 String::from_utf8_lossy(&rustc_output.stderr)
299 );
300 }
301
302 let sysroot = PathBuf::from(String::from_utf8(rustc_output.stdout)?.trim());
303 if sysroot.join("lib/rustlib").join(RUST_TARGET).exists() {
304 return Ok(());
305 }
306
307 let output = Command::new("rustup")
308 .args(["target", "add", RUST_TARGET])
309 .stderr(Stdio::inherit())
310 .stdout(Stdio::inherit())
311 .output()
312 .context("failed to run `rustup target add`")?;
313 if !output.status.success() {
314 bail!("failed to install the `{RUST_TARGET}` target");
315 }
316
317 Ok(())
318 }
319
320 async fn install_wasi_preview1_adapter_if_needed(&self) -> Result<Vec<u8>> {
321 let cache_path = self.cache_dir.join("wasi_snapshot_preview1.reactor.wasm");
322 if let Ok(content) = fs::read(&cache_path) {
323 if Parser::is_core_wasm(&content) {
324 return Ok(content);
325 }
326 }
327
328 fs::remove_file(&cache_path).ok();
329
330 log::info!(
331 "downloading wasi adapter module to {}",
332 cache_path.display()
333 );
334 let mut response = self
335 .http
336 .get(WASI_ADAPTER_URL, AsyncBody::default(), true)
337 .await?;
338
339 let mut content = Vec::new();
340 let mut body = BufReader::new(response.body_mut());
341 body.read_to_end(&mut content).await?;
342
343 fs::write(&cache_path, &content)
344 .with_context(|| format!("failed to save file {}", cache_path.display()))?;
345
346 if !Parser::is_core_wasm(&content) {
347 bail!("downloaded wasi adapter is invalid");
348 }
349 Ok(content)
350 }
351
352 async fn install_wasi_sdk_if_needed(&self) -> Result<PathBuf> {
353 let url = if let Some(asset_name) = WASI_SDK_ASSET_NAME {
354 format!("{WASI_SDK_URL}/{asset_name}")
355 } else {
356 bail!("wasi-sdk is not available for platform {}", env::consts::OS);
357 };
358
359 let wasi_sdk_dir = self.cache_dir.join("wasi-sdk");
360 let mut clang_path = wasi_sdk_dir.clone();
361 clang_path.extend(["bin", "clang-17"]);
362
363 if fs::metadata(&clang_path).map_or(false, |metadata| metadata.is_file()) {
364 return Ok(clang_path);
365 }
366
367 let mut tar_out_dir = wasi_sdk_dir.clone();
368 tar_out_dir.set_extension("archive");
369
370 fs::remove_dir_all(&wasi_sdk_dir).ok();
371 fs::remove_dir_all(&tar_out_dir).ok();
372
373 log::info!("downloading wasi-sdk to {}", wasi_sdk_dir.display());
374 let mut response = self.http.get(&url, AsyncBody::default(), true).await?;
375 let body = BufReader::new(response.body_mut());
376 let body = GzipDecoder::new(body);
377 let tar = Archive::new(body);
378 tar.unpack(&tar_out_dir)
379 .await
380 .context("failed to unpack wasi-sdk archive")?;
381
382 let inner_dir = fs::read_dir(&tar_out_dir)?
383 .next()
384 .ok_or_else(|| anyhow!("no content"))?
385 .context("failed to read contents of extracted wasi archive directory")?
386 .path();
387 fs::rename(&inner_dir, &wasi_sdk_dir).context("failed to move extracted wasi dir")?;
388 fs::remove_dir_all(&tar_out_dir).ok();
389
390 Ok(clang_path)
391 }
392
393 // This was adapted from:
394 // https://github.com/bytecodealliance/wasm-tools/1791a8f139722e9f8679a2bd3d8e423e55132b22/src/bin/wasm-tools/strip.rs
395 fn strip_custom_sections(&self, input: &Vec<u8>) -> Result<Vec<u8>> {
396 use wasmparser::Payload::*;
397
398 let strip_custom_section = |name: &str| name.starts_with(".debug");
399
400 let mut output = Vec::new();
401 let mut stack = Vec::new();
402
403 for payload in Parser::new(0).parse_all(input) {
404 let payload = payload?;
405
406 // Track nesting depth, so that we don't mess with inner producer sections:
407 match payload {
408 Version { encoding, .. } => {
409 output.extend_from_slice(match encoding {
410 wasmparser::Encoding::Component => &wasm_encoder::Component::HEADER,
411 wasmparser::Encoding::Module => &wasm_encoder::Module::HEADER,
412 });
413 }
414 ModuleSection { .. } | ComponentSection { .. } => {
415 stack.push(mem::take(&mut output));
416 continue;
417 }
418 End { .. } => {
419 let mut parent = match stack.pop() {
420 Some(c) => c,
421 None => break,
422 };
423 if output.starts_with(&wasm_encoder::Component::HEADER) {
424 parent.push(ComponentSectionId::Component as u8);
425 output.encode(&mut parent);
426 } else {
427 parent.push(ComponentSectionId::CoreModule as u8);
428 output.encode(&mut parent);
429 }
430 output = parent;
431 }
432 _ => {}
433 }
434
435 match &payload {
436 CustomSection(c) => {
437 if strip_custom_section(c.name()) {
438 continue;
439 }
440 }
441
442 _ => {}
443 }
444
445 if let Some((id, range)) = payload.as_section() {
446 RawSection {
447 id,
448 data: &input[range],
449 }
450 .append_to(&mut output);
451 }
452 }
453
454 Ok(output)
455 }
456}