1use crate::ExtensionManifest;
2use crate::{extension_manifest::ExtensionLibraryKind, GrammarManifestEntry};
3use anyhow::{anyhow, bail, Context as _, Result};
4use async_compression::futures::bufread::GzipDecoder;
5use async_tar::Archive;
6use futures::io::BufReader;
7use futures::AsyncReadExt;
8use serde::Deserialize;
9use std::{
10 env, fs,
11 path::{Path, PathBuf},
12 process::{Command, Stdio},
13 sync::Arc,
14};
15use util::http::{AsyncBody, HttpClient};
16use wit_component::ComponentEncoder;
17
18/// Currently, we compile with Rust's `wasm32-wasi` target, which works with WASI `preview1`.
19/// But the WASM component model is based on WASI `preview2`. So we need an 'adapter' WASM
20/// module, which implements the `preview1` interface in terms of `preview2`.
21///
22/// Once Rust 1.78 is released, there will be a `wasm32-wasip2` target available, so we will
23/// not need the adapter anymore.
24const RUST_TARGET: &str = "wasm32-wasi";
25const WASI_ADAPTER_URL: &str =
26 "https://github.com/bytecodealliance/wasmtime/releases/download/v18.0.2/wasi_snapshot_preview1.reactor.wasm";
27
28/// Compiling Tree-sitter parsers from C to WASM requires Clang 17, and a WASM build of libc
29/// and clang's runtime library. The `wasi-sdk` provides these binaries.
30///
31/// Once Clang 17 and its wasm target are available via system package managers, we won't need
32/// to download this.
33const WASI_SDK_URL: &str = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-21/";
34const WASI_SDK_ASSET_NAME: Option<&str> = if cfg!(target_os = "macos") {
35 Some("wasi-sdk-21.0-macos.tar.gz")
36} else if cfg!(target_os = "linux") {
37 Some("wasi-sdk-21.0-linux.tar.gz")
38} else {
39 None
40};
41
42pub struct ExtensionBuilder {
43 cache_dir: PathBuf,
44 pub http: Arc<dyn HttpClient>,
45}
46
47pub struct CompileExtensionOptions {
48 pub release: bool,
49}
50
51#[derive(Deserialize)]
52struct CargoToml {
53 package: CargoTomlPackage,
54}
55
56#[derive(Deserialize)]
57struct CargoTomlPackage {
58 name: String,
59}
60
61impl ExtensionBuilder {
62 pub fn new(cache_dir: PathBuf, http: Arc<dyn HttpClient>) -> Self {
63 Self { cache_dir, http }
64 }
65
66 pub async fn compile_extension(
67 &self,
68 extension_dir: &Path,
69 options: CompileExtensionOptions,
70 ) -> Result<()> {
71 fs::create_dir_all(&self.cache_dir)?;
72 let extension_toml_path = extension_dir.join("extension.toml");
73 let extension_toml_content = fs::read_to_string(&extension_toml_path)?;
74 let extension_toml: ExtensionManifest = toml::from_str(&extension_toml_content)?;
75
76 let cargo_toml_path = extension_dir.join("Cargo.toml");
77 if extension_toml.lib.kind == Some(ExtensionLibraryKind::Rust)
78 || fs::metadata(&cargo_toml_path)?.is_file()
79 {
80 self.compile_rust_extension(extension_dir, options).await?;
81 }
82
83 for (grammar_name, grammar_metadata) in extension_toml.grammars {
84 self.compile_grammar(extension_dir, grammar_name, grammar_metadata)
85 .await?;
86 }
87
88 log::info!("finished compiling extension {}", extension_dir.display());
89 Ok(())
90 }
91
92 async fn compile_rust_extension(
93 &self,
94 extension_dir: &Path,
95 options: CompileExtensionOptions,
96 ) -> Result<(), anyhow::Error> {
97 self.install_rust_wasm_target_if_needed()?;
98 let adapter_bytes = self.install_wasi_preview1_adapter_if_needed().await?;
99
100 let cargo_toml_content = fs::read_to_string(&extension_dir.join("Cargo.toml"))?;
101 let cargo_toml: CargoToml = toml::from_str(&cargo_toml_content)?;
102
103 log::info!("compiling rust extension {}", extension_dir.display());
104 let output = Command::new("cargo")
105 .args(["build", "--target", RUST_TARGET])
106 .args(options.release.then_some("--release"))
107 .arg("--target-dir")
108 .arg(extension_dir.join("target"))
109 .current_dir(&extension_dir)
110 .output()
111 .context("failed to run `cargo`")?;
112 if !output.status.success() {
113 bail!(
114 "failed to build extension {}",
115 String::from_utf8_lossy(&output.stderr)
116 );
117 }
118
119 let mut wasm_path = PathBuf::from(extension_dir);
120 wasm_path.extend([
121 "target",
122 RUST_TARGET,
123 if options.release { "release" } else { "debug" },
124 cargo_toml.package.name.as_str(),
125 ]);
126 wasm_path.set_extension("wasm");
127
128 let wasm_bytes = fs::read(&wasm_path)
129 .with_context(|| format!("failed to read output module `{}`", wasm_path.display()))?;
130
131 let encoder = ComponentEncoder::default()
132 .module(&wasm_bytes)?
133 .adapter("wasi_snapshot_preview1", &adapter_bytes)
134 .context("failed to load adapter module")?
135 .validate(true);
136
137 let component_bytes = encoder
138 .encode()
139 .context("failed to encode wasm component")?;
140
141 fs::write(extension_dir.join("extension.wasm"), &component_bytes)
142 .context("failed to write extension.wasm")?;
143
144 Ok(())
145 }
146
147 async fn compile_grammar(
148 &self,
149 extension_dir: &Path,
150 grammar_name: Arc<str>,
151 grammar_metadata: GrammarManifestEntry,
152 ) -> Result<()> {
153 let clang_path = self.install_wasi_sdk_if_needed().await?;
154
155 let mut grammar_repo_dir = extension_dir.to_path_buf();
156 grammar_repo_dir.extend(["grammars", grammar_name.as_ref()]);
157
158 let mut grammar_wasm_path = grammar_repo_dir.clone();
159 grammar_wasm_path.set_extension("wasm");
160
161 log::info!("checking out {grammar_name} parser");
162 self.checkout_repo(
163 &grammar_repo_dir,
164 &grammar_metadata.repository,
165 &grammar_metadata.rev,
166 )?;
167
168 let src_path = grammar_repo_dir.join("src");
169 let parser_path = src_path.join("parser.c");
170 let scanner_path = src_path.join("scanner.c");
171
172 log::info!("compiling {grammar_name} parser");
173 let clang_output = Command::new(&clang_path)
174 .args(["-fPIC", "-shared", "-Os"])
175 .arg(format!("-Wl,--export=tree_sitter_{grammar_name}"))
176 .arg("-o")
177 .arg(&grammar_wasm_path)
178 .arg("-I")
179 .arg(&src_path)
180 .arg(&parser_path)
181 .args(scanner_path.exists().then_some(scanner_path))
182 .output()
183 .context("failed to run clang")?;
184 if !clang_output.status.success() {
185 bail!(
186 "failed to compile {} parser with clang: {}",
187 grammar_name,
188 String::from_utf8_lossy(&clang_output.stderr),
189 );
190 }
191
192 Ok(())
193 }
194
195 fn checkout_repo(&self, directory: &Path, url: &str, rev: &str) -> Result<()> {
196 let git_dir = directory.join(".git");
197
198 if directory.exists() {
199 let remotes_output = Command::new("git")
200 .arg("--git-dir")
201 .arg(&git_dir)
202 .args(["remote", "-v"])
203 .output()?;
204 let has_remote = remotes_output.status.success()
205 && String::from_utf8_lossy(&remotes_output.stdout)
206 .lines()
207 .any(|line| {
208 let mut parts = line.split(|c: char| c.is_whitespace());
209 parts.next() == Some("origin") && parts.any(|part| part == url)
210 });
211 if !has_remote {
212 bail!(
213 "grammar directory '{}' already exists, but is not a git clone of '{}'",
214 directory.display(),
215 url
216 );
217 }
218 } else {
219 fs::create_dir_all(&directory).with_context(|| {
220 format!("failed to create grammar directory {}", directory.display(),)
221 })?;
222 let init_output = Command::new("git")
223 .arg("init")
224 .current_dir(&directory)
225 .output()?;
226 if !init_output.status.success() {
227 bail!(
228 "failed to run `git init` in directory '{}'",
229 directory.display()
230 );
231 }
232
233 let remote_add_output = Command::new("git")
234 .arg("--git-dir")
235 .arg(&git_dir)
236 .args(["remote", "add", "origin", url])
237 .output()
238 .context("failed to execute `git remote add`")?;
239 if !remote_add_output.status.success() {
240 bail!(
241 "failed to add remote {url} for git repository {}",
242 git_dir.display()
243 );
244 }
245 }
246
247 let fetch_output = Command::new("git")
248 .arg("--git-dir")
249 .arg(&git_dir)
250 .args(["fetch", "--depth", "1", "origin", &rev])
251 .output()
252 .context("failed to execute `git fetch`")?;
253 if !fetch_output.status.success() {
254 bail!(
255 "failed to fetch revision {} in directory '{}'",
256 rev,
257 directory.display()
258 );
259 }
260
261 let checkout_output = Command::new("git")
262 .arg("--git-dir")
263 .arg(&git_dir)
264 .args(["checkout", &rev])
265 .current_dir(&directory)
266 .output()
267 .context("failed to execute `git checkout`")?;
268 if !checkout_output.status.success() {
269 bail!(
270 "failed to checkout revision {} in directory '{}'",
271 rev,
272 directory.display()
273 );
274 }
275
276 Ok(())
277 }
278
279 fn install_rust_wasm_target_if_needed(&self) -> Result<()> {
280 let rustc_output = Command::new("rustc")
281 .arg("--print")
282 .arg("sysroot")
283 .output()
284 .context("failed to run rustc")?;
285 if !rustc_output.status.success() {
286 bail!(
287 "failed to retrieve rust sysroot: {}",
288 String::from_utf8_lossy(&rustc_output.stderr)
289 );
290 }
291
292 let sysroot = PathBuf::from(String::from_utf8(rustc_output.stdout)?.trim());
293 if sysroot.join("lib/rustlib").join(RUST_TARGET).exists() {
294 return Ok(());
295 }
296
297 let output = Command::new("rustup")
298 .args(["target", "add", RUST_TARGET])
299 .stderr(Stdio::inherit())
300 .stdout(Stdio::inherit())
301 .output()
302 .context("failed to run `rustup target add`")?;
303 if !output.status.success() {
304 bail!("failed to install the `{RUST_TARGET}` target");
305 }
306
307 Ok(())
308 }
309
310 async fn install_wasi_preview1_adapter_if_needed(&self) -> Result<Vec<u8>> {
311 let cache_path = self.cache_dir.join("wasi_snapshot_preview1.reactor.wasm");
312 if let Ok(content) = fs::read(&cache_path) {
313 if wasmparser::Parser::is_core_wasm(&content) {
314 return Ok(content);
315 }
316 }
317
318 fs::remove_file(&cache_path).ok();
319
320 log::info!("downloading wasi adapter module");
321 let mut response = self
322 .http
323 .get(WASI_ADAPTER_URL, AsyncBody::default(), true)
324 .await?;
325
326 let mut content = Vec::new();
327 let mut body = BufReader::new(response.body_mut());
328 body.read_to_end(&mut content).await?;
329
330 fs::write(&cache_path, &content)
331 .with_context(|| format!("failed to save file {}", cache_path.display()))?;
332
333 if !wasmparser::Parser::is_core_wasm(&content) {
334 bail!("downloaded wasi adapter is invalid");
335 }
336 Ok(content)
337 }
338
339 async fn install_wasi_sdk_if_needed(&self) -> Result<PathBuf> {
340 let url = if let Some(asset_name) = WASI_SDK_ASSET_NAME {
341 format!("{WASI_SDK_URL}/{asset_name}")
342 } else {
343 bail!("wasi-sdk is not available for platform {}", env::consts::OS);
344 };
345
346 let wasi_sdk_dir = self.cache_dir.join("wasi-sdk");
347 let mut clang_path = wasi_sdk_dir.clone();
348 clang_path.extend(["bin", "clang-17"]);
349
350 if fs::metadata(&clang_path).map_or(false, |metadata| metadata.is_file()) {
351 return Ok(clang_path);
352 }
353
354 let mut tar_out_dir = wasi_sdk_dir.clone();
355 tar_out_dir.set_extension("archive");
356
357 fs::remove_dir_all(&wasi_sdk_dir).ok();
358 fs::remove_dir_all(&tar_out_dir).ok();
359
360 let mut response = self.http.get(&url, AsyncBody::default(), true).await?;
361 let body = BufReader::new(response.body_mut());
362 let body = GzipDecoder::new(body);
363 let tar = Archive::new(body);
364 tar.unpack(&tar_out_dir)
365 .await
366 .context("failed to unpack wasi-sdk archive")?;
367
368 let inner_dir = fs::read_dir(&tar_out_dir)?
369 .next()
370 .ok_or_else(|| anyhow!("no content"))?
371 .context("failed to read contents of extracted wasi archive directory")?
372 .path();
373 fs::rename(&inner_dir, &wasi_sdk_dir).context("failed to move extracted wasi dir")?;
374 fs::remove_dir_all(&tar_out_dir).ok();
375
376 Ok(clang_path)
377 }
378}