extension_builder.rs

  1use crate::wasm_host::parse_wasm_extension_version;
  2use crate::ExtensionManifest;
  3use crate::{extension_manifest::ExtensionLibraryKind, GrammarManifestEntry};
  4use anyhow::{anyhow, bail, Context as _, Result};
  5use async_compression::futures::bufread::GzipDecoder;
  6use async_tar::Archive;
  7use futures::io::BufReader;
  8use futures::AsyncReadExt;
  9use serde::Deserialize;
 10use std::{
 11    env, fs, mem,
 12    path::{Path, PathBuf},
 13    process::{Command, Stdio},
 14    sync::Arc,
 15};
 16use util::http::{self, AsyncBody, HttpClient};
 17use wasm_encoder::{ComponentSectionId, Encode as _, RawSection, Section as _};
 18use wasmparser::Parser;
 19use wit_component::ComponentEncoder;
 20
 21/// Currently, we compile with Rust's `wasm32-wasi` target, which works with WASI `preview1`.
 22/// But the WASM component model is based on WASI `preview2`. So we need an 'adapter' WASM
 23/// module, which implements the `preview1` interface in terms of `preview2`.
 24///
 25/// Once Rust 1.78 is released, there will be a `wasm32-wasip2` target available, so we will
 26/// not need the adapter anymore.
 27const RUST_TARGET: &str = "wasm32-wasi";
 28const WASI_ADAPTER_URL: &str =
 29    "https://github.com/bytecodealliance/wasmtime/releases/download/v18.0.2/wasi_snapshot_preview1.reactor.wasm";
 30
 31/// Compiling Tree-sitter parsers from C to WASM requires Clang 17, and a WASM build of libc
 32/// and clang's runtime library. The `wasi-sdk` provides these binaries.
 33///
 34/// Once Clang 17 and its wasm target are available via system package managers, we won't need
 35/// to download this.
 36const WASI_SDK_URL: &str = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-21/";
 37const WASI_SDK_ASSET_NAME: Option<&str> = if cfg!(target_os = "macos") {
 38    Some("wasi-sdk-21.0-macos.tar.gz")
 39} else if cfg!(target_os = "linux") {
 40    Some("wasi-sdk-21.0-linux.tar.gz")
 41} else if cfg!(target_os = "windows") {
 42    Some("wasi-sdk-21.0.m-mingw.tar.gz")
 43} else {
 44    None
 45};
 46
 47pub struct ExtensionBuilder {
 48    cache_dir: PathBuf,
 49    pub http: Arc<dyn HttpClient>,
 50}
 51
 52pub struct CompileExtensionOptions {
 53    pub release: bool,
 54}
 55
 56#[derive(Deserialize)]
 57struct CargoToml {
 58    package: CargoTomlPackage,
 59}
 60
 61#[derive(Deserialize)]
 62struct CargoTomlPackage {
 63    name: String,
 64}
 65
 66impl ExtensionBuilder {
 67    pub fn new(cache_dir: PathBuf) -> Self {
 68        Self {
 69            cache_dir,
 70            http: http::client(),
 71        }
 72    }
 73
 74    pub async fn compile_extension(
 75        &self,
 76        extension_dir: &Path,
 77        extension_manifest: &mut ExtensionManifest,
 78        options: CompileExtensionOptions,
 79    ) -> Result<()> {
 80        populate_defaults(extension_manifest, &extension_dir)?;
 81
 82        if extension_dir.is_relative() {
 83            bail!(
 84                "extension dir {} is not an absolute path",
 85                extension_dir.display()
 86            );
 87        }
 88
 89        fs::create_dir_all(&self.cache_dir).context("failed to create cache dir")?;
 90
 91        if extension_manifest.lib.kind == Some(ExtensionLibraryKind::Rust) {
 92            log::info!("compiling Rust extension {}", extension_dir.display());
 93            self.compile_rust_extension(extension_dir, extension_manifest, options)
 94                .await
 95                .context("failed to compile Rust extension")?;
 96        }
 97
 98        for (grammar_name, grammar_metadata) in &extension_manifest.grammars {
 99            self.compile_grammar(extension_dir, grammar_name.as_ref(), grammar_metadata)
100                .await
101                .with_context(|| format!("failed to compile grammar '{grammar_name}'"))?;
102        }
103
104        log::info!("finished compiling extension {}", extension_dir.display());
105        Ok(())
106    }
107
108    async fn compile_rust_extension(
109        &self,
110        extension_dir: &Path,
111        manifest: &mut ExtensionManifest,
112        options: CompileExtensionOptions,
113    ) -> Result<(), anyhow::Error> {
114        self.install_rust_wasm_target_if_needed()?;
115        let adapter_bytes = self.install_wasi_preview1_adapter_if_needed().await?;
116
117        let cargo_toml_content = fs::read_to_string(&extension_dir.join("Cargo.toml"))?;
118        let cargo_toml: CargoToml = toml::from_str(&cargo_toml_content)?;
119
120        log::info!("compiling rust extension {}", extension_dir.display());
121        let output = Command::new("cargo")
122            .args(["build", "--target", RUST_TARGET])
123            .args(options.release.then_some("--release"))
124            .arg("--target-dir")
125            .arg(extension_dir.join("target"))
126            .current_dir(&extension_dir)
127            .output()
128            .context("failed to run `cargo`")?;
129        if !output.status.success() {
130            bail!(
131                "failed to build extension {}",
132                String::from_utf8_lossy(&output.stderr)
133            );
134        }
135
136        let mut wasm_path = PathBuf::from(extension_dir);
137        wasm_path.extend([
138            "target",
139            RUST_TARGET,
140            if options.release { "release" } else { "debug" },
141            &cargo_toml
142                .package
143                .name
144                // The wasm32-wasi target normalizes `-` in package names to `_` in the resulting `.wasm` file.
145                .replace('-', "_"),
146        ]);
147        wasm_path.set_extension("wasm");
148
149        let wasm_bytes = fs::read(&wasm_path)
150            .with_context(|| format!("failed to read output module `{}`", wasm_path.display()))?;
151
152        let encoder = ComponentEncoder::default()
153            .module(&wasm_bytes)?
154            .adapter("wasi_snapshot_preview1", &adapter_bytes)
155            .context("failed to load adapter module")?
156            .validate(true);
157
158        let component_bytes = encoder
159            .encode()
160            .context("failed to encode wasm component")?;
161
162        let component_bytes = self
163            .strip_custom_sections(&component_bytes)
164            .context("failed to strip debug sections from wasm component")?;
165
166        let wasm_extension_api_version =
167            parse_wasm_extension_version(&manifest.id, &component_bytes)
168                .context("compiled wasm did not contain a valid zed extension api version")?;
169        manifest.lib.version = Some(wasm_extension_api_version);
170
171        fs::write(extension_dir.join("extension.wasm"), &component_bytes)
172            .context("failed to write extension.wasm")?;
173
174        Ok(())
175    }
176
177    async fn compile_grammar(
178        &self,
179        extension_dir: &Path,
180        grammar_name: &str,
181        grammar_metadata: &GrammarManifestEntry,
182    ) -> Result<()> {
183        let clang_path = self.install_wasi_sdk_if_needed().await?;
184
185        let mut grammar_repo_dir = extension_dir.to_path_buf();
186        grammar_repo_dir.extend(["grammars", grammar_name]);
187
188        let mut grammar_wasm_path = grammar_repo_dir.clone();
189        grammar_wasm_path.set_extension("wasm");
190
191        log::info!("checking out {grammar_name} parser");
192        self.checkout_repo(
193            &grammar_repo_dir,
194            &grammar_metadata.repository,
195            &grammar_metadata.rev,
196        )?;
197
198        let src_path = grammar_repo_dir.join("src");
199        let parser_path = src_path.join("parser.c");
200        let scanner_path = src_path.join("scanner.c");
201
202        log::info!("compiling {grammar_name} parser");
203        let clang_output = Command::new(&clang_path)
204            .args(["-fPIC", "-shared", "-Os"])
205            .arg(format!("-Wl,--export=tree_sitter_{grammar_name}"))
206            .arg("-o")
207            .arg(&grammar_wasm_path)
208            .arg("-I")
209            .arg(&src_path)
210            .arg(&parser_path)
211            .args(scanner_path.exists().then_some(scanner_path))
212            .output()
213            .context("failed to run clang")?;
214        if !clang_output.status.success() {
215            bail!(
216                "failed to compile {} parser with clang: {}",
217                grammar_name,
218                String::from_utf8_lossy(&clang_output.stderr),
219            );
220        }
221
222        Ok(())
223    }
224
225    fn checkout_repo(&self, directory: &Path, url: &str, rev: &str) -> Result<()> {
226        let git_dir = directory.join(".git");
227
228        if directory.exists() {
229            let remotes_output = Command::new("git")
230                .arg("--git-dir")
231                .arg(&git_dir)
232                .args(["remote", "-v"])
233                .output()?;
234            let has_remote = remotes_output.status.success()
235                && String::from_utf8_lossy(&remotes_output.stdout)
236                    .lines()
237                    .any(|line| {
238                        let mut parts = line.split(|c: char| c.is_whitespace());
239                        parts.next() == Some("origin") && parts.any(|part| part == url)
240                    });
241            if !has_remote {
242                bail!(
243                    "grammar directory '{}' already exists, but is not a git clone of '{}'",
244                    directory.display(),
245                    url
246                );
247            }
248        } else {
249            fs::create_dir_all(&directory).with_context(|| {
250                format!("failed to create grammar directory {}", directory.display(),)
251            })?;
252            let init_output = Command::new("git")
253                .arg("init")
254                .current_dir(&directory)
255                .output()?;
256            if !init_output.status.success() {
257                bail!(
258                    "failed to run `git init` in directory '{}'",
259                    directory.display()
260                );
261            }
262
263            let remote_add_output = Command::new("git")
264                .arg("--git-dir")
265                .arg(&git_dir)
266                .args(["remote", "add", "origin", url])
267                .output()
268                .context("failed to execute `git remote add`")?;
269            if !remote_add_output.status.success() {
270                bail!(
271                    "failed to add remote {url} for git repository {}",
272                    git_dir.display()
273                );
274            }
275        }
276
277        let fetch_output = Command::new("git")
278            .arg("--git-dir")
279            .arg(&git_dir)
280            .args(["fetch", "--depth", "1", "origin", &rev])
281            .output()
282            .context("failed to execute `git fetch`")?;
283
284        let checkout_output = Command::new("git")
285            .arg("--git-dir")
286            .arg(&git_dir)
287            .args(["checkout", &rev])
288            .current_dir(&directory)
289            .output()
290            .context("failed to execute `git checkout`")?;
291        if !checkout_output.status.success() {
292            if !fetch_output.status.success() {
293                bail!(
294                    "failed to fetch revision {} in directory '{}'",
295                    rev,
296                    directory.display()
297                );
298            }
299            bail!(
300                "failed to checkout revision {} in directory '{}': {}",
301                rev,
302                directory.display(),
303                String::from_utf8_lossy(&checkout_output.stderr)
304            );
305        }
306
307        Ok(())
308    }
309
310    fn install_rust_wasm_target_if_needed(&self) -> Result<()> {
311        let rustc_output = Command::new("rustc")
312            .arg("--print")
313            .arg("sysroot")
314            .output()
315            .context("failed to run rustc")?;
316        if !rustc_output.status.success() {
317            bail!(
318                "failed to retrieve rust sysroot: {}",
319                String::from_utf8_lossy(&rustc_output.stderr)
320            );
321        }
322
323        let sysroot = PathBuf::from(String::from_utf8(rustc_output.stdout)?.trim());
324        if sysroot.join("lib/rustlib").join(RUST_TARGET).exists() {
325            return Ok(());
326        }
327
328        let output = Command::new("rustup")
329            .args(["target", "add", RUST_TARGET])
330            .stderr(Stdio::inherit())
331            .stdout(Stdio::inherit())
332            .output()
333            .context("failed to run `rustup target add`")?;
334        if !output.status.success() {
335            bail!("failed to install the `{RUST_TARGET}` target");
336        }
337
338        Ok(())
339    }
340
341    async fn install_wasi_preview1_adapter_if_needed(&self) -> Result<Vec<u8>> {
342        let cache_path = self.cache_dir.join("wasi_snapshot_preview1.reactor.wasm");
343        if let Ok(content) = fs::read(&cache_path) {
344            if Parser::is_core_wasm(&content) {
345                return Ok(content);
346            }
347        }
348
349        fs::remove_file(&cache_path).ok();
350
351        log::info!(
352            "downloading wasi adapter module to {}",
353            cache_path.display()
354        );
355        let mut response = self
356            .http
357            .get(WASI_ADAPTER_URL, AsyncBody::default(), true)
358            .await?;
359
360        let mut content = Vec::new();
361        let mut body = BufReader::new(response.body_mut());
362        body.read_to_end(&mut content).await?;
363
364        fs::write(&cache_path, &content)
365            .with_context(|| format!("failed to save file {}", cache_path.display()))?;
366
367        if !Parser::is_core_wasm(&content) {
368            bail!("downloaded wasi adapter is invalid");
369        }
370        Ok(content)
371    }
372
373    async fn install_wasi_sdk_if_needed(&self) -> Result<PathBuf> {
374        let url = if let Some(asset_name) = WASI_SDK_ASSET_NAME {
375            format!("{WASI_SDK_URL}/{asset_name}")
376        } else {
377            bail!("wasi-sdk is not available for platform {}", env::consts::OS);
378        };
379
380        let wasi_sdk_dir = self.cache_dir.join("wasi-sdk");
381        let mut clang_path = wasi_sdk_dir.clone();
382        clang_path.extend(["bin", &format!("clang{}", env::consts::EXE_SUFFIX)]);
383
384        if fs::metadata(&clang_path).map_or(false, |metadata| metadata.is_file()) {
385            return Ok(clang_path);
386        }
387
388        let mut tar_out_dir = wasi_sdk_dir.clone();
389        tar_out_dir.set_extension("archive");
390
391        fs::remove_dir_all(&wasi_sdk_dir).ok();
392        fs::remove_dir_all(&tar_out_dir).ok();
393
394        log::info!("downloading wasi-sdk to {}", wasi_sdk_dir.display());
395        let mut response = self.http.get(&url, AsyncBody::default(), true).await?;
396        let body = BufReader::new(response.body_mut());
397        let body = GzipDecoder::new(body);
398        let tar = Archive::new(body);
399        tar.unpack(&tar_out_dir)
400            .await
401            .context("failed to unpack wasi-sdk archive")?;
402
403        let inner_dir = fs::read_dir(&tar_out_dir)?
404            .next()
405            .ok_or_else(|| anyhow!("no content"))?
406            .context("failed to read contents of extracted wasi archive directory")?
407            .path();
408        fs::rename(&inner_dir, &wasi_sdk_dir).context("failed to move extracted wasi dir")?;
409        fs::remove_dir_all(&tar_out_dir).ok();
410
411        Ok(clang_path)
412    }
413
414    // This was adapted from:
415    // https://github.com/bytecodealliance/wasm-tools/1791a8f139722e9f8679a2bd3d8e423e55132b22/src/bin/wasm-tools/strip.rs
416    fn strip_custom_sections(&self, input: &Vec<u8>) -> Result<Vec<u8>> {
417        use wasmparser::Payload::*;
418
419        let strip_custom_section = |name: &str| name.starts_with(".debug");
420
421        let mut output = Vec::new();
422        let mut stack = Vec::new();
423
424        for payload in Parser::new(0).parse_all(input) {
425            let payload = payload?;
426
427            // Track nesting depth, so that we don't mess with inner producer sections:
428            match payload {
429                Version { encoding, .. } => {
430                    output.extend_from_slice(match encoding {
431                        wasmparser::Encoding::Component => &wasm_encoder::Component::HEADER,
432                        wasmparser::Encoding::Module => &wasm_encoder::Module::HEADER,
433                    });
434                }
435                ModuleSection { .. } | ComponentSection { .. } => {
436                    stack.push(mem::take(&mut output));
437                    continue;
438                }
439                End { .. } => {
440                    let mut parent = match stack.pop() {
441                        Some(c) => c,
442                        None => break,
443                    };
444                    if output.starts_with(&wasm_encoder::Component::HEADER) {
445                        parent.push(ComponentSectionId::Component as u8);
446                        output.encode(&mut parent);
447                    } else {
448                        parent.push(ComponentSectionId::CoreModule as u8);
449                        output.encode(&mut parent);
450                    }
451                    output = parent;
452                }
453                _ => {}
454            }
455
456            match &payload {
457                CustomSection(c) => {
458                    if strip_custom_section(c.name()) {
459                        continue;
460                    }
461                }
462
463                _ => {}
464            }
465
466            if let Some((id, range)) = payload.as_section() {
467                RawSection {
468                    id,
469                    data: &input[range],
470                }
471                .append_to(&mut output);
472            }
473        }
474
475        Ok(output)
476    }
477}
478
479fn populate_defaults(manifest: &mut ExtensionManifest, extension_path: &Path) -> Result<()> {
480    // For legacy extensions on the v0 schema (aka, using `extension.json`), clear out any existing
481    // contents of the computed fields, since we don't care what the existing values are.
482    if manifest.schema_version.is_v0() {
483        manifest.languages.clear();
484        manifest.grammars.clear();
485        manifest.themes.clear();
486    }
487
488    let cargo_toml_path = extension_path.join("Cargo.toml");
489    if cargo_toml_path.exists() {
490        manifest.lib.kind = Some(ExtensionLibraryKind::Rust);
491    }
492
493    let languages_dir = extension_path.join("languages");
494    if languages_dir.exists() {
495        for entry in fs::read_dir(&languages_dir).context("failed to list languages dir")? {
496            let entry = entry?;
497            let language_dir = entry.path();
498            let config_path = language_dir.join("config.toml");
499            if config_path.exists() {
500                let relative_language_dir =
501                    language_dir.strip_prefix(extension_path)?.to_path_buf();
502                if !manifest.languages.contains(&relative_language_dir) {
503                    manifest.languages.push(relative_language_dir);
504                }
505            }
506        }
507    }
508
509    let themes_dir = extension_path.join("themes");
510    if themes_dir.exists() {
511        for entry in fs::read_dir(&themes_dir).context("failed to list themes dir")? {
512            let entry = entry?;
513            let theme_path = entry.path();
514            if theme_path.extension() == Some("json".as_ref()) {
515                let relative_theme_path = theme_path.strip_prefix(extension_path)?.to_path_buf();
516                if !manifest.themes.contains(&relative_theme_path) {
517                    manifest.themes.push(relative_theme_path);
518                }
519            }
520        }
521    }
522
523    // For legacy extensions on the v0 schema (aka, using `extension.json`), we want to populate the grammars in
524    // the manifest using the contents of the `grammars` directory.
525    if manifest.schema_version.is_v0() {
526        let grammars_dir = extension_path.join("grammars");
527        if grammars_dir.exists() {
528            for entry in fs::read_dir(&grammars_dir).context("failed to list grammars dir")? {
529                let entry = entry?;
530                let grammar_path = entry.path();
531                if grammar_path.extension() == Some("toml".as_ref()) {
532                    #[derive(Deserialize)]
533                    struct GrammarConfigToml {
534                        pub repository: String,
535                        pub commit: String,
536                    }
537
538                    let grammar_config = fs::read_to_string(&grammar_path)?;
539                    let grammar_config: GrammarConfigToml = toml::from_str(&grammar_config)?;
540
541                    let grammar_name = grammar_path
542                        .file_stem()
543                        .and_then(|stem| stem.to_str())
544                        .ok_or_else(|| anyhow!("no grammar name"))?;
545                    if !manifest.grammars.contains_key(grammar_name) {
546                        manifest.grammars.insert(
547                            grammar_name.into(),
548                            GrammarManifestEntry {
549                                repository: grammar_config.repository,
550                                rev: grammar_config.commit,
551                            },
552                        );
553                    }
554                }
555            }
556        }
557    }
558
559    Ok(())
560}