extension_builder.rs

  1use crate::{
  2    parse_wasm_extension_version, ExtensionLibraryKind, ExtensionManifest, GrammarManifestEntry,
  3};
  4use anyhow::{anyhow, bail, Context as _, Result};
  5use async_compression::futures::bufread::GzipDecoder;
  6use async_tar::Archive;
  7use convert_case::{Case, Casing as _};
  8use futures::io::BufReader;
  9use futures::AsyncReadExt;
 10use http_client::{self, AsyncBody, HttpClient};
 11use serde::Deserialize;
 12use std::{
 13    env, fs, mem,
 14    path::{Path, PathBuf},
 15    process::Stdio,
 16    sync::Arc,
 17};
 18use wasm_encoder::{ComponentSectionId, Encode as _, RawSection, Section as _};
 19use wasmparser::Parser;
 20use wit_component::ComponentEncoder;
 21
 22/// Currently, we compile with Rust's `wasm32-wasip1` target, which works with WASI `preview1`.
 23/// But the WASM component model is based on WASI `preview2`. So we need an 'adapter' WASM
 24/// module, which implements the `preview1` interface in terms of `preview2`.
 25///
 26/// Once Rust 1.78 is released, there will be a `wasm32-wasip2` target available, so we will
 27/// not need the adapter anymore.
 28const RUST_TARGET: &str = "wasm32-wasip1";
 29const WASI_ADAPTER_URL: &str = "https://github.com/bytecodealliance/wasmtime/releases/download/v18.0.2/wasi_snapshot_preview1.reactor.wasm";
 30
 31/// Compiling Tree-sitter parsers from C to WASM requires Clang 17, and a WASM build of libc
 32/// and clang's runtime library. The `wasi-sdk` provides these binaries.
 33///
 34/// Once Clang 17 and its wasm target are available via system package managers, we won't need
 35/// to download this.
 36const WASI_SDK_URL: &str = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-21/";
 37const WASI_SDK_ASSET_NAME: Option<&str> = if cfg!(target_os = "macos") {
 38    Some("wasi-sdk-21.0-macos.tar.gz")
 39} else if cfg!(any(target_os = "linux", target_os = "freebsd")) {
 40    Some("wasi-sdk-21.0-linux.tar.gz")
 41} else if cfg!(target_os = "windows") {
 42    Some("wasi-sdk-21.0.m-mingw.tar.gz")
 43} else {
 44    None
 45};
 46
 47pub struct ExtensionBuilder {
 48    cache_dir: PathBuf,
 49    pub http: Arc<dyn HttpClient>,
 50}
 51
 52pub struct CompileExtensionOptions {
 53    pub release: bool,
 54}
 55
 56#[derive(Deserialize)]
 57struct CargoToml {
 58    package: CargoTomlPackage,
 59}
 60
 61#[derive(Deserialize)]
 62struct CargoTomlPackage {
 63    name: String,
 64}
 65
 66impl ExtensionBuilder {
 67    pub fn new(http_client: Arc<dyn HttpClient>, cache_dir: PathBuf) -> Self {
 68        Self {
 69            cache_dir,
 70            http: http_client,
 71        }
 72    }
 73
 74    pub async fn compile_extension(
 75        &self,
 76        extension_dir: &Path,
 77        extension_manifest: &mut ExtensionManifest,
 78        options: CompileExtensionOptions,
 79    ) -> Result<()> {
 80        populate_defaults(extension_manifest, extension_dir)?;
 81
 82        if extension_dir.is_relative() {
 83            bail!(
 84                "extension dir {} is not an absolute path",
 85                extension_dir.display()
 86            );
 87        }
 88
 89        fs::create_dir_all(&self.cache_dir).context("failed to create cache dir")?;
 90
 91        if extension_manifest.lib.kind == Some(ExtensionLibraryKind::Rust) {
 92            log::info!("compiling Rust extension {}", extension_dir.display());
 93            self.compile_rust_extension(extension_dir, extension_manifest, options)
 94                .await
 95                .context("failed to compile Rust extension")?;
 96            log::info!("compiled Rust extension {}", extension_dir.display());
 97        }
 98
 99        for (grammar_name, grammar_metadata) in &extension_manifest.grammars {
100            let snake_cased_grammar_name = grammar_name.to_case(Case::Snake);
101            if grammar_name.as_ref() != snake_cased_grammar_name.as_str() {
102                bail!(
103                    "grammar name '{grammar_name}' must be written in snake_case: {snake_cased_grammar_name}"
104                );
105            }
106
107            log::info!(
108                "compiling grammar {grammar_name} for extension {}",
109                extension_dir.display()
110            );
111            self.compile_grammar(extension_dir, grammar_name.as_ref(), grammar_metadata)
112                .await
113                .with_context(|| format!("failed to compile grammar '{grammar_name}'"))?;
114            log::info!(
115                "compiled grammar {grammar_name} for extension {}",
116                extension_dir.display()
117            );
118        }
119
120        log::info!("finished compiling extension {}", extension_dir.display());
121        Ok(())
122    }
123
124    async fn compile_rust_extension(
125        &self,
126        extension_dir: &Path,
127        manifest: &mut ExtensionManifest,
128        options: CompileExtensionOptions,
129    ) -> Result<(), anyhow::Error> {
130        self.install_rust_wasm_target_if_needed()?;
131        let adapter_bytes = self.install_wasi_preview1_adapter_if_needed().await?;
132
133        let cargo_toml_content = fs::read_to_string(extension_dir.join("Cargo.toml"))?;
134        let cargo_toml: CargoToml = toml::from_str(&cargo_toml_content)?;
135
136        log::info!(
137            "compiling Rust crate for extension {}",
138            extension_dir.display()
139        );
140        let output = util::command::new_std_command("cargo")
141            .args(["build", "--target", RUST_TARGET])
142            .args(options.release.then_some("--release"))
143            .arg("--target-dir")
144            .arg(extension_dir.join("target"))
145            // WASI builds do not work with sccache and just stuck, so disable it.
146            .env("RUSTC_WRAPPER", "")
147            .current_dir(extension_dir)
148            .output()
149            .context("failed to run `cargo`")?;
150        if !output.status.success() {
151            bail!(
152                "failed to build extension {}",
153                String::from_utf8_lossy(&output.stderr)
154            );
155        }
156
157        log::info!(
158            "compiled Rust crate for extension {}",
159            extension_dir.display()
160        );
161
162        let mut wasm_path = PathBuf::from(extension_dir);
163        wasm_path.extend([
164            "target",
165            RUST_TARGET,
166            if options.release { "release" } else { "debug" },
167            &cargo_toml
168                .package
169                .name
170                // The wasm32-wasip1 target normalizes `-` in package names to `_` in the resulting `.wasm` file.
171                .replace('-', "_"),
172        ]);
173        wasm_path.set_extension("wasm");
174
175        let wasm_bytes = fs::read(&wasm_path)
176            .with_context(|| format!("failed to read output module `{}`", wasm_path.display()))?;
177
178        let mut encoder = ComponentEncoder::default()
179            .module(&wasm_bytes)?
180            .adapter("wasi_snapshot_preview1", &adapter_bytes)
181            .context("failed to load adapter module")?
182            .validate(true);
183
184        log::info!(
185            "encoding wasm component for extension {}",
186            extension_dir.display()
187        );
188
189        let component_bytes = encoder
190            .encode()
191            .context("failed to encode wasm component")?;
192
193        let component_bytes = self
194            .strip_custom_sections(&component_bytes)
195            .context("failed to strip debug sections from wasm component")?;
196
197        let wasm_extension_api_version =
198            parse_wasm_extension_version(&manifest.id, &component_bytes)
199                .context("compiled wasm did not contain a valid zed extension api version")?;
200        manifest.lib.version = Some(wasm_extension_api_version);
201
202        let extension_file = extension_dir.join("extension.wasm");
203        fs::write(extension_file.clone(), &component_bytes)
204            .context("failed to write extension.wasm")?;
205
206        log::info!(
207            "extension {} written to {}",
208            extension_dir.display(),
209            extension_file.display()
210        );
211
212        Ok(())
213    }
214
215    async fn compile_grammar(
216        &self,
217        extension_dir: &Path,
218        grammar_name: &str,
219        grammar_metadata: &GrammarManifestEntry,
220    ) -> Result<()> {
221        let clang_path = self.install_wasi_sdk_if_needed().await?;
222
223        let mut grammar_repo_dir = extension_dir.to_path_buf();
224        grammar_repo_dir.extend(["grammars", grammar_name]);
225
226        let mut grammar_wasm_path = grammar_repo_dir.clone();
227        grammar_wasm_path.set_extension("wasm");
228
229        log::info!("checking out {grammar_name} parser");
230        self.checkout_repo(
231            &grammar_repo_dir,
232            &grammar_metadata.repository,
233            &grammar_metadata.rev,
234        )?;
235
236        let base_grammar_path = grammar_metadata
237            .path
238            .as_ref()
239            .map(|path| grammar_repo_dir.join(path))
240            .unwrap_or(grammar_repo_dir);
241
242        let src_path = base_grammar_path.join("src");
243        let parser_path = src_path.join("parser.c");
244        let scanner_path = src_path.join("scanner.c");
245
246        log::info!("compiling {grammar_name} parser");
247        let clang_output = util::command::new_std_command(&clang_path)
248            .args(["-fPIC", "-shared", "-Os"])
249            .arg(format!("-Wl,--export=tree_sitter_{grammar_name}"))
250            .arg("-o")
251            .arg(&grammar_wasm_path)
252            .arg("-I")
253            .arg(&src_path)
254            .arg(&parser_path)
255            .args(scanner_path.exists().then_some(scanner_path))
256            .output()
257            .context("failed to run clang")?;
258
259        if !clang_output.status.success() {
260            bail!(
261                "failed to compile {} parser with clang: {}",
262                grammar_name,
263                String::from_utf8_lossy(&clang_output.stderr),
264            );
265        }
266
267        Ok(())
268    }
269
270    fn checkout_repo(&self, directory: &Path, url: &str, rev: &str) -> Result<()> {
271        let git_dir = directory.join(".git");
272
273        if directory.exists() {
274            let remotes_output = util::command::new_std_command("git")
275                .arg("--git-dir")
276                .arg(&git_dir)
277                .args(["remote", "-v"])
278                .output()?;
279            let has_remote = remotes_output.status.success()
280                && String::from_utf8_lossy(&remotes_output.stdout)
281                    .lines()
282                    .any(|line| {
283                        let mut parts = line.split(|c: char| c.is_whitespace());
284                        parts.next() == Some("origin") && parts.any(|part| part == url)
285                    });
286            if !has_remote {
287                bail!(
288                    "grammar directory '{}' already exists, but is not a git clone of '{}'",
289                    directory.display(),
290                    url
291                );
292            }
293        } else {
294            fs::create_dir_all(directory).with_context(|| {
295                format!("failed to create grammar directory {}", directory.display(),)
296            })?;
297            let init_output = util::command::new_std_command("git")
298                .arg("init")
299                .current_dir(directory)
300                .output()?;
301            if !init_output.status.success() {
302                bail!(
303                    "failed to run `git init` in directory '{}'",
304                    directory.display()
305                );
306            }
307
308            let remote_add_output = util::command::new_std_command("git")
309                .arg("--git-dir")
310                .arg(&git_dir)
311                .args(["remote", "add", "origin", url])
312                .output()
313                .context("failed to execute `git remote add`")?;
314            if !remote_add_output.status.success() {
315                bail!(
316                    "failed to add remote {url} for git repository {}",
317                    git_dir.display()
318                );
319            }
320        }
321
322        let fetch_output = util::command::new_std_command("git")
323            .arg("--git-dir")
324            .arg(&git_dir)
325            .args(["fetch", "--depth", "1", "origin", rev])
326            .output()
327            .context("failed to execute `git fetch`")?;
328
329        let checkout_output = util::command::new_std_command("git")
330            .arg("--git-dir")
331            .arg(&git_dir)
332            .args(["checkout", rev])
333            .current_dir(directory)
334            .output()
335            .context("failed to execute `git checkout`")?;
336        if !checkout_output.status.success() {
337            if !fetch_output.status.success() {
338                bail!(
339                    "failed to fetch revision {} in directory '{}'",
340                    rev,
341                    directory.display()
342                );
343            }
344            bail!(
345                "failed to checkout revision {} in directory '{}': {}",
346                rev,
347                directory.display(),
348                String::from_utf8_lossy(&checkout_output.stderr)
349            );
350        }
351
352        Ok(())
353    }
354
355    fn install_rust_wasm_target_if_needed(&self) -> Result<()> {
356        let rustc_output = util::command::new_std_command("rustc")
357            .arg("--print")
358            .arg("sysroot")
359            .output()
360            .context("failed to run rustc")?;
361        if !rustc_output.status.success() {
362            bail!(
363                "failed to retrieve rust sysroot: {}",
364                String::from_utf8_lossy(&rustc_output.stderr)
365            );
366        }
367
368        let sysroot = PathBuf::from(String::from_utf8(rustc_output.stdout)?.trim());
369        if sysroot.join("lib/rustlib").join(RUST_TARGET).exists() {
370            return Ok(());
371        }
372
373        let output = util::command::new_std_command("rustup")
374            .args(["target", "add", RUST_TARGET])
375            .stderr(Stdio::piped())
376            .stdout(Stdio::inherit())
377            .output()
378            .context("failed to run `rustup target add`")?;
379        if !output.status.success() {
380            bail!(
381                "failed to install the `{RUST_TARGET}` target: {}",
382                String::from_utf8_lossy(&rustc_output.stderr)
383            );
384        }
385
386        Ok(())
387    }
388
389    async fn install_wasi_preview1_adapter_if_needed(&self) -> Result<Vec<u8>> {
390        let cache_path = self.cache_dir.join("wasi_snapshot_preview1.reactor.wasm");
391        if let Ok(content) = fs::read(&cache_path) {
392            if Parser::is_core_wasm(&content) {
393                return Ok(content);
394            }
395        }
396
397        fs::remove_file(&cache_path).ok();
398
399        log::info!(
400            "downloading wasi adapter module to {}",
401            cache_path.display()
402        );
403        let mut response = self
404            .http
405            .get(WASI_ADAPTER_URL, AsyncBody::default(), true)
406            .await?;
407
408        let mut content = Vec::new();
409        let mut body = BufReader::new(response.body_mut());
410        body.read_to_end(&mut content).await?;
411
412        fs::write(&cache_path, &content)
413            .with_context(|| format!("failed to save file {}", cache_path.display()))?;
414
415        if !Parser::is_core_wasm(&content) {
416            bail!("downloaded wasi adapter is invalid");
417        }
418        Ok(content)
419    }
420
421    async fn install_wasi_sdk_if_needed(&self) -> Result<PathBuf> {
422        let url = if let Some(asset_name) = WASI_SDK_ASSET_NAME {
423            format!("{WASI_SDK_URL}/{asset_name}")
424        } else {
425            bail!("wasi-sdk is not available for platform {}", env::consts::OS);
426        };
427
428        let wasi_sdk_dir = self.cache_dir.join("wasi-sdk");
429        let mut clang_path = wasi_sdk_dir.clone();
430        clang_path.extend(["bin", &format!("clang{}", env::consts::EXE_SUFFIX)]);
431
432        if fs::metadata(&clang_path).map_or(false, |metadata| metadata.is_file()) {
433            return Ok(clang_path);
434        }
435
436        let mut tar_out_dir = wasi_sdk_dir.clone();
437        tar_out_dir.set_extension("archive");
438
439        fs::remove_dir_all(&wasi_sdk_dir).ok();
440        fs::remove_dir_all(&tar_out_dir).ok();
441
442        log::info!("downloading wasi-sdk to {}", wasi_sdk_dir.display());
443        let mut response = self.http.get(&url, AsyncBody::default(), true).await?;
444        let body = BufReader::new(response.body_mut());
445        let body = GzipDecoder::new(body);
446        let tar = Archive::new(body);
447
448        tar.unpack(&tar_out_dir)
449            .await
450            .context("failed to unpack wasi-sdk archive")?;
451
452        let inner_dir = fs::read_dir(&tar_out_dir)?
453            .next()
454            .ok_or_else(|| anyhow!("no content"))?
455            .context("failed to read contents of extracted wasi archive directory")?
456            .path();
457        fs::rename(&inner_dir, &wasi_sdk_dir).context("failed to move extracted wasi dir")?;
458        fs::remove_dir_all(&tar_out_dir).ok();
459
460        Ok(clang_path)
461    }
462
463    // This was adapted from:
464    // https://github.com/bytecodealliance/wasm-tools/blob/1791a8f139722e9f8679a2bd3d8e423e55132b22/src/bin/wasm-tools/strip.rs
465    fn strip_custom_sections(&self, input: &Vec<u8>) -> Result<Vec<u8>> {
466        use wasmparser::Payload::*;
467
468        let strip_custom_section = |name: &str| name.starts_with(".debug");
469
470        let mut output = Vec::new();
471        let mut stack = Vec::new();
472
473        for payload in Parser::new(0).parse_all(input) {
474            let payload = payload?;
475            let component_header = wasm_encoder::Component::HEADER;
476            let module_header = wasm_encoder::Module::HEADER;
477
478            // Track nesting depth, so that we don't mess with inner producer sections:
479            match payload {
480                Version { encoding, .. } => {
481                    output.extend_from_slice(match encoding {
482                        wasmparser::Encoding::Component => &component_header,
483                        wasmparser::Encoding::Module => &module_header,
484                    });
485                }
486                ModuleSection { .. } | ComponentSection { .. } => {
487                    stack.push(mem::take(&mut output));
488                    continue;
489                }
490                End { .. } => {
491                    let mut parent = match stack.pop() {
492                        Some(c) => c,
493                        None => break,
494                    };
495                    if output.starts_with(&component_header) {
496                        parent.push(ComponentSectionId::Component as u8);
497                        output.encode(&mut parent);
498                    } else {
499                        parent.push(ComponentSectionId::CoreModule as u8);
500                        output.encode(&mut parent);
501                    }
502                    output = parent;
503                }
504                _ => {}
505            }
506
507            if let CustomSection(c) = &payload {
508                if strip_custom_section(c.name()) {
509                    continue;
510                }
511            }
512
513            if let Some((id, range)) = payload.as_section() {
514                RawSection {
515                    id,
516                    data: &input[range],
517                }
518                .append_to(&mut output);
519            }
520        }
521
522        Ok(output)
523    }
524}
525
526fn populate_defaults(manifest: &mut ExtensionManifest, extension_path: &Path) -> Result<()> {
527    // For legacy extensions on the v0 schema (aka, using `extension.json`), clear out any existing
528    // contents of the computed fields, since we don't care what the existing values are.
529    if manifest.schema_version.is_v0() {
530        manifest.languages.clear();
531        manifest.grammars.clear();
532        manifest.themes.clear();
533    }
534
535    let cargo_toml_path = extension_path.join("Cargo.toml");
536    if cargo_toml_path.exists() {
537        manifest.lib.kind = Some(ExtensionLibraryKind::Rust);
538    }
539
540    let languages_dir = extension_path.join("languages");
541    if languages_dir.exists() {
542        for entry in fs::read_dir(&languages_dir).context("failed to list languages dir")? {
543            let entry = entry?;
544            let language_dir = entry.path();
545            let config_path = language_dir.join("config.toml");
546            if config_path.exists() {
547                let relative_language_dir =
548                    language_dir.strip_prefix(extension_path)?.to_path_buf();
549                if !manifest.languages.contains(&relative_language_dir) {
550                    manifest.languages.push(relative_language_dir);
551                }
552            }
553        }
554    }
555
556    let themes_dir = extension_path.join("themes");
557    if themes_dir.exists() {
558        for entry in fs::read_dir(&themes_dir).context("failed to list themes dir")? {
559            let entry = entry?;
560            let theme_path = entry.path();
561            if theme_path.extension() == Some("json".as_ref()) {
562                let relative_theme_path = theme_path.strip_prefix(extension_path)?.to_path_buf();
563                if !manifest.themes.contains(&relative_theme_path) {
564                    manifest.themes.push(relative_theme_path);
565                }
566            }
567        }
568    }
569
570    let icon_themes_dir = extension_path.join("icon_themes");
571    if icon_themes_dir.exists() {
572        for entry in fs::read_dir(&icon_themes_dir).context("failed to list icon themes dir")? {
573            let entry = entry?;
574            let icon_theme_path = entry.path();
575            if icon_theme_path.extension() == Some("json".as_ref()) {
576                let relative_icon_theme_path =
577                    icon_theme_path.strip_prefix(extension_path)?.to_path_buf();
578                if !manifest.icon_themes.contains(&relative_icon_theme_path) {
579                    manifest.icon_themes.push(relative_icon_theme_path);
580                }
581            }
582        }
583    }
584
585    let snippets_json_path = extension_path.join("snippets.json");
586    if snippets_json_path.exists() {
587        manifest.snippets = Some(snippets_json_path);
588    }
589
590    // For legacy extensions on the v0 schema (aka, using `extension.json`), we want to populate the grammars in
591    // the manifest using the contents of the `grammars` directory.
592    if manifest.schema_version.is_v0() {
593        let grammars_dir = extension_path.join("grammars");
594        if grammars_dir.exists() {
595            for entry in fs::read_dir(&grammars_dir).context("failed to list grammars dir")? {
596                let entry = entry?;
597                let grammar_path = entry.path();
598                if grammar_path.extension() == Some("toml".as_ref()) {
599                    #[derive(Deserialize)]
600                    struct GrammarConfigToml {
601                        pub repository: String,
602                        pub commit: String,
603                        #[serde(default)]
604                        pub path: Option<String>,
605                    }
606
607                    let grammar_config = fs::read_to_string(&grammar_path)?;
608                    let grammar_config: GrammarConfigToml = toml::from_str(&grammar_config)?;
609
610                    let grammar_name = grammar_path
611                        .file_stem()
612                        .and_then(|stem| stem.to_str())
613                        .ok_or_else(|| anyhow!("no grammar name"))?;
614                    if !manifest.grammars.contains_key(grammar_name) {
615                        manifest.grammars.insert(
616                            grammar_name.into(),
617                            GrammarManifestEntry {
618                                repository: grammar_config.repository,
619                                rev: grammar_config.commit,
620                                path: grammar_config.path,
621                            },
622                        );
623                    }
624                }
625            }
626        }
627    }
628
629    Ok(())
630}