extension_builder.rs

  1use crate::{
  2    parse_wasm_extension_version, ExtensionLibraryKind, ExtensionManifest, GrammarManifestEntry,
  3};
  4use anyhow::{anyhow, bail, Context as _, Result};
  5use async_compression::futures::bufread::GzipDecoder;
  6use async_tar::Archive;
  7use convert_case::{Case, Casing as _};
  8use futures::io::BufReader;
  9use futures::AsyncReadExt;
 10use http_client::{self, AsyncBody, HttpClient};
 11use serde::Deserialize;
 12use std::{
 13    env, fs, mem,
 14    path::{Path, PathBuf},
 15    process::Stdio,
 16    sync::Arc,
 17};
 18use wasm_encoder::{ComponentSectionId, Encode as _, RawSection, Section as _};
 19use wasmparser::Parser;
 20use wit_component::ComponentEncoder;
 21
 22/// Currently, we compile with Rust's `wasm32-wasip1` target, which works with WASI `preview1`.
 23/// But the WASM component model is based on WASI `preview2`. So we need an 'adapter' WASM
 24/// module, which implements the `preview1` interface in terms of `preview2`.
 25///
 26/// Once Rust 1.78 is released, there will be a `wasm32-wasip2` target available, so we will
 27/// not need the adapter anymore.
 28const RUST_TARGET: &str = "wasm32-wasip1";
 29const WASI_ADAPTER_URL: &str =
 30    "https://github.com/bytecodealliance/wasmtime/releases/download/v18.0.2/wasi_snapshot_preview1.reactor.wasm";
 31
 32/// Compiling Tree-sitter parsers from C to WASM requires Clang 17, and a WASM build of libc
 33/// and clang's runtime library. The `wasi-sdk` provides these binaries.
 34///
 35/// Once Clang 17 and its wasm target are available via system package managers, we won't need
 36/// to download this.
 37const WASI_SDK_URL: &str = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-21/";
 38const WASI_SDK_ASSET_NAME: Option<&str> = if cfg!(target_os = "macos") {
 39    Some("wasi-sdk-21.0-macos.tar.gz")
 40} else if cfg!(any(target_os = "linux", target_os = "freebsd")) {
 41    Some("wasi-sdk-21.0-linux.tar.gz")
 42} else if cfg!(target_os = "windows") {
 43    Some("wasi-sdk-21.0.m-mingw.tar.gz")
 44} else {
 45    None
 46};
 47
 48pub struct ExtensionBuilder {
 49    cache_dir: PathBuf,
 50    pub http: Arc<dyn HttpClient>,
 51}
 52
 53pub struct CompileExtensionOptions {
 54    pub release: bool,
 55}
 56
 57#[derive(Deserialize)]
 58struct CargoToml {
 59    package: CargoTomlPackage,
 60}
 61
 62#[derive(Deserialize)]
 63struct CargoTomlPackage {
 64    name: String,
 65}
 66
 67impl ExtensionBuilder {
 68    pub fn new(http_client: Arc<dyn HttpClient>, cache_dir: PathBuf) -> Self {
 69        Self {
 70            cache_dir,
 71            http: http_client,
 72        }
 73    }
 74
 75    pub async fn compile_extension(
 76        &self,
 77        extension_dir: &Path,
 78        extension_manifest: &mut ExtensionManifest,
 79        options: CompileExtensionOptions,
 80    ) -> Result<()> {
 81        populate_defaults(extension_manifest, extension_dir)?;
 82
 83        if extension_dir.is_relative() {
 84            bail!(
 85                "extension dir {} is not an absolute path",
 86                extension_dir.display()
 87            );
 88        }
 89
 90        fs::create_dir_all(&self.cache_dir).context("failed to create cache dir")?;
 91
 92        if extension_manifest.lib.kind == Some(ExtensionLibraryKind::Rust) {
 93            log::info!("compiling Rust extension {}", extension_dir.display());
 94            self.compile_rust_extension(extension_dir, extension_manifest, options)
 95                .await
 96                .context("failed to compile Rust extension")?;
 97            log::info!("compiled Rust extension {}", extension_dir.display());
 98        }
 99
100        for (grammar_name, grammar_metadata) in &extension_manifest.grammars {
101            let snake_cased_grammar_name = grammar_name.to_case(Case::Snake);
102            if grammar_name.as_ref() != snake_cased_grammar_name.as_str() {
103                bail!("grammar name '{grammar_name}' must be written in snake_case: {snake_cased_grammar_name}");
104            }
105
106            log::info!(
107                "compiling grammar {grammar_name} for extension {}",
108                extension_dir.display()
109            );
110            self.compile_grammar(extension_dir, grammar_name.as_ref(), grammar_metadata)
111                .await
112                .with_context(|| format!("failed to compile grammar '{grammar_name}'"))?;
113            log::info!(
114                "compiled grammar {grammar_name} for extension {}",
115                extension_dir.display()
116            );
117        }
118
119        log::info!("finished compiling extension {}", extension_dir.display());
120        Ok(())
121    }
122
123    async fn compile_rust_extension(
124        &self,
125        extension_dir: &Path,
126        manifest: &mut ExtensionManifest,
127        options: CompileExtensionOptions,
128    ) -> Result<(), anyhow::Error> {
129        self.install_rust_wasm_target_if_needed()?;
130        let adapter_bytes = self.install_wasi_preview1_adapter_if_needed().await?;
131
132        let cargo_toml_content = fs::read_to_string(extension_dir.join("Cargo.toml"))?;
133        let cargo_toml: CargoToml = toml::from_str(&cargo_toml_content)?;
134
135        log::info!(
136            "compiling Rust crate for extension {}",
137            extension_dir.display()
138        );
139        let output = util::command::new_std_command("cargo")
140            .args(["build", "--target", RUST_TARGET])
141            .args(options.release.then_some("--release"))
142            .arg("--target-dir")
143            .arg(extension_dir.join("target"))
144            // WASI builds do not work with sccache and just stuck, so disable it.
145            .env("RUSTC_WRAPPER", "")
146            .current_dir(extension_dir)
147            .output()
148            .context("failed to run `cargo`")?;
149        if !output.status.success() {
150            bail!(
151                "failed to build extension {}",
152                String::from_utf8_lossy(&output.stderr)
153            );
154        }
155
156        log::info!(
157            "compiled Rust crate for extension {}",
158            extension_dir.display()
159        );
160
161        let mut wasm_path = PathBuf::from(extension_dir);
162        wasm_path.extend([
163            "target",
164            RUST_TARGET,
165            if options.release { "release" } else { "debug" },
166            &cargo_toml
167                .package
168                .name
169                // The wasm32-wasip1 target normalizes `-` in package names to `_` in the resulting `.wasm` file.
170                .replace('-', "_"),
171        ]);
172        wasm_path.set_extension("wasm");
173
174        let wasm_bytes = fs::read(&wasm_path)
175            .with_context(|| format!("failed to read output module `{}`", wasm_path.display()))?;
176
177        let mut encoder = ComponentEncoder::default()
178            .module(&wasm_bytes)?
179            .adapter("wasi_snapshot_preview1", &adapter_bytes)
180            .context("failed to load adapter module")?
181            .validate(true);
182
183        log::info!(
184            "encoding wasm component for extension {}",
185            extension_dir.display()
186        );
187
188        let component_bytes = encoder
189            .encode()
190            .context("failed to encode wasm component")?;
191
192        let component_bytes = self
193            .strip_custom_sections(&component_bytes)
194            .context("failed to strip debug sections from wasm component")?;
195
196        let wasm_extension_api_version =
197            parse_wasm_extension_version(&manifest.id, &component_bytes)
198                .context("compiled wasm did not contain a valid zed extension api version")?;
199        manifest.lib.version = Some(wasm_extension_api_version);
200
201        let extension_file = extension_dir.join("extension.wasm");
202        fs::write(extension_file.clone(), &component_bytes)
203            .context("failed to write extension.wasm")?;
204
205        log::info!(
206            "extension {} written to {}",
207            extension_dir.display(),
208            extension_file.display()
209        );
210
211        Ok(())
212    }
213
214    async fn compile_grammar(
215        &self,
216        extension_dir: &Path,
217        grammar_name: &str,
218        grammar_metadata: &GrammarManifestEntry,
219    ) -> Result<()> {
220        let clang_path = self.install_wasi_sdk_if_needed().await?;
221
222        let mut grammar_repo_dir = extension_dir.to_path_buf();
223        grammar_repo_dir.extend(["grammars", grammar_name]);
224
225        let mut grammar_wasm_path = grammar_repo_dir.clone();
226        grammar_wasm_path.set_extension("wasm");
227
228        log::info!("checking out {grammar_name} parser");
229        self.checkout_repo(
230            &grammar_repo_dir,
231            &grammar_metadata.repository,
232            &grammar_metadata.rev,
233        )?;
234
235        let base_grammar_path = grammar_metadata
236            .path
237            .as_ref()
238            .map(|path| grammar_repo_dir.join(path))
239            .unwrap_or(grammar_repo_dir);
240
241        let src_path = base_grammar_path.join("src");
242        let parser_path = src_path.join("parser.c");
243        let scanner_path = src_path.join("scanner.c");
244
245        log::info!("compiling {grammar_name} parser");
246        let clang_output = util::command::new_std_command(&clang_path)
247            .args(["-fPIC", "-shared", "-Os"])
248            .arg(format!("-Wl,--export=tree_sitter_{grammar_name}"))
249            .arg("-o")
250            .arg(&grammar_wasm_path)
251            .arg("-I")
252            .arg(&src_path)
253            .arg(&parser_path)
254            .args(scanner_path.exists().then_some(scanner_path))
255            .output()
256            .context("failed to run clang")?;
257
258        if !clang_output.status.success() {
259            bail!(
260                "failed to compile {} parser with clang: {}",
261                grammar_name,
262                String::from_utf8_lossy(&clang_output.stderr),
263            );
264        }
265
266        Ok(())
267    }
268
269    fn checkout_repo(&self, directory: &Path, url: &str, rev: &str) -> Result<()> {
270        let git_dir = directory.join(".git");
271
272        if directory.exists() {
273            let remotes_output = util::command::new_std_command("git")
274                .arg("--git-dir")
275                .arg(&git_dir)
276                .args(["remote", "-v"])
277                .output()?;
278            let has_remote = remotes_output.status.success()
279                && String::from_utf8_lossy(&remotes_output.stdout)
280                    .lines()
281                    .any(|line| {
282                        let mut parts = line.split(|c: char| c.is_whitespace());
283                        parts.next() == Some("origin") && parts.any(|part| part == url)
284                    });
285            if !has_remote {
286                bail!(
287                    "grammar directory '{}' already exists, but is not a git clone of '{}'",
288                    directory.display(),
289                    url
290                );
291            }
292        } else {
293            fs::create_dir_all(directory).with_context(|| {
294                format!("failed to create grammar directory {}", directory.display(),)
295            })?;
296            let init_output = util::command::new_std_command("git")
297                .arg("init")
298                .current_dir(directory)
299                .output()?;
300            if !init_output.status.success() {
301                bail!(
302                    "failed to run `git init` in directory '{}'",
303                    directory.display()
304                );
305            }
306
307            let remote_add_output = util::command::new_std_command("git")
308                .arg("--git-dir")
309                .arg(&git_dir)
310                .args(["remote", "add", "origin", url])
311                .output()
312                .context("failed to execute `git remote add`")?;
313            if !remote_add_output.status.success() {
314                bail!(
315                    "failed to add remote {url} for git repository {}",
316                    git_dir.display()
317                );
318            }
319        }
320
321        let fetch_output = util::command::new_std_command("git")
322            .arg("--git-dir")
323            .arg(&git_dir)
324            .args(["fetch", "--depth", "1", "origin", rev])
325            .output()
326            .context("failed to execute `git fetch`")?;
327
328        let checkout_output = util::command::new_std_command("git")
329            .arg("--git-dir")
330            .arg(&git_dir)
331            .args(["checkout", rev])
332            .current_dir(directory)
333            .output()
334            .context("failed to execute `git checkout`")?;
335        if !checkout_output.status.success() {
336            if !fetch_output.status.success() {
337                bail!(
338                    "failed to fetch revision {} in directory '{}'",
339                    rev,
340                    directory.display()
341                );
342            }
343            bail!(
344                "failed to checkout revision {} in directory '{}': {}",
345                rev,
346                directory.display(),
347                String::from_utf8_lossy(&checkout_output.stderr)
348            );
349        }
350
351        Ok(())
352    }
353
354    fn install_rust_wasm_target_if_needed(&self) -> Result<()> {
355        let rustc_output = util::command::new_std_command("rustc")
356            .arg("--print")
357            .arg("sysroot")
358            .output()
359            .context("failed to run rustc")?;
360        if !rustc_output.status.success() {
361            bail!(
362                "failed to retrieve rust sysroot: {}",
363                String::from_utf8_lossy(&rustc_output.stderr)
364            );
365        }
366
367        let sysroot = PathBuf::from(String::from_utf8(rustc_output.stdout)?.trim());
368        if sysroot.join("lib/rustlib").join(RUST_TARGET).exists() {
369            return Ok(());
370        }
371
372        let output = util::command::new_std_command("rustup")
373            .args(["target", "add", RUST_TARGET])
374            .stderr(Stdio::piped())
375            .stdout(Stdio::inherit())
376            .output()
377            .context("failed to run `rustup target add`")?;
378        if !output.status.success() {
379            bail!(
380                "failed to install the `{RUST_TARGET}` target: {}",
381                String::from_utf8_lossy(&rustc_output.stderr)
382            );
383        }
384
385        Ok(())
386    }
387
388    async fn install_wasi_preview1_adapter_if_needed(&self) -> Result<Vec<u8>> {
389        let cache_path = self.cache_dir.join("wasi_snapshot_preview1.reactor.wasm");
390        if let Ok(content) = fs::read(&cache_path) {
391            if Parser::is_core_wasm(&content) {
392                return Ok(content);
393            }
394        }
395
396        fs::remove_file(&cache_path).ok();
397
398        log::info!(
399            "downloading wasi adapter module to {}",
400            cache_path.display()
401        );
402        let mut response = self
403            .http
404            .get(WASI_ADAPTER_URL, AsyncBody::default(), true)
405            .await?;
406
407        let mut content = Vec::new();
408        let mut body = BufReader::new(response.body_mut());
409        body.read_to_end(&mut content).await?;
410
411        fs::write(&cache_path, &content)
412            .with_context(|| format!("failed to save file {}", cache_path.display()))?;
413
414        if !Parser::is_core_wasm(&content) {
415            bail!("downloaded wasi adapter is invalid");
416        }
417        Ok(content)
418    }
419
420    async fn install_wasi_sdk_if_needed(&self) -> Result<PathBuf> {
421        let url = if let Some(asset_name) = WASI_SDK_ASSET_NAME {
422            format!("{WASI_SDK_URL}/{asset_name}")
423        } else {
424            bail!("wasi-sdk is not available for platform {}", env::consts::OS);
425        };
426
427        let wasi_sdk_dir = self.cache_dir.join("wasi-sdk");
428        let mut clang_path = wasi_sdk_dir.clone();
429        clang_path.extend(["bin", &format!("clang{}", env::consts::EXE_SUFFIX)]);
430
431        if fs::metadata(&clang_path).map_or(false, |metadata| metadata.is_file()) {
432            return Ok(clang_path);
433        }
434
435        let mut tar_out_dir = wasi_sdk_dir.clone();
436        tar_out_dir.set_extension("archive");
437
438        fs::remove_dir_all(&wasi_sdk_dir).ok();
439        fs::remove_dir_all(&tar_out_dir).ok();
440
441        log::info!("downloading wasi-sdk to {}", wasi_sdk_dir.display());
442        let mut response = self.http.get(&url, AsyncBody::default(), true).await?;
443        let body = BufReader::new(response.body_mut());
444        let body = GzipDecoder::new(body);
445        let tar = Archive::new(body);
446
447        tar.unpack(&tar_out_dir)
448            .await
449            .context("failed to unpack wasi-sdk archive")?;
450
451        let inner_dir = fs::read_dir(&tar_out_dir)?
452            .next()
453            .ok_or_else(|| anyhow!("no content"))?
454            .context("failed to read contents of extracted wasi archive directory")?
455            .path();
456        fs::rename(&inner_dir, &wasi_sdk_dir).context("failed to move extracted wasi dir")?;
457        fs::remove_dir_all(&tar_out_dir).ok();
458
459        Ok(clang_path)
460    }
461
462    // This was adapted from:
463    // https://github.com/bytecodealliance/wasm-tools/blob/1791a8f139722e9f8679a2bd3d8e423e55132b22/src/bin/wasm-tools/strip.rs
464    fn strip_custom_sections(&self, input: &Vec<u8>) -> Result<Vec<u8>> {
465        use wasmparser::Payload::*;
466
467        let strip_custom_section = |name: &str| name.starts_with(".debug");
468
469        let mut output = Vec::new();
470        let mut stack = Vec::new();
471
472        for payload in Parser::new(0).parse_all(input) {
473            let payload = payload?;
474            let component_header = wasm_encoder::Component::HEADER;
475            let module_header = wasm_encoder::Module::HEADER;
476
477            // Track nesting depth, so that we don't mess with inner producer sections:
478            match payload {
479                Version { encoding, .. } => {
480                    output.extend_from_slice(match encoding {
481                        wasmparser::Encoding::Component => &component_header,
482                        wasmparser::Encoding::Module => &module_header,
483                    });
484                }
485                ModuleSection { .. } | ComponentSection { .. } => {
486                    stack.push(mem::take(&mut output));
487                    continue;
488                }
489                End { .. } => {
490                    let mut parent = match stack.pop() {
491                        Some(c) => c,
492                        None => break,
493                    };
494                    if output.starts_with(&component_header) {
495                        parent.push(ComponentSectionId::Component as u8);
496                        output.encode(&mut parent);
497                    } else {
498                        parent.push(ComponentSectionId::CoreModule as u8);
499                        output.encode(&mut parent);
500                    }
501                    output = parent;
502                }
503                _ => {}
504            }
505
506            if let CustomSection(c) = &payload {
507                if strip_custom_section(c.name()) {
508                    continue;
509                }
510            }
511
512            if let Some((id, range)) = payload.as_section() {
513                RawSection {
514                    id,
515                    data: &input[range],
516                }
517                .append_to(&mut output);
518            }
519        }
520
521        Ok(output)
522    }
523}
524
525fn populate_defaults(manifest: &mut ExtensionManifest, extension_path: &Path) -> Result<()> {
526    // For legacy extensions on the v0 schema (aka, using `extension.json`), clear out any existing
527    // contents of the computed fields, since we don't care what the existing values are.
528    if manifest.schema_version.is_v0() {
529        manifest.languages.clear();
530        manifest.grammars.clear();
531        manifest.themes.clear();
532    }
533
534    let cargo_toml_path = extension_path.join("Cargo.toml");
535    if cargo_toml_path.exists() {
536        manifest.lib.kind = Some(ExtensionLibraryKind::Rust);
537    }
538
539    let languages_dir = extension_path.join("languages");
540    if languages_dir.exists() {
541        for entry in fs::read_dir(&languages_dir).context("failed to list languages dir")? {
542            let entry = entry?;
543            let language_dir = entry.path();
544            let config_path = language_dir.join("config.toml");
545            if config_path.exists() {
546                let relative_language_dir =
547                    language_dir.strip_prefix(extension_path)?.to_path_buf();
548                if !manifest.languages.contains(&relative_language_dir) {
549                    manifest.languages.push(relative_language_dir);
550                }
551            }
552        }
553    }
554
555    let themes_dir = extension_path.join("themes");
556    if themes_dir.exists() {
557        for entry in fs::read_dir(&themes_dir).context("failed to list themes dir")? {
558            let entry = entry?;
559            let theme_path = entry.path();
560            if theme_path.extension() == Some("json".as_ref()) {
561                let relative_theme_path = theme_path.strip_prefix(extension_path)?.to_path_buf();
562                if !manifest.themes.contains(&relative_theme_path) {
563                    manifest.themes.push(relative_theme_path);
564                }
565            }
566        }
567    }
568
569    let icon_themes_dir = extension_path.join("icon_themes");
570    if icon_themes_dir.exists() {
571        for entry in fs::read_dir(&icon_themes_dir).context("failed to list icon themes dir")? {
572            let entry = entry?;
573            let icon_theme_path = entry.path();
574            if icon_theme_path.extension() == Some("json".as_ref()) {
575                let relative_icon_theme_path =
576                    icon_theme_path.strip_prefix(extension_path)?.to_path_buf();
577                if !manifest.icon_themes.contains(&relative_icon_theme_path) {
578                    manifest.icon_themes.push(relative_icon_theme_path);
579                }
580            }
581        }
582    }
583
584    let snippets_json_path = extension_path.join("snippets.json");
585    if snippets_json_path.exists() {
586        manifest.snippets = Some(snippets_json_path);
587    }
588
589    // For legacy extensions on the v0 schema (aka, using `extension.json`), we want to populate the grammars in
590    // the manifest using the contents of the `grammars` directory.
591    if manifest.schema_version.is_v0() {
592        let grammars_dir = extension_path.join("grammars");
593        if grammars_dir.exists() {
594            for entry in fs::read_dir(&grammars_dir).context("failed to list grammars dir")? {
595                let entry = entry?;
596                let grammar_path = entry.path();
597                if grammar_path.extension() == Some("toml".as_ref()) {
598                    #[derive(Deserialize)]
599                    struct GrammarConfigToml {
600                        pub repository: String,
601                        pub commit: String,
602                        #[serde(default)]
603                        pub path: Option<String>,
604                    }
605
606                    let grammar_config = fs::read_to_string(&grammar_path)?;
607                    let grammar_config: GrammarConfigToml = toml::from_str(&grammar_config)?;
608
609                    let grammar_name = grammar_path
610                        .file_stem()
611                        .and_then(|stem| stem.to_str())
612                        .ok_or_else(|| anyhow!("no grammar name"))?;
613                    if !manifest.grammars.contains_key(grammar_name) {
614                        manifest.grammars.insert(
615                            grammar_name.into(),
616                            GrammarManifestEntry {
617                                repository: grammar_config.repository,
618                                rev: grammar_config.commit,
619                                path: grammar_config.path,
620                            },
621                        );
622                    }
623                }
624            }
625        }
626    }
627
628    Ok(())
629}