extension_builder.rs

  1use crate::{
  2    ExtensionLibraryKind, ExtensionManifest, GrammarManifestEntry, parse_wasm_extension_version,
  3};
  4use anyhow::{Context as _, Result, anyhow, bail};
  5use async_compression::futures::bufread::GzipDecoder;
  6use async_tar::Archive;
  7use futures::io::BufReader;
  8use heck::ToSnakeCase;
  9use http_client::{self, AsyncBody, HttpClient};
 10use serde::Deserialize;
 11use std::{
 12    env, fs, mem,
 13    path::{Path, PathBuf},
 14    process::Stdio,
 15    sync::Arc,
 16};
 17use wasi_preview1_component_adapter_provider::WASI_SNAPSHOT_PREVIEW1_REACTOR_ADAPTER;
 18use wasm_encoder::{ComponentSectionId, Encode as _, RawSection, Section as _};
 19use wasmparser::Parser;
 20use wit_component::ComponentEncoder;
 21
 22/// Currently, we compile with Rust's `wasm32-wasip1` target, which works with WASI `preview1`.
 23/// But the WASM component model is based on WASI `preview2`. So we need an 'adapter' WASM
 24/// module, which implements the `preview1` interface in terms of `preview2`.
 25///
 26/// Once Rust 1.78 is released, there will be a `wasm32-wasip2` target available, so we will
 27/// not need the adapter anymore.
 28const RUST_TARGET: &str = "wasm32-wasip1";
 29
 30/// Compiling Tree-sitter parsers from C to WASM requires Clang 17, and a WASM build of libc
 31/// and clang's runtime library. The `wasi-sdk` provides these binaries.
 32///
 33/// Once Clang 17 and its wasm target are available via system package managers, we won't need
 34/// to download this.
 35const WASI_SDK_URL: &str = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-25/";
 36const WASI_SDK_ASSET_NAME: Option<&str> = if cfg!(all(target_os = "macos", target_arch = "x86_64"))
 37{
 38    Some("wasi-sdk-25.0-x86_64-macos.tar.gz")
 39} else if cfg!(all(target_os = "macos", target_arch = "aarch64")) {
 40    Some("wasi-sdk-25.0-arm64-macos.tar.gz")
 41} else if cfg!(all(target_os = "linux", target_arch = "x86_64")) {
 42    Some("wasi-sdk-25.0-x86_64-linux.tar.gz")
 43} else if cfg!(all(target_os = "linux", target_arch = "aarch64")) {
 44    Some("wasi-sdk-25.0-arm64-linux.tar.gz")
 45} else if cfg!(all(target_os = "freebsd", target_arch = "x86_64")) {
 46    Some("wasi-sdk-25.0-x86_64-linux.tar.gz")
 47} else if cfg!(all(target_os = "freebsd", target_arch = "aarch64")) {
 48    Some("wasi-sdk-25.0-arm64-linux.tar.gz")
 49} else if cfg!(all(target_os = "windows", target_arch = "x86_64")) {
 50    Some("wasi-sdk-25.0-x86_64-windows.tar.gz")
 51} else {
 52    None
 53};
 54
 55pub struct ExtensionBuilder {
 56    cache_dir: PathBuf,
 57    pub http: Arc<dyn HttpClient>,
 58}
 59
 60pub struct CompileExtensionOptions {
 61    pub release: bool,
 62}
 63
 64#[derive(Deserialize)]
 65struct CargoToml {
 66    package: CargoTomlPackage,
 67}
 68
 69#[derive(Deserialize)]
 70struct CargoTomlPackage {
 71    name: String,
 72}
 73
 74impl ExtensionBuilder {
 75    pub fn new(http_client: Arc<dyn HttpClient>, cache_dir: PathBuf) -> Self {
 76        Self {
 77            cache_dir,
 78            http: http_client,
 79        }
 80    }
 81
 82    pub async fn compile_extension(
 83        &self,
 84        extension_dir: &Path,
 85        extension_manifest: &mut ExtensionManifest,
 86        options: CompileExtensionOptions,
 87    ) -> Result<()> {
 88        populate_defaults(extension_manifest, extension_dir)?;
 89
 90        if extension_dir.is_relative() {
 91            bail!(
 92                "extension dir {} is not an absolute path",
 93                extension_dir.display()
 94            );
 95        }
 96
 97        fs::create_dir_all(&self.cache_dir).context("failed to create cache dir")?;
 98
 99        if extension_manifest.lib.kind == Some(ExtensionLibraryKind::Rust) {
100            log::info!("compiling Rust extension {}", extension_dir.display());
101            self.compile_rust_extension(extension_dir, extension_manifest, options)
102                .await
103                .context("failed to compile Rust extension")?;
104            log::info!("compiled Rust extension {}", extension_dir.display());
105        }
106
107        for (grammar_name, grammar_metadata) in &extension_manifest.grammars {
108            let snake_cased_grammar_name = grammar_name.to_snake_case();
109            if grammar_name.as_ref() != snake_cased_grammar_name.as_str() {
110                bail!(
111                    "grammar name '{grammar_name}' must be written in snake_case: {snake_cased_grammar_name}"
112                );
113            }
114
115            log::info!(
116                "compiling grammar {grammar_name} for extension {}",
117                extension_dir.display()
118            );
119            self.compile_grammar(extension_dir, grammar_name.as_ref(), grammar_metadata)
120                .await
121                .with_context(|| format!("failed to compile grammar '{grammar_name}'"))?;
122            log::info!(
123                "compiled grammar {grammar_name} for extension {}",
124                extension_dir.display()
125            );
126        }
127
128        log::info!("finished compiling extension {}", extension_dir.display());
129        Ok(())
130    }
131
132    async fn compile_rust_extension(
133        &self,
134        extension_dir: &Path,
135        manifest: &mut ExtensionManifest,
136        options: CompileExtensionOptions,
137    ) -> Result<(), anyhow::Error> {
138        self.install_rust_wasm_target_if_needed()?;
139
140        let cargo_toml_content = fs::read_to_string(extension_dir.join("Cargo.toml"))?;
141        let cargo_toml: CargoToml = toml::from_str(&cargo_toml_content)?;
142
143        log::info!(
144            "compiling Rust crate for extension {}",
145            extension_dir.display()
146        );
147        let output = util::command::new_std_command("cargo")
148            .args(["build", "--target", RUST_TARGET])
149            .args(options.release.then_some("--release"))
150            .arg("--target-dir")
151            .arg(extension_dir.join("target"))
152            // WASI builds do not work with sccache and just stuck, so disable it.
153            .env("RUSTC_WRAPPER", "")
154            .current_dir(extension_dir)
155            .output()
156            .context("failed to run `cargo`")?;
157        if !output.status.success() {
158            bail!(
159                "failed to build extension {}",
160                String::from_utf8_lossy(&output.stderr)
161            );
162        }
163
164        log::info!(
165            "compiled Rust crate for extension {}",
166            extension_dir.display()
167        );
168
169        let mut wasm_path = PathBuf::from(extension_dir);
170        wasm_path.extend([
171            "target",
172            RUST_TARGET,
173            if options.release { "release" } else { "debug" },
174            &cargo_toml
175                .package
176                .name
177                // The wasm32-wasip1 target normalizes `-` in package names to `_` in the resulting `.wasm` file.
178                .replace('-', "_"),
179        ]);
180        wasm_path.set_extension("wasm");
181
182        let wasm_bytes = fs::read(&wasm_path)
183            .with_context(|| format!("failed to read output module `{}`", wasm_path.display()))?;
184
185        let mut encoder = ComponentEncoder::default()
186            .module(&wasm_bytes)?
187            .adapter(
188                "wasi_snapshot_preview1",
189                WASI_SNAPSHOT_PREVIEW1_REACTOR_ADAPTER,
190            )
191            .context("failed to load adapter module")?
192            .validate(true);
193
194        log::info!(
195            "encoding wasm component for extension {}",
196            extension_dir.display()
197        );
198
199        let component_bytes = encoder
200            .encode()
201            .context("failed to encode wasm component")?;
202
203        let component_bytes = self
204            .strip_custom_sections(&component_bytes)
205            .context("failed to strip debug sections from wasm component")?;
206
207        let wasm_extension_api_version =
208            parse_wasm_extension_version(&manifest.id, &component_bytes)
209                .context("compiled wasm did not contain a valid zed extension api version")?;
210        manifest.lib.version = Some(wasm_extension_api_version);
211
212        let extension_file = extension_dir.join("extension.wasm");
213        fs::write(extension_file.clone(), &component_bytes)
214            .context("failed to write extension.wasm")?;
215
216        log::info!(
217            "extension {} written to {}",
218            extension_dir.display(),
219            extension_file.display()
220        );
221
222        Ok(())
223    }
224
225    async fn compile_grammar(
226        &self,
227        extension_dir: &Path,
228        grammar_name: &str,
229        grammar_metadata: &GrammarManifestEntry,
230    ) -> Result<()> {
231        let clang_path = self.install_wasi_sdk_if_needed().await?;
232
233        let mut grammar_repo_dir = extension_dir.to_path_buf();
234        grammar_repo_dir.extend(["grammars", grammar_name]);
235
236        let mut grammar_wasm_path = grammar_repo_dir.clone();
237        grammar_wasm_path.set_extension("wasm");
238
239        log::info!("checking out {grammar_name} parser");
240        self.checkout_repo(
241            &grammar_repo_dir,
242            &grammar_metadata.repository,
243            &grammar_metadata.rev,
244        )?;
245
246        let base_grammar_path = grammar_metadata
247            .path
248            .as_ref()
249            .map(|path| grammar_repo_dir.join(path))
250            .unwrap_or(grammar_repo_dir);
251
252        let src_path = base_grammar_path.join("src");
253        let parser_path = src_path.join("parser.c");
254        let scanner_path = src_path.join("scanner.c");
255
256        log::info!("compiling {grammar_name} parser");
257        let clang_output = util::command::new_std_command(&clang_path)
258            .args(["-fPIC", "-shared", "-Os"])
259            .arg(format!("-Wl,--export=tree_sitter_{grammar_name}"))
260            .arg("-o")
261            .arg(&grammar_wasm_path)
262            .arg("-I")
263            .arg(&src_path)
264            .arg(&parser_path)
265            .args(scanner_path.exists().then_some(scanner_path))
266            .output()
267            .context("failed to run clang")?;
268
269        if !clang_output.status.success() {
270            bail!(
271                "failed to compile {} parser with clang: {}",
272                grammar_name,
273                String::from_utf8_lossy(&clang_output.stderr),
274            );
275        }
276
277        Ok(())
278    }
279
280    fn checkout_repo(&self, directory: &Path, url: &str, rev: &str) -> Result<()> {
281        let git_dir = directory.join(".git");
282
283        if directory.exists() {
284            let remotes_output = util::command::new_std_command("git")
285                .arg("--git-dir")
286                .arg(&git_dir)
287                .args(["remote", "-v"])
288                .output()?;
289            let has_remote = remotes_output.status.success()
290                && String::from_utf8_lossy(&remotes_output.stdout)
291                    .lines()
292                    .any(|line| {
293                        let mut parts = line.split(|c: char| c.is_whitespace());
294                        parts.next() == Some("origin") && parts.any(|part| part == url)
295                    });
296            if !has_remote {
297                bail!(
298                    "grammar directory '{}' already exists, but is not a git clone of '{}'",
299                    directory.display(),
300                    url
301                );
302            }
303        } else {
304            fs::create_dir_all(directory).with_context(|| {
305                format!("failed to create grammar directory {}", directory.display(),)
306            })?;
307            let init_output = util::command::new_std_command("git")
308                .arg("init")
309                .current_dir(directory)
310                .output()?;
311            if !init_output.status.success() {
312                bail!(
313                    "failed to run `git init` in directory '{}'",
314                    directory.display()
315                );
316            }
317
318            let remote_add_output = util::command::new_std_command("git")
319                .arg("--git-dir")
320                .arg(&git_dir)
321                .args(["remote", "add", "origin", url])
322                .output()
323                .context("failed to execute `git remote add`")?;
324            if !remote_add_output.status.success() {
325                bail!(
326                    "failed to add remote {url} for git repository {}",
327                    git_dir.display()
328                );
329            }
330        }
331
332        let fetch_output = util::command::new_std_command("git")
333            .arg("--git-dir")
334            .arg(&git_dir)
335            .args(["fetch", "--depth", "1", "origin", rev])
336            .output()
337            .context("failed to execute `git fetch`")?;
338
339        let checkout_output = util::command::new_std_command("git")
340            .arg("--git-dir")
341            .arg(&git_dir)
342            .args(["checkout", rev])
343            .current_dir(directory)
344            .output()
345            .context("failed to execute `git checkout`")?;
346        if !checkout_output.status.success() {
347            if !fetch_output.status.success() {
348                bail!(
349                    "failed to fetch revision {} in directory '{}'",
350                    rev,
351                    directory.display()
352                );
353            }
354            bail!(
355                "failed to checkout revision {} in directory '{}': {}",
356                rev,
357                directory.display(),
358                String::from_utf8_lossy(&checkout_output.stderr)
359            );
360        }
361
362        Ok(())
363    }
364
365    fn install_rust_wasm_target_if_needed(&self) -> Result<()> {
366        let rustc_output = util::command::new_std_command("rustc")
367            .arg("--print")
368            .arg("sysroot")
369            .output()
370            .context("failed to run rustc")?;
371        if !rustc_output.status.success() {
372            bail!(
373                "failed to retrieve rust sysroot: {}",
374                String::from_utf8_lossy(&rustc_output.stderr)
375            );
376        }
377
378        let sysroot = PathBuf::from(String::from_utf8(rustc_output.stdout)?.trim());
379        if sysroot.join("lib/rustlib").join(RUST_TARGET).exists() {
380            return Ok(());
381        }
382
383        let output = util::command::new_std_command("rustup")
384            .args(["target", "add", RUST_TARGET])
385            .stderr(Stdio::piped())
386            .stdout(Stdio::inherit())
387            .output()
388            .context("failed to run `rustup target add`")?;
389        if !output.status.success() {
390            bail!(
391                "failed to install the `{RUST_TARGET}` target: {}",
392                String::from_utf8_lossy(&rustc_output.stderr)
393            );
394        }
395
396        Ok(())
397    }
398
399    async fn install_wasi_sdk_if_needed(&self) -> Result<PathBuf> {
400        let url = if let Some(asset_name) = WASI_SDK_ASSET_NAME {
401            format!("{WASI_SDK_URL}{asset_name}")
402        } else {
403            bail!("wasi-sdk is not available for platform {}", env::consts::OS);
404        };
405
406        let wasi_sdk_dir = self.cache_dir.join("wasi-sdk");
407        let mut clang_path = wasi_sdk_dir.clone();
408        clang_path.extend(["bin", &format!("clang{}", env::consts::EXE_SUFFIX)]);
409
410        if fs::metadata(&clang_path).map_or(false, |metadata| metadata.is_file()) {
411            return Ok(clang_path);
412        }
413
414        let mut tar_out_dir = wasi_sdk_dir.clone();
415        tar_out_dir.set_extension("archive");
416
417        fs::remove_dir_all(&wasi_sdk_dir).ok();
418        fs::remove_dir_all(&tar_out_dir).ok();
419
420        log::info!("downloading wasi-sdk to {}", wasi_sdk_dir.display());
421        let mut response = self.http.get(&url, AsyncBody::default(), true).await?;
422        let body = BufReader::new(response.body_mut());
423        let body = GzipDecoder::new(body);
424        let tar = Archive::new(body);
425
426        tar.unpack(&tar_out_dir)
427            .await
428            .context("failed to unpack wasi-sdk archive")?;
429
430        let inner_dir = fs::read_dir(&tar_out_dir)?
431            .next()
432            .ok_or_else(|| anyhow!("no content"))?
433            .context("failed to read contents of extracted wasi archive directory")?
434            .path();
435        fs::rename(&inner_dir, &wasi_sdk_dir).context("failed to move extracted wasi dir")?;
436        fs::remove_dir_all(&tar_out_dir).ok();
437
438        Ok(clang_path)
439    }
440
441    // This was adapted from:
442    // https://github.com/bytecodealliance/wasm-tools/blob/1791a8f139722e9f8679a2bd3d8e423e55132b22/src/bin/wasm-tools/strip.rs
443    fn strip_custom_sections(&self, input: &Vec<u8>) -> Result<Vec<u8>> {
444        use wasmparser::Payload::*;
445
446        let strip_custom_section = |name: &str| name.starts_with(".debug");
447
448        let mut output = Vec::new();
449        let mut stack = Vec::new();
450
451        for payload in Parser::new(0).parse_all(input) {
452            let payload = payload?;
453            let component_header = wasm_encoder::Component::HEADER;
454            let module_header = wasm_encoder::Module::HEADER;
455
456            // Track nesting depth, so that we don't mess with inner producer sections:
457            match payload {
458                Version { encoding, .. } => {
459                    output.extend_from_slice(match encoding {
460                        wasmparser::Encoding::Component => &component_header,
461                        wasmparser::Encoding::Module => &module_header,
462                    });
463                }
464                ModuleSection { .. } | ComponentSection { .. } => {
465                    stack.push(mem::take(&mut output));
466                    continue;
467                }
468                End { .. } => {
469                    let mut parent = match stack.pop() {
470                        Some(c) => c,
471                        None => break,
472                    };
473                    if output.starts_with(&component_header) {
474                        parent.push(ComponentSectionId::Component as u8);
475                        output.encode(&mut parent);
476                    } else {
477                        parent.push(ComponentSectionId::CoreModule as u8);
478                        output.encode(&mut parent);
479                    }
480                    output = parent;
481                }
482                _ => {}
483            }
484
485            if let CustomSection(c) = &payload {
486                if strip_custom_section(c.name()) {
487                    continue;
488                }
489            }
490
491            if let Some((id, range)) = payload.as_section() {
492                RawSection {
493                    id,
494                    data: &input[range],
495                }
496                .append_to(&mut output);
497            }
498        }
499
500        Ok(output)
501    }
502}
503
504fn populate_defaults(manifest: &mut ExtensionManifest, extension_path: &Path) -> Result<()> {
505    // For legacy extensions on the v0 schema (aka, using `extension.json`), clear out any existing
506    // contents of the computed fields, since we don't care what the existing values are.
507    if manifest.schema_version.is_v0() {
508        manifest.languages.clear();
509        manifest.grammars.clear();
510        manifest.themes.clear();
511    }
512
513    let cargo_toml_path = extension_path.join("Cargo.toml");
514    if cargo_toml_path.exists() {
515        manifest.lib.kind = Some(ExtensionLibraryKind::Rust);
516    }
517
518    let languages_dir = extension_path.join("languages");
519    if languages_dir.exists() {
520        for entry in fs::read_dir(&languages_dir).context("failed to list languages dir")? {
521            let entry = entry?;
522            let language_dir = entry.path();
523            let config_path = language_dir.join("config.toml");
524            if config_path.exists() {
525                let relative_language_dir =
526                    language_dir.strip_prefix(extension_path)?.to_path_buf();
527                if !manifest.languages.contains(&relative_language_dir) {
528                    manifest.languages.push(relative_language_dir);
529                }
530            }
531        }
532    }
533
534    let themes_dir = extension_path.join("themes");
535    if themes_dir.exists() {
536        for entry in fs::read_dir(&themes_dir).context("failed to list themes dir")? {
537            let entry = entry?;
538            let theme_path = entry.path();
539            if theme_path.extension() == Some("json".as_ref()) {
540                let relative_theme_path = theme_path.strip_prefix(extension_path)?.to_path_buf();
541                if !manifest.themes.contains(&relative_theme_path) {
542                    manifest.themes.push(relative_theme_path);
543                }
544            }
545        }
546    }
547
548    let icon_themes_dir = extension_path.join("icon_themes");
549    if icon_themes_dir.exists() {
550        for entry in fs::read_dir(&icon_themes_dir).context("failed to list icon themes dir")? {
551            let entry = entry?;
552            let icon_theme_path = entry.path();
553            if icon_theme_path.extension() == Some("json".as_ref()) {
554                let relative_icon_theme_path =
555                    icon_theme_path.strip_prefix(extension_path)?.to_path_buf();
556                if !manifest.icon_themes.contains(&relative_icon_theme_path) {
557                    manifest.icon_themes.push(relative_icon_theme_path);
558                }
559            }
560        }
561    }
562
563    let snippets_json_path = extension_path.join("snippets.json");
564    if snippets_json_path.exists() {
565        manifest.snippets = Some(snippets_json_path);
566    }
567
568    // For legacy extensions on the v0 schema (aka, using `extension.json`), we want to populate the grammars in
569    // the manifest using the contents of the `grammars` directory.
570    if manifest.schema_version.is_v0() {
571        let grammars_dir = extension_path.join("grammars");
572        if grammars_dir.exists() {
573            for entry in fs::read_dir(&grammars_dir).context("failed to list grammars dir")? {
574                let entry = entry?;
575                let grammar_path = entry.path();
576                if grammar_path.extension() == Some("toml".as_ref()) {
577                    #[derive(Deserialize)]
578                    struct GrammarConfigToml {
579                        pub repository: String,
580                        pub commit: String,
581                        #[serde(default)]
582                        pub path: Option<String>,
583                    }
584
585                    let grammar_config = fs::read_to_string(&grammar_path)?;
586                    let grammar_config: GrammarConfigToml = toml::from_str(&grammar_config)?;
587
588                    let grammar_name = grammar_path
589                        .file_stem()
590                        .and_then(|stem| stem.to_str())
591                        .ok_or_else(|| anyhow!("no grammar name"))?;
592                    if !manifest.grammars.contains_key(grammar_name) {
593                        manifest.grammars.insert(
594                            grammar_name.into(),
595                            GrammarManifestEntry {
596                                repository: grammar_config.repository,
597                                rev: grammar_config.commit,
598                                path: grammar_config.path,
599                            },
600                        );
601                    }
602                }
603            }
604        }
605    }
606
607    Ok(())
608}