extension_builder.rs

  1use crate::{
  2    ExtensionLibraryKind, ExtensionManifest, GrammarManifestEntry, parse_wasm_extension_version,
  3};
  4use anyhow::{Context as _, Result, bail};
  5use async_compression::futures::bufread::GzipDecoder;
  6use async_tar::Archive;
  7use futures::io::BufReader;
  8use heck::ToSnakeCase;
  9use http_client::{self, AsyncBody, HttpClient};
 10use serde::Deserialize;
 11use std::{
 12    env, fs, mem,
 13    path::{Path, PathBuf},
 14    process::Stdio,
 15    sync::Arc,
 16};
 17use wasm_encoder::{ComponentSectionId, Encode as _, RawSection, Section as _};
 18use wasmparser::Parser;
 19
 20/// Currently, we compile with Rust's `wasm32-wasip2` target, which works with WASI `preview2` and the component model.
 21const RUST_TARGET: &str = "wasm32-wasip2";
 22
 23/// Compiling Tree-sitter parsers from C to WASM requires Clang 17, and a WASM build of libc
 24/// and clang's runtime library. The `wasi-sdk` provides these binaries.
 25///
 26/// Once Clang 17 and its wasm target are available via system package managers, we won't need
 27/// to download this.
 28const WASI_SDK_URL: &str = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-25/";
 29const WASI_SDK_ASSET_NAME: Option<&str> = if cfg!(all(target_os = "macos", target_arch = "x86_64"))
 30{
 31    Some("wasi-sdk-25.0-x86_64-macos.tar.gz")
 32} else if cfg!(all(target_os = "macos", target_arch = "aarch64")) {
 33    Some("wasi-sdk-25.0-arm64-macos.tar.gz")
 34} else if cfg!(all(target_os = "linux", target_arch = "x86_64")) {
 35    Some("wasi-sdk-25.0-x86_64-linux.tar.gz")
 36} else if cfg!(all(target_os = "linux", target_arch = "aarch64")) {
 37    Some("wasi-sdk-25.0-arm64-linux.tar.gz")
 38} else if cfg!(all(target_os = "freebsd", target_arch = "x86_64")) {
 39    Some("wasi-sdk-25.0-x86_64-linux.tar.gz")
 40} else if cfg!(all(target_os = "freebsd", target_arch = "aarch64")) {
 41    Some("wasi-sdk-25.0-arm64-linux.tar.gz")
 42} else if cfg!(all(target_os = "windows", target_arch = "x86_64")) {
 43    Some("wasi-sdk-25.0-x86_64-windows.tar.gz")
 44} else {
 45    None
 46};
 47
 48pub struct ExtensionBuilder {
 49    cache_dir: PathBuf,
 50    pub http: Arc<dyn HttpClient>,
 51}
 52
 53pub struct CompileExtensionOptions {
 54    pub release: bool,
 55}
 56
 57#[derive(Deserialize)]
 58struct CargoToml {
 59    package: CargoTomlPackage,
 60}
 61
 62#[derive(Deserialize)]
 63struct CargoTomlPackage {
 64    name: String,
 65}
 66
 67impl ExtensionBuilder {
 68    pub fn new(http_client: Arc<dyn HttpClient>, cache_dir: PathBuf) -> Self {
 69        Self {
 70            cache_dir,
 71            http: http_client,
 72        }
 73    }
 74
 75    pub async fn compile_extension(
 76        &self,
 77        extension_dir: &Path,
 78        extension_manifest: &mut ExtensionManifest,
 79        options: CompileExtensionOptions,
 80    ) -> Result<()> {
 81        populate_defaults(extension_manifest, extension_dir)?;
 82
 83        if extension_dir.is_relative() {
 84            bail!(
 85                "extension dir {} is not an absolute path",
 86                extension_dir.display()
 87            );
 88        }
 89
 90        fs::create_dir_all(&self.cache_dir).context("failed to create cache dir")?;
 91
 92        if extension_manifest.lib.kind == Some(ExtensionLibraryKind::Rust) {
 93            log::info!("compiling Rust extension {}", extension_dir.display());
 94            self.compile_rust_extension(extension_dir, extension_manifest, options)
 95                .await
 96                .context("failed to compile Rust extension")?;
 97            log::info!("compiled Rust extension {}", extension_dir.display());
 98        }
 99
100        for (grammar_name, grammar_metadata) in &extension_manifest.grammars {
101            let snake_cased_grammar_name = grammar_name.to_snake_case();
102            if grammar_name.as_ref() != snake_cased_grammar_name.as_str() {
103                bail!(
104                    "grammar name '{grammar_name}' must be written in snake_case: {snake_cased_grammar_name}"
105                );
106            }
107
108            log::info!(
109                "compiling grammar {grammar_name} for extension {}",
110                extension_dir.display()
111            );
112            self.compile_grammar(extension_dir, grammar_name.as_ref(), grammar_metadata)
113                .await
114                .with_context(|| format!("failed to compile grammar '{grammar_name}'"))?;
115            log::info!(
116                "compiled grammar {grammar_name} for extension {}",
117                extension_dir.display()
118            );
119        }
120
121        log::info!("finished compiling extension {}", extension_dir.display());
122        Ok(())
123    }
124
125    async fn compile_rust_extension(
126        &self,
127        extension_dir: &Path,
128        manifest: &mut ExtensionManifest,
129        options: CompileExtensionOptions,
130    ) -> anyhow::Result<()> {
131        self.install_rust_wasm_target_if_needed()?;
132
133        let cargo_toml_content = fs::read_to_string(extension_dir.join("Cargo.toml"))?;
134        let cargo_toml: CargoToml = toml::from_str(&cargo_toml_content)?;
135
136        log::info!(
137            "compiling Rust crate for extension {}",
138            extension_dir.display()
139        );
140        let output = util::command::new_std_command("cargo")
141            .args(["build", "--target", RUST_TARGET])
142            .args(options.release.then_some("--release"))
143            .arg("--target-dir")
144            .arg(extension_dir.join("target"))
145            // WASI builds do not work with sccache and just stuck, so disable it.
146            .env("RUSTC_WRAPPER", "")
147            .current_dir(extension_dir)
148            .output()
149            .context("failed to run `cargo`")?;
150        if !output.status.success() {
151            bail!(
152                "failed to build extension {}",
153                String::from_utf8_lossy(&output.stderr)
154            );
155        }
156
157        log::info!(
158            "compiled Rust crate for extension {}",
159            extension_dir.display()
160        );
161
162        let mut wasm_path = PathBuf::from(extension_dir);
163        wasm_path.extend([
164            "target",
165            RUST_TARGET,
166            if options.release { "release" } else { "debug" },
167            &cargo_toml
168                .package
169                .name
170                // The wasm32-wasip2 target normalizes `-` in package names to `_` in the resulting `.wasm` file.
171                .replace('-', "_"),
172        ]);
173        wasm_path.set_extension("wasm");
174
175        log::info!(
176            "encoding wasm component for extension {}",
177            extension_dir.display()
178        );
179
180        let component_bytes = fs::read(&wasm_path)
181            .with_context(|| format!("failed to read output module `{}`", wasm_path.display()))?;
182
183        let component_bytes = self
184            .strip_custom_sections(&component_bytes)
185            .context("failed to strip debug sections from wasm component")?;
186
187        let wasm_extension_api_version =
188            parse_wasm_extension_version(&manifest.id, &component_bytes)
189                .context("compiled wasm did not contain a valid zed extension api version")?;
190        manifest.lib.version = Some(wasm_extension_api_version);
191
192        let extension_file = extension_dir.join("extension.wasm");
193        fs::write(extension_file.clone(), &component_bytes)
194            .context("failed to write extension.wasm")?;
195
196        log::info!(
197            "extension {} written to {}",
198            extension_dir.display(),
199            extension_file.display()
200        );
201
202        Ok(())
203    }
204
205    async fn compile_grammar(
206        &self,
207        extension_dir: &Path,
208        grammar_name: &str,
209        grammar_metadata: &GrammarManifestEntry,
210    ) -> Result<()> {
211        let clang_path = self.install_wasi_sdk_if_needed().await?;
212
213        let mut grammar_repo_dir = extension_dir.to_path_buf();
214        grammar_repo_dir.extend(["grammars", grammar_name]);
215
216        let mut grammar_wasm_path = grammar_repo_dir.clone();
217        grammar_wasm_path.set_extension("wasm");
218
219        log::info!("checking out {grammar_name} parser");
220        self.checkout_repo(
221            &grammar_repo_dir,
222            &grammar_metadata.repository,
223            &grammar_metadata.rev,
224        )?;
225
226        let base_grammar_path = grammar_metadata
227            .path
228            .as_ref()
229            .map(|path| grammar_repo_dir.join(path))
230            .unwrap_or(grammar_repo_dir);
231
232        let src_path = base_grammar_path.join("src");
233        let parser_path = src_path.join("parser.c");
234        let scanner_path = src_path.join("scanner.c");
235
236        log::info!("compiling {grammar_name} parser");
237        let clang_output = util::command::new_std_command(&clang_path)
238            .args(["-fPIC", "-shared", "-Os"])
239            .arg(format!("-Wl,--export=tree_sitter_{grammar_name}"))
240            .arg("-o")
241            .arg(&grammar_wasm_path)
242            .arg("-I")
243            .arg(&src_path)
244            .arg(&parser_path)
245            .args(scanner_path.exists().then_some(scanner_path))
246            .output()
247            .context("failed to run clang")?;
248
249        if !clang_output.status.success() {
250            bail!(
251                "failed to compile {} parser with clang: {}",
252                grammar_name,
253                String::from_utf8_lossy(&clang_output.stderr),
254            );
255        }
256
257        Ok(())
258    }
259
260    fn checkout_repo(&self, directory: &Path, url: &str, rev: &str) -> Result<()> {
261        let git_dir = directory.join(".git");
262
263        if directory.exists() {
264            let remotes_output = util::command::new_std_command("git")
265                .arg("--git-dir")
266                .arg(&git_dir)
267                .args(["remote", "-v"])
268                .output()?;
269            let has_remote = remotes_output.status.success()
270                && String::from_utf8_lossy(&remotes_output.stdout)
271                    .lines()
272                    .any(|line| {
273                        let mut parts = line.split(|c: char| c.is_whitespace());
274                        parts.next() == Some("origin") && parts.any(|part| part == url)
275                    });
276            if !has_remote {
277                bail!(
278                    "grammar directory '{}' already exists, but is not a git clone of '{}'",
279                    directory.display(),
280                    url
281                );
282            }
283        } else {
284            fs::create_dir_all(directory).with_context(|| {
285                format!("failed to create grammar directory {}", directory.display(),)
286            })?;
287            let init_output = util::command::new_std_command("git")
288                .arg("init")
289                .current_dir(directory)
290                .output()?;
291            if !init_output.status.success() {
292                bail!(
293                    "failed to run `git init` in directory '{}'",
294                    directory.display()
295                );
296            }
297
298            let remote_add_output = util::command::new_std_command("git")
299                .arg("--git-dir")
300                .arg(&git_dir)
301                .args(["remote", "add", "origin", url])
302                .output()
303                .context("failed to execute `git remote add`")?;
304            if !remote_add_output.status.success() {
305                bail!(
306                    "failed to add remote {url} for git repository {}",
307                    git_dir.display()
308                );
309            }
310        }
311
312        let fetch_output = util::command::new_std_command("git")
313            .arg("--git-dir")
314            .arg(&git_dir)
315            .args(["fetch", "--depth", "1", "origin", rev])
316            .output()
317            .context("failed to execute `git fetch`")?;
318
319        let checkout_output = util::command::new_std_command("git")
320            .arg("--git-dir")
321            .arg(&git_dir)
322            .args(["checkout", rev])
323            .current_dir(directory)
324            .output()
325            .context("failed to execute `git checkout`")?;
326        if !checkout_output.status.success() {
327            if !fetch_output.status.success() {
328                bail!(
329                    "failed to fetch revision {} in directory '{}'",
330                    rev,
331                    directory.display()
332                );
333            }
334            bail!(
335                "failed to checkout revision {} in directory '{}': {}",
336                rev,
337                directory.display(),
338                String::from_utf8_lossy(&checkout_output.stderr)
339            );
340        }
341
342        Ok(())
343    }
344
345    fn install_rust_wasm_target_if_needed(&self) -> Result<()> {
346        let rustc_output = util::command::new_std_command("rustc")
347            .arg("--print")
348            .arg("sysroot")
349            .output()
350            .context("failed to run rustc")?;
351        if !rustc_output.status.success() {
352            bail!(
353                "failed to retrieve rust sysroot: {}",
354                String::from_utf8_lossy(&rustc_output.stderr)
355            );
356        }
357
358        let sysroot = PathBuf::from(String::from_utf8(rustc_output.stdout)?.trim());
359        if sysroot.join("lib/rustlib").join(RUST_TARGET).exists() {
360            return Ok(());
361        }
362
363        let output = util::command::new_std_command("rustup")
364            .args(["target", "add", RUST_TARGET])
365            .stderr(Stdio::piped())
366            .stdout(Stdio::inherit())
367            .output()
368            .context("failed to run `rustup target add`")?;
369        if !output.status.success() {
370            bail!(
371                "failed to install the `{RUST_TARGET}` target: {}",
372                String::from_utf8_lossy(&rustc_output.stderr)
373            );
374        }
375
376        Ok(())
377    }
378
379    async fn install_wasi_sdk_if_needed(&self) -> Result<PathBuf> {
380        let url = if let Some(asset_name) = WASI_SDK_ASSET_NAME {
381            format!("{WASI_SDK_URL}{asset_name}")
382        } else {
383            bail!("wasi-sdk is not available for platform {}", env::consts::OS);
384        };
385
386        let wasi_sdk_dir = self.cache_dir.join("wasi-sdk");
387        let mut clang_path = wasi_sdk_dir.clone();
388        clang_path.extend(["bin", &format!("clang{}", env::consts::EXE_SUFFIX)]);
389
390        if fs::metadata(&clang_path).map_or(false, |metadata| metadata.is_file()) {
391            return Ok(clang_path);
392        }
393
394        let mut tar_out_dir = wasi_sdk_dir.clone();
395        tar_out_dir.set_extension("archive");
396
397        fs::remove_dir_all(&wasi_sdk_dir).ok();
398        fs::remove_dir_all(&tar_out_dir).ok();
399
400        log::info!("downloading wasi-sdk to {}", wasi_sdk_dir.display());
401        let mut response = self.http.get(&url, AsyncBody::default(), true).await?;
402        let body = BufReader::new(response.body_mut());
403        let body = GzipDecoder::new(body);
404        let tar = Archive::new(body);
405
406        tar.unpack(&tar_out_dir)
407            .await
408            .context("failed to unpack wasi-sdk archive")?;
409
410        let inner_dir = fs::read_dir(&tar_out_dir)?
411            .next()
412            .context("no content")?
413            .context("failed to read contents of extracted wasi archive directory")?
414            .path();
415        fs::rename(&inner_dir, &wasi_sdk_dir).context("failed to move extracted wasi dir")?;
416        fs::remove_dir_all(&tar_out_dir).ok();
417
418        Ok(clang_path)
419    }
420
421    // This was adapted from:
422    // https://github.com/bytecodealliance/wasm-tools/blob/e8809bb17fcf69aa8c85cd5e6db7cff5cf36b1de/src/bin/wasm-tools/strip.rs
423    fn strip_custom_sections(&self, input: &Vec<u8>) -> Result<Vec<u8>> {
424        use wasmparser::Payload::*;
425
426        let strip_custom_section = |name: &str| {
427            // Default strip everything but:
428            // * the `name` section
429            // * any `component-type` sections
430            // * the `dylink.0` section
431            // * our custom version section
432            name != "name"
433                && !name.starts_with("component-type:")
434                && name != "dylink.0"
435                && name != "zed:api-version"
436        };
437
438        let mut output = Vec::new();
439        let mut stack = Vec::new();
440
441        for payload in Parser::new(0).parse_all(&input) {
442            let payload = payload?;
443
444            // Track nesting depth, so that we don't mess with inner producer sections:
445            match payload {
446                Version { encoding, .. } => {
447                    output.extend_from_slice(match encoding {
448                        wasmparser::Encoding::Component => &wasm_encoder::Component::HEADER,
449                        wasmparser::Encoding::Module => &wasm_encoder::Module::HEADER,
450                    });
451                }
452                ModuleSection { .. } | ComponentSection { .. } => {
453                    stack.push(mem::take(&mut output));
454                    continue;
455                }
456                End { .. } => {
457                    let mut parent = match stack.pop() {
458                        Some(c) => c,
459                        None => break,
460                    };
461                    if output.starts_with(&wasm_encoder::Component::HEADER) {
462                        parent.push(ComponentSectionId::Component as u8);
463                        output.encode(&mut parent);
464                    } else {
465                        parent.push(ComponentSectionId::CoreModule as u8);
466                        output.encode(&mut parent);
467                    }
468                    output = parent;
469                }
470                _ => {}
471            }
472
473            match &payload {
474                CustomSection(c) => {
475                    if strip_custom_section(c.name()) {
476                        continue;
477                    }
478                }
479
480                _ => {}
481            }
482            if let Some((id, range)) = payload.as_section() {
483                RawSection {
484                    id,
485                    data: &input[range],
486                }
487                .append_to(&mut output);
488            }
489        }
490
491        Ok(output)
492    }
493}
494
495fn populate_defaults(manifest: &mut ExtensionManifest, extension_path: &Path) -> Result<()> {
496    // For legacy extensions on the v0 schema (aka, using `extension.json`), clear out any existing
497    // contents of the computed fields, since we don't care what the existing values are.
498    if manifest.schema_version.is_v0() {
499        manifest.languages.clear();
500        manifest.grammars.clear();
501        manifest.themes.clear();
502    }
503
504    let cargo_toml_path = extension_path.join("Cargo.toml");
505    if cargo_toml_path.exists() {
506        manifest.lib.kind = Some(ExtensionLibraryKind::Rust);
507    }
508
509    let languages_dir = extension_path.join("languages");
510    if languages_dir.exists() {
511        for entry in fs::read_dir(&languages_dir).context("failed to list languages dir")? {
512            let entry = entry?;
513            let language_dir = entry.path();
514            let config_path = language_dir.join("config.toml");
515            if config_path.exists() {
516                let relative_language_dir =
517                    language_dir.strip_prefix(extension_path)?.to_path_buf();
518                if !manifest.languages.contains(&relative_language_dir) {
519                    manifest.languages.push(relative_language_dir);
520                }
521            }
522        }
523    }
524
525    let themes_dir = extension_path.join("themes");
526    if themes_dir.exists() {
527        for entry in fs::read_dir(&themes_dir).context("failed to list themes dir")? {
528            let entry = entry?;
529            let theme_path = entry.path();
530            if theme_path.extension() == Some("json".as_ref()) {
531                let relative_theme_path = theme_path.strip_prefix(extension_path)?.to_path_buf();
532                if !manifest.themes.contains(&relative_theme_path) {
533                    manifest.themes.push(relative_theme_path);
534                }
535            }
536        }
537    }
538
539    let icon_themes_dir = extension_path.join("icon_themes");
540    if icon_themes_dir.exists() {
541        for entry in fs::read_dir(&icon_themes_dir).context("failed to list icon themes dir")? {
542            let entry = entry?;
543            let icon_theme_path = entry.path();
544            if icon_theme_path.extension() == Some("json".as_ref()) {
545                let relative_icon_theme_path =
546                    icon_theme_path.strip_prefix(extension_path)?.to_path_buf();
547                if !manifest.icon_themes.contains(&relative_icon_theme_path) {
548                    manifest.icon_themes.push(relative_icon_theme_path);
549                }
550            }
551        }
552    }
553
554    let snippets_json_path = extension_path.join("snippets.json");
555    if snippets_json_path.exists() {
556        manifest.snippets = Some(snippets_json_path);
557    }
558
559    // For legacy extensions on the v0 schema (aka, using `extension.json`), we want to populate the grammars in
560    // the manifest using the contents of the `grammars` directory.
561    if manifest.schema_version.is_v0() {
562        let grammars_dir = extension_path.join("grammars");
563        if grammars_dir.exists() {
564            for entry in fs::read_dir(&grammars_dir).context("failed to list grammars dir")? {
565                let entry = entry?;
566                let grammar_path = entry.path();
567                if grammar_path.extension() == Some("toml".as_ref()) {
568                    #[derive(Deserialize)]
569                    struct GrammarConfigToml {
570                        pub repository: String,
571                        pub commit: String,
572                        #[serde(default)]
573                        pub path: Option<String>,
574                    }
575
576                    let grammar_config = fs::read_to_string(&grammar_path)?;
577                    let grammar_config: GrammarConfigToml = toml::from_str(&grammar_config)?;
578
579                    let grammar_name = grammar_path
580                        .file_stem()
581                        .and_then(|stem| stem.to_str())
582                        .context("no grammar name")?;
583                    if !manifest.grammars.contains_key(grammar_name) {
584                        manifest.grammars.insert(
585                            grammar_name.into(),
586                            GrammarManifestEntry {
587                                repository: grammar_config.repository,
588                                rev: grammar_config.commit,
589                                path: grammar_config.path,
590                            },
591                        );
592                    }
593                }
594            }
595        }
596    }
597
598    Ok(())
599}