extension_builder.rs

  1use crate::{
  2    ExtensionLibraryKind, ExtensionManifest, GrammarManifestEntry, parse_wasm_extension_version,
  3};
  4use anyhow::{Context as _, Result, bail};
  5use async_compression::futures::bufread::GzipDecoder;
  6use async_tar::Archive;
  7use futures::io::BufReader;
  8use heck::ToSnakeCase;
  9use http_client::{self, AsyncBody, HttpClient};
 10use serde::Deserialize;
 11use std::{
 12    env, fs, mem,
 13    path::{Path, PathBuf},
 14    process::Stdio,
 15    str::FromStr,
 16    sync::Arc,
 17};
 18use wasm_encoder::{ComponentSectionId, Encode as _, RawSection, Section as _};
 19use wasmparser::Parser;
 20
 21/// Currently, we compile with Rust's `wasm32-wasip2` target, which works with WASI `preview2` and the component model.
 22const RUST_TARGET: &str = "wasm32-wasip2";
 23
 24/// Compiling Tree-sitter parsers from C to WASM requires Clang 17, and a WASM build of libc
 25/// and clang's runtime library. The `wasi-sdk` provides these binaries.
 26///
 27/// Once Clang 17 and its wasm target are available via system package managers, we won't need
 28/// to download this.
 29const WASI_SDK_URL: &str = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-25/";
 30const WASI_SDK_ASSET_NAME: Option<&str> = if cfg!(all(target_os = "macos", target_arch = "x86_64"))
 31{
 32    Some("wasi-sdk-25.0-x86_64-macos.tar.gz")
 33} else if cfg!(all(target_os = "macos", target_arch = "aarch64")) {
 34    Some("wasi-sdk-25.0-arm64-macos.tar.gz")
 35} else if cfg!(all(target_os = "linux", target_arch = "x86_64")) {
 36    Some("wasi-sdk-25.0-x86_64-linux.tar.gz")
 37} else if cfg!(all(target_os = "linux", target_arch = "aarch64")) {
 38    Some("wasi-sdk-25.0-arm64-linux.tar.gz")
 39} else if cfg!(all(target_os = "freebsd", target_arch = "x86_64")) {
 40    Some("wasi-sdk-25.0-x86_64-linux.tar.gz")
 41} else if cfg!(all(target_os = "freebsd", target_arch = "aarch64")) {
 42    Some("wasi-sdk-25.0-arm64-linux.tar.gz")
 43} else if cfg!(all(target_os = "windows", target_arch = "x86_64")) {
 44    Some("wasi-sdk-25.0-x86_64-windows.tar.gz")
 45} else {
 46    None
 47};
 48
 49pub struct ExtensionBuilder {
 50    cache_dir: PathBuf,
 51    pub http: Arc<dyn HttpClient>,
 52}
 53
 54pub struct CompileExtensionOptions {
 55    pub release: bool,
 56}
 57
 58#[derive(Deserialize)]
 59struct CargoToml {
 60    package: CargoTomlPackage,
 61}
 62
 63#[derive(Deserialize)]
 64struct CargoTomlPackage {
 65    name: String,
 66}
 67
 68impl ExtensionBuilder {
 69    pub fn new(http_client: Arc<dyn HttpClient>, cache_dir: PathBuf) -> Self {
 70        Self {
 71            cache_dir,
 72            http: http_client,
 73        }
 74    }
 75
 76    pub async fn compile_extension(
 77        &self,
 78        extension_dir: &Path,
 79        extension_manifest: &mut ExtensionManifest,
 80        options: CompileExtensionOptions,
 81    ) -> Result<()> {
 82        populate_defaults(extension_manifest, extension_dir)?;
 83
 84        if extension_dir.is_relative() {
 85            bail!(
 86                "extension dir {} is not an absolute path",
 87                extension_dir.display()
 88            );
 89        }
 90
 91        fs::create_dir_all(&self.cache_dir).context("failed to create cache dir")?;
 92
 93        if extension_manifest.lib.kind == Some(ExtensionLibraryKind::Rust) {
 94            log::info!("compiling Rust extension {}", extension_dir.display());
 95            self.compile_rust_extension(extension_dir, extension_manifest, options)
 96                .await
 97                .context("failed to compile Rust extension")?;
 98            log::info!("compiled Rust extension {}", extension_dir.display());
 99        }
100
101        for (debug_adapter_name, meta) in &mut extension_manifest.debug_adapters {
102            let debug_adapter_relative_schema_path =
103                meta.schema_path.clone().unwrap_or_else(|| {
104                    Path::new("debug_adapter_schemas")
105                        .join(Path::new(debug_adapter_name.as_ref()).with_extension("json"))
106                });
107            let debug_adapter_schema_path = extension_dir.join(debug_adapter_relative_schema_path);
108
109            let debug_adapter_schema = fs::read_to_string(&debug_adapter_schema_path)
110                .with_context(|| {
111                    format!("failed to read debug adapter schema for `{debug_adapter_name}` from `{debug_adapter_schema_path:?}`")
112                })?;
113            _ = serde_json::Value::from_str(&debug_adapter_schema).with_context(|| {
114                format!("Debug adapter schema for `{debug_adapter_name}` (path: `{debug_adapter_schema_path:?}`) is not a valid JSON")
115            })?;
116        }
117        for (grammar_name, grammar_metadata) in &extension_manifest.grammars {
118            let snake_cased_grammar_name = grammar_name.to_snake_case();
119            if grammar_name.as_ref() != snake_cased_grammar_name.as_str() {
120                bail!(
121                    "grammar name '{grammar_name}' must be written in snake_case: {snake_cased_grammar_name}"
122                );
123            }
124
125            log::info!(
126                "compiling grammar {grammar_name} for extension {}",
127                extension_dir.display()
128            );
129            self.compile_grammar(extension_dir, grammar_name.as_ref(), grammar_metadata)
130                .await
131                .with_context(|| format!("failed to compile grammar '{grammar_name}'"))?;
132            log::info!(
133                "compiled grammar {grammar_name} for extension {}",
134                extension_dir.display()
135            );
136        }
137
138        log::info!("finished compiling extension {}", extension_dir.display());
139        Ok(())
140    }
141
142    async fn compile_rust_extension(
143        &self,
144        extension_dir: &Path,
145        manifest: &mut ExtensionManifest,
146        options: CompileExtensionOptions,
147    ) -> anyhow::Result<()> {
148        self.install_rust_wasm_target_if_needed()?;
149
150        let cargo_toml_content = fs::read_to_string(extension_dir.join("Cargo.toml"))?;
151        let cargo_toml: CargoToml = toml::from_str(&cargo_toml_content)?;
152
153        log::info!(
154            "compiling Rust crate for extension {}",
155            extension_dir.display()
156        );
157        let output = util::command::new_std_command("cargo")
158            .args(["build", "--target", RUST_TARGET])
159            .args(options.release.then_some("--release"))
160            .arg("--target-dir")
161            .arg(extension_dir.join("target"))
162            // WASI builds do not work with sccache and just stuck, so disable it.
163            .env("RUSTC_WRAPPER", "")
164            .current_dir(extension_dir)
165            .output()
166            .context("failed to run `cargo`")?;
167        if !output.status.success() {
168            bail!(
169                "failed to build extension {}",
170                String::from_utf8_lossy(&output.stderr)
171            );
172        }
173
174        log::info!(
175            "compiled Rust crate for extension {}",
176            extension_dir.display()
177        );
178
179        let mut wasm_path = PathBuf::from(extension_dir);
180        wasm_path.extend([
181            "target",
182            RUST_TARGET,
183            if options.release { "release" } else { "debug" },
184            &cargo_toml
185                .package
186                .name
187                // The wasm32-wasip2 target normalizes `-` in package names to `_` in the resulting `.wasm` file.
188                .replace('-', "_"),
189        ]);
190        wasm_path.set_extension("wasm");
191
192        log::info!(
193            "encoding wasm component for extension {}",
194            extension_dir.display()
195        );
196
197        let component_bytes = fs::read(&wasm_path)
198            .with_context(|| format!("failed to read output module `{}`", wasm_path.display()))?;
199
200        let component_bytes = self
201            .strip_custom_sections(&component_bytes)
202            .context("failed to strip debug sections from wasm component")?;
203
204        let wasm_extension_api_version =
205            parse_wasm_extension_version(&manifest.id, &component_bytes)
206                .context("compiled wasm did not contain a valid zed extension api version")?;
207        manifest.lib.version = Some(wasm_extension_api_version);
208
209        let extension_file = extension_dir.join("extension.wasm");
210        fs::write(extension_file.clone(), &component_bytes)
211            .context("failed to write extension.wasm")?;
212
213        log::info!(
214            "extension {} written to {}",
215            extension_dir.display(),
216            extension_file.display()
217        );
218
219        Ok(())
220    }
221
222    async fn compile_grammar(
223        &self,
224        extension_dir: &Path,
225        grammar_name: &str,
226        grammar_metadata: &GrammarManifestEntry,
227    ) -> Result<()> {
228        let clang_path = self.install_wasi_sdk_if_needed().await?;
229
230        let mut grammar_repo_dir = extension_dir.to_path_buf();
231        grammar_repo_dir.extend(["grammars", grammar_name]);
232
233        let mut grammar_wasm_path = grammar_repo_dir.clone();
234        grammar_wasm_path.set_extension("wasm");
235
236        log::info!("checking out {grammar_name} parser");
237        self.checkout_repo(
238            &grammar_repo_dir,
239            &grammar_metadata.repository,
240            &grammar_metadata.rev,
241        )?;
242
243        let base_grammar_path = grammar_metadata
244            .path
245            .as_ref()
246            .map(|path| grammar_repo_dir.join(path))
247            .unwrap_or(grammar_repo_dir);
248
249        let src_path = base_grammar_path.join("src");
250        let parser_path = src_path.join("parser.c");
251        let scanner_path = src_path.join("scanner.c");
252
253        log::info!("compiling {grammar_name} parser");
254        let clang_output = util::command::new_std_command(&clang_path)
255            .args(["-fPIC", "-shared", "-Os"])
256            .arg(format!("-Wl,--export=tree_sitter_{grammar_name}"))
257            .arg("-o")
258            .arg(&grammar_wasm_path)
259            .arg("-I")
260            .arg(&src_path)
261            .arg(&parser_path)
262            .args(scanner_path.exists().then_some(scanner_path))
263            .output()
264            .context("failed to run clang")?;
265
266        if !clang_output.status.success() {
267            bail!(
268                "failed to compile {} parser with clang: {}",
269                grammar_name,
270                String::from_utf8_lossy(&clang_output.stderr),
271            );
272        }
273
274        Ok(())
275    }
276
277    fn checkout_repo(&self, directory: &Path, url: &str, rev: &str) -> Result<()> {
278        let git_dir = directory.join(".git");
279
280        if directory.exists() {
281            let remotes_output = util::command::new_std_command("git")
282                .arg("--git-dir")
283                .arg(&git_dir)
284                .args(["remote", "-v"])
285                .output()?;
286            let has_remote = remotes_output.status.success()
287                && String::from_utf8_lossy(&remotes_output.stdout)
288                    .lines()
289                    .any(|line| {
290                        let mut parts = line.split(|c: char| c.is_whitespace());
291                        parts.next() == Some("origin") && parts.any(|part| part == url)
292                    });
293            if !has_remote {
294                bail!(
295                    "grammar directory '{}' already exists, but is not a git clone of '{}'",
296                    directory.display(),
297                    url
298                );
299            }
300        } else {
301            fs::create_dir_all(directory).with_context(|| {
302                format!("failed to create grammar directory {}", directory.display(),)
303            })?;
304            let init_output = util::command::new_std_command("git")
305                .arg("init")
306                .current_dir(directory)
307                .output()?;
308            if !init_output.status.success() {
309                bail!(
310                    "failed to run `git init` in directory '{}'",
311                    directory.display()
312                );
313            }
314
315            let remote_add_output = util::command::new_std_command("git")
316                .arg("--git-dir")
317                .arg(&git_dir)
318                .args(["remote", "add", "origin", url])
319                .output()
320                .context("failed to execute `git remote add`")?;
321            if !remote_add_output.status.success() {
322                bail!(
323                    "failed to add remote {url} for git repository {}",
324                    git_dir.display()
325                );
326            }
327        }
328
329        let fetch_output = util::command::new_std_command("git")
330            .arg("--git-dir")
331            .arg(&git_dir)
332            .args(["fetch", "--depth", "1", "origin", rev])
333            .output()
334            .context("failed to execute `git fetch`")?;
335
336        let checkout_output = util::command::new_std_command("git")
337            .arg("--git-dir")
338            .arg(&git_dir)
339            .args(["checkout", rev])
340            .current_dir(directory)
341            .output()
342            .context("failed to execute `git checkout`")?;
343        if !checkout_output.status.success() {
344            if !fetch_output.status.success() {
345                bail!(
346                    "failed to fetch revision {} in directory '{}'",
347                    rev,
348                    directory.display()
349                );
350            }
351            bail!(
352                "failed to checkout revision {} in directory '{}': {}",
353                rev,
354                directory.display(),
355                String::from_utf8_lossy(&checkout_output.stderr)
356            );
357        }
358
359        Ok(())
360    }
361
362    fn install_rust_wasm_target_if_needed(&self) -> Result<()> {
363        let rustc_output = util::command::new_std_command("rustc")
364            .arg("--print")
365            .arg("sysroot")
366            .output()
367            .context("failed to run rustc")?;
368        if !rustc_output.status.success() {
369            bail!(
370                "failed to retrieve rust sysroot: {}",
371                String::from_utf8_lossy(&rustc_output.stderr)
372            );
373        }
374
375        let sysroot = PathBuf::from(String::from_utf8(rustc_output.stdout)?.trim());
376        if sysroot.join("lib/rustlib").join(RUST_TARGET).exists() {
377            return Ok(());
378        }
379
380        let output = util::command::new_std_command("rustup")
381            .args(["target", "add", RUST_TARGET])
382            .stderr(Stdio::piped())
383            .stdout(Stdio::inherit())
384            .output()
385            .context("failed to run `rustup target add`")?;
386        if !output.status.success() {
387            bail!(
388                "failed to install the `{RUST_TARGET}` target: {}",
389                String::from_utf8_lossy(&rustc_output.stderr)
390            );
391        }
392
393        Ok(())
394    }
395
396    async fn install_wasi_sdk_if_needed(&self) -> Result<PathBuf> {
397        let url = if let Some(asset_name) = WASI_SDK_ASSET_NAME {
398            format!("{WASI_SDK_URL}{asset_name}")
399        } else {
400            bail!("wasi-sdk is not available for platform {}", env::consts::OS);
401        };
402
403        let wasi_sdk_dir = self.cache_dir.join("wasi-sdk");
404        let mut clang_path = wasi_sdk_dir.clone();
405        clang_path.extend(["bin", &format!("clang{}", env::consts::EXE_SUFFIX)]);
406
407        if fs::metadata(&clang_path).map_or(false, |metadata| metadata.is_file()) {
408            return Ok(clang_path);
409        }
410
411        let mut tar_out_dir = wasi_sdk_dir.clone();
412        tar_out_dir.set_extension("archive");
413
414        fs::remove_dir_all(&wasi_sdk_dir).ok();
415        fs::remove_dir_all(&tar_out_dir).ok();
416
417        log::info!("downloading wasi-sdk to {}", wasi_sdk_dir.display());
418        let mut response = self.http.get(&url, AsyncBody::default(), true).await?;
419        let body = BufReader::new(response.body_mut());
420        let body = GzipDecoder::new(body);
421        let tar = Archive::new(body);
422
423        tar.unpack(&tar_out_dir)
424            .await
425            .context("failed to unpack wasi-sdk archive")?;
426
427        let inner_dir = fs::read_dir(&tar_out_dir)?
428            .next()
429            .context("no content")?
430            .context("failed to read contents of extracted wasi archive directory")?
431            .path();
432        fs::rename(&inner_dir, &wasi_sdk_dir).context("failed to move extracted wasi dir")?;
433        fs::remove_dir_all(&tar_out_dir).ok();
434
435        Ok(clang_path)
436    }
437
438    // This was adapted from:
439    // https://github.com/bytecodealliance/wasm-tools/blob/e8809bb17fcf69aa8c85cd5e6db7cff5cf36b1de/src/bin/wasm-tools/strip.rs
440    fn strip_custom_sections(&self, input: &Vec<u8>) -> Result<Vec<u8>> {
441        use wasmparser::Payload::*;
442
443        let strip_custom_section = |name: &str| {
444            // Default strip everything but:
445            // * the `name` section
446            // * any `component-type` sections
447            // * the `dylink.0` section
448            // * our custom version section
449            name != "name"
450                && !name.starts_with("component-type:")
451                && name != "dylink.0"
452                && name != "zed:api-version"
453        };
454
455        let mut output = Vec::new();
456        let mut stack = Vec::new();
457
458        for payload in Parser::new(0).parse_all(&input) {
459            let payload = payload?;
460
461            // Track nesting depth, so that we don't mess with inner producer sections:
462            match payload {
463                Version { encoding, .. } => {
464                    output.extend_from_slice(match encoding {
465                        wasmparser::Encoding::Component => &wasm_encoder::Component::HEADER,
466                        wasmparser::Encoding::Module => &wasm_encoder::Module::HEADER,
467                    });
468                }
469                ModuleSection { .. } | ComponentSection { .. } => {
470                    stack.push(mem::take(&mut output));
471                    continue;
472                }
473                End { .. } => {
474                    let mut parent = match stack.pop() {
475                        Some(c) => c,
476                        None => break,
477                    };
478                    if output.starts_with(&wasm_encoder::Component::HEADER) {
479                        parent.push(ComponentSectionId::Component as u8);
480                        output.encode(&mut parent);
481                    } else {
482                        parent.push(ComponentSectionId::CoreModule as u8);
483                        output.encode(&mut parent);
484                    }
485                    output = parent;
486                }
487                _ => {}
488            }
489
490            match &payload {
491                CustomSection(c) => {
492                    if strip_custom_section(c.name()) {
493                        continue;
494                    }
495                }
496
497                _ => {}
498            }
499            if let Some((id, range)) = payload.as_section() {
500                RawSection {
501                    id,
502                    data: &input[range],
503                }
504                .append_to(&mut output);
505            }
506        }
507
508        Ok(output)
509    }
510}
511
512fn populate_defaults(manifest: &mut ExtensionManifest, extension_path: &Path) -> Result<()> {
513    // For legacy extensions on the v0 schema (aka, using `extension.json`), clear out any existing
514    // contents of the computed fields, since we don't care what the existing values are.
515    if manifest.schema_version.is_v0() {
516        manifest.languages.clear();
517        manifest.grammars.clear();
518        manifest.themes.clear();
519    }
520
521    let cargo_toml_path = extension_path.join("Cargo.toml");
522    if cargo_toml_path.exists() {
523        manifest.lib.kind = Some(ExtensionLibraryKind::Rust);
524    }
525
526    let languages_dir = extension_path.join("languages");
527    if languages_dir.exists() {
528        for entry in fs::read_dir(&languages_dir).context("failed to list languages dir")? {
529            let entry = entry?;
530            let language_dir = entry.path();
531            let config_path = language_dir.join("config.toml");
532            if config_path.exists() {
533                let relative_language_dir =
534                    language_dir.strip_prefix(extension_path)?.to_path_buf();
535                if !manifest.languages.contains(&relative_language_dir) {
536                    manifest.languages.push(relative_language_dir);
537                }
538            }
539        }
540    }
541
542    let themes_dir = extension_path.join("themes");
543    if themes_dir.exists() {
544        for entry in fs::read_dir(&themes_dir).context("failed to list themes dir")? {
545            let entry = entry?;
546            let theme_path = entry.path();
547            if theme_path.extension() == Some("json".as_ref()) {
548                let relative_theme_path = theme_path.strip_prefix(extension_path)?.to_path_buf();
549                if !manifest.themes.contains(&relative_theme_path) {
550                    manifest.themes.push(relative_theme_path);
551                }
552            }
553        }
554    }
555
556    let icon_themes_dir = extension_path.join("icon_themes");
557    if icon_themes_dir.exists() {
558        for entry in fs::read_dir(&icon_themes_dir).context("failed to list icon themes dir")? {
559            let entry = entry?;
560            let icon_theme_path = entry.path();
561            if icon_theme_path.extension() == Some("json".as_ref()) {
562                let relative_icon_theme_path =
563                    icon_theme_path.strip_prefix(extension_path)?.to_path_buf();
564                if !manifest.icon_themes.contains(&relative_icon_theme_path) {
565                    manifest.icon_themes.push(relative_icon_theme_path);
566                }
567            }
568        }
569    }
570
571    let snippets_json_path = extension_path.join("snippets.json");
572    if snippets_json_path.exists() {
573        manifest.snippets = Some(snippets_json_path);
574    }
575
576    // For legacy extensions on the v0 schema (aka, using `extension.json`), we want to populate the grammars in
577    // the manifest using the contents of the `grammars` directory.
578    if manifest.schema_version.is_v0() {
579        let grammars_dir = extension_path.join("grammars");
580        if grammars_dir.exists() {
581            for entry in fs::read_dir(&grammars_dir).context("failed to list grammars dir")? {
582                let entry = entry?;
583                let grammar_path = entry.path();
584                if grammar_path.extension() == Some("toml".as_ref()) {
585                    #[derive(Deserialize)]
586                    struct GrammarConfigToml {
587                        pub repository: String,
588                        pub commit: String,
589                        #[serde(default)]
590                        pub path: Option<String>,
591                    }
592
593                    let grammar_config = fs::read_to_string(&grammar_path)?;
594                    let grammar_config: GrammarConfigToml = toml::from_str(&grammar_config)?;
595
596                    let grammar_name = grammar_path
597                        .file_stem()
598                        .and_then(|stem| stem.to_str())
599                        .context("no grammar name")?;
600                    if !manifest.grammars.contains_key(grammar_name) {
601                        manifest.grammars.insert(
602                            grammar_name.into(),
603                            GrammarManifestEntry {
604                                repository: grammar_config.repository,
605                                rev: grammar_config.commit,
606                                path: grammar_config.path,
607                            },
608                        );
609                    }
610                }
611            }
612        }
613    }
614
615    Ok(())
616}