extension_builder.rs

  1use crate::{
  2    ExtensionLibraryKind, ExtensionManifest, GrammarManifestEntry, build_debug_adapter_schema_path,
  3    parse_wasm_extension_version,
  4};
  5use anyhow::{Context as _, Result, bail};
  6use futures::AsyncReadExt;
  7use heck::ToSnakeCase;
  8use http_client::{self, AsyncBody, HttpClient};
  9use serde::Deserialize;
 10use std::{
 11    env, fs, mem,
 12    path::{Path, PathBuf},
 13    process::Stdio,
 14    str::FromStr,
 15    sync::Arc,
 16};
 17use wasm_encoder::{ComponentSectionId, Encode as _, RawSection, Section as _};
 18use wasmparser::Parser;
 19
 20/// Currently, we compile with Rust's `wasm32-wasip2` target, which works with WASI `preview2` and the component model.
 21const RUST_TARGET: &str = "wasm32-wasip2";
 22
 23/// Compiling Tree-sitter parsers from C to WASM requires Clang 17, and a WASM build of libc
 24/// and clang's runtime library. The `wasi-sdk` provides these binaries.
 25///
 26/// Once Clang 17 and its wasm target are available via system package managers, we won't need
 27/// to download this.
 28const WASI_SDK_URL: &str = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-25/";
 29const WASI_SDK_ASSET_NAME: Option<&str> = if cfg!(all(target_os = "macos", target_arch = "x86_64"))
 30{
 31    Some("wasi-sdk-25.0-x86_64-macos.tar.gz")
 32} else if cfg!(all(target_os = "macos", target_arch = "aarch64")) {
 33    Some("wasi-sdk-25.0-arm64-macos.tar.gz")
 34} else if cfg!(all(target_os = "linux", target_arch = "x86_64")) {
 35    Some("wasi-sdk-25.0-x86_64-linux.tar.gz")
 36} else if cfg!(all(target_os = "linux", target_arch = "aarch64")) {
 37    Some("wasi-sdk-25.0-arm64-linux.tar.gz")
 38} else if cfg!(all(target_os = "freebsd", target_arch = "x86_64")) {
 39    Some("wasi-sdk-25.0-x86_64-linux.tar.gz")
 40} else if cfg!(all(target_os = "freebsd", target_arch = "aarch64")) {
 41    Some("wasi-sdk-25.0-arm64-linux.tar.gz")
 42} else if cfg!(all(target_os = "windows", target_arch = "x86_64")) {
 43    Some("wasi-sdk-25.0-x86_64-windows.tar.gz")
 44} else {
 45    None
 46};
 47
 48pub struct ExtensionBuilder {
 49    cache_dir: PathBuf,
 50    pub http: Arc<dyn HttpClient>,
 51}
 52
 53pub struct CompileExtensionOptions {
 54    pub release: bool,
 55}
 56
 57#[derive(Deserialize)]
 58struct CargoToml {
 59    package: CargoTomlPackage,
 60}
 61
 62#[derive(Deserialize)]
 63struct CargoTomlPackage {
 64    name: String,
 65}
 66
 67impl ExtensionBuilder {
 68    pub fn new(http_client: Arc<dyn HttpClient>, cache_dir: PathBuf) -> Self {
 69        Self {
 70            cache_dir,
 71            http: http_client,
 72        }
 73    }
 74
 75    pub async fn compile_extension(
 76        &self,
 77        extension_dir: &Path,
 78        extension_manifest: &mut ExtensionManifest,
 79        options: CompileExtensionOptions,
 80    ) -> Result<()> {
 81        populate_defaults(extension_manifest, extension_dir)?;
 82
 83        if extension_dir.is_relative() {
 84            bail!(
 85                "extension dir {} is not an absolute path",
 86                extension_dir.display()
 87            );
 88        }
 89
 90        fs::create_dir_all(&self.cache_dir).context("failed to create cache dir")?;
 91
 92        if extension_manifest.lib.kind == Some(ExtensionLibraryKind::Rust) {
 93            log::info!("compiling Rust extension {}", extension_dir.display());
 94            self.compile_rust_extension(extension_dir, extension_manifest, options)
 95                .await
 96                .context("failed to compile Rust extension")?;
 97            log::info!("compiled Rust extension {}", extension_dir.display());
 98        }
 99
100        for (debug_adapter_name, meta) in &mut extension_manifest.debug_adapters {
101            let debug_adapter_schema_path =
102                extension_dir.join(build_debug_adapter_schema_path(debug_adapter_name, meta));
103
104            let debug_adapter_schema = fs::read_to_string(&debug_adapter_schema_path)
105                .with_context(|| {
106                    format!("failed to read debug adapter schema for `{debug_adapter_name}` from `{debug_adapter_schema_path:?}`")
107                })?;
108            _ = serde_json::Value::from_str(&debug_adapter_schema).with_context(|| {
109                format!("Debug adapter schema for `{debug_adapter_name}` (path: `{debug_adapter_schema_path:?}`) is not a valid JSON")
110            })?;
111        }
112        for (grammar_name, grammar_metadata) in &extension_manifest.grammars {
113            let snake_cased_grammar_name = grammar_name.to_snake_case();
114            if grammar_name.as_ref() != snake_cased_grammar_name.as_str() {
115                bail!(
116                    "grammar name '{grammar_name}' must be written in snake_case: {snake_cased_grammar_name}"
117                );
118            }
119
120            log::info!(
121                "compiling grammar {grammar_name} for extension {}",
122                extension_dir.display()
123            );
124            self.compile_grammar(extension_dir, grammar_name.as_ref(), grammar_metadata)
125                .await
126                .with_context(|| format!("failed to compile grammar '{grammar_name}'"))?;
127            log::info!(
128                "compiled grammar {grammar_name} for extension {}",
129                extension_dir.display()
130            );
131        }
132
133        log::info!("finished compiling extension {}", extension_dir.display());
134        Ok(())
135    }
136
137    async fn compile_rust_extension(
138        &self,
139        extension_dir: &Path,
140        manifest: &mut ExtensionManifest,
141        options: CompileExtensionOptions,
142    ) -> anyhow::Result<()> {
143        self.install_rust_wasm_target_if_needed().await?;
144
145        let cargo_toml_content = fs::read_to_string(extension_dir.join("Cargo.toml"))?;
146        let cargo_toml: CargoToml = toml::from_str(&cargo_toml_content)?;
147
148        log::info!(
149            "compiling Rust crate for extension {}",
150            extension_dir.display()
151        );
152        let output = util::command::new_smol_command("cargo")
153            .args(["build", "--target", RUST_TARGET])
154            .args(options.release.then_some("--release"))
155            .arg("--target-dir")
156            .arg(extension_dir.join("target"))
157            // WASI builds do not work with sccache and just stuck, so disable it.
158            .env("RUSTC_WRAPPER", "")
159            .current_dir(extension_dir)
160            .output()
161            .await
162            .context("failed to run `cargo`")?;
163        if !output.status.success() {
164            bail!(
165                "failed to build extension {}",
166                String::from_utf8_lossy(&output.stderr)
167            );
168        }
169
170        log::info!(
171            "compiled Rust crate for extension {}",
172            extension_dir.display()
173        );
174
175        let mut wasm_path = PathBuf::from(extension_dir);
176        wasm_path.extend([
177            "target",
178            RUST_TARGET,
179            if options.release { "release" } else { "debug" },
180            &cargo_toml
181                .package
182                .name
183                // The wasm32-wasip2 target normalizes `-` in package names to `_` in the resulting `.wasm` file.
184                .replace('-', "_"),
185        ]);
186        wasm_path.set_extension("wasm");
187
188        log::info!(
189            "encoding wasm component for extension {}",
190            extension_dir.display()
191        );
192
193        let component_bytes = fs::read(&wasm_path)
194            .with_context(|| format!("failed to read output module `{}`", wasm_path.display()))?;
195
196        let component_bytes = self
197            .strip_custom_sections(&component_bytes)
198            .context("failed to strip debug sections from wasm component")?;
199
200        let wasm_extension_api_version =
201            parse_wasm_extension_version(&manifest.id, &component_bytes)
202                .context("compiled wasm did not contain a valid zed extension api version")?;
203        manifest.lib.version = Some(wasm_extension_api_version);
204
205        let extension_file = extension_dir.join("extension.wasm");
206        fs::write(extension_file.clone(), &component_bytes)
207            .context("failed to write extension.wasm")?;
208
209        log::info!(
210            "extension {} written to {}",
211            extension_dir.display(),
212            extension_file.display()
213        );
214
215        Ok(())
216    }
217
218    async fn compile_grammar(
219        &self,
220        extension_dir: &Path,
221        grammar_name: &str,
222        grammar_metadata: &GrammarManifestEntry,
223    ) -> Result<()> {
224        let clang_path = self.install_wasi_sdk_if_needed().await?;
225
226        let mut grammar_repo_dir = extension_dir.to_path_buf();
227        grammar_repo_dir.extend(["grammars", grammar_name]);
228
229        let mut grammar_wasm_path = grammar_repo_dir.clone();
230        grammar_wasm_path.set_extension("wasm");
231
232        log::info!("checking out {grammar_name} parser");
233        self.checkout_repo(
234            &grammar_repo_dir,
235            &grammar_metadata.repository,
236            &grammar_metadata.rev,
237        )
238        .await?;
239
240        let base_grammar_path = grammar_metadata
241            .path
242            .as_ref()
243            .map(|path| grammar_repo_dir.join(path))
244            .unwrap_or(grammar_repo_dir);
245
246        let src_path = base_grammar_path.join("src");
247        let parser_path = src_path.join("parser.c");
248        let scanner_path = src_path.join("scanner.c");
249
250        log::info!("compiling {grammar_name} parser");
251        let clang_output = util::command::new_smol_command(&clang_path)
252            .args(["-fPIC", "-shared", "-Os"])
253            .arg(format!("-Wl,--export=tree_sitter_{grammar_name}"))
254            .arg("-o")
255            .arg(&grammar_wasm_path)
256            .arg("-I")
257            .arg(&src_path)
258            .arg(&parser_path)
259            .args(scanner_path.exists().then_some(scanner_path))
260            .output()
261            .await
262            .context("failed to run clang")?;
263
264        if !clang_output.status.success() {
265            bail!(
266                "failed to compile {} parser with clang: {}",
267                grammar_name,
268                String::from_utf8_lossy(&clang_output.stderr),
269            );
270        }
271
272        Ok(())
273    }
274
275    async fn checkout_repo(&self, directory: &Path, url: &str, rev: &str) -> Result<()> {
276        let git_dir = directory.join(".git");
277
278        if directory.exists() {
279            let remotes_output = util::command::new_smol_command("git")
280                .arg("--git-dir")
281                .arg(&git_dir)
282                .args(["remote", "-v"])
283                .output()
284                .await?;
285            let has_remote = remotes_output.status.success()
286                && String::from_utf8_lossy(&remotes_output.stdout)
287                    .lines()
288                    .any(|line| {
289                        let mut parts = line.split(|c: char| c.is_whitespace());
290                        parts.next() == Some("origin") && parts.any(|part| part == url)
291                    });
292            if !has_remote {
293                bail!(
294                    "grammar directory '{}' already exists, but is not a git clone of '{}'",
295                    directory.display(),
296                    url
297                );
298            }
299        } else {
300            fs::create_dir_all(directory).with_context(|| {
301                format!("failed to create grammar directory {}", directory.display(),)
302            })?;
303            let init_output = util::command::new_smol_command("git")
304                .arg("init")
305                .current_dir(directory)
306                .output()
307                .await?;
308            if !init_output.status.success() {
309                bail!(
310                    "failed to run `git init` in directory '{}'",
311                    directory.display()
312                );
313            }
314
315            let remote_add_output = util::command::new_smol_command("git")
316                .arg("--git-dir")
317                .arg(&git_dir)
318                .args(["remote", "add", "origin", url])
319                .output()
320                .await
321                .context("failed to execute `git remote add`")?;
322            if !remote_add_output.status.success() {
323                bail!(
324                    "failed to add remote {url} for git repository {}",
325                    git_dir.display()
326                );
327            }
328        }
329
330        let fetch_output = util::command::new_smol_command("git")
331            .arg("--git-dir")
332            .arg(&git_dir)
333            .args(["fetch", "--depth", "1", "origin", rev])
334            .output()
335            .await
336            .context("failed to execute `git fetch`")?;
337
338        let checkout_output = util::command::new_smol_command("git")
339            .arg("--git-dir")
340            .arg(&git_dir)
341            .args(["checkout", rev])
342            .current_dir(directory)
343            .output()
344            .await
345            .context("failed to execute `git checkout`")?;
346        if !checkout_output.status.success() {
347            if !fetch_output.status.success() {
348                bail!(
349                    "failed to fetch revision {} in directory '{}'",
350                    rev,
351                    directory.display()
352                );
353            }
354            bail!(
355                "failed to checkout revision {} in directory '{}': {}",
356                rev,
357                directory.display(),
358                String::from_utf8_lossy(&checkout_output.stderr)
359            );
360        }
361
362        Ok(())
363    }
364
365    async fn install_rust_wasm_target_if_needed(&self) -> Result<()> {
366        let rustc_output = util::command::new_smol_command("rustc")
367            .arg("--print")
368            .arg("sysroot")
369            .output()
370            .await
371            .context("failed to run rustc")?;
372        if !rustc_output.status.success() {
373            bail!(
374                "failed to retrieve rust sysroot: {}",
375                String::from_utf8_lossy(&rustc_output.stderr)
376            );
377        }
378
379        let sysroot = PathBuf::from(String::from_utf8(rustc_output.stdout)?.trim());
380        if sysroot.join("lib/rustlib").join(RUST_TARGET).exists() {
381            return Ok(());
382        }
383
384        let output = util::command::new_smol_command("rustup")
385            .args(["target", "add", RUST_TARGET])
386            .stderr(Stdio::piped())
387            .stdout(Stdio::inherit())
388            .output()
389            .await
390            .context("failed to run `rustup target add`")?;
391        if !output.status.success() {
392            bail!(
393                "failed to install the `{RUST_TARGET}` target: {}",
394                String::from_utf8_lossy(&rustc_output.stderr)
395            );
396        }
397
398        Ok(())
399    }
400
401    async fn install_wasi_sdk_if_needed(&self) -> Result<PathBuf> {
402        let url = if let Some(asset_name) = WASI_SDK_ASSET_NAME {
403            format!("{WASI_SDK_URL}{asset_name}")
404        } else {
405            bail!("wasi-sdk is not available for platform {}", env::consts::OS);
406        };
407
408        let wasi_sdk_dir = self.cache_dir.join("wasi-sdk");
409        let mut clang_path = wasi_sdk_dir.clone();
410        clang_path.extend(["bin", &format!("clang{}", env::consts::EXE_SUFFIX)]);
411
412        log::info!("downloading wasi-sdk to {}", wasi_sdk_dir.display());
413
414        if fs::metadata(&clang_path).is_ok_and(|metadata| metadata.is_file()) {
415            return Ok(clang_path);
416        }
417
418        let tar_out_dir = self.cache_dir.join("wasi-sdk-temp");
419
420        fs::remove_dir_all(&wasi_sdk_dir).ok();
421        fs::remove_dir_all(&tar_out_dir).ok();
422        fs::create_dir_all(&tar_out_dir).context("failed to create extraction directory")?;
423
424        let mut response = self.http.get(&url, AsyncBody::default(), true).await?;
425
426        // Write the response to a temporary file
427        let tar_gz_path = self.cache_dir.join("wasi-sdk.tar.gz");
428        let mut tar_gz_file =
429            fs::File::create(&tar_gz_path).context("failed to create temporary tar.gz file")?;
430        let response_body = response.body_mut();
431        let mut body_bytes = Vec::new();
432        response_body.read_to_end(&mut body_bytes).await?;
433        std::io::Write::write_all(&mut tar_gz_file, &body_bytes)?;
434        drop(tar_gz_file);
435
436        log::info!("un-tarring wasi-sdk to {}", tar_out_dir.display());
437
438        // Shell out to tar to extract the archive
439        let tar_output = util::command::new_smol_command("tar")
440            .arg("-xzf")
441            .arg(&tar_gz_path)
442            .arg("-C")
443            .arg(&tar_out_dir)
444            .output()
445            .await
446            .context("failed to run tar")?;
447
448        if !tar_output.status.success() {
449            bail!(
450                "failed to extract wasi-sdk archive: {}",
451                String::from_utf8_lossy(&tar_output.stderr)
452            );
453        }
454
455        log::info!("finished downloading wasi-sdk");
456
457        // Clean up the temporary tar.gz file
458        fs::remove_file(&tar_gz_path).ok();
459
460        let inner_dir = fs::read_dir(&tar_out_dir)?
461            .next()
462            .context("no content")?
463            .context("failed to read contents of extracted wasi archive directory")?
464            .path();
465        fs::rename(&inner_dir, &wasi_sdk_dir).context("failed to move extracted wasi dir")?;
466        fs::remove_dir_all(&tar_out_dir).ok();
467
468        Ok(clang_path)
469    }
470
471    // This was adapted from:
472    // https://github.com/bytecodealliance/wasm-tools/blob/e8809bb17fcf69aa8c85cd5e6db7cff5cf36b1de/src/bin/wasm-tools/strip.rs
473    fn strip_custom_sections(&self, input: &Vec<u8>) -> Result<Vec<u8>> {
474        use wasmparser::Payload::*;
475
476        let strip_custom_section = |name: &str| {
477            // Default strip everything but:
478            // * the `name` section
479            // * any `component-type` sections
480            // * the `dylink.0` section
481            // * our custom version section
482            name != "name"
483                && !name.starts_with("component-type:")
484                && name != "dylink.0"
485                && name != "zed:api-version"
486        };
487
488        let mut output = Vec::new();
489        let mut stack = Vec::new();
490
491        for payload in Parser::new(0).parse_all(input) {
492            let payload = payload?;
493
494            // Track nesting depth, so that we don't mess with inner producer sections:
495            match payload {
496                Version { encoding, .. } => {
497                    output.extend_from_slice(match encoding {
498                        wasmparser::Encoding::Component => &wasm_encoder::Component::HEADER,
499                        wasmparser::Encoding::Module => &wasm_encoder::Module::HEADER,
500                    });
501                }
502                ModuleSection { .. } | ComponentSection { .. } => {
503                    stack.push(mem::take(&mut output));
504                    continue;
505                }
506                End { .. } => {
507                    let mut parent = match stack.pop() {
508                        Some(c) => c,
509                        None => break,
510                    };
511                    if output.starts_with(&wasm_encoder::Component::HEADER) {
512                        parent.push(ComponentSectionId::Component as u8);
513                        output.encode(&mut parent);
514                    } else {
515                        parent.push(ComponentSectionId::CoreModule as u8);
516                        output.encode(&mut parent);
517                    }
518                    output = parent;
519                }
520                _ => {}
521            }
522
523            if let CustomSection(c) = &payload
524                && strip_custom_section(c.name())
525            {
526                continue;
527            }
528            if let Some((id, range)) = payload.as_section() {
529                RawSection {
530                    id,
531                    data: &input[range],
532                }
533                .append_to(&mut output);
534            }
535        }
536
537        Ok(output)
538    }
539}
540
541fn populate_defaults(manifest: &mut ExtensionManifest, extension_path: &Path) -> Result<()> {
542    // For legacy extensions on the v0 schema (aka, using `extension.json`), clear out any existing
543    // contents of the computed fields, since we don't care what the existing values are.
544    if manifest.schema_version.is_v0() {
545        manifest.languages.clear();
546        manifest.grammars.clear();
547        manifest.themes.clear();
548    }
549
550    let cargo_toml_path = extension_path.join("Cargo.toml");
551    if cargo_toml_path.exists() {
552        manifest.lib.kind = Some(ExtensionLibraryKind::Rust);
553    }
554
555    let languages_dir = extension_path.join("languages");
556    if languages_dir.exists() {
557        for entry in fs::read_dir(&languages_dir).context("failed to list languages dir")? {
558            let entry = entry?;
559            let language_dir = entry.path();
560            let config_path = language_dir.join("config.toml");
561            if config_path.exists() {
562                let relative_language_dir =
563                    language_dir.strip_prefix(extension_path)?.to_path_buf();
564                if !manifest.languages.contains(&relative_language_dir) {
565                    manifest.languages.push(relative_language_dir);
566                }
567            }
568        }
569    }
570
571    let themes_dir = extension_path.join("themes");
572    if themes_dir.exists() {
573        for entry in fs::read_dir(&themes_dir).context("failed to list themes dir")? {
574            let entry = entry?;
575            let theme_path = entry.path();
576            if theme_path.extension() == Some("json".as_ref()) {
577                let relative_theme_path = theme_path.strip_prefix(extension_path)?.to_path_buf();
578                if !manifest.themes.contains(&relative_theme_path) {
579                    manifest.themes.push(relative_theme_path);
580                }
581            }
582        }
583    }
584
585    let icon_themes_dir = extension_path.join("icon_themes");
586    if icon_themes_dir.exists() {
587        for entry in fs::read_dir(&icon_themes_dir).context("failed to list icon themes dir")? {
588            let entry = entry?;
589            let icon_theme_path = entry.path();
590            if icon_theme_path.extension() == Some("json".as_ref()) {
591                let relative_icon_theme_path =
592                    icon_theme_path.strip_prefix(extension_path)?.to_path_buf();
593                if !manifest.icon_themes.contains(&relative_icon_theme_path) {
594                    manifest.icon_themes.push(relative_icon_theme_path);
595                }
596            }
597        }
598    }
599
600    let snippets_json_path = extension_path.join("snippets.json");
601    if snippets_json_path.exists() {
602        manifest.snippets = Some(snippets_json_path);
603    }
604
605    // For legacy extensions on the v0 schema (aka, using `extension.json`), we want to populate the grammars in
606    // the manifest using the contents of the `grammars` directory.
607    if manifest.schema_version.is_v0() {
608        let grammars_dir = extension_path.join("grammars");
609        if grammars_dir.exists() {
610            for entry in fs::read_dir(&grammars_dir).context("failed to list grammars dir")? {
611                let entry = entry?;
612                let grammar_path = entry.path();
613                if grammar_path.extension() == Some("toml".as_ref()) {
614                    #[derive(Deserialize)]
615                    struct GrammarConfigToml {
616                        pub repository: String,
617                        pub commit: String,
618                        #[serde(default)]
619                        pub path: Option<String>,
620                    }
621
622                    let grammar_config = fs::read_to_string(&grammar_path)?;
623                    let grammar_config: GrammarConfigToml = toml::from_str(&grammar_config)?;
624
625                    let grammar_name = grammar_path
626                        .file_stem()
627                        .and_then(|stem| stem.to_str())
628                        .context("no grammar name")?;
629                    if !manifest.grammars.contains_key(grammar_name) {
630                        manifest.grammars.insert(
631                            grammar_name.into(),
632                            GrammarManifestEntry {
633                                repository: grammar_config.repository,
634                                rev: grammar_config.commit,
635                                path: grammar_config.path,
636                            },
637                        );
638                    }
639                }
640            }
641        }
642    }
643
644    Ok(())
645}