extension_builder.rs

  1use crate::{
  2    ExtensionLibraryKind, ExtensionManifest, GrammarManifestEntry, parse_wasm_extension_version,
  3};
  4use anyhow::{Context as _, Result, anyhow, bail};
  5use async_compression::futures::bufread::GzipDecoder;
  6use async_tar::Archive;
  7use futures::AsyncReadExt;
  8use futures::io::BufReader;
  9use heck::ToSnakeCase;
 10use http_client::{self, AsyncBody, HttpClient};
 11use serde::Deserialize;
 12use std::{
 13    env, fs, mem,
 14    path::{Path, PathBuf},
 15    process::Stdio,
 16    sync::Arc,
 17};
 18use wasm_encoder::{ComponentSectionId, Encode as _, RawSection, Section as _};
 19use wasmparser::Parser;
 20use wit_component::ComponentEncoder;
 21
 22/// Currently, we compile with Rust's `wasm32-wasip1` target, which works with WASI `preview1`.
 23/// But the WASM component model is based on WASI `preview2`. So we need an 'adapter' WASM
 24/// module, which implements the `preview1` interface in terms of `preview2`.
 25///
 26/// Once Rust 1.78 is released, there will be a `wasm32-wasip2` target available, so we will
 27/// not need the adapter anymore.
 28const RUST_TARGET: &str = "wasm32-wasip1";
 29const WASI_ADAPTER_URL: &str = "https://github.com/bytecodealliance/wasmtime/releases/download/v18.0.2/wasi_snapshot_preview1.reactor.wasm";
 30
 31/// Compiling Tree-sitter parsers from C to WASM requires Clang 17, and a WASM build of libc
 32/// and clang's runtime library. The `wasi-sdk` provides these binaries.
 33///
 34/// Once Clang 17 and its wasm target are available via system package managers, we won't need
 35/// to download this.
 36const WASI_SDK_URL: &str = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-25/";
 37const WASI_SDK_ASSET_NAME: Option<&str> = if cfg!(all(target_os = "macos", target_arch = "x86_64"))
 38{
 39    Some("wasi-sdk-25.0-x86_64-macos.tar.gz")
 40} else if cfg!(all(target_os = "macos", target_arch = "aarch64")) {
 41    Some("wasi-sdk-25.0-arm64-macos.tar.gz")
 42} else if cfg!(all(target_os = "linux", target_arch = "x86_64")) {
 43    Some("wasi-sdk-25.0-x86_64-linux.tar.gz")
 44} else if cfg!(all(target_os = "linux", target_arch = "aarch64")) {
 45    Some("wasi-sdk-25.0-arm64-linux.tar.gz")
 46} else if cfg!(all(target_os = "freebsd", target_arch = "x86_64")) {
 47    Some("wasi-sdk-25.0-x86_64-linux.tar.gz")
 48} else if cfg!(all(target_os = "freebsd", target_arch = "aarch64")) {
 49    Some("wasi-sdk-25.0-arm64-linux.tar.gz")
 50} else if cfg!(all(target_os = "windows", target_arch = "x86_64")) {
 51    Some("wasi-sdk-25.0-x86_64-windows.tar.gz")
 52} else {
 53    None
 54};
 55
 56pub struct ExtensionBuilder {
 57    cache_dir: PathBuf,
 58    pub http: Arc<dyn HttpClient>,
 59}
 60
 61pub struct CompileExtensionOptions {
 62    pub release: bool,
 63}
 64
 65#[derive(Deserialize)]
 66struct CargoToml {
 67    package: CargoTomlPackage,
 68}
 69
 70#[derive(Deserialize)]
 71struct CargoTomlPackage {
 72    name: String,
 73}
 74
 75impl ExtensionBuilder {
 76    pub fn new(http_client: Arc<dyn HttpClient>, cache_dir: PathBuf) -> Self {
 77        Self {
 78            cache_dir,
 79            http: http_client,
 80        }
 81    }
 82
 83    pub async fn compile_extension(
 84        &self,
 85        extension_dir: &Path,
 86        extension_manifest: &mut ExtensionManifest,
 87        options: CompileExtensionOptions,
 88    ) -> Result<()> {
 89        populate_defaults(extension_manifest, extension_dir)?;
 90
 91        if extension_dir.is_relative() {
 92            bail!(
 93                "extension dir {} is not an absolute path",
 94                extension_dir.display()
 95            );
 96        }
 97
 98        fs::create_dir_all(&self.cache_dir).context("failed to create cache dir")?;
 99
100        if extension_manifest.lib.kind == Some(ExtensionLibraryKind::Rust) {
101            log::info!("compiling Rust extension {}", extension_dir.display());
102            self.compile_rust_extension(extension_dir, extension_manifest, options)
103                .await
104                .context("failed to compile Rust extension")?;
105            log::info!("compiled Rust extension {}", extension_dir.display());
106        }
107
108        for (grammar_name, grammar_metadata) in &extension_manifest.grammars {
109            let snake_cased_grammar_name = grammar_name.to_snake_case();
110            if grammar_name.as_ref() != snake_cased_grammar_name.as_str() {
111                bail!(
112                    "grammar name '{grammar_name}' must be written in snake_case: {snake_cased_grammar_name}"
113                );
114            }
115
116            log::info!(
117                "compiling grammar {grammar_name} for extension {}",
118                extension_dir.display()
119            );
120            self.compile_grammar(extension_dir, grammar_name.as_ref(), grammar_metadata)
121                .await
122                .with_context(|| format!("failed to compile grammar '{grammar_name}'"))?;
123            log::info!(
124                "compiled grammar {grammar_name} for extension {}",
125                extension_dir.display()
126            );
127        }
128
129        log::info!("finished compiling extension {}", extension_dir.display());
130        Ok(())
131    }
132
133    async fn compile_rust_extension(
134        &self,
135        extension_dir: &Path,
136        manifest: &mut ExtensionManifest,
137        options: CompileExtensionOptions,
138    ) -> Result<(), anyhow::Error> {
139        self.install_rust_wasm_target_if_needed()?;
140        let adapter_bytes = self.install_wasi_preview1_adapter_if_needed().await?;
141
142        let cargo_toml_content = fs::read_to_string(extension_dir.join("Cargo.toml"))?;
143        let cargo_toml: CargoToml = toml::from_str(&cargo_toml_content)?;
144
145        log::info!(
146            "compiling Rust crate for extension {}",
147            extension_dir.display()
148        );
149        let output = util::command::new_std_command("cargo")
150            .args(["build", "--target", RUST_TARGET])
151            .args(options.release.then_some("--release"))
152            .arg("--target-dir")
153            .arg(extension_dir.join("target"))
154            // WASI builds do not work with sccache and just stuck, so disable it.
155            .env("RUSTC_WRAPPER", "")
156            .current_dir(extension_dir)
157            .output()
158            .context("failed to run `cargo`")?;
159        if !output.status.success() {
160            bail!(
161                "failed to build extension {}",
162                String::from_utf8_lossy(&output.stderr)
163            );
164        }
165
166        log::info!(
167            "compiled Rust crate for extension {}",
168            extension_dir.display()
169        );
170
171        let mut wasm_path = PathBuf::from(extension_dir);
172        wasm_path.extend([
173            "target",
174            RUST_TARGET,
175            if options.release { "release" } else { "debug" },
176            &cargo_toml
177                .package
178                .name
179                // The wasm32-wasip1 target normalizes `-` in package names to `_` in the resulting `.wasm` file.
180                .replace('-', "_"),
181        ]);
182        wasm_path.set_extension("wasm");
183
184        let wasm_bytes = fs::read(&wasm_path)
185            .with_context(|| format!("failed to read output module `{}`", wasm_path.display()))?;
186
187        let mut encoder = ComponentEncoder::default()
188            .module(&wasm_bytes)?
189            .adapter("wasi_snapshot_preview1", &adapter_bytes)
190            .context("failed to load adapter module")?
191            .validate(true);
192
193        log::info!(
194            "encoding wasm component for extension {}",
195            extension_dir.display()
196        );
197
198        let component_bytes = encoder
199            .encode()
200            .context("failed to encode wasm component")?;
201
202        let component_bytes = self
203            .strip_custom_sections(&component_bytes)
204            .context("failed to strip debug sections from wasm component")?;
205
206        let wasm_extension_api_version =
207            parse_wasm_extension_version(&manifest.id, &component_bytes)
208                .context("compiled wasm did not contain a valid zed extension api version")?;
209        manifest.lib.version = Some(wasm_extension_api_version);
210
211        let extension_file = extension_dir.join("extension.wasm");
212        fs::write(extension_file.clone(), &component_bytes)
213            .context("failed to write extension.wasm")?;
214
215        log::info!(
216            "extension {} written to {}",
217            extension_dir.display(),
218            extension_file.display()
219        );
220
221        Ok(())
222    }
223
224    async fn compile_grammar(
225        &self,
226        extension_dir: &Path,
227        grammar_name: &str,
228        grammar_metadata: &GrammarManifestEntry,
229    ) -> Result<()> {
230        let clang_path = self.install_wasi_sdk_if_needed().await?;
231
232        let mut grammar_repo_dir = extension_dir.to_path_buf();
233        grammar_repo_dir.extend(["grammars", grammar_name]);
234
235        let mut grammar_wasm_path = grammar_repo_dir.clone();
236        grammar_wasm_path.set_extension("wasm");
237
238        log::info!("checking out {grammar_name} parser");
239        self.checkout_repo(
240            &grammar_repo_dir,
241            &grammar_metadata.repository,
242            &grammar_metadata.rev,
243        )?;
244
245        let base_grammar_path = grammar_metadata
246            .path
247            .as_ref()
248            .map(|path| grammar_repo_dir.join(path))
249            .unwrap_or(grammar_repo_dir);
250
251        let src_path = base_grammar_path.join("src");
252        let parser_path = src_path.join("parser.c");
253        let scanner_path = src_path.join("scanner.c");
254
255        log::info!("compiling {grammar_name} parser");
256        let clang_output = util::command::new_std_command(&clang_path)
257            .args(["-fPIC", "-shared", "-Os"])
258            .arg(format!("-Wl,--export=tree_sitter_{grammar_name}"))
259            .arg("-o")
260            .arg(&grammar_wasm_path)
261            .arg("-I")
262            .arg(&src_path)
263            .arg(&parser_path)
264            .args(scanner_path.exists().then_some(scanner_path))
265            .output()
266            .context("failed to run clang")?;
267
268        if !clang_output.status.success() {
269            bail!(
270                "failed to compile {} parser with clang: {}",
271                grammar_name,
272                String::from_utf8_lossy(&clang_output.stderr),
273            );
274        }
275
276        Ok(())
277    }
278
279    fn checkout_repo(&self, directory: &Path, url: &str, rev: &str) -> Result<()> {
280        let git_dir = directory.join(".git");
281
282        if directory.exists() {
283            let remotes_output = util::command::new_std_command("git")
284                .arg("--git-dir")
285                .arg(&git_dir)
286                .args(["remote", "-v"])
287                .output()?;
288            let has_remote = remotes_output.status.success()
289                && String::from_utf8_lossy(&remotes_output.stdout)
290                    .lines()
291                    .any(|line| {
292                        let mut parts = line.split(|c: char| c.is_whitespace());
293                        parts.next() == Some("origin") && parts.any(|part| part == url)
294                    });
295            if !has_remote {
296                bail!(
297                    "grammar directory '{}' already exists, but is not a git clone of '{}'",
298                    directory.display(),
299                    url
300                );
301            }
302        } else {
303            fs::create_dir_all(directory).with_context(|| {
304                format!("failed to create grammar directory {}", directory.display(),)
305            })?;
306            let init_output = util::command::new_std_command("git")
307                .arg("init")
308                .current_dir(directory)
309                .output()?;
310            if !init_output.status.success() {
311                bail!(
312                    "failed to run `git init` in directory '{}'",
313                    directory.display()
314                );
315            }
316
317            let remote_add_output = util::command::new_std_command("git")
318                .arg("--git-dir")
319                .arg(&git_dir)
320                .args(["remote", "add", "origin", url])
321                .output()
322                .context("failed to execute `git remote add`")?;
323            if !remote_add_output.status.success() {
324                bail!(
325                    "failed to add remote {url} for git repository {}",
326                    git_dir.display()
327                );
328            }
329        }
330
331        let fetch_output = util::command::new_std_command("git")
332            .arg("--git-dir")
333            .arg(&git_dir)
334            .args(["fetch", "--depth", "1", "origin", rev])
335            .output()
336            .context("failed to execute `git fetch`")?;
337
338        let checkout_output = util::command::new_std_command("git")
339            .arg("--git-dir")
340            .arg(&git_dir)
341            .args(["checkout", rev])
342            .current_dir(directory)
343            .output()
344            .context("failed to execute `git checkout`")?;
345        if !checkout_output.status.success() {
346            if !fetch_output.status.success() {
347                bail!(
348                    "failed to fetch revision {} in directory '{}'",
349                    rev,
350                    directory.display()
351                );
352            }
353            bail!(
354                "failed to checkout revision {} in directory '{}': {}",
355                rev,
356                directory.display(),
357                String::from_utf8_lossy(&checkout_output.stderr)
358            );
359        }
360
361        Ok(())
362    }
363
364    fn install_rust_wasm_target_if_needed(&self) -> Result<()> {
365        let rustc_output = util::command::new_std_command("rustc")
366            .arg("--print")
367            .arg("sysroot")
368            .output()
369            .context("failed to run rustc")?;
370        if !rustc_output.status.success() {
371            bail!(
372                "failed to retrieve rust sysroot: {}",
373                String::from_utf8_lossy(&rustc_output.stderr)
374            );
375        }
376
377        let sysroot = PathBuf::from(String::from_utf8(rustc_output.stdout)?.trim());
378        if sysroot.join("lib/rustlib").join(RUST_TARGET).exists() {
379            return Ok(());
380        }
381
382        let output = util::command::new_std_command("rustup")
383            .args(["target", "add", RUST_TARGET])
384            .stderr(Stdio::piped())
385            .stdout(Stdio::inherit())
386            .output()
387            .context("failed to run `rustup target add`")?;
388        if !output.status.success() {
389            bail!(
390                "failed to install the `{RUST_TARGET}` target: {}",
391                String::from_utf8_lossy(&rustc_output.stderr)
392            );
393        }
394
395        Ok(())
396    }
397
398    async fn install_wasi_preview1_adapter_if_needed(&self) -> Result<Vec<u8>> {
399        let cache_path = self.cache_dir.join("wasi_snapshot_preview1.reactor.wasm");
400        if let Ok(content) = fs::read(&cache_path) {
401            if Parser::is_core_wasm(&content) {
402                return Ok(content);
403            }
404        }
405
406        fs::remove_file(&cache_path).ok();
407
408        log::info!(
409            "downloading wasi adapter module to {}",
410            cache_path.display()
411        );
412        let mut response = self
413            .http
414            .get(WASI_ADAPTER_URL, AsyncBody::default(), true)
415            .await?;
416
417        let mut content = Vec::new();
418        let mut body = BufReader::new(response.body_mut());
419        body.read_to_end(&mut content).await?;
420
421        fs::write(&cache_path, &content)
422            .with_context(|| format!("failed to save file {}", cache_path.display()))?;
423
424        if !Parser::is_core_wasm(&content) {
425            bail!("downloaded wasi adapter is invalid");
426        }
427        Ok(content)
428    }
429
430    async fn install_wasi_sdk_if_needed(&self) -> Result<PathBuf> {
431        let url = if let Some(asset_name) = WASI_SDK_ASSET_NAME {
432            format!("{WASI_SDK_URL}/{asset_name}")
433        } else {
434            bail!("wasi-sdk is not available for platform {}", env::consts::OS);
435        };
436
437        let wasi_sdk_dir = self.cache_dir.join("wasi-sdk");
438        let mut clang_path = wasi_sdk_dir.clone();
439        clang_path.extend(["bin", &format!("clang{}", env::consts::EXE_SUFFIX)]);
440
441        if fs::metadata(&clang_path).map_or(false, |metadata| metadata.is_file()) {
442            return Ok(clang_path);
443        }
444
445        let mut tar_out_dir = wasi_sdk_dir.clone();
446        tar_out_dir.set_extension("archive");
447
448        fs::remove_dir_all(&wasi_sdk_dir).ok();
449        fs::remove_dir_all(&tar_out_dir).ok();
450
451        log::info!("downloading wasi-sdk to {}", wasi_sdk_dir.display());
452        let mut response = self.http.get(&url, AsyncBody::default(), true).await?;
453        let body = BufReader::new(response.body_mut());
454        let body = GzipDecoder::new(body);
455        let tar = Archive::new(body);
456
457        tar.unpack(&tar_out_dir)
458            .await
459            .context("failed to unpack wasi-sdk archive")?;
460
461        let inner_dir = fs::read_dir(&tar_out_dir)?
462            .next()
463            .ok_or_else(|| anyhow!("no content"))?
464            .context("failed to read contents of extracted wasi archive directory")?
465            .path();
466        fs::rename(&inner_dir, &wasi_sdk_dir).context("failed to move extracted wasi dir")?;
467        fs::remove_dir_all(&tar_out_dir).ok();
468
469        Ok(clang_path)
470    }
471
472    // This was adapted from:
473    // https://github.com/bytecodealliance/wasm-tools/blob/1791a8f139722e9f8679a2bd3d8e423e55132b22/src/bin/wasm-tools/strip.rs
474    fn strip_custom_sections(&self, input: &Vec<u8>) -> Result<Vec<u8>> {
475        use wasmparser::Payload::*;
476
477        let strip_custom_section = |name: &str| name.starts_with(".debug");
478
479        let mut output = Vec::new();
480        let mut stack = Vec::new();
481
482        for payload in Parser::new(0).parse_all(input) {
483            let payload = payload?;
484            let component_header = wasm_encoder::Component::HEADER;
485            let module_header = wasm_encoder::Module::HEADER;
486
487            // Track nesting depth, so that we don't mess with inner producer sections:
488            match payload {
489                Version { encoding, .. } => {
490                    output.extend_from_slice(match encoding {
491                        wasmparser::Encoding::Component => &component_header,
492                        wasmparser::Encoding::Module => &module_header,
493                    });
494                }
495                ModuleSection { .. } | ComponentSection { .. } => {
496                    stack.push(mem::take(&mut output));
497                    continue;
498                }
499                End { .. } => {
500                    let mut parent = match stack.pop() {
501                        Some(c) => c,
502                        None => break,
503                    };
504                    if output.starts_with(&component_header) {
505                        parent.push(ComponentSectionId::Component as u8);
506                        output.encode(&mut parent);
507                    } else {
508                        parent.push(ComponentSectionId::CoreModule as u8);
509                        output.encode(&mut parent);
510                    }
511                    output = parent;
512                }
513                _ => {}
514            }
515
516            if let CustomSection(c) = &payload {
517                if strip_custom_section(c.name()) {
518                    continue;
519                }
520            }
521
522            if let Some((id, range)) = payload.as_section() {
523                RawSection {
524                    id,
525                    data: &input[range],
526                }
527                .append_to(&mut output);
528            }
529        }
530
531        Ok(output)
532    }
533}
534
535fn populate_defaults(manifest: &mut ExtensionManifest, extension_path: &Path) -> Result<()> {
536    // For legacy extensions on the v0 schema (aka, using `extension.json`), clear out any existing
537    // contents of the computed fields, since we don't care what the existing values are.
538    if manifest.schema_version.is_v0() {
539        manifest.languages.clear();
540        manifest.grammars.clear();
541        manifest.themes.clear();
542    }
543
544    let cargo_toml_path = extension_path.join("Cargo.toml");
545    if cargo_toml_path.exists() {
546        manifest.lib.kind = Some(ExtensionLibraryKind::Rust);
547    }
548
549    let languages_dir = extension_path.join("languages");
550    if languages_dir.exists() {
551        for entry in fs::read_dir(&languages_dir).context("failed to list languages dir")? {
552            let entry = entry?;
553            let language_dir = entry.path();
554            let config_path = language_dir.join("config.toml");
555            if config_path.exists() {
556                let relative_language_dir =
557                    language_dir.strip_prefix(extension_path)?.to_path_buf();
558                if !manifest.languages.contains(&relative_language_dir) {
559                    manifest.languages.push(relative_language_dir);
560                }
561            }
562        }
563    }
564
565    let themes_dir = extension_path.join("themes");
566    if themes_dir.exists() {
567        for entry in fs::read_dir(&themes_dir).context("failed to list themes dir")? {
568            let entry = entry?;
569            let theme_path = entry.path();
570            if theme_path.extension() == Some("json".as_ref()) {
571                let relative_theme_path = theme_path.strip_prefix(extension_path)?.to_path_buf();
572                if !manifest.themes.contains(&relative_theme_path) {
573                    manifest.themes.push(relative_theme_path);
574                }
575            }
576        }
577    }
578
579    let icon_themes_dir = extension_path.join("icon_themes");
580    if icon_themes_dir.exists() {
581        for entry in fs::read_dir(&icon_themes_dir).context("failed to list icon themes dir")? {
582            let entry = entry?;
583            let icon_theme_path = entry.path();
584            if icon_theme_path.extension() == Some("json".as_ref()) {
585                let relative_icon_theme_path =
586                    icon_theme_path.strip_prefix(extension_path)?.to_path_buf();
587                if !manifest.icon_themes.contains(&relative_icon_theme_path) {
588                    manifest.icon_themes.push(relative_icon_theme_path);
589                }
590            }
591        }
592    }
593
594    let snippets_json_path = extension_path.join("snippets.json");
595    if snippets_json_path.exists() {
596        manifest.snippets = Some(snippets_json_path);
597    }
598
599    // For legacy extensions on the v0 schema (aka, using `extension.json`), we want to populate the grammars in
600    // the manifest using the contents of the `grammars` directory.
601    if manifest.schema_version.is_v0() {
602        let grammars_dir = extension_path.join("grammars");
603        if grammars_dir.exists() {
604            for entry in fs::read_dir(&grammars_dir).context("failed to list grammars dir")? {
605                let entry = entry?;
606                let grammar_path = entry.path();
607                if grammar_path.extension() == Some("toml".as_ref()) {
608                    #[derive(Deserialize)]
609                    struct GrammarConfigToml {
610                        pub repository: String,
611                        pub commit: String,
612                        #[serde(default)]
613                        pub path: Option<String>,
614                    }
615
616                    let grammar_config = fs::read_to_string(&grammar_path)?;
617                    let grammar_config: GrammarConfigToml = toml::from_str(&grammar_config)?;
618
619                    let grammar_name = grammar_path
620                        .file_stem()
621                        .and_then(|stem| stem.to_str())
622                        .ok_or_else(|| anyhow!("no grammar name"))?;
623                    if !manifest.grammars.contains_key(grammar_name) {
624                        manifest.grammars.insert(
625                            grammar_name.into(),
626                            GrammarManifestEntry {
627                                repository: grammar_config.repository,
628                                rev: grammar_config.commit,
629                                path: grammar_config.path,
630                            },
631                        );
632                    }
633                }
634            }
635        }
636    }
637
638    Ok(())
639}