extension_builder.rs

  1use crate::wasm_host::parse_wasm_extension_version;
  2use crate::ExtensionManifest;
  3use crate::{extension_manifest::ExtensionLibraryKind, GrammarManifestEntry};
  4use anyhow::{anyhow, bail, Context as _, Result};
  5use async_compression::futures::bufread::GzipDecoder;
  6use async_tar::Archive;
  7use futures::io::BufReader;
  8use futures::AsyncReadExt;
  9use http_client::{self, AsyncBody, HttpClient};
 10use serde::Deserialize;
 11use std::{
 12    env, fs, mem,
 13    path::{Path, PathBuf},
 14    process::{Command, Stdio},
 15    sync::Arc,
 16};
 17use wasm_encoder::{ComponentSectionId, Encode as _, RawSection, Section as _};
 18use wasmparser::Parser;
 19use wit_component::ComponentEncoder;
 20
 21/// Currently, we compile with Rust's `wasm32-wasip1` target, which works with WASI `preview1`.
 22/// But the WASM component model is based on WASI `preview2`. So we need an 'adapter' WASM
 23/// module, which implements the `preview1` interface in terms of `preview2`.
 24///
 25/// Once Rust 1.78 is released, there will be a `wasm32-wasip2` target available, so we will
 26/// not need the adapter anymore.
 27const RUST_TARGET: &str = "wasm32-wasip1";
 28const WASI_ADAPTER_URL: &str =
 29    "https://github.com/bytecodealliance/wasmtime/releases/download/v18.0.2/wasi_snapshot_preview1.reactor.wasm";
 30
 31/// Compiling Tree-sitter parsers from C to WASM requires Clang 17, and a WASM build of libc
 32/// and clang's runtime library. The `wasi-sdk` provides these binaries.
 33///
 34/// Once Clang 17 and its wasm target are available via system package managers, we won't need
 35/// to download this.
 36const WASI_SDK_URL: &str = "https://github.com/WebAssembly/wasi-sdk/releases/download/wasi-sdk-21/";
 37const WASI_SDK_ASSET_NAME: Option<&str> = if cfg!(target_os = "macos") {
 38    Some("wasi-sdk-21.0-macos.tar.gz")
 39} else if cfg!(target_os = "linux") {
 40    Some("wasi-sdk-21.0-linux.tar.gz")
 41} else if cfg!(target_os = "windows") {
 42    Some("wasi-sdk-21.0.m-mingw.tar.gz")
 43} else {
 44    None
 45};
 46
 47pub struct ExtensionBuilder {
 48    cache_dir: PathBuf,
 49    pub http: Arc<dyn HttpClient>,
 50}
 51
 52pub struct CompileExtensionOptions {
 53    pub release: bool,
 54}
 55
 56#[derive(Deserialize)]
 57struct CargoToml {
 58    package: CargoTomlPackage,
 59}
 60
 61#[derive(Deserialize)]
 62struct CargoTomlPackage {
 63    name: String,
 64}
 65
 66impl ExtensionBuilder {
 67    pub fn new(http_client: Arc<dyn HttpClient>, cache_dir: PathBuf) -> Self {
 68        Self {
 69            cache_dir,
 70            http: http_client,
 71        }
 72    }
 73
 74    pub async fn compile_extension(
 75        &self,
 76        extension_dir: &Path,
 77        extension_manifest: &mut ExtensionManifest,
 78        options: CompileExtensionOptions,
 79    ) -> Result<()> {
 80        populate_defaults(extension_manifest, extension_dir)?;
 81
 82        if extension_dir.is_relative() {
 83            bail!(
 84                "extension dir {} is not an absolute path",
 85                extension_dir.display()
 86            );
 87        }
 88
 89        fs::create_dir_all(&self.cache_dir).context("failed to create cache dir")?;
 90
 91        if extension_manifest.lib.kind == Some(ExtensionLibraryKind::Rust) {
 92            log::info!("compiling Rust extension {}", extension_dir.display());
 93            self.compile_rust_extension(extension_dir, extension_manifest, options)
 94                .await
 95                .context("failed to compile Rust extension")?;
 96            log::info!("compiled Rust extension {}", extension_dir.display());
 97        }
 98
 99        for (grammar_name, grammar_metadata) in &extension_manifest.grammars {
100            log::info!(
101                "compiling grammar {grammar_name} for extension {}",
102                extension_dir.display()
103            );
104            self.compile_grammar(extension_dir, grammar_name.as_ref(), grammar_metadata)
105                .await
106                .with_context(|| format!("failed to compile grammar '{grammar_name}'"))?;
107            log::info!(
108                "compiled grammar {grammar_name} for extension {}",
109                extension_dir.display()
110            );
111        }
112
113        log::info!("finished compiling extension {}", extension_dir.display());
114        Ok(())
115    }
116
117    async fn compile_rust_extension(
118        &self,
119        extension_dir: &Path,
120        manifest: &mut ExtensionManifest,
121        options: CompileExtensionOptions,
122    ) -> Result<(), anyhow::Error> {
123        self.install_rust_wasm_target_if_needed()?;
124        let adapter_bytes = self.install_wasi_preview1_adapter_if_needed().await?;
125
126        let cargo_toml_content = fs::read_to_string(extension_dir.join("Cargo.toml"))?;
127        let cargo_toml: CargoToml = toml::from_str(&cargo_toml_content)?;
128
129        log::info!(
130            "compiling Rust crate for extension {}",
131            extension_dir.display()
132        );
133        let output = Command::new("cargo")
134            .args(["build", "--target", RUST_TARGET])
135            .args(options.release.then_some("--release"))
136            .arg("--target-dir")
137            .arg(extension_dir.join("target"))
138            .current_dir(extension_dir)
139            .output()
140            .context("failed to run `cargo`")?;
141        if !output.status.success() {
142            bail!(
143                "failed to build extension {}",
144                String::from_utf8_lossy(&output.stderr)
145            );
146        }
147
148        log::info!(
149            "compiled Rust crate for extension {}",
150            extension_dir.display()
151        );
152
153        let mut wasm_path = PathBuf::from(extension_dir);
154        wasm_path.extend([
155            "target",
156            RUST_TARGET,
157            if options.release { "release" } else { "debug" },
158            &cargo_toml
159                .package
160                .name
161                // The wasm32-wasip1 target normalizes `-` in package names to `_` in the resulting `.wasm` file.
162                .replace('-', "_"),
163        ]);
164        wasm_path.set_extension("wasm");
165
166        let wasm_bytes = fs::read(&wasm_path)
167            .with_context(|| format!("failed to read output module `{}`", wasm_path.display()))?;
168
169        let encoder = ComponentEncoder::default()
170            .module(&wasm_bytes)?
171            .adapter("wasi_snapshot_preview1", &adapter_bytes)
172            .context("failed to load adapter module")?
173            .validate(true);
174
175        log::info!(
176            "encoding wasm component for extension {}",
177            extension_dir.display()
178        );
179
180        let component_bytes = encoder
181            .encode()
182            .context("failed to encode wasm component")?;
183
184        let component_bytes = self
185            .strip_custom_sections(&component_bytes)
186            .context("failed to strip debug sections from wasm component")?;
187
188        let wasm_extension_api_version =
189            parse_wasm_extension_version(&manifest.id, &component_bytes)
190                .context("compiled wasm did not contain a valid zed extension api version")?;
191        manifest.lib.version = Some(wasm_extension_api_version);
192
193        let extension_file = extension_dir.join("extension.wasm");
194        fs::write(extension_file.clone(), &component_bytes)
195            .context("failed to write extension.wasm")?;
196
197        log::info!(
198            "extension {} written to {}",
199            extension_dir.display(),
200            extension_file.display()
201        );
202
203        Ok(())
204    }
205
206    async fn compile_grammar(
207        &self,
208        extension_dir: &Path,
209        grammar_name: &str,
210        grammar_metadata: &GrammarManifestEntry,
211    ) -> Result<()> {
212        let clang_path = self.install_wasi_sdk_if_needed().await?;
213
214        let mut grammar_repo_dir = extension_dir.to_path_buf();
215        grammar_repo_dir.extend(["grammars", grammar_name]);
216
217        let mut grammar_wasm_path = grammar_repo_dir.clone();
218        grammar_wasm_path.set_extension("wasm");
219
220        log::info!("checking out {grammar_name} parser");
221        self.checkout_repo(
222            &grammar_repo_dir,
223            &grammar_metadata.repository,
224            &grammar_metadata.rev,
225        )?;
226
227        let base_grammar_path = grammar_metadata
228            .path
229            .as_ref()
230            .map(|path| grammar_repo_dir.join(path))
231            .unwrap_or(grammar_repo_dir);
232
233        let src_path = base_grammar_path.join("src");
234        let parser_path = src_path.join("parser.c");
235        let scanner_path = src_path.join("scanner.c");
236
237        log::info!("compiling {grammar_name} parser");
238        let clang_output = Command::new(&clang_path)
239            .args(["-fPIC", "-shared", "-Os"])
240            .arg(format!("-Wl,--export=tree_sitter_{grammar_name}"))
241            .arg("-o")
242            .arg(&grammar_wasm_path)
243            .arg("-I")
244            .arg(&src_path)
245            .arg(&parser_path)
246            .args(scanner_path.exists().then_some(scanner_path))
247            .output()
248            .context("failed to run clang")?;
249
250        if !clang_output.status.success() {
251            bail!(
252                "failed to compile {} parser with clang: {}",
253                grammar_name,
254                String::from_utf8_lossy(&clang_output.stderr),
255            );
256        }
257
258        Ok(())
259    }
260
261    fn checkout_repo(&self, directory: &Path, url: &str, rev: &str) -> Result<()> {
262        let git_dir = directory.join(".git");
263
264        if directory.exists() {
265            let remotes_output = Command::new("git")
266                .arg("--git-dir")
267                .arg(&git_dir)
268                .args(["remote", "-v"])
269                .output()?;
270            let has_remote = remotes_output.status.success()
271                && String::from_utf8_lossy(&remotes_output.stdout)
272                    .lines()
273                    .any(|line| {
274                        let mut parts = line.split(|c: char| c.is_whitespace());
275                        parts.next() == Some("origin") && parts.any(|part| part == url)
276                    });
277            if !has_remote {
278                bail!(
279                    "grammar directory '{}' already exists, but is not a git clone of '{}'",
280                    directory.display(),
281                    url
282                );
283            }
284        } else {
285            fs::create_dir_all(directory).with_context(|| {
286                format!("failed to create grammar directory {}", directory.display(),)
287            })?;
288            let init_output = Command::new("git")
289                .arg("init")
290                .current_dir(directory)
291                .output()?;
292            if !init_output.status.success() {
293                bail!(
294                    "failed to run `git init` in directory '{}'",
295                    directory.display()
296                );
297            }
298
299            let remote_add_output = Command::new("git")
300                .arg("--git-dir")
301                .arg(&git_dir)
302                .args(["remote", "add", "origin", url])
303                .output()
304                .context("failed to execute `git remote add`")?;
305            if !remote_add_output.status.success() {
306                bail!(
307                    "failed to add remote {url} for git repository {}",
308                    git_dir.display()
309                );
310            }
311        }
312
313        let fetch_output = Command::new("git")
314            .arg("--git-dir")
315            .arg(&git_dir)
316            .args(["fetch", "--depth", "1", "origin", rev])
317            .output()
318            .context("failed to execute `git fetch`")?;
319
320        let checkout_output = Command::new("git")
321            .arg("--git-dir")
322            .arg(&git_dir)
323            .args(["checkout", rev])
324            .current_dir(directory)
325            .output()
326            .context("failed to execute `git checkout`")?;
327        if !checkout_output.status.success() {
328            if !fetch_output.status.success() {
329                bail!(
330                    "failed to fetch revision {} in directory '{}'",
331                    rev,
332                    directory.display()
333                );
334            }
335            bail!(
336                "failed to checkout revision {} in directory '{}': {}",
337                rev,
338                directory.display(),
339                String::from_utf8_lossy(&checkout_output.stderr)
340            );
341        }
342
343        Ok(())
344    }
345
346    fn install_rust_wasm_target_if_needed(&self) -> Result<()> {
347        let rustc_output = Command::new("rustc")
348            .arg("--print")
349            .arg("sysroot")
350            .output()
351            .context("failed to run rustc")?;
352        if !rustc_output.status.success() {
353            bail!(
354                "failed to retrieve rust sysroot: {}",
355                String::from_utf8_lossy(&rustc_output.stderr)
356            );
357        }
358
359        let sysroot = PathBuf::from(String::from_utf8(rustc_output.stdout)?.trim());
360        if sysroot.join("lib/rustlib").join(RUST_TARGET).exists() {
361            return Ok(());
362        }
363
364        let output = Command::new("rustup")
365            .args(["target", "add", RUST_TARGET])
366            .stderr(Stdio::inherit())
367            .stdout(Stdio::inherit())
368            .output()
369            .context("failed to run `rustup target add`")?;
370        if !output.status.success() {
371            bail!("failed to install the `{RUST_TARGET}` target");
372        }
373
374        Ok(())
375    }
376
377    async fn install_wasi_preview1_adapter_if_needed(&self) -> Result<Vec<u8>> {
378        let cache_path = self.cache_dir.join("wasi_snapshot_preview1.reactor.wasm");
379        if let Ok(content) = fs::read(&cache_path) {
380            if Parser::is_core_wasm(&content) {
381                return Ok(content);
382            }
383        }
384
385        fs::remove_file(&cache_path).ok();
386
387        log::info!(
388            "downloading wasi adapter module to {}",
389            cache_path.display()
390        );
391        let mut response = self
392            .http
393            .get(WASI_ADAPTER_URL, AsyncBody::default(), true)
394            .await?;
395
396        let mut content = Vec::new();
397        let mut body = BufReader::new(response.body_mut());
398        body.read_to_end(&mut content).await?;
399
400        fs::write(&cache_path, &content)
401            .with_context(|| format!("failed to save file {}", cache_path.display()))?;
402
403        if !Parser::is_core_wasm(&content) {
404            bail!("downloaded wasi adapter is invalid");
405        }
406        Ok(content)
407    }
408
409    async fn install_wasi_sdk_if_needed(&self) -> Result<PathBuf> {
410        let url = if let Some(asset_name) = WASI_SDK_ASSET_NAME {
411            format!("{WASI_SDK_URL}/{asset_name}")
412        } else {
413            bail!("wasi-sdk is not available for platform {}", env::consts::OS);
414        };
415
416        let wasi_sdk_dir = self.cache_dir.join("wasi-sdk");
417        let mut clang_path = wasi_sdk_dir.clone();
418        clang_path.extend(["bin", &format!("clang{}", env::consts::EXE_SUFFIX)]);
419
420        if fs::metadata(&clang_path).map_or(false, |metadata| metadata.is_file()) {
421            return Ok(clang_path);
422        }
423
424        let mut tar_out_dir = wasi_sdk_dir.clone();
425        tar_out_dir.set_extension("archive");
426
427        fs::remove_dir_all(&wasi_sdk_dir).ok();
428        fs::remove_dir_all(&tar_out_dir).ok();
429
430        log::info!("downloading wasi-sdk to {}", wasi_sdk_dir.display());
431        let mut response = self.http.get(&url, AsyncBody::default(), true).await?;
432        let body = BufReader::new(response.body_mut());
433        let body = GzipDecoder::new(body);
434        let tar = Archive::new(body);
435
436        tar.unpack(&tar_out_dir)
437            .await
438            .context("failed to unpack wasi-sdk archive")?;
439
440        let inner_dir = fs::read_dir(&tar_out_dir)?
441            .next()
442            .ok_or_else(|| anyhow!("no content"))?
443            .context("failed to read contents of extracted wasi archive directory")?
444            .path();
445        fs::rename(&inner_dir, &wasi_sdk_dir).context("failed to move extracted wasi dir")?;
446        fs::remove_dir_all(&tar_out_dir).ok();
447
448        Ok(clang_path)
449    }
450
451    // This was adapted from:
452    // https://github.com/bytecodealliance/wasm-tools/blob/1791a8f139722e9f8679a2bd3d8e423e55132b22/src/bin/wasm-tools/strip.rs
453    fn strip_custom_sections(&self, input: &Vec<u8>) -> Result<Vec<u8>> {
454        use wasmparser::Payload::*;
455
456        let strip_custom_section = |name: &str| name.starts_with(".debug");
457
458        let mut output = Vec::new();
459        let mut stack = Vec::new();
460
461        for payload in Parser::new(0).parse_all(input) {
462            let payload = payload?;
463            let component_header = wasm_encoder::Component::HEADER;
464            let module_header = wasm_encoder::Module::HEADER;
465
466            // Track nesting depth, so that we don't mess with inner producer sections:
467            match payload {
468                Version { encoding, .. } => {
469                    output.extend_from_slice(match encoding {
470                        wasmparser::Encoding::Component => &component_header,
471                        wasmparser::Encoding::Module => &module_header,
472                    });
473                }
474                ModuleSection { .. } | ComponentSection { .. } => {
475                    stack.push(mem::take(&mut output));
476                    continue;
477                }
478                End { .. } => {
479                    let mut parent = match stack.pop() {
480                        Some(c) => c,
481                        None => break,
482                    };
483                    if output.starts_with(&component_header) {
484                        parent.push(ComponentSectionId::Component as u8);
485                        output.encode(&mut parent);
486                    } else {
487                        parent.push(ComponentSectionId::CoreModule as u8);
488                        output.encode(&mut parent);
489                    }
490                    output = parent;
491                }
492                _ => {}
493            }
494
495            if let CustomSection(c) = &payload {
496                if strip_custom_section(c.name()) {
497                    continue;
498                }
499            }
500
501            if let Some((id, range)) = payload.as_section() {
502                RawSection {
503                    id,
504                    data: &input[range],
505                }
506                .append_to(&mut output);
507            }
508        }
509
510        Ok(output)
511    }
512}
513
514fn populate_defaults(manifest: &mut ExtensionManifest, extension_path: &Path) -> Result<()> {
515    // For legacy extensions on the v0 schema (aka, using `extension.json`), clear out any existing
516    // contents of the computed fields, since we don't care what the existing values are.
517    if manifest.schema_version.is_v0() {
518        manifest.languages.clear();
519        manifest.grammars.clear();
520        manifest.themes.clear();
521    }
522
523    let cargo_toml_path = extension_path.join("Cargo.toml");
524    if cargo_toml_path.exists() {
525        manifest.lib.kind = Some(ExtensionLibraryKind::Rust);
526    }
527
528    let languages_dir = extension_path.join("languages");
529    if languages_dir.exists() {
530        for entry in fs::read_dir(&languages_dir).context("failed to list languages dir")? {
531            let entry = entry?;
532            let language_dir = entry.path();
533            let config_path = language_dir.join("config.toml");
534            if config_path.exists() {
535                let relative_language_dir =
536                    language_dir.strip_prefix(extension_path)?.to_path_buf();
537                if !manifest.languages.contains(&relative_language_dir) {
538                    manifest.languages.push(relative_language_dir);
539                }
540            }
541        }
542    }
543
544    let themes_dir = extension_path.join("themes");
545    if themes_dir.exists() {
546        for entry in fs::read_dir(&themes_dir).context("failed to list themes dir")? {
547            let entry = entry?;
548            let theme_path = entry.path();
549            if theme_path.extension() == Some("json".as_ref()) {
550                let relative_theme_path = theme_path.strip_prefix(extension_path)?.to_path_buf();
551                if !manifest.themes.contains(&relative_theme_path) {
552                    manifest.themes.push(relative_theme_path);
553                }
554            }
555        }
556    }
557
558    let snippets_json_path = extension_path.join("snippets.json");
559    if snippets_json_path.exists() {
560        manifest.snippets = Some(snippets_json_path);
561    }
562
563    // For legacy extensions on the v0 schema (aka, using `extension.json`), we want to populate the grammars in
564    // the manifest using the contents of the `grammars` directory.
565    if manifest.schema_version.is_v0() {
566        let grammars_dir = extension_path.join("grammars");
567        if grammars_dir.exists() {
568            for entry in fs::read_dir(&grammars_dir).context("failed to list grammars dir")? {
569                let entry = entry?;
570                let grammar_path = entry.path();
571                if grammar_path.extension() == Some("toml".as_ref()) {
572                    #[derive(Deserialize)]
573                    struct GrammarConfigToml {
574                        pub repository: String,
575                        pub commit: String,
576                        #[serde(default)]
577                        pub path: Option<String>,
578                    }
579
580                    let grammar_config = fs::read_to_string(&grammar_path)?;
581                    let grammar_config: GrammarConfigToml = toml::from_str(&grammar_config)?;
582
583                    let grammar_name = grammar_path
584                        .file_stem()
585                        .and_then(|stem| stem.to_str())
586                        .ok_or_else(|| anyhow!("no grammar name"))?;
587                    if !manifest.grammars.contains_key(grammar_name) {
588                        manifest.grammars.insert(
589                            grammar_name.into(),
590                            GrammarManifestEntry {
591                                repository: grammar_config.repository,
592                                rev: grammar_config.commit,
593                                path: grammar_config.path,
594                            },
595                        );
596                    }
597                }
598            }
599        }
600    }
601
602    Ok(())
603}