Canonicalize extension paths (#48632)

Mikayla Maki created

Release Notes:

- N/A

Change summary

crates/extension_host/src/wasm_host.rs                  | 173 ++++++++++
crates/extension_host/src/wasm_host/wit/since_v0_1_0.rs |   6 
crates/extension_host/src/wasm_host/wit/since_v0_8_0.rs |   6 
crates/util/src/archive.rs                              |  61 +++
4 files changed, 235 insertions(+), 11 deletions(-)

Detailed changes

crates/extension_host/src/wasm_host.rs 🔗

@@ -11,7 +11,7 @@ use extension::{
     ProjectDelegate, SlashCommand, SlashCommandArgumentCompletion, SlashCommandOutput, Symbol,
     WorktreeDelegate,
 };
-use fs::{Fs, normalize_path};
+use fs::Fs;
 use futures::future::LocalBoxFuture;
 use futures::{
     Future, FutureExt, StreamExt as _,
@@ -722,14 +722,57 @@ impl WasmHost {
         Ok(ctx.build())
     }
 
-    pub fn writeable_path_from_extension(&self, id: &Arc<str>, path: &Path) -> Result<PathBuf> {
-        let extension_work_dir = self.work_dir.join(id.as_ref());
-        let path = normalize_path(&extension_work_dir.join(path));
+    pub async fn writeable_path_from_extension(
+        &self,
+        id: &Arc<str>,
+        path: &Path,
+    ) -> Result<PathBuf> {
+        let canonical_work_dir = self
+            .fs
+            .canonicalize(&self.work_dir)
+            .await
+            .with_context(|| format!("canonicalizing work dir {:?}", self.work_dir))?;
+        let extension_work_dir = canonical_work_dir.join(id.as_ref());
+
+        let absolute = if path.is_relative() {
+            extension_work_dir.join(path)
+        } else {
+            path.to_path_buf()
+        };
+
+        let normalized = util::paths::normalize_lexically(&absolute)
+            .map_err(|_| anyhow!("path {path:?} escapes its parent"))?;
+
+        // Canonicalize the nearest existing ancestor to resolve any symlinks
+        // in the on-disk portion of the path. Components beyond that ancestor
+        // are re-appended, which lets this work for destinations that don't
+        // exist yet (e.g. nested directories created by tar extraction).
+        let mut existing = normalized.as_path();
+        let mut tail_components = Vec::new();
+        let canonical_prefix = loop {
+            match self.fs.canonicalize(existing).await {
+                Ok(canonical) => break canonical,
+                Err(_) => {
+                    if let Some(file_name) = existing.file_name() {
+                        tail_components.push(file_name.to_owned());
+                    }
+                    existing = existing
+                        .parent()
+                        .context(format!("cannot resolve path {path:?}"))?;
+                }
+            }
+        };
+
+        let mut resolved = canonical_prefix;
+        for component in tail_components.into_iter().rev() {
+            resolved.push(component);
+        }
+
         anyhow::ensure!(
-            path.starts_with(&extension_work_dir),
-            "cannot write to path {path:?}",
+            resolved.starts_with(&extension_work_dir),
+            "cannot write to path {resolved:?}",
         );
-        Ok(path)
+        Ok(resolved)
     }
 }
 
@@ -920,3 +963,119 @@ impl CacheStore for IncrementalCompilationCache {
         true
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use extension::ExtensionHostProxy;
+    use fs::FakeFs;
+    use gpui::TestAppContext;
+    use http_client::FakeHttpClient;
+    use node_runtime::NodeRuntime;
+    use serde_json::json;
+    use settings::SettingsStore;
+
+    fn init_test(cx: &mut TestAppContext) {
+        cx.update(|cx| {
+            let store = SettingsStore::test(cx);
+            cx.set_global(store);
+            release_channel::init(semver::Version::new(0, 0, 0), cx);
+            extension::init(cx);
+            gpui_tokio::init(cx);
+        });
+    }
+
+    #[gpui::test]
+    async fn test_writeable_path_rejects_escape_attempts(cx: &mut TestAppContext) {
+        init_test(cx);
+
+        let fs = FakeFs::new(cx.executor());
+        fs.insert_tree(
+            "/work",
+            json!({
+                "test-extension": {
+                    "legit.txt": "legitimate content"
+                }
+            }),
+        )
+        .await;
+        fs.insert_tree("/outside", json!({ "secret.txt": "sensitive data" }))
+            .await;
+        fs.insert_symlink("/work/test-extension/escape", PathBuf::from("/outside"))
+            .await;
+
+        let host = cx.update(|cx| {
+            WasmHost::new(
+                fs.clone(),
+                FakeHttpClient::with_200_response(),
+                NodeRuntime::unavailable(),
+                Arc::new(ExtensionHostProxy::default()),
+                PathBuf::from("/work"),
+                cx,
+            )
+        });
+
+        let extension_id: Arc<str> = "test-extension".into();
+
+        // A path traversing through a symlink that points outside the work dir
+        // must be rejected. Canonicalization resolves the symlink before the
+        // prefix check, so this is caught.
+        let result = host
+            .writeable_path_from_extension(
+                &extension_id,
+                Path::new("/work/test-extension/escape/secret.txt"),
+            )
+            .await;
+        assert!(
+            result.is_err(),
+            "symlink escape should be rejected, but got: {result:?}",
+        );
+
+        // A path using `..` to escape the extension work dir must be rejected.
+        let result = host
+            .writeable_path_from_extension(
+                &extension_id,
+                Path::new("/work/test-extension/../../outside/secret.txt"),
+            )
+            .await;
+        assert!(
+            result.is_err(),
+            "parent traversal escape should be rejected, but got: {result:?}",
+        );
+
+        // A legitimate path within the extension work dir should succeed.
+        let result = host
+            .writeable_path_from_extension(
+                &extension_id,
+                Path::new("/work/test-extension/legit.txt"),
+            )
+            .await;
+        assert!(
+            result.is_ok(),
+            "legitimate path should be accepted, but got: {result:?}",
+        );
+
+        // A relative path with non-existent intermediate directories should
+        // succeed, mirroring the integration test pattern where an extension
+        // downloads a tar to e.g. "gleam-v1.2.3" (creating the directory)
+        // and then references "gleam-v1.2.3/gleam" inside it.
+        let result = host
+            .writeable_path_from_extension(&extension_id, Path::new("new-dir/nested/binary"))
+            .await;
+        assert!(
+            result.is_ok(),
+            "relative path with non-existent parents should be accepted, but got: {result:?}",
+        );
+
+        // A symlink deeper than the immediate parent must still be caught.
+        // Here "escape" is a symlink to /outside, so "escape/deep/file.txt"
+        // has multiple non-existent components beyond the symlink.
+        let result = host
+            .writeable_path_from_extension(&extension_id, Path::new("escape/deep/nested/file.txt"))
+            .await;
+        assert!(
+            result.is_err(),
+            "symlink escape through deep non-existent path should be rejected, but got: {result:?}",
+        );
+    }
+}

crates/extension_host/src/wasm_host/wit/since_v0_1_0.rs 🔗

@@ -508,7 +508,8 @@ impl ExtensionImports for WasmState {
 
             let destination_path = self
                 .host
-                .writeable_path_from_extension(&self.manifest.id, &path)?;
+                .writeable_path_from_extension(&self.manifest.id, &path)
+                .await?;
 
             let mut response = self
                 .host
@@ -565,7 +566,8 @@ impl ExtensionImports for WasmState {
     async fn make_file_executable(&mut self, path: String) -> wasmtime::Result<Result<(), String>> {
         let path = self
             .host
-            .writeable_path_from_extension(&self.manifest.id, Path::new(&path))?;
+            .writeable_path_from_extension(&self.manifest.id, Path::new(&path))
+            .await?;
 
         make_file_executable(&path)
             .await

crates/extension_host/src/wasm_host/wit/since_v0_8_0.rs 🔗

@@ -1048,7 +1048,8 @@ impl ExtensionImports for WasmState {
 
             let destination_path = self
                 .host
-                .writeable_path_from_extension(&self.manifest.id, &path)?;
+                .writeable_path_from_extension(&self.manifest.id, &path)
+                .await?;
 
             let mut response = self
                 .host
@@ -1105,7 +1106,8 @@ impl ExtensionImports for WasmState {
     async fn make_file_executable(&mut self, path: String) -> wasmtime::Result<Result<(), String>> {
         let path = self
             .host
-            .writeable_path_from_extension(&self.manifest.id, Path::new(&path))?;
+            .writeable_path_from_extension(&self.manifest.id, Path::new(&path))
+            .await?;
 
         make_file_executable(&path)
             .await

crates/util/src/archive.rs 🔗

@@ -318,4 +318,65 @@ mod tests {
             );
         });
     }
+
+    #[test]
+    fn test_archive_path_is_normal_rejects_traversal() {
+        assert!(!archive_path_is_normal("../parent.txt"));
+        assert!(!archive_path_is_normal("foo/../../grandparent.txt"));
+        assert!(!archive_path_is_normal("/tmp/absolute.txt"));
+
+        assert!(archive_path_is_normal("foo/bar.txt"));
+        assert!(archive_path_is_normal("foo/bar/baz.txt"));
+        assert!(archive_path_is_normal("./foo/bar.txt"));
+        assert!(archive_path_is_normal("normal.txt"));
+    }
+
+    async fn build_zip_with_entries(entries: &[(&str, &[u8])]) -> Cursor<Vec<u8>> {
+        let mut buf = Cursor::new(Vec::new());
+        let mut writer = ZipFileWriter::new(&mut buf);
+        for (name, data) in entries {
+            let builder = ZipEntryBuilder::new((*name).into(), async_zip::Compression::Stored);
+            writer.write_entry_whole(builder, data).await.unwrap();
+        }
+        writer.close().await.unwrap();
+        buf.set_position(0);
+        buf
+    }
+
+    #[test]
+    fn test_extract_zip_skips_path_traversal_entries() {
+        smol::block_on(async {
+            let base_dir = tempfile::tempdir().unwrap();
+            let extract_dir = base_dir.path().join("subdir");
+            std::fs::create_dir_all(&extract_dir).unwrap();
+
+            let absolute_target = base_dir.path().join("absolute.txt");
+            let reader = build_zip_with_entries(&[
+                ("normal.txt", b"normal file"),
+                ("subdir/nested.txt", b"nested file"),
+                ("../parent.txt", b"parent file"),
+                ("foo/../../grandparent.txt", b"grandparent file"),
+                (absolute_target.to_str().unwrap(), b"absolute file"),
+            ])
+            .await;
+
+            extract_zip(&extract_dir, reader).await.unwrap();
+
+            assert_file_content(&extract_dir.join("normal.txt"), "normal file");
+            assert_file_content(&extract_dir.join("subdir/nested.txt"), "nested file");
+
+            assert!(
+                !base_dir.path().join("parent.txt").exists(),
+                "parent traversal entry should have been skipped"
+            );
+            assert!(
+                !base_dir.path().join("grandparent.txt").exists(),
+                "nested traversal entry should have been skipped"
+            );
+            assert!(
+                !absolute_target.exists(),
+                "absolute path entry should have been skipped"
+            );
+        });
+    }
 }