archive.rs

  1use std::path::Path;
  2
  3use anyhow::{Context as _, Result};
  4use async_zip::base::read;
  5#[cfg(not(windows))]
  6use futures::AsyncSeek;
  7use futures::{AsyncRead, io::BufReader};
  8
  9fn archive_path_is_normal(filename: &str) -> bool {
 10    Path::new(filename).components().all(|c| {
 11        matches!(
 12            c,
 13            std::path::Component::Normal(_) | std::path::Component::CurDir
 14        )
 15    })
 16}
 17
 18#[cfg(windows)]
 19pub async fn extract_zip<R: AsyncRead + Unpin>(destination: &Path, reader: R) -> Result<()> {
 20    let mut reader = read::stream::ZipFileReader::new(BufReader::new(reader));
 21
 22    let destination = &destination
 23        .canonicalize()
 24        .unwrap_or_else(|_| destination.to_path_buf());
 25
 26    while let Some(mut item) = reader.next_with_entry().await? {
 27        let entry_reader = item.reader_mut();
 28        let entry = entry_reader.entry();
 29        let filename = entry
 30            .filename()
 31            .as_str()
 32            .context("reading zip entry file name")?;
 33
 34        if !archive_path_is_normal(filename) {
 35            reader = item.skip().await.context("reading next zip entry")?;
 36            continue;
 37        }
 38
 39        let path = destination.join(filename);
 40
 41        if entry
 42            .dir()
 43            .with_context(|| format!("reading zip entry metadata for path {path:?}"))?
 44        {
 45            std::fs::create_dir_all(&path)
 46                .with_context(|| format!("creating directory {path:?}"))?;
 47        } else {
 48            let parent_dir = path
 49                .parent()
 50                .with_context(|| format!("no parent directory for {path:?}"))?;
 51            std::fs::create_dir_all(parent_dir)
 52                .with_context(|| format!("creating parent directory {parent_dir:?}"))?;
 53            let mut file = smol::fs::File::create(&path)
 54                .await
 55                .with_context(|| format!("creating file {path:?}"))?;
 56            futures::io::copy(entry_reader, &mut file)
 57                .await
 58                .with_context(|| format!("extracting into file {path:?}"))?;
 59        }
 60
 61        reader = item.skip().await.context("reading next zip entry")?;
 62    }
 63
 64    Ok(())
 65}
 66
 67#[cfg(not(windows))]
 68pub async fn extract_zip<R: AsyncRead + Unpin>(destination: &Path, reader: R) -> Result<()> {
 69    // Unix needs file permissions copied when extracting.
 70    // This is only possible to do when a reader impls `AsyncSeek` and `seek::ZipFileReader` is used.
 71    // `stream::ZipFileReader` also has the `unix_permissions` method, but it will always return `Some(0)`.
 72    //
 73    // A typical `reader` comes from a streaming network response, so cannot be sought right away,
 74    // and reading the entire archive into the memory seems wasteful.
 75    //
 76    // So, save the stream into a temporary file first and then get it read with a seeking reader.
 77    let mut file = async_fs::File::from(tempfile::tempfile().context("creating a temporary file")?);
 78    futures::io::copy(&mut BufReader::new(reader), &mut file)
 79        .await
 80        .context("saving archive contents into the temporary file")?;
 81    extract_seekable_zip(destination, file).await
 82}
 83
 84#[cfg(not(windows))]
 85pub async fn extract_seekable_zip<R: AsyncRead + AsyncSeek + Unpin>(
 86    destination: &Path,
 87    reader: R,
 88) -> Result<()> {
 89    let mut reader = read::seek::ZipFileReader::new(BufReader::new(reader))
 90        .await
 91        .context("reading the zip archive")?;
 92    let destination = &destination
 93        .canonicalize()
 94        .unwrap_or_else(|_| destination.to_path_buf());
 95    for (i, entry) in reader.file().entries().to_vec().into_iter().enumerate() {
 96        let filename = entry
 97            .filename()
 98            .as_str()
 99            .context("reading zip entry file name")?;
100
101        if !archive_path_is_normal(filename) {
102            continue;
103        }
104
105        let path = destination.join(filename);
106
107        if entry
108            .dir()
109            .with_context(|| format!("reading zip entry metadata for path {path:?}"))?
110        {
111            std::fs::create_dir_all(&path)
112                .with_context(|| format!("creating directory {path:?}"))?;
113        } else {
114            let parent_dir = path
115                .parent()
116                .with_context(|| format!("no parent directory for {path:?}"))?;
117            std::fs::create_dir_all(parent_dir)
118                .with_context(|| format!("creating parent directory {parent_dir:?}"))?;
119            let mut file = smol::fs::File::create(&path)
120                .await
121                .with_context(|| format!("creating file {path:?}"))?;
122            let mut entry_reader = reader
123                .reader_with_entry(i)
124                .await
125                .with_context(|| format!("reading entry for path {path:?}"))?;
126            futures::io::copy(&mut entry_reader, &mut file)
127                .await
128                .with_context(|| format!("extracting into file {path:?}"))?;
129
130            if let Some(perms) = entry.unix_permissions()
131                && perms != 0o000
132            {
133                use std::os::unix::fs::PermissionsExt;
134                let permissions = std::fs::Permissions::from_mode(u32::from(perms));
135                file.set_permissions(permissions)
136                    .await
137                    .with_context(|| format!("setting permissions for file {path:?}"))?;
138            }
139        }
140    }
141
142    Ok(())
143}
144
145#[cfg(test)]
146mod tests {
147    use async_zip::ZipEntryBuilder;
148    use async_zip::base::write::ZipFileWriter;
149    use futures::{AsyncSeek, AsyncWriteExt};
150    use smol::io::Cursor;
151    use tempfile::TempDir;
152
153    use super::*;
154
155    #[allow(unused_variables)]
156    async fn compress_zip(src_dir: &Path, dst: &Path, keep_file_permissions: bool) -> Result<()> {
157        let mut out = smol::fs::File::create(dst).await?;
158        let mut writer = ZipFileWriter::new(&mut out);
159
160        for entry in walkdir::WalkDir::new(src_dir) {
161            let entry = entry?;
162            let path = entry.path();
163
164            if path.is_dir() {
165                continue;
166            }
167
168            let relative_path = path.strip_prefix(src_dir)?;
169            let data = smol::fs::read(&path).await?;
170
171            let filename = relative_path.display().to_string();
172
173            #[cfg(unix)]
174            {
175                let mut builder =
176                    ZipEntryBuilder::new(filename.into(), async_zip::Compression::Deflate);
177                use std::os::unix::fs::PermissionsExt;
178                let metadata = std::fs::metadata(path)?;
179                let perms = keep_file_permissions.then(|| metadata.permissions().mode() as u16);
180                builder = builder.unix_permissions(perms.unwrap_or_default());
181                writer.write_entry_whole(builder, &data).await?;
182            }
183            #[cfg(not(unix))]
184            {
185                let builder =
186                    ZipEntryBuilder::new(filename.into(), async_zip::Compression::Deflate);
187                writer.write_entry_whole(builder, &data).await?;
188            }
189        }
190
191        writer.close().await?;
192        out.flush().await?;
193        out.sync_all().await?;
194
195        Ok(())
196    }
197
198    #[track_caller]
199    fn assert_file_content(path: &Path, content: &str) {
200        assert!(path.exists(), "file not found: {:?}", path);
201        let actual = std::fs::read_to_string(path).unwrap();
202        assert_eq!(actual, content);
203    }
204
205    #[track_caller]
206    fn make_test_data() -> TempDir {
207        let dir = tempfile::tempdir().unwrap();
208        let dst = dir.path();
209
210        std::fs::write(dst.join("test"), "Hello world.").unwrap();
211        std::fs::create_dir_all(dst.join("foo/bar")).unwrap();
212        std::fs::write(dst.join("foo/bar.txt"), "Foo bar.").unwrap();
213        std::fs::write(dst.join("foo/dar.md"), "Bar dar.").unwrap();
214        std::fs::write(dst.join("foo/bar/dar你好.txt"), "你好世界").unwrap();
215
216        dir
217    }
218
219    async fn read_archive(path: &Path) -> impl AsyncRead + AsyncSeek + Unpin {
220        let data = smol::fs::read(&path).await.unwrap();
221        Cursor::new(data)
222    }
223
224    #[test]
225    fn test_extract_zip() {
226        let test_dir = make_test_data();
227        let zip_file = test_dir.path().join("test.zip");
228
229        smol::block_on(async {
230            compress_zip(test_dir.path(), &zip_file, true)
231                .await
232                .unwrap();
233            let reader = read_archive(&zip_file).await;
234
235            let dir = tempfile::tempdir().unwrap();
236            let dst = dir.path();
237            extract_zip(dst, reader).await.unwrap();
238
239            assert_file_content(&dst.join("test"), "Hello world.");
240            assert_file_content(&dst.join("foo/bar.txt"), "Foo bar.");
241            assert_file_content(&dst.join("foo/dar.md"), "Bar dar.");
242            assert_file_content(&dst.join("foo/bar/dar你好.txt"), "你好世界");
243        });
244    }
245
246    #[cfg(unix)]
247    #[test]
248    fn test_extract_zip_preserves_executable_permissions() {
249        use std::os::unix::fs::PermissionsExt;
250
251        smol::block_on(async {
252            let test_dir = tempfile::tempdir().unwrap();
253            let executable_path = test_dir.path().join("my_script");
254
255            // Create an executable file
256            std::fs::write(&executable_path, "#!/bin/bash\necho 'Hello'").unwrap();
257            let mut perms = std::fs::metadata(&executable_path).unwrap().permissions();
258            perms.set_mode(0o755); // rwxr-xr-x
259            std::fs::set_permissions(&executable_path, perms).unwrap();
260
261            // Create zip
262            let zip_file = test_dir.path().join("test.zip");
263            compress_zip(test_dir.path(), &zip_file, true)
264                .await
265                .unwrap();
266
267            // Extract to new location
268            let extract_dir = tempfile::tempdir().unwrap();
269            let reader = read_archive(&zip_file).await;
270            extract_zip(extract_dir.path(), reader).await.unwrap();
271
272            // Check permissions are preserved
273            let extracted_path = extract_dir.path().join("my_script");
274            assert!(extracted_path.exists());
275            let extracted_perms = std::fs::metadata(&extracted_path).unwrap().permissions();
276            assert_eq!(extracted_perms.mode() & 0o777, 0o755);
277        });
278    }
279
280    #[cfg(unix)]
281    #[test]
282    fn test_extract_zip_sets_default_permissions() {
283        use std::os::unix::fs::PermissionsExt;
284
285        smol::block_on(async {
286            let test_dir = tempfile::tempdir().unwrap();
287            let file_path = test_dir.path().join("my_script");
288
289            std::fs::write(&file_path, "#!/bin/bash\necho 'Hello'").unwrap();
290            // The permissions will be shaped by the umask in the test environment
291            let original_perms = std::fs::metadata(&file_path).unwrap().permissions();
292
293            // Create zip
294            let zip_file = test_dir.path().join("test.zip");
295            compress_zip(test_dir.path(), &zip_file, false)
296                .await
297                .unwrap();
298
299            // Extract to new location
300            let extract_dir = tempfile::tempdir().unwrap();
301            let reader = read_archive(&zip_file).await;
302            extract_zip(extract_dir.path(), reader).await.unwrap();
303
304            // Permissions were not stored, so will be whatever the umask generates
305            // by default for new files. This should match what we saw when we previously wrote
306            // the file.
307            let extracted_path = extract_dir.path().join("my_script");
308            assert!(extracted_path.exists());
309            let extracted_perms = std::fs::metadata(&extracted_path).unwrap().permissions();
310            assert_eq!(
311                extracted_perms.mode(),
312                original_perms.mode(),
313                "Expected matching Unix file mode for unzipped file without keep_file_permissions"
314            );
315            assert_eq!(
316                extracted_perms, original_perms,
317                "Expected default set of permissions for unzipped file without keep_file_permissions"
318            );
319        });
320    }
321
322    #[test]
323    fn test_archive_path_is_normal_rejects_traversal() {
324        assert!(!archive_path_is_normal("../parent.txt"));
325        assert!(!archive_path_is_normal("foo/../../grandparent.txt"));
326        assert!(!archive_path_is_normal("/tmp/absolute.txt"));
327
328        assert!(archive_path_is_normal("foo/bar.txt"));
329        assert!(archive_path_is_normal("foo/bar/baz.txt"));
330        assert!(archive_path_is_normal("./foo/bar.txt"));
331        assert!(archive_path_is_normal("normal.txt"));
332    }
333
334    async fn build_zip_with_entries(entries: &[(&str, &[u8])]) -> Cursor<Vec<u8>> {
335        let mut buf = Cursor::new(Vec::new());
336        let mut writer = ZipFileWriter::new(&mut buf);
337        for (name, data) in entries {
338            let builder = ZipEntryBuilder::new((*name).into(), async_zip::Compression::Stored);
339            writer.write_entry_whole(builder, data).await.unwrap();
340        }
341        writer.close().await.unwrap();
342        buf.set_position(0);
343        buf
344    }
345
346    #[test]
347    fn test_extract_zip_skips_path_traversal_entries() {
348        smol::block_on(async {
349            let base_dir = tempfile::tempdir().unwrap();
350            let extract_dir = base_dir.path().join("subdir");
351            std::fs::create_dir_all(&extract_dir).unwrap();
352
353            let absolute_target = base_dir.path().join("absolute.txt");
354            let reader = build_zip_with_entries(&[
355                ("normal.txt", b"normal file"),
356                ("subdir/nested.txt", b"nested file"),
357                ("../parent.txt", b"parent file"),
358                ("foo/../../grandparent.txt", b"grandparent file"),
359                (absolute_target.to_str().unwrap(), b"absolute file"),
360            ])
361            .await;
362
363            extract_zip(&extract_dir, reader).await.unwrap();
364
365            assert_file_content(&extract_dir.join("normal.txt"), "normal file");
366            assert_file_content(&extract_dir.join("subdir/nested.txt"), "nested file");
367
368            assert!(
369                !base_dir.path().join("parent.txt").exists(),
370                "parent traversal entry should have been skipped"
371            );
372            assert!(
373                !base_dir.path().join("grandparent.txt").exists(),
374                "nested traversal entry should have been skipped"
375            );
376            assert!(
377                !absolute_target.exists(),
378                "absolute path entry should have been skipped"
379            );
380        });
381    }
382}