archive.rs

  1use std::path::Path;
  2
  3use anyhow::{Context as _, Result};
  4use async_zip::base::read;
  5#[cfg(not(windows))]
  6use futures::AsyncSeek;
  7use futures::{AsyncRead, io::BufReader};
  8
  9#[cfg(any(unix, windows))]
 10fn archive_path_is_normal(filename: &str) -> bool {
 11    Path::new(filename).components().all(|c| {
 12        matches!(
 13            c,
 14            std::path::Component::Normal(_) | std::path::Component::CurDir
 15        )
 16    })
 17}
 18
 19#[cfg(windows)]
 20pub async fn extract_zip<R: AsyncRead + Unpin>(destination: &Path, reader: R) -> Result<()> {
 21    let mut reader = read::stream::ZipFileReader::new(BufReader::new(reader));
 22
 23    let destination = &destination
 24        .canonicalize()
 25        .unwrap_or_else(|_| destination.to_path_buf());
 26
 27    while let Some(mut item) = reader.next_with_entry().await? {
 28        let entry_reader = item.reader_mut();
 29        let entry = entry_reader.entry();
 30        let filename = entry
 31            .filename()
 32            .as_str()
 33            .context("reading zip entry file name")?;
 34
 35        if !archive_path_is_normal(filename) {
 36            reader = item.skip().await.context("reading next zip entry")?;
 37            continue;
 38        }
 39
 40        let path = destination.join(filename);
 41
 42        if entry
 43            .dir()
 44            .with_context(|| format!("reading zip entry metadata for path {path:?}"))?
 45        {
 46            std::fs::create_dir_all(&path)
 47                .with_context(|| format!("creating directory {path:?}"))?;
 48        } else {
 49            let parent_dir = path
 50                .parent()
 51                .with_context(|| format!("no parent directory for {path:?}"))?;
 52            std::fs::create_dir_all(parent_dir)
 53                .with_context(|| format!("creating parent directory {parent_dir:?}"))?;
 54            let mut file = smol::fs::File::create(&path)
 55                .await
 56                .with_context(|| format!("creating file {path:?}"))?;
 57            futures::io::copy(entry_reader, &mut file)
 58                .await
 59                .with_context(|| format!("extracting into file {path:?}"))?;
 60        }
 61
 62        reader = item.skip().await.context("reading next zip entry")?;
 63    }
 64
 65    Ok(())
 66}
 67
 68#[cfg(unix)]
 69pub async fn extract_zip<R: AsyncRead + Unpin>(destination: &Path, reader: R) -> Result<()> {
 70    // Unix needs file permissions copied when extracting.
 71    // This is only possible to do when a reader impls `AsyncSeek` and `seek::ZipFileReader` is used.
 72    // `stream::ZipFileReader` also has the `unix_permissions` method, but it will always return `Some(0)`.
 73    //
 74    // A typical `reader` comes from a streaming network response, so cannot be sought right away,
 75    // and reading the entire archive into the memory seems wasteful.
 76    //
 77    // So, save the stream into a temporary file first and then get it read with a seeking reader.
 78    let mut file = async_fs::File::from(tempfile::tempfile().context("creating a temporary file")?);
 79    futures::io::copy(&mut BufReader::new(reader), &mut file)
 80        .await
 81        .context("saving archive contents into the temporary file")?;
 82    extract_seekable_zip(destination, file).await
 83}
 84
 85#[cfg(unix)]
 86pub async fn extract_seekable_zip<R: AsyncRead + AsyncSeek + Unpin>(
 87    destination: &Path,
 88    reader: R,
 89) -> Result<()> {
 90    let mut reader = read::seek::ZipFileReader::new(BufReader::new(reader))
 91        .await
 92        .context("reading the zip archive")?;
 93    let destination = &destination
 94        .canonicalize()
 95        .unwrap_or_else(|_| destination.to_path_buf());
 96    for (i, entry) in reader.file().entries().to_vec().into_iter().enumerate() {
 97        let filename = entry
 98            .filename()
 99            .as_str()
100            .context("reading zip entry file name")?;
101
102        if !archive_path_is_normal(filename) {
103            continue;
104        }
105
106        let path = destination.join(filename);
107
108        if entry
109            .dir()
110            .with_context(|| format!("reading zip entry metadata for path {path:?}"))?
111        {
112            std::fs::create_dir_all(&path)
113                .with_context(|| format!("creating directory {path:?}"))?;
114        } else {
115            let parent_dir = path
116                .parent()
117                .with_context(|| format!("no parent directory for {path:?}"))?;
118            std::fs::create_dir_all(parent_dir)
119                .with_context(|| format!("creating parent directory {parent_dir:?}"))?;
120            let mut file = smol::fs::File::create(&path)
121                .await
122                .with_context(|| format!("creating file {path:?}"))?;
123            let mut entry_reader = reader
124                .reader_with_entry(i)
125                .await
126                .with_context(|| format!("reading entry for path {path:?}"))?;
127            futures::io::copy(&mut entry_reader, &mut file)
128                .await
129                .with_context(|| format!("extracting into file {path:?}"))?;
130
131            if let Some(perms) = entry.unix_permissions()
132                && perms != 0o000
133            {
134                use std::os::unix::fs::PermissionsExt;
135                let permissions = std::fs::Permissions::from_mode(u32::from(perms));
136                file.set_permissions(permissions)
137                    .await
138                    .with_context(|| format!("setting permissions for file {path:?}"))?;
139            }
140        }
141    }
142
143    Ok(())
144}
145
146#[cfg(test)]
147mod tests {
148    use async_zip::ZipEntryBuilder;
149    use async_zip::base::write::ZipFileWriter;
150    use futures::{AsyncSeek, AsyncWriteExt};
151    use smol::io::Cursor;
152    use tempfile::TempDir;
153
154    use super::*;
155
156    #[allow(unused_variables)]
157    async fn compress_zip(src_dir: &Path, dst: &Path, keep_file_permissions: bool) -> Result<()> {
158        let mut out = smol::fs::File::create(dst).await?;
159        let mut writer = ZipFileWriter::new(&mut out);
160
161        for entry in walkdir::WalkDir::new(src_dir) {
162            let entry = entry?;
163            let path = entry.path();
164
165            if path.is_dir() {
166                continue;
167            }
168
169            let relative_path = path.strip_prefix(src_dir)?;
170            let data = smol::fs::read(&path).await?;
171
172            let filename = relative_path.display().to_string();
173
174            #[cfg(unix)]
175            {
176                let mut builder =
177                    ZipEntryBuilder::new(filename.into(), async_zip::Compression::Deflate);
178                use std::os::unix::fs::PermissionsExt;
179                let metadata = std::fs::metadata(path)?;
180                let perms = keep_file_permissions.then(|| metadata.permissions().mode() as u16);
181                builder = builder.unix_permissions(perms.unwrap_or_default());
182                writer.write_entry_whole(builder, &data).await?;
183            }
184            #[cfg(not(unix))]
185            {
186                let builder =
187                    ZipEntryBuilder::new(filename.into(), async_zip::Compression::Deflate);
188                writer.write_entry_whole(builder, &data).await?;
189            }
190        }
191
192        writer.close().await?;
193        out.flush().await?;
194        out.sync_all().await?;
195
196        Ok(())
197    }
198
199    #[track_caller]
200    fn assert_file_content(path: &Path, content: &str) {
201        assert!(path.exists(), "file not found: {:?}", path);
202        let actual = std::fs::read_to_string(path).unwrap();
203        assert_eq!(actual, content);
204    }
205
206    #[track_caller]
207    fn make_test_data() -> TempDir {
208        let dir = tempfile::tempdir().unwrap();
209        let dst = dir.path();
210
211        std::fs::write(dst.join("test"), "Hello world.").unwrap();
212        std::fs::create_dir_all(dst.join("foo/bar")).unwrap();
213        std::fs::write(dst.join("foo/bar.txt"), "Foo bar.").unwrap();
214        std::fs::write(dst.join("foo/dar.md"), "Bar dar.").unwrap();
215        std::fs::write(dst.join("foo/bar/dar你好.txt"), "你好世界").unwrap();
216
217        dir
218    }
219
220    async fn read_archive(path: &Path) -> impl AsyncRead + AsyncSeek + Unpin {
221        let data = smol::fs::read(&path).await.unwrap();
222        Cursor::new(data)
223    }
224
225    #[test]
226    fn test_extract_zip() {
227        let test_dir = make_test_data();
228        let zip_file = test_dir.path().join("test.zip");
229
230        smol::block_on(async {
231            compress_zip(test_dir.path(), &zip_file, true)
232                .await
233                .unwrap();
234            let reader = read_archive(&zip_file).await;
235
236            let dir = tempfile::tempdir().unwrap();
237            let dst = dir.path();
238            extract_zip(dst, reader).await.unwrap();
239
240            assert_file_content(&dst.join("test"), "Hello world.");
241            assert_file_content(&dst.join("foo/bar.txt"), "Foo bar.");
242            assert_file_content(&dst.join("foo/dar.md"), "Bar dar.");
243            assert_file_content(&dst.join("foo/bar/dar你好.txt"), "你好世界");
244        });
245    }
246
247    #[cfg(unix)]
248    #[test]
249    fn test_extract_zip_preserves_executable_permissions() {
250        use std::os::unix::fs::PermissionsExt;
251
252        smol::block_on(async {
253            let test_dir = tempfile::tempdir().unwrap();
254            let executable_path = test_dir.path().join("my_script");
255
256            // Create an executable file
257            std::fs::write(&executable_path, "#!/bin/bash\necho 'Hello'").unwrap();
258            let mut perms = std::fs::metadata(&executable_path).unwrap().permissions();
259            perms.set_mode(0o755); // rwxr-xr-x
260            std::fs::set_permissions(&executable_path, perms).unwrap();
261
262            // Create zip
263            let zip_file = test_dir.path().join("test.zip");
264            compress_zip(test_dir.path(), &zip_file, true)
265                .await
266                .unwrap();
267
268            // Extract to new location
269            let extract_dir = tempfile::tempdir().unwrap();
270            let reader = read_archive(&zip_file).await;
271            extract_zip(extract_dir.path(), reader).await.unwrap();
272
273            // Check permissions are preserved
274            let extracted_path = extract_dir.path().join("my_script");
275            assert!(extracted_path.exists());
276            let extracted_perms = std::fs::metadata(&extracted_path).unwrap().permissions();
277            assert_eq!(extracted_perms.mode() & 0o777, 0o755);
278        });
279    }
280
281    #[cfg(unix)]
282    #[test]
283    fn test_extract_zip_sets_default_permissions() {
284        use std::os::unix::fs::PermissionsExt;
285
286        smol::block_on(async {
287            let test_dir = tempfile::tempdir().unwrap();
288            let file_path = test_dir.path().join("my_script");
289
290            std::fs::write(&file_path, "#!/bin/bash\necho 'Hello'").unwrap();
291            // The permissions will be shaped by the umask in the test environment
292            let original_perms = std::fs::metadata(&file_path).unwrap().permissions();
293
294            // Create zip
295            let zip_file = test_dir.path().join("test.zip");
296            compress_zip(test_dir.path(), &zip_file, false)
297                .await
298                .unwrap();
299
300            // Extract to new location
301            let extract_dir = tempfile::tempdir().unwrap();
302            let reader = read_archive(&zip_file).await;
303            extract_zip(extract_dir.path(), reader).await.unwrap();
304
305            // Permissions were not stored, so will be whatever the umask generates
306            // by default for new files. This should match what we saw when we previously wrote
307            // the file.
308            let extracted_path = extract_dir.path().join("my_script");
309            assert!(extracted_path.exists());
310            let extracted_perms = std::fs::metadata(&extracted_path).unwrap().permissions();
311            assert_eq!(
312                extracted_perms.mode(),
313                original_perms.mode(),
314                "Expected matching Unix file mode for unzipped file without keep_file_permissions"
315            );
316            assert_eq!(
317                extracted_perms, original_perms,
318                "Expected default set of permissions for unzipped file without keep_file_permissions"
319            );
320        });
321    }
322
323    #[test]
324    fn test_archive_path_is_normal_rejects_traversal() {
325        assert!(!archive_path_is_normal("../parent.txt"));
326        assert!(!archive_path_is_normal("foo/../../grandparent.txt"));
327        assert!(!archive_path_is_normal("/tmp/absolute.txt"));
328
329        assert!(archive_path_is_normal("foo/bar.txt"));
330        assert!(archive_path_is_normal("foo/bar/baz.txt"));
331        assert!(archive_path_is_normal("./foo/bar.txt"));
332        assert!(archive_path_is_normal("normal.txt"));
333    }
334
335    async fn build_zip_with_entries(entries: &[(&str, &[u8])]) -> Cursor<Vec<u8>> {
336        let mut buf = Cursor::new(Vec::new());
337        let mut writer = ZipFileWriter::new(&mut buf);
338        for (name, data) in entries {
339            let builder = ZipEntryBuilder::new((*name).into(), async_zip::Compression::Stored);
340            writer.write_entry_whole(builder, data).await.unwrap();
341        }
342        writer.close().await.unwrap();
343        buf.set_position(0);
344        buf
345    }
346
347    #[test]
348    fn test_extract_zip_skips_path_traversal_entries() {
349        smol::block_on(async {
350            let base_dir = tempfile::tempdir().unwrap();
351            let extract_dir = base_dir.path().join("subdir");
352            std::fs::create_dir_all(&extract_dir).unwrap();
353
354            let absolute_target = base_dir.path().join("absolute.txt");
355            let reader = build_zip_with_entries(&[
356                ("normal.txt", b"normal file"),
357                ("subdir/nested.txt", b"nested file"),
358                ("../parent.txt", b"parent file"),
359                ("foo/../../grandparent.txt", b"grandparent file"),
360                (absolute_target.to_str().unwrap(), b"absolute file"),
361            ])
362            .await;
363
364            extract_zip(&extract_dir, reader).await.unwrap();
365
366            assert_file_content(&extract_dir.join("normal.txt"), "normal file");
367            assert_file_content(&extract_dir.join("subdir/nested.txt"), "nested file");
368
369            assert!(
370                !base_dir.path().join("parent.txt").exists(),
371                "parent traversal entry should have been skipped"
372            );
373            assert!(
374                !base_dir.path().join("grandparent.txt").exists(),
375                "nested traversal entry should have been skipped"
376            );
377            assert!(
378                !absolute_target.exists(),
379                "absolute path entry should have been skipped"
380            );
381        });
382    }
383}