summary_backlog.rs

 1use collections::HashMap;
 2use std::{path::Path, sync::Arc, time::SystemTime};
 3
 4const MAX_FILES_BEFORE_RESUMMARIZE: usize = 4;
 5const MAX_BYTES_BEFORE_RESUMMARIZE: u64 = 1_000_000; // 1 MB
 6
 7#[derive(Default, Debug)]
 8pub struct SummaryBacklog {
 9    /// Key: path to a file that needs summarization, but that we haven't summarized yet. Value: that file's size on disk, in bytes, and its mtime.
10    files: HashMap<Arc<Path>, (u64, Option<SystemTime>)>,
11    /// Cache of the sum of all values in `files`, so we don't have to traverse the whole map to check if we're over the byte limit.
12    total_bytes: u64,
13}
14
15impl SummaryBacklog {
16    /// Store the given path in the backlog, along with how many bytes are in it.
17    pub fn insert(&mut self, path: Arc<Path>, bytes_on_disk: u64, mtime: Option<SystemTime>) {
18        let (prev_bytes, _) = self
19            .files
20            .insert(path, (bytes_on_disk, mtime))
21            .unwrap_or_default(); // Default to 0 prev_bytes
22
23        // Update the cached total by subtracting out the old amount and adding the new one.
24        self.total_bytes = self.total_bytes - prev_bytes + bytes_on_disk;
25    }
26
27    /// Returns true if the total number of bytes in the backlog exceeds a predefined threshold.
28    pub fn needs_drain(&self) -> bool {
29        self.files.len() > MAX_FILES_BEFORE_RESUMMARIZE ||
30        // The whole purpose of the cached total_bytes is to make this comparison cheap.
31        // Otherwise we'd have to traverse the entire dictionary every time we wanted this answer.
32        self.total_bytes > MAX_BYTES_BEFORE_RESUMMARIZE
33    }
34
35    /// Remove all the entries in the backlog and return the file paths as an iterator.
36    #[allow(clippy::needless_lifetimes)] // Clippy thinks this 'a can be elided, but eliding it gives a compile error
37    pub fn drain<'a>(&'a mut self) -> impl Iterator<Item = (Arc<Path>, Option<SystemTime>)> + 'a {
38        self.total_bytes = 0;
39
40        self.files
41            .drain()
42            .map(|(path, (_size, mtime))| (path, mtime))
43    }
44
45    pub fn len(&self) -> usize {
46        self.files.len()
47    }
48}