Add VecMap and use it in ProjectGroupBuilder (#52596)

Eric Holk created

This adds a collection that has a map-like API but is backed by vectors.
For small collections, this often outperforms a HashMap because you
don't have the overhead of hashing things and everything is guaranteed
to be contiguous in memory.

I hand-rolled one of these in `ProjectGroupBuilder` but this factors it
into its own collection. This implements the API that
`ProjectGroupBuilder` needed, but if this becomes more widely used we
can expand to include more of the `HashMap` API.

Self-Review Checklist:

- [x] I've reviewed my own diff for quality, security, and reliability
- [x] Unsafe blocks (if any) have justifying comments
- [x] The content is consistent with the [UI/UX
checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist)
- [x] Tests cover the new/changed behavior
- [x] Performance impact has been considered and is acceptable

Release Notes:

- N/A

Change summary

Cargo.lock                                  |   1 
crates/collections/Cargo.toml               |   2 
crates/collections/src/collections.rs       |   4 
crates/collections/src/vecmap.rs            | 119 +++++++++++++++++++++++
crates/collections/src/vecmap_tests.rs      |  96 ++++++++++++++++++
crates/sidebar/Cargo.toml                   |   1 
crates/sidebar/src/project_group_builder.rs |  24 +---
7 files changed, 228 insertions(+), 19 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -16022,6 +16022,7 @@ dependencies = [
  "anyhow",
  "assistant_text_thread",
  "chrono",
+ "collections",
  "editor",
  "feature_flags",
  "fs",

crates/collections/Cargo.toml 🔗

@@ -4,7 +4,7 @@ version = "0.1.0"
 edition.workspace = true
 publish = false
 license = "Apache-2.0"
-description = "Standard collection type re-exports used by Zed and GPUI"
+description = "Standard collection types used by Zed and GPUI"
 
 [lints]
 workspace = true

crates/collections/src/collections.rs 🔗

@@ -7,3 +7,7 @@ pub use indexmap::Equivalent;
 pub use rustc_hash::FxHasher;
 pub use rustc_hash::{FxHashMap, FxHashSet};
 pub use std::collections::*;
+
+pub mod vecmap;
+#[cfg(test)]
+mod vecmap_tests;

crates/collections/src/vecmap.rs 🔗

@@ -0,0 +1,119 @@
+/// A collection that provides a map interface but is backed by vectors.
+///
+/// This is suitable for small key-value stores where the item count is not
+/// large enough to overcome the overhead of a more complex algorithm.
+///
+/// If this meets your use cases, then [`VecMap`] should be a drop-in
+/// replacement for [`std::collections::HashMap`] or [`crate::HashMap`]. Note
+/// that we are adding APIs on an as-needed basis. If the API you need is not
+/// present yet, please add it!
+///
+/// Because it uses vectors as a backing store, the map also iterates over items
+/// in insertion order, like [`crate::IndexMap`].
+///
+/// This struct uses a struct-of-arrays (SoA) representation which tends to be
+/// more cache efficient and promotes autovectorization when using simple key or
+/// value types.
+#[derive(Default)]
+pub struct VecMap<K, V> {
+    keys: Vec<K>,
+    values: Vec<V>,
+}
+
+impl<K, V> VecMap<K, V> {
+    pub fn new() -> Self {
+        Self {
+            keys: Vec::new(),
+            values: Vec::new(),
+        }
+    }
+
+    pub fn iter(&self) -> Iter<'_, K, V> {
+        Iter {
+            iter: self.keys.iter().zip(self.values.iter()),
+        }
+    }
+}
+
+impl<K: Eq, V> VecMap<K, V> {
+    pub fn entry(&mut self, key: K) -> Entry<'_, K, V> {
+        match self.keys.iter().position(|k| k == &key) {
+            Some(index) => Entry::Occupied(OccupiedEntry {
+                key: &self.keys[index],
+                value: &mut self.values[index],
+            }),
+            None => Entry::Vacant(VacantEntry { map: self, key }),
+        }
+    }
+}
+
+pub struct Iter<'a, K, V> {
+    iter: std::iter::Zip<std::slice::Iter<'a, K>, std::slice::Iter<'a, V>>,
+}
+
+impl<'a, K, V> Iterator for Iter<'a, K, V> {
+    type Item = (&'a K, &'a V);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.iter.next()
+    }
+}
+
+pub enum Entry<'a, K, V> {
+    Occupied(OccupiedEntry<'a, K, V>),
+    Vacant(VacantEntry<'a, K, V>),
+}
+
+impl<'a, K, V> Entry<'a, K, V> {
+    pub fn key(&self) -> &K {
+        match self {
+            Entry::Occupied(entry) => entry.key,
+            Entry::Vacant(entry) => &entry.key,
+        }
+    }
+
+    pub fn or_insert_with_key<F>(self, default: F) -> &'a mut V
+    where
+        F: FnOnce(&K) -> V,
+    {
+        match self {
+            Entry::Occupied(entry) => entry.value,
+            Entry::Vacant(entry) => {
+                entry.map.values.push(default(&entry.key));
+                entry.map.keys.push(entry.key);
+                match entry.map.values.last_mut() {
+                    Some(value) => value,
+                    None => unreachable!("vec empty after pushing to it"),
+                }
+            }
+        }
+    }
+
+    pub fn or_insert_with<F>(self, default: F) -> &'a mut V
+    where
+        F: FnOnce() -> V,
+    {
+        self.or_insert_with_key(|_| default())
+    }
+
+    pub fn or_insert(self, value: V) -> &'a mut V {
+        self.or_insert_with_key(|_| value)
+    }
+
+    pub fn or_insert_default(self) -> &'a mut V
+    where
+        V: Default,
+    {
+        self.or_insert_with_key(|_| Default::default())
+    }
+}
+
+pub struct OccupiedEntry<'a, K, V> {
+    key: &'a K,
+    value: &'a mut V,
+}
+
+pub struct VacantEntry<'a, K, V> {
+    map: &'a mut VecMap<K, V>,
+    key: K,
+}

crates/collections/src/vecmap_tests.rs 🔗

@@ -0,0 +1,96 @@
+//! Tests for the VecMap collection.
+//!
+//! This is in a sibling module so that the tests are guaranteed to only cover
+//! states that can be created by the public API.
+
+use crate::vecmap::*;
+
+#[test]
+fn test_entry_vacant_or_insert() {
+    let mut map: VecMap<&str, i32> = VecMap::new();
+    let value = map.entry("a").or_insert(1);
+    assert_eq!(*value, 1);
+    assert_eq!(map.iter().collect::<Vec<_>>(), vec![(&"a", &1)]);
+}
+
+#[test]
+fn test_entry_occupied_or_insert_keeps_existing() {
+    let mut map: VecMap<&str, i32> = VecMap::new();
+    map.entry("a").or_insert(1);
+    let value = map.entry("a").or_insert(99);
+    assert_eq!(*value, 1);
+    assert_eq!(map.iter().collect::<Vec<_>>(), vec![(&"a", &1)]);
+}
+
+#[test]
+fn test_entry_or_insert_with() {
+    let mut map: VecMap<&str, i32> = VecMap::new();
+    map.entry("a").or_insert_with(|| 42);
+    assert_eq!(map.iter().collect::<Vec<_>>(), vec![(&"a", &42)]);
+}
+
+#[test]
+fn test_entry_or_insert_with_not_called_when_occupied() {
+    let mut map: VecMap<&str, i32> = VecMap::new();
+    map.entry("a").or_insert(1);
+    map.entry("a")
+        .or_insert_with(|| panic!("should not be called"));
+    assert_eq!(map.iter().collect::<Vec<_>>(), vec![(&"a", &1)]);
+}
+
+#[test]
+fn test_entry_or_insert_with_key() {
+    let mut map: VecMap<&str, String> = VecMap::new();
+    map.entry("hello").or_insert_with_key(|k| k.to_uppercase());
+    assert_eq!(
+        map.iter().collect::<Vec<_>>(),
+        vec![(&"hello", &"HELLO".to_string())]
+    );
+}
+
+#[test]
+fn test_entry_or_insert_default() {
+    let mut map: VecMap<&str, i32> = VecMap::new();
+    map.entry("a").or_insert_default();
+    assert_eq!(map.iter().collect::<Vec<_>>(), vec![(&"a", &0)]);
+}
+
+#[test]
+fn test_entry_key() {
+    let mut map: VecMap<&str, i32> = VecMap::new();
+    assert_eq!(*map.entry("a").key(), "a");
+    map.entry("a").or_insert(1);
+    assert_eq!(*map.entry("a").key(), "a");
+}
+
+#[test]
+fn test_entry_mut_ref_can_be_updated() {
+    let mut map: VecMap<&str, i32> = VecMap::new();
+    let value = map.entry("a").or_insert(0);
+    *value = 5;
+    assert_eq!(map.iter().collect::<Vec<_>>(), vec![(&"a", &5)]);
+}
+
+#[test]
+fn test_insertion_order_preserved() {
+    let mut map: VecMap<&str, i32> = VecMap::new();
+    map.entry("b").or_insert(2);
+    map.entry("a").or_insert(1);
+    map.entry("c").or_insert(3);
+    assert_eq!(
+        map.iter().collect::<Vec<_>>(),
+        vec![(&"b", &2), (&"a", &1), (&"c", &3)]
+    );
+}
+
+#[test]
+fn test_multiple_entries_independent() {
+    let mut map: VecMap<i32, i32> = VecMap::new();
+    map.entry(1).or_insert(10);
+    map.entry(2).or_insert(20);
+    map.entry(3).or_insert(30);
+    assert_eq!(map.iter().count(), 3);
+    // Re-inserting does not duplicate keys
+    map.entry(1).or_insert(99);
+    assert_eq!(map.iter().count(), 3);
+}

crates/sidebar/Cargo.toml 🔗

@@ -23,6 +23,7 @@ agent_settings.workspace = true
 agent_ui.workspace = true
 anyhow.workspace = true
 chrono.workspace = true
+collections.workspace = true
 editor.workspace = true
 feature_flags.workspace = true
 fs.workspace = true

crates/sidebar/src/project_group_builder.rs 🔗

@@ -8,8 +8,8 @@
 //! This module is provides the functions and structures necessary to do this
 //! lookup and mapping.
 
+use collections::{HashMap, HashSet, vecmap::VecMap};
 use std::{
-    collections::{HashMap, HashSet},
     path::{Path, PathBuf},
     sync::Arc,
 };
@@ -78,16 +78,14 @@ impl ProjectGroup {
 pub struct ProjectGroupBuilder {
     /// Maps git repositories' work_directory_abs_path to their original_repo_abs_path
     directory_mappings: HashMap<PathBuf, PathBuf>,
-    project_group_names: Vec<ProjectGroupName>,
-    project_groups: Vec<ProjectGroup>,
+    project_groups: VecMap<ProjectGroupName, ProjectGroup>,
 }
 
 impl ProjectGroupBuilder {
     fn new() -> Self {
         Self {
-            directory_mappings: HashMap::new(),
-            project_group_names: Vec::new(),
-            project_groups: Vec::new(),
+            directory_mappings: HashMap::default(),
+            project_groups: VecMap::new(),
         }
     }
 
@@ -113,15 +111,7 @@ impl ProjectGroupBuilder {
     }
 
     fn project_group_entry(&mut self, name: &ProjectGroupName) -> &mut ProjectGroup {
-        match self.project_group_names.iter().position(|n| n == name) {
-            Some(idx) => &mut self.project_groups[idx],
-            None => {
-                let idx = self.project_group_names.len();
-                self.project_group_names.push(name.clone());
-                self.project_groups.push(ProjectGroup::default());
-                &mut self.project_groups[idx]
-            }
-        }
+        self.project_groups.entry(name.clone()).or_insert_default()
     }
 
     fn add_mapping(&mut self, work_directory: &Path, original_repo: &Path) {
@@ -204,9 +194,7 @@ impl ProjectGroupBuilder {
     }
 
     pub fn groups(&self) -> impl Iterator<Item = (&ProjectGroupName, &ProjectGroup)> {
-        self.project_group_names
-            .iter()
-            .zip(self.project_groups.iter())
+        self.project_groups.iter()
     }
 }