Cargo.lock 🔗
@@ -16022,6 +16022,7 @@ dependencies = [
"anyhow",
"assistant_text_thread",
"chrono",
+ "collections",
"editor",
"feature_flags",
"fs",
Eric Holk created
This adds a collection that has a map-like API but is backed by vectors.
For small collections, this often outperforms a HashMap because you
don't have the overhead of hashing things and everything is guaranteed
to be contiguous in memory.
I hand-rolled one of these in `ProjectGroupBuilder` but this factors it
into its own collection. This implements the API that
`ProjectGroupBuilder` needed, but if this becomes more widely used we
can expand to include more of the `HashMap` API.
Self-Review Checklist:
- [x] I've reviewed my own diff for quality, security, and reliability
- [x] Unsafe blocks (if any) have justifying comments
- [x] The content is consistent with the [UI/UX
checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist)
- [x] Tests cover the new/changed behavior
- [x] Performance impact has been considered and is acceptable
Release Notes:
- N/A
Cargo.lock | 1
crates/collections/Cargo.toml | 2
crates/collections/src/collections.rs | 4
crates/collections/src/vecmap.rs | 119 +++++++++++++++++++++++
crates/collections/src/vecmap_tests.rs | 96 ++++++++++++++++++
crates/sidebar/Cargo.toml | 1
crates/sidebar/src/project_group_builder.rs | 24 +---
7 files changed, 228 insertions(+), 19 deletions(-)
@@ -16022,6 +16022,7 @@ dependencies = [
"anyhow",
"assistant_text_thread",
"chrono",
+ "collections",
"editor",
"feature_flags",
"fs",
@@ -4,7 +4,7 @@ version = "0.1.0"
edition.workspace = true
publish = false
license = "Apache-2.0"
-description = "Standard collection type re-exports used by Zed and GPUI"
+description = "Standard collection types used by Zed and GPUI"
[lints]
workspace = true
@@ -7,3 +7,7 @@ pub use indexmap::Equivalent;
pub use rustc_hash::FxHasher;
pub use rustc_hash::{FxHashMap, FxHashSet};
pub use std::collections::*;
+
+pub mod vecmap;
+#[cfg(test)]
+mod vecmap_tests;
@@ -0,0 +1,119 @@
+/// A collection that provides a map interface but is backed by vectors.
+///
+/// This is suitable for small key-value stores where the item count is not
+/// large enough to overcome the overhead of a more complex algorithm.
+///
+/// If this meets your use cases, then [`VecMap`] should be a drop-in
+/// replacement for [`std::collections::HashMap`] or [`crate::HashMap`]. Note
+/// that we are adding APIs on an as-needed basis. If the API you need is not
+/// present yet, please add it!
+///
+/// Because it uses vectors as a backing store, the map also iterates over items
+/// in insertion order, like [`crate::IndexMap`].
+///
+/// This struct uses a struct-of-arrays (SoA) representation which tends to be
+/// more cache efficient and promotes autovectorization when using simple key or
+/// value types.
+#[derive(Default)]
+pub struct VecMap<K, V> {
+ keys: Vec<K>,
+ values: Vec<V>,
+}
+
+impl<K, V> VecMap<K, V> {
+ pub fn new() -> Self {
+ Self {
+ keys: Vec::new(),
+ values: Vec::new(),
+ }
+ }
+
+ pub fn iter(&self) -> Iter<'_, K, V> {
+ Iter {
+ iter: self.keys.iter().zip(self.values.iter()),
+ }
+ }
+}
+
+impl<K: Eq, V> VecMap<K, V> {
+ pub fn entry(&mut self, key: K) -> Entry<'_, K, V> {
+ match self.keys.iter().position(|k| k == &key) {
+ Some(index) => Entry::Occupied(OccupiedEntry {
+ key: &self.keys[index],
+ value: &mut self.values[index],
+ }),
+ None => Entry::Vacant(VacantEntry { map: self, key }),
+ }
+ }
+}
+
+pub struct Iter<'a, K, V> {
+ iter: std::iter::Zip<std::slice::Iter<'a, K>, std::slice::Iter<'a, V>>,
+}
+
+impl<'a, K, V> Iterator for Iter<'a, K, V> {
+ type Item = (&'a K, &'a V);
+
+ fn next(&mut self) -> Option<Self::Item> {
+ self.iter.next()
+ }
+}
+
+pub enum Entry<'a, K, V> {
+ Occupied(OccupiedEntry<'a, K, V>),
+ Vacant(VacantEntry<'a, K, V>),
+}
+
+impl<'a, K, V> Entry<'a, K, V> {
+ pub fn key(&self) -> &K {
+ match self {
+ Entry::Occupied(entry) => entry.key,
+ Entry::Vacant(entry) => &entry.key,
+ }
+ }
+
+ pub fn or_insert_with_key<F>(self, default: F) -> &'a mut V
+ where
+ F: FnOnce(&K) -> V,
+ {
+ match self {
+ Entry::Occupied(entry) => entry.value,
+ Entry::Vacant(entry) => {
+ entry.map.values.push(default(&entry.key));
+ entry.map.keys.push(entry.key);
+ match entry.map.values.last_mut() {
+ Some(value) => value,
+ None => unreachable!("vec empty after pushing to it"),
+ }
+ }
+ }
+ }
+
+ pub fn or_insert_with<F>(self, default: F) -> &'a mut V
+ where
+ F: FnOnce() -> V,
+ {
+ self.or_insert_with_key(|_| default())
+ }
+
+ pub fn or_insert(self, value: V) -> &'a mut V {
+ self.or_insert_with_key(|_| value)
+ }
+
+ pub fn or_insert_default(self) -> &'a mut V
+ where
+ V: Default,
+ {
+ self.or_insert_with_key(|_| Default::default())
+ }
+}
+
+pub struct OccupiedEntry<'a, K, V> {
+ key: &'a K,
+ value: &'a mut V,
+}
+
+pub struct VacantEntry<'a, K, V> {
+ map: &'a mut VecMap<K, V>,
+ key: K,
+}
@@ -0,0 +1,96 @@
+//! Tests for the VecMap collection.
+//!
+//! This is in a sibling module so that the tests are guaranteed to only cover
+//! states that can be created by the public API.
+
+use crate::vecmap::*;
+
+#[test]
+fn test_entry_vacant_or_insert() {
+ let mut map: VecMap<&str, i32> = VecMap::new();
+ let value = map.entry("a").or_insert(1);
+ assert_eq!(*value, 1);
+ assert_eq!(map.iter().collect::<Vec<_>>(), vec![(&"a", &1)]);
+}
+
+#[test]
+fn test_entry_occupied_or_insert_keeps_existing() {
+ let mut map: VecMap<&str, i32> = VecMap::new();
+ map.entry("a").or_insert(1);
+ let value = map.entry("a").or_insert(99);
+ assert_eq!(*value, 1);
+ assert_eq!(map.iter().collect::<Vec<_>>(), vec![(&"a", &1)]);
+}
+
+#[test]
+fn test_entry_or_insert_with() {
+ let mut map: VecMap<&str, i32> = VecMap::new();
+ map.entry("a").or_insert_with(|| 42);
+ assert_eq!(map.iter().collect::<Vec<_>>(), vec![(&"a", &42)]);
+}
+
+#[test]
+fn test_entry_or_insert_with_not_called_when_occupied() {
+ let mut map: VecMap<&str, i32> = VecMap::new();
+ map.entry("a").or_insert(1);
+ map.entry("a")
+ .or_insert_with(|| panic!("should not be called"));
+ assert_eq!(map.iter().collect::<Vec<_>>(), vec![(&"a", &1)]);
+}
+
+#[test]
+fn test_entry_or_insert_with_key() {
+ let mut map: VecMap<&str, String> = VecMap::new();
+ map.entry("hello").or_insert_with_key(|k| k.to_uppercase());
+ assert_eq!(
+ map.iter().collect::<Vec<_>>(),
+ vec![(&"hello", &"HELLO".to_string())]
+ );
+}
+
+#[test]
+fn test_entry_or_insert_default() {
+ let mut map: VecMap<&str, i32> = VecMap::new();
+ map.entry("a").or_insert_default();
+ assert_eq!(map.iter().collect::<Vec<_>>(), vec![(&"a", &0)]);
+}
+
+#[test]
+fn test_entry_key() {
+ let mut map: VecMap<&str, i32> = VecMap::new();
+ assert_eq!(*map.entry("a").key(), "a");
+ map.entry("a").or_insert(1);
+ assert_eq!(*map.entry("a").key(), "a");
+}
+
+#[test]
+fn test_entry_mut_ref_can_be_updated() {
+ let mut map: VecMap<&str, i32> = VecMap::new();
+ let value = map.entry("a").or_insert(0);
+ *value = 5;
+ assert_eq!(map.iter().collect::<Vec<_>>(), vec![(&"a", &5)]);
+}
+
+#[test]
+fn test_insertion_order_preserved() {
+ let mut map: VecMap<&str, i32> = VecMap::new();
+ map.entry("b").or_insert(2);
+ map.entry("a").or_insert(1);
+ map.entry("c").or_insert(3);
+ assert_eq!(
+ map.iter().collect::<Vec<_>>(),
+ vec![(&"b", &2), (&"a", &1), (&"c", &3)]
+ );
+}
+
+#[test]
+fn test_multiple_entries_independent() {
+ let mut map: VecMap<i32, i32> = VecMap::new();
+ map.entry(1).or_insert(10);
+ map.entry(2).or_insert(20);
+ map.entry(3).or_insert(30);
+ assert_eq!(map.iter().count(), 3);
+ // Re-inserting does not duplicate keys
+ map.entry(1).or_insert(99);
+ assert_eq!(map.iter().count(), 3);
+}
@@ -23,6 +23,7 @@ agent_settings.workspace = true
agent_ui.workspace = true
anyhow.workspace = true
chrono.workspace = true
+collections.workspace = true
editor.workspace = true
feature_flags.workspace = true
fs.workspace = true
@@ -8,8 +8,8 @@
//! This module is provides the functions and structures necessary to do this
//! lookup and mapping.
+use collections::{HashMap, HashSet, vecmap::VecMap};
use std::{
- collections::{HashMap, HashSet},
path::{Path, PathBuf},
sync::Arc,
};
@@ -78,16 +78,14 @@ impl ProjectGroup {
pub struct ProjectGroupBuilder {
/// Maps git repositories' work_directory_abs_path to their original_repo_abs_path
directory_mappings: HashMap<PathBuf, PathBuf>,
- project_group_names: Vec<ProjectGroupName>,
- project_groups: Vec<ProjectGroup>,
+ project_groups: VecMap<ProjectGroupName, ProjectGroup>,
}
impl ProjectGroupBuilder {
fn new() -> Self {
Self {
- directory_mappings: HashMap::new(),
- project_group_names: Vec::new(),
- project_groups: Vec::new(),
+ directory_mappings: HashMap::default(),
+ project_groups: VecMap::new(),
}
}
@@ -113,15 +111,7 @@ impl ProjectGroupBuilder {
}
fn project_group_entry(&mut self, name: &ProjectGroupName) -> &mut ProjectGroup {
- match self.project_group_names.iter().position(|n| n == name) {
- Some(idx) => &mut self.project_groups[idx],
- None => {
- let idx = self.project_group_names.len();
- self.project_group_names.push(name.clone());
- self.project_groups.push(ProjectGroup::default());
- &mut self.project_groups[idx]
- }
- }
+ self.project_groups.entry(name.clone()).or_insert_default()
}
fn add_mapping(&mut self, work_directory: &Path, original_repo: &Path) {
@@ -204,9 +194,7 @@ impl ProjectGroupBuilder {
}
pub fn groups(&self) -> impl Iterator<Item = (&ProjectGroupName, &ProjectGroup)> {
- self.project_group_names
- .iter()
- .zip(self.project_groups.iter())
+ self.project_groups.iter()
}
}