Built first draft of workspace serialization schemas, started writing DB tests

Mikayla Maki and kay@zed.dev created

Co-Authored-By: kay@zed.dev

Change summary

crates/db/src/db.rs         |   1 
crates/db/src/items.rs      | 341 +++++---------------------------------
crates/db/src/kvp.rs        |   2 
crates/db/src/migrations.rs |   5 
crates/db/src/workspace.rs  | 180 ++++++++++++++++++++
5 files changed, 231 insertions(+), 298 deletions(-)

Detailed changes

crates/db/src/db.rs 🔗

@@ -1,6 +1,7 @@
 mod items;
 mod kvp;
 mod migrations;
+mod workspace;
 
 use std::fs;
 use std::path::{Path, PathBuf};

crates/db/src/items.rs 🔗

@@ -6,306 +6,59 @@ use rusqlite::{named_params, params};
 
 use super::Db;
 
-pub(crate) const ITEMS_M_1: &str = "
-CREATE TABLE items(
-    id INTEGER PRIMARY KEY,
-    kind TEXT
-) STRICT;
-CREATE TABLE item_path(
-    item_id INTEGER PRIMARY KEY,
-    path BLOB
-) STRICT;
-CREATE TABLE item_query(
-    item_id INTEGER PRIMARY KEY,
-    query TEXT
-) STRICT;
-";
-
-#[derive(PartialEq, Eq, Hash, Debug)]
-pub enum SerializedItemKind {
-    Editor,
-    Terminal,
-    ProjectSearch,
-    Diagnostics,
-}
-
-impl Display for SerializedItemKind {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.write_str(&format!("{:?}", self))
-    }
-}
-
-#[derive(Clone, Debug, PartialEq, Eq, Hash)]
-pub enum SerializedItem {
-    Editor(usize, PathBuf),
-    Terminal(usize),
-    ProjectSearch(usize, String),
-    Diagnostics(usize),
-}
-
-impl SerializedItem {
-    fn kind(&self) -> SerializedItemKind {
-        match self {
-            SerializedItem::Editor(_, _) => SerializedItemKind::Editor,
-            SerializedItem::Terminal(_) => SerializedItemKind::Terminal,
-            SerializedItem::ProjectSearch(_, _) => SerializedItemKind::ProjectSearch,
-            SerializedItem::Diagnostics(_) => SerializedItemKind::Diagnostics,
-        }
-    }
-
-    fn id(&self) -> usize {
-        match self {
-            SerializedItem::Editor(id, _)
-            | SerializedItem::Terminal(id)
-            | SerializedItem::ProjectSearch(id, _)
-            | SerializedItem::Diagnostics(id) => *id,
-        }
-    }
-}
-
-impl Db {
-    fn write_item(&self, serialized_item: SerializedItem) -> Result<()> {
-        self.real()
-            .map(|db| {
-                let mut lock = db.connection.lock();
-                let tx = lock.transaction()?;
-
-                // Serialize the item
-                let id = serialized_item.id();
-                {
-                    let mut stmt = tx.prepare_cached(
-                        "INSERT OR REPLACE INTO items(id, kind) VALUES ((?), (?))",
-                    )?;
-
-                    dbg!("inserting item");
-                    stmt.execute(params![id, serialized_item.kind().to_string()])?;
-                }
-
-                // Serialize item data
-                match &serialized_item {
-                    SerializedItem::Editor(_, path) => {
-                        dbg!("inserting path");
-                        let mut stmt = tx.prepare_cached(
-                            "INSERT OR REPLACE INTO item_path(item_id, path) VALUES ((?), (?))",
-                        )?;
-
-                        let path_bytes = path.as_os_str().as_bytes();
-                        stmt.execute(params![id, path_bytes])?;
-                    }
-                    SerializedItem::ProjectSearch(_, query) => {
-                        dbg!("inserting query");
-                        let mut stmt = tx.prepare_cached(
-                            "INSERT OR REPLACE INTO item_query(item_id, query) VALUES ((?), (?))",
-                        )?;
-
-                        stmt.execute(params![id, query])?;
-                    }
-                    _ => {}
-                }
-
-                tx.commit()?;
-
-                let mut stmt = lock.prepare_cached("SELECT id, kind FROM items")?;
-                let _ = stmt
-                    .query_map([], |row| {
-                        let zero: usize = row.get(0)?;
-                        let one: String = row.get(1)?;
-
-                        dbg!(zero, one);
-                        Ok(())
-                    })?
-                    .collect::<Vec<Result<(), _>>>();
-
-                Ok(())
-            })
-            .unwrap_or(Ok(()))
-    }
-
-    fn delete_item(&self, item_id: usize) -> Result<()> {
-        self.real()
-            .map(|db| {
-                let lock = db.connection.lock();
-
-                let mut stmt = lock.prepare_cached(
-                    r#"
-                    DELETE FROM items WHERE id = (:id);
-                    DELETE FROM item_path WHERE id = (:id);
-                    DELETE FROM item_query WHERE id = (:id);
-                    "#,
-                )?;
-
-                stmt.execute(named_params! {":id": item_id})?;
-
-                Ok(())
-            })
-            .unwrap_or(Ok(()))
-    }
-
-    fn take_items(&self) -> Result<HashSet<SerializedItem>> {
-        self.real()
-            .map(|db| {
-                let mut lock = db.connection.lock();
-
-                let tx = lock.transaction()?;
+/// Current design makes the cut at the item level,
+///   - Maybe A little more bottom up, serialize 'Terminals' and 'Editors' directly, and then make a seperate
+///   - items table, with a kind, and an integer that acts as a key to one of these other tables
+/// This column is a foreign key to ONE OF: editors, terminals, searches
+///   -
 
-                // When working with transactions in rusqlite, need to make this kind of scope
-                // To make the borrow stuff work correctly. Don't know why, rust is wild.
-                let result = {
-                    let mut editors_stmt = tx.prepare_cached(
-                        r#"
-                        SELECT items.id, item_path.path
-                        FROM items
-                        LEFT JOIN item_path
-                            ON items.id = item_path.item_id
-                        WHERE items.kind = ?;
-                        "#,
-                    )?;
+// (workspace_id, item_id)
+// kind -> ::Editor::
 
-                    let editors_iter = editors_stmt.query_map(
-                        [SerializedItemKind::Editor.to_string()],
-                        |row| {
-                            let id: usize = row.get(0)?;
+// ->
+// At the workspace level
+// -> (Workspace_ID, item_id)
+// -> One shot, big query, load everything up:
 
-                            let buf: Vec<u8> = row.get(1)?;
-                            let path: PathBuf = OsStr::from_bytes(&buf).into();
+// -> SerializedWorkspace::deserialize(tx, itemKey)
+//     -> SerializedEditor::deserialize(tx, itemKey)
 
-                            Ok(SerializedItem::Editor(id, path))
-                        },
-                    )?;
+//         ->
+// -> Workspace::new(SerializedWorkspace)
+//     -> Editor::new(serialized_workspace[???]serializedEditor)
 
-                    let mut terminals_stmt = tx.prepare_cached(
-                        r#"
-                        SELECT items.id
-                        FROM items
-                        WHERE items.kind = ?;
-                        "#,
-                    )?;
-                    let terminals_iter = terminals_stmt.query_map(
-                        [SerializedItemKind::Terminal.to_string()],
-                        |row| {
-                            let id: usize = row.get(0)?;
+// //Pros: Keeps sql out of every body elese, makes changing it easier (e.g. for loading from a network or RocksDB)
+// //Cons: DB has to know the internals of the entire rest of the app
 
-                            Ok(SerializedItem::Terminal(id))
-                        },
-                    )?;
+// Workspace
+// Worktree roots
+// Pane groups
+// Dock
+// Items
+// Sidebars
 
-                    let mut search_stmt = tx.prepare_cached(
-                        r#"
-                        SELECT items.id, item_query.query
-                        FROM items
-                        LEFT JOIN item_query
-                            ON items.id = item_query.item_id
-                        WHERE items.kind = ?;
-                        "#,
-                    )?;
-                    let searches_iter = search_stmt.query_map(
-                        [SerializedItemKind::ProjectSearch.to_string()],
-                        |row| {
-                            let id: usize = row.get(0)?;
-                            let query = row.get(1)?;
-
-                            Ok(SerializedItem::ProjectSearch(id, query))
-                        },
-                    )?;
-
-                    #[cfg(debug_assertions)]
-                    let tmp =
-                        searches_iter.collect::<Vec<Result<SerializedItem, rusqlite::Error>>>();
-                    #[cfg(debug_assertions)]
-                    debug_assert!(tmp.len() == 0 || tmp.len() == 1);
-                    #[cfg(debug_assertions)]
-                    let searches_iter = tmp.into_iter();
-
-                    let mut diagnostic_stmt = tx.prepare_cached(
-                        r#"
-                        SELECT items.id
-                        FROM items
-                        WHERE items.kind = ?;
-                        "#,
-                    )?;
-
-                    let diagnostics_iter = diagnostic_stmt.query_map(
-                        [SerializedItemKind::Diagnostics.to_string()],
-                        |row| {
-                            let id: usize = row.get(0)?;
-
-                            Ok(SerializedItem::Diagnostics(id))
-                        },
-                    )?;
-
-                    #[cfg(debug_assertions)]
-                    let tmp =
-                        diagnostics_iter.collect::<Vec<Result<SerializedItem, rusqlite::Error>>>();
-                    #[cfg(debug_assertions)]
-                    debug_assert!(tmp.len() == 0 || tmp.len() == 1);
-                    #[cfg(debug_assertions)]
-                    let diagnostics_iter = tmp.into_iter();
-
-                    let res = editors_iter
-                        .chain(terminals_iter)
-                        .chain(diagnostics_iter)
-                        .chain(searches_iter)
-                        .collect::<Result<HashSet<SerializedItem>, rusqlite::Error>>()?;
-
-                    let mut delete_stmt = tx.prepare_cached(
-                        r#"
-                        DELETE FROM items;
-                        DELETE FROM item_path;
-                        DELETE FROM item_query;
-                        "#,
-                    )?;
-
-                    delete_stmt.execute([])?;
-
-                    res
-                };
-
-                tx.commit()?;
-
-                Ok(result)
-            })
-            .unwrap_or(Ok(HashSet::default()))
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use anyhow::Result;
-
-    use super::*;
-
-    #[test]
-    fn test_items_round_trip() -> Result<()> {
-        let db = Db::open_in_memory();
-
-        let mut items = vec![
-            SerializedItem::Editor(0, PathBuf::from("/tmp/test.txt")),
-            SerializedItem::Terminal(1),
-            SerializedItem::ProjectSearch(2, "Test query!".to_string()),
-            SerializedItem::Diagnostics(3),
-        ]
-        .into_iter()
-        .collect::<HashSet<_>>();
-
-        for item in items.iter() {
-            dbg!("Inserting... ");
-            db.write_item(item.clone())?;
-        }
-
-        assert_eq!(items, db.take_items()?);
-
-        // Check that it's empty, as expected
-        assert_eq!(HashSet::default(), db.take_items()?);
-
-        for item in items.iter() {
-            db.write_item(item.clone())?;
-        }
-
-        items.remove(&SerializedItem::ProjectSearch(2, "Test query!".to_string()));
-        db.delete_item(2)?;
+pub(crate) const ITEMS_M_1: &str = "
+CREATE TABLE items(
+    workspace_id INTEGER,
+    item_id INTEGER,
+    kind TEXT NOT NULL,
+    PRIMARY KEY (workspace_id, item_id)
+    FOREIGN KEY(workspace_id) REFERENCES workspace_ids(workspace_id)
+) STRICT;
 
-        assert_eq!(items, db.take_items()?);
+CREATE TABLE project_searches(
+    workspace_id INTEGER,
+    item_id INTEGER,
+    query TEXT,
+    PRIMARY KEY (workspace_id, item_id)
+    FOREIGN KEY(workspace_id) REFERENCES workspace_ids(workspace_id)
+) STRICT;
 
-        Ok(())
-    }
-}
+CREATE TABLE editors(
+    workspace_id INTEGER,
+    item_id INTEGER,
+    path BLOB NOT NULL,
+    PRIMARY KEY (workspace_id, item_id)
+    FOREIGN KEY(workspace_id) REFERENCES workspace_ids(workspace_id)
+) STRICT;
+";

crates/db/src/kvp.rs 🔗

@@ -3,7 +3,7 @@ use rusqlite::OptionalExtension;
 
 use super::Db;
 
-pub(crate) const KVP_M_1_UP: &str = "
+pub(crate) const KVP_M_1: &str = "
 CREATE TABLE kv_store(
     key TEXT PRIMARY KEY,
     value TEXT NOT NULL

crates/db/src/migrations.rs 🔗

@@ -1,7 +1,7 @@
 use rusqlite_migration::{Migrations, M};
 
 // use crate::items::ITEMS_M_1;
-use crate::{items::ITEMS_M_1, kvp::KVP_M_1_UP};
+use crate::kvp::KVP_M_1;
 
 // This must be ordered by development time! Only ever add new migrations to the end!!
 // Bad things will probably happen if you don't monotonically edit this vec!!!!
@@ -9,7 +9,6 @@ use crate::{items::ITEMS_M_1, kvp::KVP_M_1_UP};
 // file system and so everything we do here is locked in _f_o_r_e_v_e_r_.
 lazy_static::lazy_static! {
     pub static ref MIGRATIONS: Migrations<'static> = Migrations::new(vec![
-        M::up(KVP_M_1_UP),
-        M::up(ITEMS_M_1),
+        M::up(KVP_M_1),
     ]);
 }

crates/db/src/workspace.rs 🔗

@@ -0,0 +1,180 @@
+use std::{path::Path, sync::Arc};
+
+use super::Db;
+
+pub(crate) const WORKSPACE_M_1: &str = "
+CREATE TABLE workspaces(
+    workspace_id INTEGER PRIMARY KEY,
+    center_group INTEGER NOT NULL,
+    dock_pane INTEGER NOT NULL,
+    timestamp INTEGER,
+    FOREIGN KEY(center_group) REFERENCES pane_groups(group_id)
+    FOREIGN KEY(dock_pane) REFERENCES pane_items(pane_id)
+) STRICT;
+
+CREATE TABLE worktree_roots(
+    worktree_root BLOB NOT NULL,
+    workspace_id INTEGER NOT NULL,
+    FOREIGN KEY(workspace_id) REFERENCES workspace_ids(workspace_id)
+) STRICT;
+
+CREATE TABLE pane_groups(
+    workspace_id INTEGER,
+    group_id INTEGER,
+    split_direction STRING, -- 'Vertical' / 'Horizontal' /
+    PRIMARY KEY (workspace_id, group_id)
+) STRICT;
+
+CREATE TABLE pane_group_children(
+    workspace_id INTEGER,
+    group_id INTEGER,
+    child_pane_id INTEGER,  -- Nullable
+    child_group_id INTEGER, -- Nullable
+    PRIMARY KEY (workspace_id, group_id)
+) STRICT;
+
+CREATE TABLE pane_items(
+    workspace_id INTEGER,
+    pane_id INTEGER,
+    item_id INTEGER, -- Array
+    PRIMARY KEY (workspace_id, pane_id)
+) STRICT;
+";
+
+// Zed stores items with ids which are a combination of a view id during a given run and a workspace id. This
+
+//      Case 1: Starting Zed Contextless
+//          > Zed -> Reopen the last
+//      Case 2: Starting Zed with a project folder
+//          > Zed ~/projects/Zed
+//      Case 3: Starting Zed with a file
+//          > Zed ~/projects/Zed/cargo.toml
+//      Case 4: Starting Zed with multiple project folders
+//          > Zed ~/projects/Zed ~/projects/Zed.dev
+
+#[derive(Debug, PartialEq, Eq)]
+pub struct WorkspaceId(usize);
+
+impl Db {
+    /// Finds or creates a workspace id for the given set of worktree roots. If the passed worktree roots is empty, return the
+    /// the last workspace id
+    pub fn workspace_id(&self, worktree_roots: &[Arc<Path>]) -> WorkspaceId {
+        // Find the workspace id which is uniquely identified by this set of paths return it if found
+        // Otherwise:
+        //   Find the max workspace_id and increment it as our new workspace id
+        //   Store in the worktrees table the mapping from this new id to the set of worktree roots
+        unimplemented!();
+    }
+
+    /// Updates the open paths for the given workspace id. Will garbage collect items from
+    /// any workspace ids which are no replaced by the new workspace id. Updates the timestamps
+    /// in the workspace id table
+    pub fn update_worktree_roots(&self, workspace_id: &WorkspaceId, worktree_roots: &[Arc<Path>]) {
+        // Lookup any WorkspaceIds which have the same set of roots, and delete them. (NOTE: this should garbage collect other tables)
+        // Remove the old rows which contain workspace_id
+        // Add rows for the new worktree_roots
+
+        // zed /tree
+        // -> add tree2
+        //   -> udpate_worktree_roots() -> ADDs entries for /tree and /tree2, LEAVING BEHIND, the initial entry for /tree
+        unimplemented!();
+    }
+
+    /// Returns the previous workspace ids sorted by last modified
+    pub fn recent_workspaces(&self) -> Vec<(WorkspaceId, Vec<Arc<Path>>)> {
+        // Return all the workspace ids and their associated paths ordered by the access timestamp
+        //ORDER BY timestamps
+        unimplemented!();
+    }
+
+    pub fn center_pane(&self, workspace: WorkspaceId) -> SerializedPaneGroup {}
+
+    pub fn dock_pane(&self, workspace: WorkspaceId) -> SerializedPane {}
+}
+
+#[cfg(test)]
+mod tests {
+
+    use std::{
+        path::{Path, PathBuf},
+        sync::Arc,
+    };
+
+    use crate::Db;
+
+    use super::WorkspaceId;
+
+    fn test_tricky_overlapping_updates() {
+        // DB state:
+        // (/tree) -> ID: 1
+        // (/tree, /tree2) -> ID: 2
+        // (/tree2, /tree3) -> ID: 3
+
+        // -> User updates 2 to: (/tree2, /tree3)
+
+        // DB state:
+        // (/tree) -> ID: 1
+        // (/tree2, /tree3) -> ID: 2
+        // Get rid of 3 for garbage collection
+
+        fn arc_path(path: &'static str) -> Arc<Path> {
+            PathBuf::from(path).into()
+        }
+
+        let data = &[
+            (WorkspaceId(1), vec![arc_path("/tmp")]),
+            (WorkspaceId(2), vec![arc_path("/tmp"), arc_path("/tmp2")]),
+            (WorkspaceId(3), vec![arc_path("/tmp2"), arc_path("/tmp3")]),
+        ];
+
+        let db = Db::open_in_memory();
+
+        for (workspace_id, entries) in data {
+            db.update_worktree_roots(workspace_id, entries); //??
+            assert_eq!(&db.workspace_id(&[]), workspace_id)
+        }
+
+        for (workspace_id, entries) in data {
+            assert_eq!(&db.workspace_id(entries.as_slice()), workspace_id);
+        }
+
+        db.update_worktree_roots(&WorkspaceId(2), &[arc_path("/tmp2")]);
+        // todo!(); // make sure that 3 got garbage collected
+
+        assert_eq!(db.workspace_id(&[arc_path("/tmp2")]), WorkspaceId(2));
+        assert_eq!(db.workspace_id(&[arc_path("/tmp")]), WorkspaceId(1));
+
+        let recent_workspaces = db.recent_workspaces();
+        assert_eq!(recent_workspaces.get(0).unwrap().0, WorkspaceId(2));
+        assert_eq!(recent_workspaces.get(1).unwrap().0, WorkspaceId(3));
+        assert_eq!(recent_workspaces.get(2).unwrap().0, WorkspaceId(1));
+    }
+}
+
+// [/tmp, /tmp2] -> ID1?
+// [/tmp] -> ID2?
+
+/*
+path | id
+/tmp   ID1
+/tmp   ID2
+/tmp2  ID1
+
+
+SELECT id
+FROM workspace_ids
+WHERE path IN (path1, path2)
+INTERSECT
+SELECT id
+FROM workspace_ids
+WHERE path = path_2
+... and etc. for each element in path array
+
+If contains row, yay! If not,
+SELECT max(id) FROm workspace_ids
+
+Select id WHERE path IN paths
+
+SELECT MAX(id)
+
+*/