From e04d044271dd4471f4a71ec4cc25077b02a1e0fa Mon Sep 17 00:00:00 2001 From: Brandt Weary Date: Sat, 10 Jan 2026 00:36:33 -0800 Subject: [PATCH] Fix fold persistence corruption from external file changes (#46011) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Fixes #33633 - folds corrupting after external file modifications. The existing fold persistence stored raw byte offsets, which become stale when files are modified externally (git operations, other editors, sync tools). This caused folds to capture wrong content on restore - users reported "wrong lines getting folded" with systematic offsets. **Solution: Content fingerprinting** - Store 32-byte content samples at fold boundaries - On restore, validate fingerprints match; if not, search buffer for new positions - Handles edge cases: short folds (< 32 bytes), duplicate content, boundary positioning **Bonus fix: Ungraceful exit survival** - Entity IDs change between sessions, but workspace cleanup's CASCADE DELETE was wiping folds before new editors could save - Now migrates folds to new entity_id immediately after restore ## Test plan - [x] Unit test for fingerprint storage/retrieval (`test_save_and_get_editor_folds_with_fingerprints`) - [x] Manual test: fold sections, add content at file start externally, reopen → folds restore at correct positions - [x] Manual test: fold sections, Ctrl+C, reopen → folds survive - [x] Existing fold tests pass (`cargo test -p editor`) ## Release Notes - N/A --------- Co-authored-by: Hector --- crates/editor/src/editor.rs | 129 ++++++++++++++++++++++++---- crates/editor/src/persistence.rs | 139 +++++++++++++++++++++++++++++-- 2 files changed, 245 insertions(+), 23 deletions(-) diff --git a/crates/editor/src/editor.rs b/crates/editor/src/editor.rs index ed25d51f1ddeee8b3f245ee306d2be49054f5cc8..81d13d345a87e85d6b8d972c8476233686846a2d 100644 --- a/crates/editor/src/editor.rs +++ b/crates/editor/src/editor.rs @@ -3505,15 +3505,30 @@ impl Editor { }; let background_executor = cx.background_executor().clone(); let editor_id = cx.entity().entity_id().as_u64() as ItemId; + const FINGERPRINT_LEN: usize = 32; let db_folds = display_snapshot .folds_in_range(MultiBufferOffset(0)..display_snapshot.buffer_snapshot().len()) .map(|fold| { - ( - fold.range.start.text_anchor.to_offset(&snapshot), - fold.range.end.text_anchor.to_offset(&snapshot), - ) + let start = fold.range.start.text_anchor.to_offset(&snapshot); + let end = fold.range.end.text_anchor.to_offset(&snapshot); + + // Extract fingerprints - content at fold boundaries for validation on restore + // Both fingerprints must be INSIDE the fold to avoid capturing surrounding + // content that might change independently. + // start_fp: first min(32, fold_len) bytes of fold content + // end_fp: last min(32, fold_len) bytes of fold content + // Clip to character boundaries to handle multibyte UTF-8 characters. + let fold_len = end - start; + let start_fp_end = snapshot + .clip_offset(start + std::cmp::min(FINGERPRINT_LEN, fold_len), Bias::Left); + let start_fp: String = snapshot.text_for_range(start..start_fp_end).collect(); + let end_fp_start = snapshot + .clip_offset(end.saturating_sub(FINGERPRINT_LEN).max(start), Bias::Right); + let end_fp: String = snapshot.text_for_range(end_fp_start..end).collect(); + + (start, end, start_fp, end_fp) }) - .collect(); + .collect::>(); self.serialize_folds = cx.background_spawn(async move { background_executor.timer(SERIALIZATION_THROTTLE_TIME).await; DB.save_editor_folds(editor_id, workspace_id, db_folds) @@ -23179,18 +23194,100 @@ impl Editor { && !folds.is_empty() { let snapshot = buffer_snapshot.get_or_init(|| self.buffer.read(cx).snapshot(cx)); - self.fold_ranges( - folds - .into_iter() - .map(|(start, end)| { - snapshot.clip_offset(MultiBufferOffset(start), Bias::Left) - ..snapshot.clip_offset(MultiBufferOffset(end), Bias::Right) + let snapshot_len = snapshot.len().0; + + // Helper: search for fingerprint in buffer, return offset if found + let find_fingerprint = |fingerprint: &str, search_start: usize| -> Option { + // Ensure we start at a character boundary (defensive) + let search_start = snapshot + .clip_offset(MultiBufferOffset(search_start), Bias::Left) + .0; + let search_end = snapshot_len.saturating_sub(fingerprint.len()); + + let mut byte_offset = search_start; + for ch in snapshot.chars_at(MultiBufferOffset(search_start)) { + if byte_offset > search_end { + break; + } + if snapshot.contains_str_at(MultiBufferOffset(byte_offset), fingerprint) { + return Some(byte_offset); + } + byte_offset += ch.len_utf8(); + } + None + }; + + // Track search position to handle duplicate fingerprints correctly. + // Folds are stored in document order, so we advance after each match. + let mut search_start = 0usize; + + let valid_folds: Vec<_> = folds + .into_iter() + .filter_map(|(stored_start, stored_end, start_fp, end_fp)| { + // Skip folds without fingerprints (old data before migration) + let sfp = start_fp?; + let efp = end_fp?; + let efp_len = efp.len(); + + // Fast path: check if fingerprints match at stored offsets + // Note: end_fp is content BEFORE fold end, so check at (stored_end - efp_len) + let start_matches = stored_start < snapshot_len + && snapshot.contains_str_at(MultiBufferOffset(stored_start), &sfp); + let efp_check_pos = stored_end.saturating_sub(efp_len); + let end_matches = efp_check_pos >= stored_start + && stored_end <= snapshot_len + && snapshot.contains_str_at(MultiBufferOffset(efp_check_pos), &efp); + + let (new_start, new_end) = if start_matches && end_matches { + // Offsets unchanged, use stored values + (stored_start, stored_end) + } else if sfp == efp { + // Short fold: identical fingerprints can only match once per search + // Use stored fold length to compute new_end + let new_start = find_fingerprint(&sfp, search_start)?; + let fold_len = stored_end - stored_start; + let new_end = new_start + fold_len; + (new_start, new_end) + } else { + // Slow path: search for fingerprints in buffer + let new_start = find_fingerprint(&sfp, search_start)?; + // Search for end_fp after start, then add efp_len to get actual fold end + let efp_pos = find_fingerprint(&efp, new_start + sfp.len())?; + let new_end = efp_pos + efp_len; + (new_start, new_end) + }; + + // Advance search position for next fold + search_start = new_end; + + // Validate fold makes sense (end must be after start) + if new_end <= new_start { + return None; + } + + Some( + snapshot.clip_offset(MultiBufferOffset(new_start), Bias::Left) + ..snapshot.clip_offset(MultiBufferOffset(new_end), Bias::Right), + ) + }) + .collect(); + + if !valid_folds.is_empty() { + self.fold_ranges(valid_folds, false, window, cx); + + // Migrate folds to current entity_id before workspace cleanup runs. + // Entity IDs change between sessions, but workspace cleanup deletes + // old editor rows (cascading to folds) based on current entity IDs. + let new_editor_id = cx.entity().entity_id().as_u64() as ItemId; + if new_editor_id != item_id { + cx.spawn(async move |_, _| { + DB.migrate_editor_folds(item_id, new_editor_id, workspace_id) + .await + .log_err(); }) - .collect(), - false, - window, - cx, - ); + .detach(); + } + } } if let Some(selections) = DB.get_editor_selections(item_id, workspace_id).log_err() diff --git a/crates/editor/src/persistence.rs b/crates/editor/src/persistence.rs index 840398474bb02542e452c79864b722cc91b111b3..02f867002d5562607c16f3180d7554c042494319 100644 --- a/crates/editor/src/persistence.rs +++ b/crates/editor/src/persistence.rs @@ -120,6 +120,8 @@ impl Domain for EditorDb { // workspace_id: usize, // start: usize, // end: usize, + // start_fingerprint: Option, + // end_fingerprint: Option, // ) const MIGRATIONS: &[&str] = &[ @@ -197,6 +199,10 @@ impl Domain for EditorDb { ON DELETE CASCADE ) STRICT; ), + sql! ( + ALTER TABLE editor_folds ADD COLUMN start_fingerprint TEXT; + ALTER TABLE editor_folds ADD COLUMN end_fingerprint TEXT; + ), ]; } @@ -274,13 +280,42 @@ impl EditorDb { pub fn get_editor_folds( editor_id: ItemId, workspace_id: WorkspaceId - ) -> Result> { - SELECT start, end + ) -> Result, Option)>> { + SELECT start, end, start_fingerprint, end_fingerprint FROM editor_folds WHERE editor_id = ?1 AND workspace_id = ?2 } } + // Migrate folds from an old editor_id to a new one. + // This is needed because entity IDs change between sessions, but workspace + // cleanup deletes old editor rows (cascading to folds) before the new + // editor has a chance to re-save its folds. + // + // We temporarily disable FK checks because the new editor row doesn't exist + // yet (it gets created during workspace serialization, which runs later). + pub async fn migrate_editor_folds( + &self, + old_editor_id: ItemId, + new_editor_id: ItemId, + workspace_id: WorkspaceId, + ) -> Result<()> { + self.write(move |conn| { + let _ = conn.exec("PRAGMA foreign_keys = OFF"); + let mut statement = Statement::prepare( + conn, + "UPDATE editor_folds SET editor_id = ?2 WHERE editor_id = ?1 AND workspace_id = ?3", + )?; + statement.bind(&old_editor_id, 1)?; + statement.bind(&new_editor_id, 2)?; + statement.bind(&workspace_id, 3)?; + let result = statement.exec(); + let _ = conn.exec("PRAGMA foreign_keys = ON"); + result + }) + .await + } + pub async fn save_editor_selections( &self, editor_id: ItemId, @@ -337,15 +372,15 @@ VALUES {placeholders}; &self, editor_id: ItemId, workspace_id: WorkspaceId, - folds: Vec<(usize, usize)>, + folds: Vec<(usize, usize, String, String)>, ) -> Result<()> { log::debug!("Saving folds for editor {editor_id} in workspace {workspace_id:?}"); let mut first_fold; let mut last_fold = 0_usize; - for (count, placeholders) in std::iter::once("(?1, ?2, ?, ?)") + for (count, placeholders) in std::iter::once("(?1, ?2, ?, ?, ?, ?)") .cycle() .take(folds.len()) - .chunks(MAX_QUERY_PLACEHOLDERS / 4) + .chunks(MAX_QUERY_PLACEHOLDERS / 6) .into_iter() .map(|chunk| { let mut count = 0; @@ -364,7 +399,7 @@ VALUES {placeholders}; r#" DELETE FROM editor_folds WHERE editor_id = ?1 AND workspace_id = ?2; -INSERT OR IGNORE INTO editor_folds (editor_id, workspace_id, start, end) +INSERT OR IGNORE INTO editor_folds (editor_id, workspace_id, start, end, start_fingerprint, end_fingerprint) VALUES {placeholders}; "# ); @@ -374,9 +409,11 @@ VALUES {placeholders}; let mut statement = Statement::prepare(conn, query)?; statement.bind(&editor_id, 1)?; let mut next_index = statement.bind(&workspace_id, 2)?; - for (start, end) in folds { + for (start, end, start_fp, end_fp) in folds { next_index = statement.bind(&start, next_index)?; next_index = statement.bind(&end, next_index)?; + next_index = statement.bind(&start_fp, next_index)?; + next_index = statement.bind(&end_fp, next_index)?; } statement.exec() }) @@ -465,4 +502,92 @@ mod tests { .unwrap(); assert_eq!(have, serialized_editor); } + + #[gpui::test] + async fn test_save_and_get_editor_folds_with_fingerprints() { + let workspace_id = workspace::WORKSPACE_DB.next_id().await.unwrap(); + + // First create an editor entry (folds have FK to editors) + let serialized_editor = SerializedEditor { + abs_path: Some(PathBuf::from("test_folds.txt")), + contents: None, + language: None, + mtime: None, + }; + DB.save_serialized_editor(5678, workspace_id, serialized_editor) + .await + .unwrap(); + + // Save folds with fingerprints (32-byte content samples at fold boundaries) + let folds = vec![ + ( + 100, + 200, + "fn main() {".to_string(), + "} // end main".to_string(), + ), + ( + 300, + 400, + "struct Foo {".to_string(), + "} // end Foo".to_string(), + ), + ]; + DB.save_editor_folds(5678, workspace_id, folds.clone()) + .await + .unwrap(); + + // Retrieve and verify fingerprints are preserved + let retrieved = DB.get_editor_folds(5678, workspace_id).unwrap(); + assert_eq!(retrieved.len(), 2); + assert_eq!( + retrieved[0], + ( + 100, + 200, + Some("fn main() {".to_string()), + Some("} // end main".to_string()) + ) + ); + assert_eq!( + retrieved[1], + ( + 300, + 400, + Some("struct Foo {".to_string()), + Some("} // end Foo".to_string()) + ) + ); + + // Test overwrite: saving new folds replaces old ones + let new_folds = vec![( + 500, + 600, + "impl Bar {".to_string(), + "} // end impl".to_string(), + )]; + DB.save_editor_folds(5678, workspace_id, new_folds) + .await + .unwrap(); + + let retrieved = DB.get_editor_folds(5678, workspace_id).unwrap(); + assert_eq!(retrieved.len(), 1); + assert_eq!( + retrieved[0], + ( + 500, + 600, + Some("impl Bar {".to_string()), + Some("} // end impl".to_string()) + ) + ); + } + + // NOTE: The fingerprint search logic (finding content at new offsets when file + // is modified externally) is in editor.rs:restore_from_db and requires a full + // Editor context to test. Manual testing procedure: + // 1. Open a file, fold some sections, close Zed + // 2. Add text at the START of the file externally (shifts all offsets) + // 3. Reopen Zed - folds should be restored at their NEW correct positions + // The search uses contains_str_at() to find fingerprints in the buffer. }