From 44abaed85783a1ea872e70e0064e8eb6649fe93a Mon Sep 17 00:00:00 2001 From: R Aadarsh Date: Sun, 5 Oct 2025 14:52:00 +0530 Subject: [PATCH] - Return an error if the file contains invalid bytes for the specified encoding instead of replacing the invalid bytes with replacement characters - Add `encoding` field in `Workspace` --- crates/encodings/src/selectors.rs | 2 ++ crates/fs/src/encodings.rs | 19 +++++++++++++++---- crates/workspace/src/workspace.rs | 4 +++- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/crates/encodings/src/selectors.rs b/crates/encodings/src/selectors.rs index 893842723c60761742fbbb91ab1ee5c2b334bcdf..b88f882e31955cccf41ef26f1da8696238f6b6fd 100644 --- a/crates/encodings/src/selectors.rs +++ b/crates/encodings/src/selectors.rs @@ -460,6 +460,8 @@ pub mod encoding { }); } else { workspace.update(cx, |workspace, cx| { + *workspace.encoding.lock().unwrap() = + encoding_from_name(self.matches[self.current_selection].string.as_str()); workspace .open_abs_path( self.selector diff --git a/crates/fs/src/encodings.rs b/crates/fs/src/encodings.rs index c6c13f64d2e6b2b5db7897eff87fe685880c95a5..1759f8917c643c6697c198a32ed94fd1b25229c0 100644 --- a/crates/fs/src/encodings.rs +++ b/crates/fs/src/encodings.rs @@ -19,6 +19,12 @@ impl Debug for EncodingWrapper { } } +impl Default for EncodingWrapper { + fn default() -> Self { + EncodingWrapper(encoding_rs::UTF_8) + } +} + pub struct EncodingWrapperVisitor; impl PartialEq for EncodingWrapper { @@ -71,10 +77,15 @@ impl EncodingWrapper { let (cow, _had_errors) = self.0.decode_with_bom_removal(&input); - // `encoding_rs` handles invalid bytes by replacing them with replacement characters - // in the output string, so we return the result even if there were errors. - // This preserves the original behaviour where files with invalid bytes could still be opened. - Ok(cow.into_owned()) + if !_had_errors { + Ok(cow.to_string()) + } else { + // If there were decoding errors, return an error. + Err(anyhow::anyhow!( + "The file contains invalid bytes for the specified encoding: {}. This usually menas that the file is not a regular text file, or is encoded in a different encoding. Continuing to open it may result in data loss if saved.", + self.0.name() + )) + } } pub async fn encode(&self, input: String) -> Result> { diff --git a/crates/workspace/src/workspace.rs b/crates/workspace/src/workspace.rs index f561e7f30a15f7cc739cbe0046f938622cf30a00..bf21e72fb38ebe509a2cff7da087c7b31c155029 100644 --- a/crates/workspace/src/workspace.rs +++ b/crates/workspace/src/workspace.rs @@ -19,6 +19,7 @@ mod workspace_settings; pub use crate::notifications::NotificationFrame; pub use dock::Panel; +use encoding_rs::Encoding; use encoding_rs::UTF_8; use fs::encodings::EncodingWrapper; pub use path_list::PathList; @@ -1181,6 +1182,7 @@ pub struct Workspace { session_id: Option, scheduled_tasks: Vec>, last_open_dock_positions: Vec, + pub encoding: Arc>, } impl EventEmitter for Workspace {} @@ -1521,9 +1523,9 @@ impl Workspace { serializable_items_tx, _items_serializer, session_id: Some(session_id), - scheduled_tasks: Vec::new(), last_open_dock_positions: Vec::new(), + encoding: Arc::new(std::sync::Mutex::new(encoding_rs::UTF_8)), } }