- Return an error if the file contains invalid bytes for the specified

R Aadarsh created

encoding instead of replacing the invalid bytes with replacement
 characters

 - Add `encoding` field in `Workspace`

Change summary

crates/encodings/src/selectors.rs |  2 ++
crates/fs/src/encodings.rs        | 19 +++++++++++++++----
crates/workspace/src/workspace.rs |  4 +++-
3 files changed, 20 insertions(+), 5 deletions(-)

Detailed changes

crates/encodings/src/selectors.rs 🔗

@@ -460,6 +460,8 @@ pub mod encoding {
                 });
             } else {
                 workspace.update(cx, |workspace, cx| {
+                    *workspace.encoding.lock().unwrap() =
+                        encoding_from_name(self.matches[self.current_selection].string.as_str());
                     workspace
                         .open_abs_path(
                             self.selector

crates/fs/src/encodings.rs 🔗

@@ -19,6 +19,12 @@ impl Debug for EncodingWrapper {
     }
 }
 
+impl Default for EncodingWrapper {
+    fn default() -> Self {
+        EncodingWrapper(encoding_rs::UTF_8)
+    }
+}
+
 pub struct EncodingWrapperVisitor;
 
 impl PartialEq for EncodingWrapper {
@@ -71,10 +77,15 @@ impl EncodingWrapper {
 
         let (cow, _had_errors) = self.0.decode_with_bom_removal(&input);
 
-        // `encoding_rs` handles invalid bytes by replacing them with replacement characters
-        // in the output string, so we return the result even if there were errors.
-        // This preserves the original behaviour where files with invalid bytes could still be opened.
-        Ok(cow.into_owned())
+        if !_had_errors {
+            Ok(cow.to_string())
+        } else {
+            // If there were decoding errors, return an error.
+            Err(anyhow::anyhow!(
+                "The file contains invalid bytes for the specified encoding: {}. This usually menas that the file is not a regular text file, or is encoded in a different encoding. Continuing to open it may result in data loss if saved.",
+                self.0.name()
+            ))
+        }
     }
 
     pub async fn encode(&self, input: String) -> Result<Vec<u8>> {

crates/workspace/src/workspace.rs 🔗

@@ -19,6 +19,7 @@ mod workspace_settings;
 
 pub use crate::notifications::NotificationFrame;
 pub use dock::Panel;
+use encoding_rs::Encoding;
 use encoding_rs::UTF_8;
 use fs::encodings::EncodingWrapper;
 pub use path_list::PathList;
@@ -1181,6 +1182,7 @@ pub struct Workspace {
     session_id: Option<String>,
     scheduled_tasks: Vec<Task<()>>,
     last_open_dock_positions: Vec<DockPosition>,
+    pub encoding: Arc<std::sync::Mutex<&'static Encoding>>,
 }
 
 impl EventEmitter<Event> for Workspace {}
@@ -1521,9 +1523,9 @@ impl Workspace {
             serializable_items_tx,
             _items_serializer,
             session_id: Some(session_id),
-
             scheduled_tasks: Vec::new(),
             last_open_dock_positions: Vec::new(),
+            encoding: Arc::new(std::sync::Mutex::new(encoding_rs::UTF_8)),
         }
     }