diff --git a/crates/copilot/src/copilot.rs b/crates/copilot/src/copilot.rs index cf136962b9c0041487c58538c39cb08b61068b0a..f72e1f392d0a7af081dc3f7350032deae71fbe57 100644 --- a/crates/copilot/src/copilot.rs +++ b/crates/copilot/src/copilot.rs @@ -1460,7 +1460,13 @@ mod tests { unimplemented!() } - fn load_with_encoding(&self, _: &App, _: &'static Encoding) -> Task> { + fn load_with_encoding( + &self, + _: &App, + _: &'static Encoding, + _: bool, + _: Arc>, + ) -> Task> { unimplemented!() } } diff --git a/crates/encodings/src/lib.rs b/crates/encodings/src/lib.rs index c7072e7552368e4983ca9d87c53ac4c565ffc760..318f4660eeb854bf701113c28db04e9e0bc30aa1 100644 --- a/crates/encodings/src/lib.rs +++ b/crates/encodings/src/lib.rs @@ -1,7 +1,9 @@ //! A crate for handling file encodings in the text editor. + use editor::{Editor, EditorSettings}; use encoding_rs::Encoding; use gpui::{ClickEvent, Entity, Subscription, WeakEntity}; +use language::Buffer; use settings::Settings; use ui::{Button, ButtonCommon, Context, LabelSize, Render, Tooltip, Window, div}; use ui::{Clickable, ParentElement}; @@ -13,7 +15,13 @@ use crate::selectors::save_or_reopen::EncodingSaveOrReopenSelector; pub struct EncodingIndicator { pub encoding: Option<&'static Encoding>, pub workspace: WeakEntity, - observe: Option, // Subscription to observe changes in the active editor + + /// Subscription to observe changes in the active editor + observe_editor: Option, + + /// Subscription to observe changes in the `encoding` field of the `Buffer` struct + observe_buffer_encoding: Option, + show: bool, // Whether to show the indicator or not, based on whether an editor is active } @@ -50,17 +58,20 @@ impl EncodingIndicator { pub fn new( encoding: Option<&'static Encoding>, workspace: WeakEntity, - observe: Option, + observe_editor: Option, + observe_buffer_encoding: Option, ) -> EncodingIndicator { EncodingIndicator { encoding, workspace, - observe, + observe_editor, show: true, + observe_buffer_encoding, } } - pub fn update( + /// Update the encoding when the active editor is switched. + pub fn update_when_editor_is_switched( &mut self, editor: Entity, _: &mut Window, @@ -68,12 +79,24 @@ impl EncodingIndicator { ) { let editor = editor.read(cx); if let Some((_, buffer, _)) = editor.active_excerpt(cx) { - let encoding = buffer.read(cx).encoding; - self.encoding = Some(encoding); + let encoding = buffer.read(cx).encoding.clone(); + self.encoding = Some(&*encoding.lock().unwrap()); } cx.notify(); } + + /// Update the encoding when the `encoding` field of the `Buffer` struct changes. + pub fn update_when_buffer_encoding_changes( + &mut self, + buffer: Entity, + _: &mut Window, + cx: &mut Context, + ) { + let encoding = buffer.read(cx).encoding.clone(); + self.encoding = Some(&*encoding.lock().unwrap()); + cx.notify(); + } } impl StatusItemView for EncodingIndicator { @@ -85,13 +108,21 @@ impl StatusItemView for EncodingIndicator { ) { match active_pane_item.and_then(|item| item.downcast::()) { Some(editor) => { - self.observe = Some(cx.observe_in(&editor, window, Self::update)); - self.update(editor, window, cx); + self.observe_editor = + Some(cx.observe_in(&editor, window, Self::update_when_editor_is_switched)); + if let Some((_, buffer, _)) = &editor.read(cx).active_excerpt(cx) { + self.observe_buffer_encoding = Some(cx.observe_in( + buffer, + window, + Self::update_when_buffer_encoding_changes, + )); + } + self.update_when_editor_is_switched(editor, window, cx); self.show = true; } None => { self.encoding = None; - self.observe = None; + self.observe_editor = None; self.show = false; } } diff --git a/crates/encodings/src/selectors.rs b/crates/encodings/src/selectors.rs index 75c1045059dcd26c7e1d8054e6cf5c4874c1cbfd..bd3ad321a21fe7883d7831670c311a5e10a9e1b3 100644 --- a/crates/encodings/src/selectors.rs +++ b/crates/encodings/src/selectors.rs @@ -409,7 +409,6 @@ pub mod encoding { ) .await } - picker .update(cx, |picker, cx| { let delegate = &mut picker.delegate; @@ -426,11 +425,13 @@ pub mod encoding { fn confirm(&mut self, _: bool, window: &mut Window, cx: &mut Context>) { if let Some(buffer) = self.buffer.upgrade() { buffer.update(cx, |buffer, cx| { - buffer.encoding = + let buffer_encoding = buffer.encoding.clone(); + let buffer_encoding = &mut *buffer_encoding.lock().unwrap(); + *buffer_encoding = encoding_from_name(self.matches[self.current_selection].string.as_str()); if self.action == Action::Reopen { let executor = cx.background_executor().clone(); - executor.spawn(buffer.reload(cx)).detach(); + executor.spawn(buffer.reload(cx, true)).detach(); } else if self.action == Action::Save { let executor = cx.background_executor().clone(); diff --git a/crates/fs/src/encodings.rs b/crates/fs/src/encodings.rs index db75dcadad807874d7700a705d894ec31a24247a..882f6c0adee22f0a2c196db6db5c726a1112d036 100644 --- a/crates/fs/src/encodings.rs +++ b/crates/fs/src/encodings.rs @@ -1,5 +1,8 @@ //! Encoding and decoding utilities using the `encoding_rs` crate. -use std::fmt::Debug; +use std::{ + fmt::Debug, + sync::{Arc, Mutex}, +}; use anyhow::Result; use encoding_rs::Encoding; @@ -42,7 +45,34 @@ impl EncodingWrapper { self.0 } - pub async fn decode(&self, input: Vec) -> Result { + pub async fn decode( + &mut self, + input: Vec, + force: bool, + buffer_encoding: Option>>, + ) -> Result { + // Check if the input starts with a BOM for UTF-16 encodings only if not forced to + // use the encoding specified. + if !force { + if (input[0] == 0xFF) & (input[1] == 0xFE) { + self.0 = encoding_rs::UTF_16LE; + + if let Some(v) = buffer_encoding { + if let Ok(mut v) = (*v).lock() { + *v = encoding_rs::UTF_16LE; + } + } + } else if (input[0] == 0xFE) & (input[1] == 0xFF) { + self.0 = encoding_rs::UTF_16BE; + + if let Some(v) = buffer_encoding { + if let Ok(mut v) = (*v).lock() { + *v = encoding_rs::UTF_16BE; + } + } + } + } + let (cow, _had_errors) = self.0.decode_with_bom_removal(&input); // `encoding_rs` handles invalid bytes by replacing them with replacement characters @@ -53,8 +83,7 @@ impl EncodingWrapper { pub async fn encode(&self, input: String) -> Result> { if self.0 == encoding_rs::UTF_16BE { - let mut data = Vec::::new(); - data.reserve(input.len() * 2); // Reserve space for UTF-16BE bytes + let mut data = Vec::::with_capacity(input.len() * 2); // Convert the input string to UTF-16BE bytes let utf16be_bytes: Vec = @@ -63,8 +92,7 @@ impl EncodingWrapper { data.extend(utf16be_bytes); return Ok(data); } else if self.0 == encoding_rs::UTF_16LE { - let mut data = Vec::::new(); - data.reserve(input.len() * 2); // Reserve space for UTF-16LE bytes + let mut data = Vec::::with_capacity(input.len() * 2); // Convert the input string to UTF-16LE bytes let utf16le_bytes: Vec = @@ -83,8 +111,13 @@ impl EncodingWrapper { } /// Convert a byte vector from a specified encoding to a UTF-8 string. -pub async fn to_utf8(input: Vec, encoding: EncodingWrapper) -> Result { - encoding.decode(input).await +pub async fn to_utf8( + input: Vec, + mut encoding: EncodingWrapper, + force: bool, + buffer_encoding: Option>>, +) -> Result { + encoding.decode(input, force, buffer_encoding).await } /// Convert a UTF-8 string to a byte vector in a specified encoding. diff --git a/crates/fs/src/fs.rs b/crates/fs/src/fs.rs index 0857b049d07235736df2b1c708588134d7e978a8..3f0be2fa07898815031e44415cc446d41e22dc07 100644 --- a/crates/fs/src/fs.rs +++ b/crates/fs/src/fs.rs @@ -9,6 +9,7 @@ use anyhow::{Context as _, Result, anyhow}; #[cfg(any(target_os = "linux", target_os = "freebsd"))] use ashpd::desktop::trash; use futures::stream::iter; +use encoding_rs::Encoding; use gpui::App; use gpui::BackgroundExecutor; use gpui::Global; @@ -124,8 +125,20 @@ pub trait Fs: Send + Sync { &self, path: PathBuf, encoding: EncodingWrapper, + force: bool, // if true, ignore BOM and use the specified encoding, + + // The current encoding of the buffer. BOM (if it exists) is checked + // to find if encoding is UTF-16, and if so, the encoding is updated to UTF-16 + // regardless of the value of `encoding`. + buffer_encoding: Arc>, ) -> anyhow::Result { - Ok(encodings::to_utf8(self.load_bytes(path.as_path()).await?, encoding).await?) + Ok(encodings::to_utf8( + self.load_bytes(path.as_path()).await?, + encoding, + force, + Some(buffer_encoding.clone()), + ) + .await?) } async fn load_bytes(&self, path: &Path) -> Result>; @@ -619,10 +632,11 @@ impl Fs for RealFs { async fn load(&self, path: &Path) -> Result { let path = path.to_path_buf(); let encoding = EncodingWrapper::new(encoding_rs::UTF_8); - let text = - smol::unblock(async || Ok(encodings::to_utf8(std::fs::read(path)?, encoding).await?)) - .await - .await; + let text = smol::unblock(async || { + Ok(encodings::to_utf8(std::fs::read(path)?, encoding, false, None).await?) + }) + .await + .await; text } diff --git a/crates/git_ui/src/git_panel.rs b/crates/git_ui/src/git_panel.rs index 0c43058c067aa9b6abcc333e58f7e4933d783b73..77b4abace49e2a9a25f0214b029b9c4cb044db0b 100644 --- a/crates/git_ui/src/git_panel.rs +++ b/crates/git_ui/src/git_panel.rs @@ -1056,7 +1056,7 @@ impl GitPanel { .iter() .filter_map(|buffer| { buffer.as_ref().ok()?.update(cx, |buffer, cx| { - buffer.is_dirty().then(|| buffer.reload(cx)) + buffer.is_dirty().then(|| buffer.reload(cx, false)) }) }) .collect() diff --git a/crates/language/src/buffer.rs b/crates/language/src/buffer.rs index e22f86be5be3714eb21b9dccf24c4890a8cf2158..a90fa3411d4b3224ca17791d0bd488e42afd2ed9 100644 --- a/crates/language/src/buffer.rs +++ b/crates/language/src/buffer.rs @@ -127,7 +127,7 @@ pub struct Buffer { has_unsaved_edits: Cell<(clock::Global, bool)>, change_bits: Vec>>, _subscriptions: Vec, - pub encoding: &'static Encoding, + pub encoding: Arc>, } #[derive(Copy, Clone, Debug, PartialEq, Eq)] @@ -420,7 +420,13 @@ pub trait LocalFile: File { fn load_bytes(&self, cx: &App) -> Task>>; /// Loads the file contents from disk, decoding them with the given encoding. - fn load_with_encoding(&self, cx: &App, encoding: &'static Encoding) -> Task>; + fn load_with_encoding( + &self, + cx: &App, + encoding: &'static Encoding, + force: bool, // whether to force the encoding even if a BOM is present + buffer_encoding: Arc>, + ) -> Task>; } /// The auto-indent behavior associated with an editing operation. @@ -1011,7 +1017,7 @@ impl Buffer { has_conflict: false, change_bits: Default::default(), _subscriptions: Vec::new(), - encoding: encoding_rs::UTF_8, + encoding: Arc::new(std::sync::Mutex::new(encoding_rs::UTF_8)), } } @@ -1345,17 +1351,21 @@ impl Buffer { } /// Reloads the contents of the buffer from disk. - pub fn reload(&mut self, cx: &Context) -> oneshot::Receiver> { + pub fn reload( + &mut self, + cx: &Context, + force: bool, // whether to force the encoding even if a BOM is present + ) -> oneshot::Receiver> { let (tx, rx) = futures::channel::oneshot::channel(); - let encoding = self.encoding; + let encoding = self.encoding.clone(); + let prev_version = self.text.version(); self.reload_task = Some(cx.spawn(async move |this, cx| { let Some((new_mtime, new_text)) = this.update(cx, |this, cx| { let file = this.file.as_ref()?.as_local()?; - Some(( - file.disk_state().mtime(), - file.load_with_encoding(cx, encoding), - )) + Some((file.disk_state().mtime(), { + file.load_with_encoding(cx, &*encoding.lock().unwrap(), force, encoding.clone()) + })) })? else { return Ok(()); @@ -5237,7 +5247,13 @@ impl LocalFile for TestFile { unimplemented!() } - fn load_with_encoding(&self, _: &App, _: &'static Encoding) -> Task> { + fn load_with_encoding( + &self, + _: &App, + _: &'static Encoding, + _: bool, // whether to force the encoding even if a BOM is present + _: Arc>, + ) -> Task> { unimplemented!() } } diff --git a/crates/project/src/buffer_store.rs b/crates/project/src/buffer_store.rs index d8dc6f4e0ffba78489e5f00de191b5c2709d3ed1..9ae1ed0310e53dc20cc95a03a7c55cf887af0697 100644 --- a/crates/project/src/buffer_store.rs +++ b/crates/project/src/buffer_store.rs @@ -387,7 +387,7 @@ impl LocalBufferStore { let version = buffer.version(); let buffer_id = buffer.remote_id(); let file = buffer.file().cloned(); - let encoding = buffer.encoding; + let encoding = buffer.encoding.clone(); if file .as_ref() @@ -397,7 +397,13 @@ impl LocalBufferStore { } let save = worktree.update(cx, |worktree, cx| { - worktree.write_file(path.as_ref(), text, line_ending, cx, encoding) + worktree.write_file( + path.as_ref(), + text, + line_ending, + cx, + &*encoding.lock().unwrap(), + ) }); cx.spawn(async move |this, cx| { @@ -629,22 +635,13 @@ impl LocalBufferStore { ) -> Task>> { let load_file = worktree.update(cx, |worktree, cx| worktree.load_file(path.as_ref(), cx)); cx.spawn(async move |this, cx| { - let path = path.clone(); - let buffer = match load_file.await.with_context(|| { - format!("Could not open path: {}", path.display(PathStyle::local())) - }) { - Ok(loaded) => { - let reservation = cx.reserve_entity::()?; - let buffer_id = BufferId::from(reservation.entity_id().as_non_zero_u64()); - let executor = cx.background_executor().clone(); - let text_buffer = cx - .background_spawn(async move { - text::Buffer::new(ReplicaId::LOCAL, buffer_id, loaded.text, &executor) - }) - .await; - cx.insert_entity(reservation, |_| { - Buffer::build(text_buffer, Some(loaded.file), Capability::ReadWrite) - })? + let buffer = match load_buffer.await { + Ok(buffer) => { + // Reload the buffer to trigger UTF-16 detection + buffer + .update(cx, |buffer, cx| buffer.reload(cx, false))? + .await?; + Ok(buffer) } Err(error) if is_not_found_error(&error) => cx.new(|cx| { let buffer_id = BufferId::from(cx.entity_id().as_non_zero_u64()); @@ -723,7 +720,9 @@ impl LocalBufferStore { cx.spawn(async move |_, cx| { let mut project_transaction = ProjectTransaction::default(); for buffer in buffers { - let transaction = buffer.update(cx, |buffer, cx| buffer.reload(cx))?.await?; + let transaction = buffer + .update(cx, |buffer, cx| buffer.reload(cx, false))? + .await?; buffer.update(cx, |buffer, cx| { if let Some(transaction) = transaction { if !push_to_history { diff --git a/crates/worktree/src/worktree.rs b/crates/worktree/src/worktree.rs index a20f4035ec63cc420ce998a25fe1a0ac0d4fb563..f087e9b651926816a56ca92ae3111bb39e71da4f 100644 --- a/crates/worktree/src/worktree.rs +++ b/crates/worktree/src/worktree.rs @@ -3129,13 +3129,22 @@ impl language::LocalFile for File { cx.background_spawn(async move { fs.load_bytes(&abs_path).await }) } - fn load_with_encoding(&self, cx: &App, encoding: &'static Encoding) -> Task> { + fn load_with_encoding( + &self, + cx: &App, + encoding: &'static Encoding, + force: bool, // whether to force the encoding even if there's a BOM + buffer_encoding: Arc>, + ) -> Task> { let worktree = self.worktree.read(cx).as_local().unwrap(); let path = worktree.absolutize(&self.path); let fs = worktree.fs.clone(); let encoding = EncodingWrapper::new(encoding); - cx.background_spawn(async move { fs.load_with_encoding(path?, encoding).await }) + cx.background_spawn(async move { + fs.load_with_encoding(path?, encoding, force, buffer_encoding) + .await + }) } } diff --git a/crates/zed/src/zed.rs b/crates/zed/src/zed.rs index 60da5c5494cedd135b1117a7d84958a3a4d3b3cb..2bbdb428ee12c7c9ac95f62bfa06cb26f32a934c 100644 --- a/crates/zed/src/zed.rs +++ b/crates/zed/src/zed.rs @@ -443,8 +443,9 @@ pub fn initialize_workspace( } }); - let encoding_indicator = - cx.new(|_cx| encodings::EncodingIndicator::new(None, workspace.weak_handle(), None)); + let encoding_indicator = cx.new(|_cx| { + encodings::EncodingIndicator::new(None, workspace.weak_handle(), None, None) + }); let cursor_position = cx.new(|_| go_to_line::cursor_position::CursorPosition::new(workspace));