diff --git a/Cargo.lock b/Cargo.lock index d33d31d9fdc5ab0e7819cbaf1d15c0a149d56627..152dbd8e603ad18b28add0aee4ad1e652f7daaf3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14226,6 +14226,7 @@ dependencies = [ "log", "rand 0.9.2", "rayon", + "regex", "smallvec", "sum_tree", "unicode-segmentation", @@ -17032,7 +17033,6 @@ dependencies = [ "parking_lot", "postage", "rand 0.9.2", - "regex", "rope", "smallvec", "sum_tree", diff --git a/crates/editor/src/editor_tests.rs b/crates/editor/src/editor_tests.rs index 2a3909f069ac491adb8f5675807b647b77d23ac9..4ccf37b021244b87bccd090ec691b09903d7b0f6 100644 --- a/crates/editor/src/editor_tests.rs +++ b/crates/editor/src/editor_tests.rs @@ -12629,6 +12629,12 @@ async fn test_strip_whitespace_and_format_via_lsp(cx: &mut TestAppContext) { ); } }); + + #[cfg(target_os = "windows")] + let line_ending = "\r\n"; + #[cfg(not(target_os = "windows"))] + let line_ending = "\n"; + // Handle formatting requests to the language server. cx.lsp .set_request_handler::({ @@ -12652,7 +12658,7 @@ async fn test_strip_whitespace_and_format_via_lsp(cx: &mut TestAppContext) { ), ( lsp::Range::new(lsp::Position::new(3, 4), lsp::Position::new(3, 4)), - "\n".into() + line_ending.into() ), ] ); @@ -12663,14 +12669,14 @@ async fn test_strip_whitespace_and_format_via_lsp(cx: &mut TestAppContext) { lsp::Position::new(1, 0), lsp::Position::new(1, 0), ), - new_text: "\n".into(), + new_text: line_ending.into(), }, lsp::TextEdit { range: lsp::Range::new( lsp::Position::new(2, 0), lsp::Position::new(2, 0), ), - new_text: "\n".into(), + new_text: line_ending.into(), }, ])) } @@ -26656,6 +26662,83 @@ async fn test_paste_url_from_other_app_creates_markdown_link_selectively_in_mult )); } +#[gpui::test] +async fn test_non_linux_line_endings_registration(cx: &mut TestAppContext) { + init_test(cx, |_| {}); + + let unix_newlines_file_text = "fn main() { + let a = 5; + }"; + let clrf_file_text = unix_newlines_file_text.lines().join("\r\n"); + + let fs = FakeFs::new(cx.executor()); + fs.insert_tree( + path!("/a"), + json!({ + "first.rs": &clrf_file_text, + }), + ) + .await; + + let project = Project::test(fs, [path!("/a").as_ref()], cx).await; + let workspace = cx.add_window(|window, cx| Workspace::test_new(project.clone(), window, cx)); + let cx = &mut VisualTestContext::from_window(*workspace, cx); + + let registered_text = Arc::new(Mutex::new(Vec::new())); + let language_registry = project.read_with(cx, |project, _| project.languages().clone()); + language_registry.add(rust_lang()); + let mut fake_servers = language_registry.register_fake_lsp( + "Rust", + FakeLspAdapter { + capabilities: lsp::ServerCapabilities { + color_provider: Some(lsp::ColorProviderCapability::Simple(true)), + ..lsp::ServerCapabilities::default() + }, + name: "rust-analyzer", + initializer: Some({ + let registered_text = registered_text.clone(); + Box::new(move |fake_server| { + fake_server.handle_notification::({ + let registered_text = registered_text.clone(); + move |params, _| { + registered_text.lock().push(params.text_document.text); + } + }); + }) + }), + ..FakeLspAdapter::default() + }, + ); + + let editor = workspace + .update(cx, |workspace, window, cx| { + workspace.open_abs_path( + PathBuf::from(path!("/a/first.rs")), + OpenOptions::default(), + window, + cx, + ) + }) + .unwrap() + .await + .unwrap() + .downcast::() + .unwrap(); + let _fake_language_server = fake_servers.next().await.unwrap(); + cx.executor().run_until_parked(); + + assert_eq!( + editor.update(cx, |editor, cx| editor.text(cx)), + unix_newlines_file_text, + "Default text API returns \n-separated text", + ); + assert_eq!( + vec![clrf_file_text], + registered_text.lock().drain(..).collect::>(), + "Expected the language server to receive the exact same text from the FS", + ); +} + #[gpui::test] async fn test_race_in_multibuffer_save(cx: &mut TestAppContext) { init_test(cx, |_| {}); diff --git a/crates/project/src/lsp_store.rs b/crates/project/src/lsp_store.rs index 2350c110eee4c8e3f2e838f04ee9fc04292209da..1d6d4240de0ae8a6781b49f78341d10b5127cdc1 100644 --- a/crates/project/src/lsp_store.rs +++ b/crates/project/src/lsp_store.rs @@ -2487,7 +2487,7 @@ impl LocalLspStore { uri.clone(), adapter.language_id(&language.name()), 0, - initial_snapshot.text(), + initial_snapshot.text_with_original_line_endings(), ); vec![snapshot] @@ -7522,6 +7522,7 @@ impl LspStore { let previous_snapshot = buffer_snapshots.last()?; let build_incremental_change = || { + let line_ending = next_snapshot.line_ending(); buffer .edits_since::>( previous_snapshot.snapshot.version(), @@ -7529,16 +7530,18 @@ impl LspStore { .map(|edit| { let edit_start = edit.new.start.0; let edit_end = edit_start + (edit.old.end.0 - edit.old.start.0); - let new_text = next_snapshot - .text_for_range(edit.new.start.1..edit.new.end.1) - .collect(); lsp::TextDocumentContentChangeEvent { range: Some(lsp::Range::new( point_to_lsp(edit_start), point_to_lsp(edit_end), )), range_length: None, - text: new_text, + // Collect changed text and preserve line endings. + // text_for_range returns chunks with normalized \n, so we need to + // convert to the buffer's actual line ending for LSP. + text: line_ending.into_string( + next_snapshot.text_for_range(edit.new.start.1..edit.new.end.1), + ), } }) .collect() @@ -7558,7 +7561,7 @@ impl LspStore { vec![lsp::TextDocumentContentChangeEvent { range: None, range_length: None, - text: next_snapshot.text(), + text: next_snapshot.text_with_original_line_endings(), }] } Some(lsp::TextDocumentSyncKind::INCREMENTAL) => build_incremental_change(), @@ -10922,13 +10925,12 @@ impl LspStore { let snapshot = versions.last().unwrap(); let version = snapshot.version; - let initial_snapshot = &snapshot.snapshot; let uri = lsp::Uri::from_file_path(file.abs_path(cx)).unwrap(); language_server.register_buffer( uri, adapter.language_id(&language.name()), version, - initial_snapshot.text(), + buffer_handle.read(cx).text_with_original_line_endings(), ); buffer_paths_registered.push((buffer_id, file.abs_path(cx))); local diff --git a/crates/rope/Cargo.toml b/crates/rope/Cargo.toml index 9dfe4cb333a2311982f1c49206214e270fd288c0..f099248a5db49ac1e857900b7d00294a11cfbff2 100644 --- a/crates/rope/Cargo.toml +++ b/crates/rope/Cargo.toml @@ -15,6 +15,7 @@ path = "src/rope.rs" arrayvec = "0.7.1" log.workspace = true rayon.workspace = true +regex.workspace = true smallvec.workspace = true sum_tree.workspace = true unicode-segmentation.workspace = true diff --git a/crates/rope/src/rope.rs b/crates/rope/src/rope.rs index 0195f61dcb30bdc85ae3dbe541fa5fba5f76a2c9..b5c5cd069e07a0957b01130eec2b4ecdf7f7120e 100644 --- a/crates/rope/src/rope.rs +++ b/crates/rope/src/rope.rs @@ -5,11 +5,14 @@ mod point_utf16; mod unclipped; use rayon::iter::{IntoParallelIterator, ParallelIterator as _}; +use regex::Regex; use smallvec::SmallVec; use std::{ + borrow::Cow, cmp, fmt, io, mem, ops::{self, AddAssign, Range}, str, + sync::{Arc, LazyLock}, }; use sum_tree::{Bias, Dimension, Dimensions, SumTree}; @@ -21,6 +24,95 @@ pub use unclipped::Unclipped; use crate::chunk::Bitmap; +static LINE_SEPARATORS_REGEX: LazyLock = + LazyLock::new(|| Regex::new(r"\r\n|\r").expect("Failed to create LINE_SEPARATORS_REGEX")); + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum LineEnding { + Unix, + Windows, +} + +impl Default for LineEnding { + fn default() -> Self { + #[cfg(unix)] + return Self::Unix; + + #[cfg(not(unix))] + return Self::Windows; + } +} + +impl LineEnding { + pub fn as_str(&self) -> &'static str { + match self { + LineEnding::Unix => "\n", + LineEnding::Windows => "\r\n", + } + } + + pub fn label(&self) -> &'static str { + match self { + LineEnding::Unix => "LF", + LineEnding::Windows => "CRLF", + } + } + + pub fn detect(text: &str) -> Self { + let mut max_ix = cmp::min(text.len(), 1000); + while !text.is_char_boundary(max_ix) { + max_ix -= 1; + } + + if let Some(ix) = text[..max_ix].find(['\n']) { + if ix > 0 && text.as_bytes()[ix - 1] == b'\r' { + Self::Windows + } else { + Self::Unix + } + } else { + Self::default() + } + } + + pub fn normalize(text: &mut String) { + if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(text, "\n") { + *text = replaced; + } + } + + pub fn normalize_arc(text: Arc) -> Arc { + if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") { + replaced.into() + } else { + text + } + } + + pub fn normalize_cow(text: Cow) -> Cow { + if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") { + replaced.into() + } else { + text + } + } + + /// Converts text chunks into a [`String`] using the current line ending. + pub fn into_string(&self, chunks: Chunks<'_>) -> String { + match self { + LineEnding::Unix => chunks.collect(), + LineEnding::Windows => { + let line_ending = self.as_str(); + let mut result = String::new(); + for chunk in chunks { + result.push_str(&chunk.replace('\n', line_ending)); + } + result + } + } + } +} + #[derive(Clone, Default)] pub struct Rope { chunks: SumTree, @@ -370,6 +462,16 @@ impl Rope { Chunks::new(self, range, true) } + /// Formats the rope's text with the specified line ending string. + /// This replaces all `\n` characters with the provided line ending. + /// + /// The rope internally stores all line breaks as `\n` (see `Display` impl). + /// Use this method to convert to different line endings for file operations, + /// LSP communication, or other scenarios requiring specific line ending formats. + pub fn to_string_with_line_ending(&self, line_ending: LineEnding) -> String { + line_ending.into_string(self.chunks()) + } + pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 { if offset >= self.summary().len { return self.summary().len_utf16; @@ -611,10 +713,16 @@ impl From<&String> for Rope { } } +/// Display implementation for Rope. +/// +/// Note: This always uses `\n` as the line separator, regardless of the original +/// file's line endings. The rope internally normalizes all line breaks to `\n`. +/// If you need to preserve original line endings (e.g., for LSP communication), +/// use `to_string_with_line_ending` instead. impl fmt::Display for Rope { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for chunk in self.chunks() { - write!(f, "{}", chunk)?; + write!(f, "{chunk}")?; } Ok(()) } @@ -2264,6 +2372,53 @@ mod tests { } } + #[test] + fn test_to_string_with_line_ending() { + // Test Unix line endings (no conversion) + let rope = Rope::from("line1\nline2\nline3"); + assert_eq!( + rope.to_string_with_line_ending(LineEnding::Unix), + "line1\nline2\nline3" + ); + + // Test Windows line endings + assert_eq!( + rope.to_string_with_line_ending(LineEnding::Windows), + "line1\r\nline2\r\nline3" + ); + + // Test empty rope + let empty_rope = Rope::from(""); + assert_eq!( + empty_rope.to_string_with_line_ending(LineEnding::Windows), + "" + ); + + // Test single line (no newlines) + let single_line = Rope::from("single line"); + assert_eq!( + single_line.to_string_with_line_ending(LineEnding::Windows), + "single line" + ); + + // Test rope ending with newline + let ending_newline = Rope::from("line1\nline2\n"); + assert_eq!( + ending_newline.to_string_with_line_ending(LineEnding::Windows), + "line1\r\nline2\r\n" + ); + + // Test large rope with multiple chunks + let mut large_rope = Rope::new(); + for i in 0..100 { + large_rope.push(&format!("line{}\n", i)); + } + let result = large_rope.to_string_with_line_ending(LineEnding::Windows); + assert!(result.contains("\r\n")); + assert!(!result.contains("\n\n")); + assert_eq!(result.matches("\r\n").count(), 100); + } + fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize { while !text.is_char_boundary(offset) { match bias { diff --git a/crates/text/Cargo.toml b/crates/text/Cargo.toml index ed02381eb83db5daececd159171a90072244a340..a58f2e20cc781f5d688b9fb1ceef8a17c48e6cb8 100644 --- a/crates/text/Cargo.toml +++ b/crates/text/Cargo.toml @@ -23,7 +23,6 @@ log.workspace = true parking_lot.workspace = true postage.workspace = true rand = { workspace = true, optional = true } -regex.workspace = true rope.workspace = true smallvec.workspace = true sum_tree.workspace = true diff --git a/crates/text/src/text.rs b/crates/text/src/text.rs index 0634212c8ca41de539c9791193321cce77c9263e..9a81fc8e941ab4d3a0e16f817fc90fbb608ea84a 100644 --- a/crates/text/src/text.rs +++ b/crates/text/src/text.rs @@ -20,11 +20,9 @@ use operation_queue::OperationQueue; pub use patch::Patch; use postage::{oneshot, prelude::*}; -use regex::Regex; pub use rope::*; pub use selection::*; use std::{ - borrow::Cow, cmp::{self, Ordering, Reverse}, fmt::Display, future::Future, @@ -32,7 +30,7 @@ use std::{ num::NonZeroU64, ops::{self, Deref, Range, Sub}, str, - sync::{Arc, LazyLock}, + sync::Arc, time::{Duration, Instant}, }; pub use subscription::*; @@ -43,9 +41,6 @@ use undo_map::UndoMap; #[cfg(any(test, feature = "test-support"))] use util::RandomCharIter; -static LINE_SEPARATORS_REGEX: LazyLock = - LazyLock::new(|| Regex::new(r"\r\n|\r").expect("Failed to create LINE_SEPARATORS_REGEX")); - pub type TransactionId = clock::Lamport; pub struct Buffer { @@ -2019,10 +2014,24 @@ impl BufferSnapshot { start..position } + /// Returns the buffer's text as a String. + /// + /// Note: This always uses `\n` as the line separator, regardless of the buffer's + /// actual line ending setting. For LSP communication or other cases where you need + /// to preserve the original line endings, use [`Self::text_with_original_line_endings`] instead. pub fn text(&self) -> String { self.visible_text.to_string() } + /// Returns the buffer's text with line same endings as in buffer's file. + /// + /// Unlike [`Self::text`] which always uses `\n`, this method formats the text using + /// the buffer's actual line ending setting (Unix `\n` or Windows `\r\n`). + pub fn text_with_original_line_endings(&self) -> String { + self.visible_text + .to_string_with_line_ending(self.line_ending) + } + pub fn line_ending(&self) -> LineEnding { self.line_ending } @@ -2126,6 +2135,10 @@ impl BufferSnapshot { self.visible_text.reversed_bytes_in_range(start..end) } + /// Returns the text in the given range. + /// + /// Note: This always uses `\n` as the line separator, regardless of the buffer's + /// actual line ending setting. pub fn text_for_range(&self, range: Range) -> Chunks<'_> { let start = range.start.to_offset(self); let end = range.end.to_offset(self); @@ -3246,77 +3259,6 @@ impl FromAnchor for usize { } } -#[derive(Clone, Copy, Debug, PartialEq)] -pub enum LineEnding { - Unix, - Windows, -} - -impl Default for LineEnding { - fn default() -> Self { - #[cfg(unix)] - return Self::Unix; - - #[cfg(not(unix))] - return Self::Windows; - } -} - -impl LineEnding { - pub fn as_str(&self) -> &'static str { - match self { - LineEnding::Unix => "\n", - LineEnding::Windows => "\r\n", - } - } - - pub fn label(&self) -> &'static str { - match self { - LineEnding::Unix => "LF", - LineEnding::Windows => "CRLF", - } - } - - pub fn detect(text: &str) -> Self { - let mut max_ix = cmp::min(text.len(), 1000); - while !text.is_char_boundary(max_ix) { - max_ix -= 1; - } - - if let Some(ix) = text[..max_ix].find(['\n']) { - if ix > 0 && text.as_bytes()[ix - 1] == b'\r' { - Self::Windows - } else { - Self::Unix - } - } else { - Self::default() - } - } - - pub fn normalize(text: &mut String) { - if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(text, "\n") { - *text = replaced; - } - } - - pub fn normalize_arc(text: Arc) -> Arc { - if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") { - replaced.into() - } else { - text - } - } - - pub fn normalize_cow(text: Cow) -> Cow { - if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") { - replaced.into() - } else { - text - } - } -} - #[cfg(debug_assertions)] pub mod debug { use super::*;