From 435eab68968fa11f8a476c78845b13df45498f25 Mon Sep 17 00:00:00 2001 From: Devdatta Talele <50290838+devdattatalele@users.noreply.github.com> Date: Fri, 24 Oct 2025 00:41:48 +0530 Subject: [PATCH] Fix ESLint linebreak-style errors by preserving line endings in LSP communication (#38773) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes https://github.com/zed-industries/zed/issues/38453 Current `Buffer` API only allows getting buffer text with `\n` line breaks — even if the `\r\n` was used in the original file's text. This it not correct in certain cases like LSP formatting, where language servers need to have original document context for e.g. formatting purposes. Added new `Buffer` API, replaced all buffer LSP registration places with the new one and added more tests. Release Notes: - Fixed ESLint linebreak-style errors by preserving line endings in LSP communication --------- Co-authored-by: Claude Co-authored-by: Kirill Bulatov --- Cargo.lock | 2 +- crates/editor/src/editor_tests.rs | 89 ++++++++++++++++- crates/project/src/lsp_store.rs | 18 ++-- crates/rope/Cargo.toml | 1 + crates/rope/src/rope.rs | 157 +++++++++++++++++++++++++++++- crates/text/Cargo.toml | 1 - crates/text/src/text.rs | 96 ++++-------------- 7 files changed, 273 insertions(+), 91 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d33d31d9fdc5ab0e7819cbaf1d15c0a149d56627..152dbd8e603ad18b28add0aee4ad1e652f7daaf3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14226,6 +14226,7 @@ dependencies = [ "log", "rand 0.9.2", "rayon", + "regex", "smallvec", "sum_tree", "unicode-segmentation", @@ -17032,7 +17033,6 @@ dependencies = [ "parking_lot", "postage", "rand 0.9.2", - "regex", "rope", "smallvec", "sum_tree", diff --git a/crates/editor/src/editor_tests.rs b/crates/editor/src/editor_tests.rs index 2a3909f069ac491adb8f5675807b647b77d23ac9..4ccf37b021244b87bccd090ec691b09903d7b0f6 100644 --- a/crates/editor/src/editor_tests.rs +++ b/crates/editor/src/editor_tests.rs @@ -12629,6 +12629,12 @@ async fn test_strip_whitespace_and_format_via_lsp(cx: &mut TestAppContext) { ); } }); + + #[cfg(target_os = "windows")] + let line_ending = "\r\n"; + #[cfg(not(target_os = "windows"))] + let line_ending = "\n"; + // Handle formatting requests to the language server. cx.lsp .set_request_handler::({ @@ -12652,7 +12658,7 @@ async fn test_strip_whitespace_and_format_via_lsp(cx: &mut TestAppContext) { ), ( lsp::Range::new(lsp::Position::new(3, 4), lsp::Position::new(3, 4)), - "\n".into() + line_ending.into() ), ] ); @@ -12663,14 +12669,14 @@ async fn test_strip_whitespace_and_format_via_lsp(cx: &mut TestAppContext) { lsp::Position::new(1, 0), lsp::Position::new(1, 0), ), - new_text: "\n".into(), + new_text: line_ending.into(), }, lsp::TextEdit { range: lsp::Range::new( lsp::Position::new(2, 0), lsp::Position::new(2, 0), ), - new_text: "\n".into(), + new_text: line_ending.into(), }, ])) } @@ -26656,6 +26662,83 @@ async fn test_paste_url_from_other_app_creates_markdown_link_selectively_in_mult )); } +#[gpui::test] +async fn test_non_linux_line_endings_registration(cx: &mut TestAppContext) { + init_test(cx, |_| {}); + + let unix_newlines_file_text = "fn main() { + let a = 5; + }"; + let clrf_file_text = unix_newlines_file_text.lines().join("\r\n"); + + let fs = FakeFs::new(cx.executor()); + fs.insert_tree( + path!("/a"), + json!({ + "first.rs": &clrf_file_text, + }), + ) + .await; + + let project = Project::test(fs, [path!("/a").as_ref()], cx).await; + let workspace = cx.add_window(|window, cx| Workspace::test_new(project.clone(), window, cx)); + let cx = &mut VisualTestContext::from_window(*workspace, cx); + + let registered_text = Arc::new(Mutex::new(Vec::new())); + let language_registry = project.read_with(cx, |project, _| project.languages().clone()); + language_registry.add(rust_lang()); + let mut fake_servers = language_registry.register_fake_lsp( + "Rust", + FakeLspAdapter { + capabilities: lsp::ServerCapabilities { + color_provider: Some(lsp::ColorProviderCapability::Simple(true)), + ..lsp::ServerCapabilities::default() + }, + name: "rust-analyzer", + initializer: Some({ + let registered_text = registered_text.clone(); + Box::new(move |fake_server| { + fake_server.handle_notification::({ + let registered_text = registered_text.clone(); + move |params, _| { + registered_text.lock().push(params.text_document.text); + } + }); + }) + }), + ..FakeLspAdapter::default() + }, + ); + + let editor = workspace + .update(cx, |workspace, window, cx| { + workspace.open_abs_path( + PathBuf::from(path!("/a/first.rs")), + OpenOptions::default(), + window, + cx, + ) + }) + .unwrap() + .await + .unwrap() + .downcast::() + .unwrap(); + let _fake_language_server = fake_servers.next().await.unwrap(); + cx.executor().run_until_parked(); + + assert_eq!( + editor.update(cx, |editor, cx| editor.text(cx)), + unix_newlines_file_text, + "Default text API returns \n-separated text", + ); + assert_eq!( + vec![clrf_file_text], + registered_text.lock().drain(..).collect::>(), + "Expected the language server to receive the exact same text from the FS", + ); +} + #[gpui::test] async fn test_race_in_multibuffer_save(cx: &mut TestAppContext) { init_test(cx, |_| {}); diff --git a/crates/project/src/lsp_store.rs b/crates/project/src/lsp_store.rs index 2350c110eee4c8e3f2e838f04ee9fc04292209da..1d6d4240de0ae8a6781b49f78341d10b5127cdc1 100644 --- a/crates/project/src/lsp_store.rs +++ b/crates/project/src/lsp_store.rs @@ -2487,7 +2487,7 @@ impl LocalLspStore { uri.clone(), adapter.language_id(&language.name()), 0, - initial_snapshot.text(), + initial_snapshot.text_with_original_line_endings(), ); vec![snapshot] @@ -7522,6 +7522,7 @@ impl LspStore { let previous_snapshot = buffer_snapshots.last()?; let build_incremental_change = || { + let line_ending = next_snapshot.line_ending(); buffer .edits_since::>( previous_snapshot.snapshot.version(), @@ -7529,16 +7530,18 @@ impl LspStore { .map(|edit| { let edit_start = edit.new.start.0; let edit_end = edit_start + (edit.old.end.0 - edit.old.start.0); - let new_text = next_snapshot - .text_for_range(edit.new.start.1..edit.new.end.1) - .collect(); lsp::TextDocumentContentChangeEvent { range: Some(lsp::Range::new( point_to_lsp(edit_start), point_to_lsp(edit_end), )), range_length: None, - text: new_text, + // Collect changed text and preserve line endings. + // text_for_range returns chunks with normalized \n, so we need to + // convert to the buffer's actual line ending for LSP. + text: line_ending.into_string( + next_snapshot.text_for_range(edit.new.start.1..edit.new.end.1), + ), } }) .collect() @@ -7558,7 +7561,7 @@ impl LspStore { vec![lsp::TextDocumentContentChangeEvent { range: None, range_length: None, - text: next_snapshot.text(), + text: next_snapshot.text_with_original_line_endings(), }] } Some(lsp::TextDocumentSyncKind::INCREMENTAL) => build_incremental_change(), @@ -10922,13 +10925,12 @@ impl LspStore { let snapshot = versions.last().unwrap(); let version = snapshot.version; - let initial_snapshot = &snapshot.snapshot; let uri = lsp::Uri::from_file_path(file.abs_path(cx)).unwrap(); language_server.register_buffer( uri, adapter.language_id(&language.name()), version, - initial_snapshot.text(), + buffer_handle.read(cx).text_with_original_line_endings(), ); buffer_paths_registered.push((buffer_id, file.abs_path(cx))); local diff --git a/crates/rope/Cargo.toml b/crates/rope/Cargo.toml index 9dfe4cb333a2311982f1c49206214e270fd288c0..f099248a5db49ac1e857900b7d00294a11cfbff2 100644 --- a/crates/rope/Cargo.toml +++ b/crates/rope/Cargo.toml @@ -15,6 +15,7 @@ path = "src/rope.rs" arrayvec = "0.7.1" log.workspace = true rayon.workspace = true +regex.workspace = true smallvec.workspace = true sum_tree.workspace = true unicode-segmentation.workspace = true diff --git a/crates/rope/src/rope.rs b/crates/rope/src/rope.rs index 0195f61dcb30bdc85ae3dbe541fa5fba5f76a2c9..b5c5cd069e07a0957b01130eec2b4ecdf7f7120e 100644 --- a/crates/rope/src/rope.rs +++ b/crates/rope/src/rope.rs @@ -5,11 +5,14 @@ mod point_utf16; mod unclipped; use rayon::iter::{IntoParallelIterator, ParallelIterator as _}; +use regex::Regex; use smallvec::SmallVec; use std::{ + borrow::Cow, cmp, fmt, io, mem, ops::{self, AddAssign, Range}, str, + sync::{Arc, LazyLock}, }; use sum_tree::{Bias, Dimension, Dimensions, SumTree}; @@ -21,6 +24,95 @@ pub use unclipped::Unclipped; use crate::chunk::Bitmap; +static LINE_SEPARATORS_REGEX: LazyLock = + LazyLock::new(|| Regex::new(r"\r\n|\r").expect("Failed to create LINE_SEPARATORS_REGEX")); + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum LineEnding { + Unix, + Windows, +} + +impl Default for LineEnding { + fn default() -> Self { + #[cfg(unix)] + return Self::Unix; + + #[cfg(not(unix))] + return Self::Windows; + } +} + +impl LineEnding { + pub fn as_str(&self) -> &'static str { + match self { + LineEnding::Unix => "\n", + LineEnding::Windows => "\r\n", + } + } + + pub fn label(&self) -> &'static str { + match self { + LineEnding::Unix => "LF", + LineEnding::Windows => "CRLF", + } + } + + pub fn detect(text: &str) -> Self { + let mut max_ix = cmp::min(text.len(), 1000); + while !text.is_char_boundary(max_ix) { + max_ix -= 1; + } + + if let Some(ix) = text[..max_ix].find(['\n']) { + if ix > 0 && text.as_bytes()[ix - 1] == b'\r' { + Self::Windows + } else { + Self::Unix + } + } else { + Self::default() + } + } + + pub fn normalize(text: &mut String) { + if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(text, "\n") { + *text = replaced; + } + } + + pub fn normalize_arc(text: Arc) -> Arc { + if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") { + replaced.into() + } else { + text + } + } + + pub fn normalize_cow(text: Cow) -> Cow { + if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") { + replaced.into() + } else { + text + } + } + + /// Converts text chunks into a [`String`] using the current line ending. + pub fn into_string(&self, chunks: Chunks<'_>) -> String { + match self { + LineEnding::Unix => chunks.collect(), + LineEnding::Windows => { + let line_ending = self.as_str(); + let mut result = String::new(); + for chunk in chunks { + result.push_str(&chunk.replace('\n', line_ending)); + } + result + } + } + } +} + #[derive(Clone, Default)] pub struct Rope { chunks: SumTree, @@ -370,6 +462,16 @@ impl Rope { Chunks::new(self, range, true) } + /// Formats the rope's text with the specified line ending string. + /// This replaces all `\n` characters with the provided line ending. + /// + /// The rope internally stores all line breaks as `\n` (see `Display` impl). + /// Use this method to convert to different line endings for file operations, + /// LSP communication, or other scenarios requiring specific line ending formats. + pub fn to_string_with_line_ending(&self, line_ending: LineEnding) -> String { + line_ending.into_string(self.chunks()) + } + pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 { if offset >= self.summary().len { return self.summary().len_utf16; @@ -611,10 +713,16 @@ impl From<&String> for Rope { } } +/// Display implementation for Rope. +/// +/// Note: This always uses `\n` as the line separator, regardless of the original +/// file's line endings. The rope internally normalizes all line breaks to `\n`. +/// If you need to preserve original line endings (e.g., for LSP communication), +/// use `to_string_with_line_ending` instead. impl fmt::Display for Rope { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { for chunk in self.chunks() { - write!(f, "{}", chunk)?; + write!(f, "{chunk}")?; } Ok(()) } @@ -2264,6 +2372,53 @@ mod tests { } } + #[test] + fn test_to_string_with_line_ending() { + // Test Unix line endings (no conversion) + let rope = Rope::from("line1\nline2\nline3"); + assert_eq!( + rope.to_string_with_line_ending(LineEnding::Unix), + "line1\nline2\nline3" + ); + + // Test Windows line endings + assert_eq!( + rope.to_string_with_line_ending(LineEnding::Windows), + "line1\r\nline2\r\nline3" + ); + + // Test empty rope + let empty_rope = Rope::from(""); + assert_eq!( + empty_rope.to_string_with_line_ending(LineEnding::Windows), + "" + ); + + // Test single line (no newlines) + let single_line = Rope::from("single line"); + assert_eq!( + single_line.to_string_with_line_ending(LineEnding::Windows), + "single line" + ); + + // Test rope ending with newline + let ending_newline = Rope::from("line1\nline2\n"); + assert_eq!( + ending_newline.to_string_with_line_ending(LineEnding::Windows), + "line1\r\nline2\r\n" + ); + + // Test large rope with multiple chunks + let mut large_rope = Rope::new(); + for i in 0..100 { + large_rope.push(&format!("line{}\n", i)); + } + let result = large_rope.to_string_with_line_ending(LineEnding::Windows); + assert!(result.contains("\r\n")); + assert!(!result.contains("\n\n")); + assert_eq!(result.matches("\r\n").count(), 100); + } + fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize { while !text.is_char_boundary(offset) { match bias { diff --git a/crates/text/Cargo.toml b/crates/text/Cargo.toml index ed02381eb83db5daececd159171a90072244a340..a58f2e20cc781f5d688b9fb1ceef8a17c48e6cb8 100644 --- a/crates/text/Cargo.toml +++ b/crates/text/Cargo.toml @@ -23,7 +23,6 @@ log.workspace = true parking_lot.workspace = true postage.workspace = true rand = { workspace = true, optional = true } -regex.workspace = true rope.workspace = true smallvec.workspace = true sum_tree.workspace = true diff --git a/crates/text/src/text.rs b/crates/text/src/text.rs index 0634212c8ca41de539c9791193321cce77c9263e..9a81fc8e941ab4d3a0e16f817fc90fbb608ea84a 100644 --- a/crates/text/src/text.rs +++ b/crates/text/src/text.rs @@ -20,11 +20,9 @@ use operation_queue::OperationQueue; pub use patch::Patch; use postage::{oneshot, prelude::*}; -use regex::Regex; pub use rope::*; pub use selection::*; use std::{ - borrow::Cow, cmp::{self, Ordering, Reverse}, fmt::Display, future::Future, @@ -32,7 +30,7 @@ use std::{ num::NonZeroU64, ops::{self, Deref, Range, Sub}, str, - sync::{Arc, LazyLock}, + sync::Arc, time::{Duration, Instant}, }; pub use subscription::*; @@ -43,9 +41,6 @@ use undo_map::UndoMap; #[cfg(any(test, feature = "test-support"))] use util::RandomCharIter; -static LINE_SEPARATORS_REGEX: LazyLock = - LazyLock::new(|| Regex::new(r"\r\n|\r").expect("Failed to create LINE_SEPARATORS_REGEX")); - pub type TransactionId = clock::Lamport; pub struct Buffer { @@ -2019,10 +2014,24 @@ impl BufferSnapshot { start..position } + /// Returns the buffer's text as a String. + /// + /// Note: This always uses `\n` as the line separator, regardless of the buffer's + /// actual line ending setting. For LSP communication or other cases where you need + /// to preserve the original line endings, use [`Self::text_with_original_line_endings`] instead. pub fn text(&self) -> String { self.visible_text.to_string() } + /// Returns the buffer's text with line same endings as in buffer's file. + /// + /// Unlike [`Self::text`] which always uses `\n`, this method formats the text using + /// the buffer's actual line ending setting (Unix `\n` or Windows `\r\n`). + pub fn text_with_original_line_endings(&self) -> String { + self.visible_text + .to_string_with_line_ending(self.line_ending) + } + pub fn line_ending(&self) -> LineEnding { self.line_ending } @@ -2126,6 +2135,10 @@ impl BufferSnapshot { self.visible_text.reversed_bytes_in_range(start..end) } + /// Returns the text in the given range. + /// + /// Note: This always uses `\n` as the line separator, regardless of the buffer's + /// actual line ending setting. pub fn text_for_range(&self, range: Range) -> Chunks<'_> { let start = range.start.to_offset(self); let end = range.end.to_offset(self); @@ -3246,77 +3259,6 @@ impl FromAnchor for usize { } } -#[derive(Clone, Copy, Debug, PartialEq)] -pub enum LineEnding { - Unix, - Windows, -} - -impl Default for LineEnding { - fn default() -> Self { - #[cfg(unix)] - return Self::Unix; - - #[cfg(not(unix))] - return Self::Windows; - } -} - -impl LineEnding { - pub fn as_str(&self) -> &'static str { - match self { - LineEnding::Unix => "\n", - LineEnding::Windows => "\r\n", - } - } - - pub fn label(&self) -> &'static str { - match self { - LineEnding::Unix => "LF", - LineEnding::Windows => "CRLF", - } - } - - pub fn detect(text: &str) -> Self { - let mut max_ix = cmp::min(text.len(), 1000); - while !text.is_char_boundary(max_ix) { - max_ix -= 1; - } - - if let Some(ix) = text[..max_ix].find(['\n']) { - if ix > 0 && text.as_bytes()[ix - 1] == b'\r' { - Self::Windows - } else { - Self::Unix - } - } else { - Self::default() - } - } - - pub fn normalize(text: &mut String) { - if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(text, "\n") { - *text = replaced; - } - } - - pub fn normalize_arc(text: Arc) -> Arc { - if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") { - replaced.into() - } else { - text - } - } - - pub fn normalize_cow(text: Cow) -> Cow { - if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") { - replaced.into() - } else { - text - } - } -} - #[cfg(debug_assertions)] pub mod debug { use super::*;