Fix ESLint linebreak-style errors by preserving line endings in LSP communication (#38773)

Devdatta Talele , Claude , and Kirill Bulatov created 3 months ago

Closes https://github.com/zed-industries/zed/issues/38453

Current `Buffer` API only allows getting buffer text with `\n` line
breaks — even if the `\r\n` was used in the original file's text.

This it not correct in certain cases like LSP formatting, where language
servers need to have original document context for e.g. formatting
purposes.

Added new `Buffer` API, replaced all buffer LSP registration places with
the new one and added more tests.

Release Notes: 

- Fixed ESLint linebreak-style errors by preserving line endings in LSP
communication

---------

Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: Kirill Bulatov <kirill@zed.dev>

Change summary

Cargo.lock                        |   2 
crates/editor/src/editor_tests.rs |  89 ++++++++++++++++++
crates/project/src/lsp_store.rs   |  18 ++-
crates/rope/Cargo.toml            |   1 
crates/rope/src/rope.rs           | 157 ++++++++++++++++++++++++++++++++
crates/text/Cargo.toml            |   1 
crates/text/src/text.rs           |  96 +++----------------
7 files changed, 273 insertions(+), 91 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -14226,6 +14226,7 @@ dependencies = [
  "log",
  "rand 0.9.2",
  "rayon",
+ "regex",
  "smallvec",
  "sum_tree",
  "unicode-segmentation",
@@ -17032,7 +17033,6 @@ dependencies = [
  "parking_lot",
  "postage",
  "rand 0.9.2",
- "regex",
  "rope",
  "smallvec",
  "sum_tree",

crates/editor/src/editor_tests.rs 🔗

@@ -12629,6 +12629,12 @@ async fn test_strip_whitespace_and_format_via_lsp(cx: &mut TestAppContext) {
                 );
             }
         });
+
+    #[cfg(target_os = "windows")]
+    let line_ending = "\r\n";
+    #[cfg(not(target_os = "windows"))]
+    let line_ending = "\n";
+
     // Handle formatting requests to the language server.
     cx.lsp
         .set_request_handler::<lsp::request::Formatting, _, _>({
@@ -12652,7 +12658,7 @@ async fn test_strip_whitespace_and_format_via_lsp(cx: &mut TestAppContext) {
                             ),
                             (
                                 lsp::Range::new(lsp::Position::new(3, 4), lsp::Position::new(3, 4)),
-                                "\n".into()
+                                line_ending.into()
                             ),
                         ]
                     );
@@ -12663,14 +12669,14 @@ async fn test_strip_whitespace_and_format_via_lsp(cx: &mut TestAppContext) {
                                 lsp::Position::new(1, 0),
                                 lsp::Position::new(1, 0),
                             ),
-                            new_text: "\n".into(),
+                            new_text: line_ending.into(),
                         },
                         lsp::TextEdit {
                             range: lsp::Range::new(
                                 lsp::Position::new(2, 0),
                                 lsp::Position::new(2, 0),
                             ),
-                            new_text: "\n".into(),
+                            new_text: line_ending.into(),
                         },
                     ]))
                 }
@@ -26656,6 +26662,83 @@ async fn test_paste_url_from_other_app_creates_markdown_link_selectively_in_mult
     ));
 }
 
+#[gpui::test]
+async fn test_non_linux_line_endings_registration(cx: &mut TestAppContext) {
+    init_test(cx, |_| {});
+
+    let unix_newlines_file_text = "fn main() {
+        let a = 5;
+    }";
+    let clrf_file_text = unix_newlines_file_text.lines().join("\r\n");
+
+    let fs = FakeFs::new(cx.executor());
+    fs.insert_tree(
+        path!("/a"),
+        json!({
+            "first.rs": &clrf_file_text,
+        }),
+    )
+    .await;
+
+    let project = Project::test(fs, [path!("/a").as_ref()], cx).await;
+    let workspace = cx.add_window(|window, cx| Workspace::test_new(project.clone(), window, cx));
+    let cx = &mut VisualTestContext::from_window(*workspace, cx);
+
+    let registered_text = Arc::new(Mutex::new(Vec::new()));
+    let language_registry = project.read_with(cx, |project, _| project.languages().clone());
+    language_registry.add(rust_lang());
+    let mut fake_servers = language_registry.register_fake_lsp(
+        "Rust",
+        FakeLspAdapter {
+            capabilities: lsp::ServerCapabilities {
+                color_provider: Some(lsp::ColorProviderCapability::Simple(true)),
+                ..lsp::ServerCapabilities::default()
+            },
+            name: "rust-analyzer",
+            initializer: Some({
+                let registered_text = registered_text.clone();
+                Box::new(move |fake_server| {
+                    fake_server.handle_notification::<lsp::notification::DidOpenTextDocument, _>({
+                        let registered_text = registered_text.clone();
+                        move |params, _| {
+                            registered_text.lock().push(params.text_document.text);
+                        }
+                    });
+                })
+            }),
+            ..FakeLspAdapter::default()
+        },
+    );
+
+    let editor = workspace
+        .update(cx, |workspace, window, cx| {
+            workspace.open_abs_path(
+                PathBuf::from(path!("/a/first.rs")),
+                OpenOptions::default(),
+                window,
+                cx,
+            )
+        })
+        .unwrap()
+        .await
+        .unwrap()
+        .downcast::<Editor>()
+        .unwrap();
+    let _fake_language_server = fake_servers.next().await.unwrap();
+    cx.executor().run_until_parked();
+
+    assert_eq!(
+        editor.update(cx, |editor, cx| editor.text(cx)),
+        unix_newlines_file_text,
+        "Default text API returns \n-separated text",
+    );
+    assert_eq!(
+        vec![clrf_file_text],
+        registered_text.lock().drain(..).collect::<Vec<_>>(),
+        "Expected the language server to receive the exact same text from the FS",
+    );
+}
+
 #[gpui::test]
 async fn test_race_in_multibuffer_save(cx: &mut TestAppContext) {
     init_test(cx, |_| {});

crates/project/src/lsp_store.rs 🔗

@@ -2487,7 +2487,7 @@ impl LocalLspStore {
                         uri.clone(),
                         adapter.language_id(&language.name()),
                         0,
-                        initial_snapshot.text(),
+                        initial_snapshot.text_with_original_line_endings(),
                     );
 
                     vec![snapshot]
@@ -7522,6 +7522,7 @@ impl LspStore {
             let previous_snapshot = buffer_snapshots.last()?;
 
             let build_incremental_change = || {
+                let line_ending = next_snapshot.line_ending();
                 buffer
                     .edits_since::<Dimensions<PointUtf16, usize>>(
                         previous_snapshot.snapshot.version(),
@@ -7529,16 +7530,18 @@ impl LspStore {
                     .map(|edit| {
                         let edit_start = edit.new.start.0;
                         let edit_end = edit_start + (edit.old.end.0 - edit.old.start.0);
-                        let new_text = next_snapshot
-                            .text_for_range(edit.new.start.1..edit.new.end.1)
-                            .collect();
                         lsp::TextDocumentContentChangeEvent {
                             range: Some(lsp::Range::new(
                                 point_to_lsp(edit_start),
                                 point_to_lsp(edit_end),
                             )),
                             range_length: None,
-                            text: new_text,
+                            // Collect changed text and preserve line endings.
+                            // text_for_range returns chunks with normalized \n, so we need to
+                            // convert to the buffer's actual line ending for LSP.
+                            text: line_ending.into_string(
+                                next_snapshot.text_for_range(edit.new.start.1..edit.new.end.1),
+                            ),
                         }
                     })
                     .collect()
@@ -7558,7 +7561,7 @@ impl LspStore {
                     vec![lsp::TextDocumentContentChangeEvent {
                         range: None,
                         range_length: None,
-                        text: next_snapshot.text(),
+                        text: next_snapshot.text_with_original_line_endings(),
                     }]
                 }
                 Some(lsp::TextDocumentSyncKind::INCREMENTAL) => build_incremental_change(),
@@ -10922,13 +10925,12 @@ impl LspStore {
 
                     let snapshot = versions.last().unwrap();
                     let version = snapshot.version;
-                    let initial_snapshot = &snapshot.snapshot;
                     let uri = lsp::Uri::from_file_path(file.abs_path(cx)).unwrap();
                     language_server.register_buffer(
                         uri,
                         adapter.language_id(&language.name()),
                         version,
-                        initial_snapshot.text(),
+                        buffer_handle.read(cx).text_with_original_line_endings(),
                     );
                     buffer_paths_registered.push((buffer_id, file.abs_path(cx)));
                     local

crates/rope/Cargo.toml 🔗

@@ -15,6 +15,7 @@ path = "src/rope.rs"
 arrayvec = "0.7.1"
 log.workspace = true
 rayon.workspace = true
+regex.workspace = true
 smallvec.workspace = true
 sum_tree.workspace = true
 unicode-segmentation.workspace = true

crates/rope/src/rope.rs 🔗

@@ -5,11 +5,14 @@ mod point_utf16;
 mod unclipped;
 
 use rayon::iter::{IntoParallelIterator, ParallelIterator as _};
+use regex::Regex;
 use smallvec::SmallVec;
 use std::{
+    borrow::Cow,
     cmp, fmt, io, mem,
     ops::{self, AddAssign, Range},
     str,
+    sync::{Arc, LazyLock},
 };
 use sum_tree::{Bias, Dimension, Dimensions, SumTree};
 
@@ -21,6 +24,95 @@ pub use unclipped::Unclipped;
 
 use crate::chunk::Bitmap;
 
+static LINE_SEPARATORS_REGEX: LazyLock<Regex> =
+    LazyLock::new(|| Regex::new(r"\r\n|\r").expect("Failed to create LINE_SEPARATORS_REGEX"));
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub enum LineEnding {
+    Unix,
+    Windows,
+}
+
+impl Default for LineEnding {
+    fn default() -> Self {
+        #[cfg(unix)]
+        return Self::Unix;
+
+        #[cfg(not(unix))]
+        return Self::Windows;
+    }
+}
+
+impl LineEnding {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            LineEnding::Unix => "\n",
+            LineEnding::Windows => "\r\n",
+        }
+    }
+
+    pub fn label(&self) -> &'static str {
+        match self {
+            LineEnding::Unix => "LF",
+            LineEnding::Windows => "CRLF",
+        }
+    }
+
+    pub fn detect(text: &str) -> Self {
+        let mut max_ix = cmp::min(text.len(), 1000);
+        while !text.is_char_boundary(max_ix) {
+            max_ix -= 1;
+        }
+
+        if let Some(ix) = text[..max_ix].find(['\n']) {
+            if ix > 0 && text.as_bytes()[ix - 1] == b'\r' {
+                Self::Windows
+            } else {
+                Self::Unix
+            }
+        } else {
+            Self::default()
+        }
+    }
+
+    pub fn normalize(text: &mut String) {
+        if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(text, "\n") {
+            *text = replaced;
+        }
+    }
+
+    pub fn normalize_arc(text: Arc<str>) -> Arc<str> {
+        if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") {
+            replaced.into()
+        } else {
+            text
+        }
+    }
+
+    pub fn normalize_cow(text: Cow<str>) -> Cow<str> {
+        if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") {
+            replaced.into()
+        } else {
+            text
+        }
+    }
+
+    /// Converts text chunks into a [`String`] using the current line ending.
+    pub fn into_string(&self, chunks: Chunks<'_>) -> String {
+        match self {
+            LineEnding::Unix => chunks.collect(),
+            LineEnding::Windows => {
+                let line_ending = self.as_str();
+                let mut result = String::new();
+                for chunk in chunks {
+                    result.push_str(&chunk.replace('\n', line_ending));
+                }
+                result
+            }
+        }
+    }
+}
+
 #[derive(Clone, Default)]
 pub struct Rope {
     chunks: SumTree<Chunk>,
@@ -370,6 +462,16 @@ impl Rope {
         Chunks::new(self, range, true)
     }
 
+    /// Formats the rope's text with the specified line ending string.
+    /// This replaces all `\n` characters with the provided line ending.
+    ///
+    /// The rope internally stores all line breaks as `\n` (see `Display` impl).
+    /// Use this method to convert to different line endings for file operations,
+    /// LSP communication, or other scenarios requiring specific line ending formats.
+    pub fn to_string_with_line_ending(&self, line_ending: LineEnding) -> String {
+        line_ending.into_string(self.chunks())
+    }
+
     pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
         if offset >= self.summary().len {
             return self.summary().len_utf16;
@@ -611,10 +713,16 @@ impl From<&String> for Rope {
     }
 }
 
+/// Display implementation for Rope.
+///
+/// Note: This always uses `\n` as the line separator, regardless of the original
+/// file's line endings. The rope internally normalizes all line breaks to `\n`.
+/// If you need to preserve original line endings (e.g., for LSP communication),
+/// use `to_string_with_line_ending` instead.
 impl fmt::Display for Rope {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         for chunk in self.chunks() {
-            write!(f, "{}", chunk)?;
+            write!(f, "{chunk}")?;
         }
         Ok(())
     }
@@ -2264,6 +2372,53 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_to_string_with_line_ending() {
+        // Test Unix line endings (no conversion)
+        let rope = Rope::from("line1\nline2\nline3");
+        assert_eq!(
+            rope.to_string_with_line_ending(LineEnding::Unix),
+            "line1\nline2\nline3"
+        );
+
+        // Test Windows line endings
+        assert_eq!(
+            rope.to_string_with_line_ending(LineEnding::Windows),
+            "line1\r\nline2\r\nline3"
+        );
+
+        // Test empty rope
+        let empty_rope = Rope::from("");
+        assert_eq!(
+            empty_rope.to_string_with_line_ending(LineEnding::Windows),
+            ""
+        );
+
+        // Test single line (no newlines)
+        let single_line = Rope::from("single line");
+        assert_eq!(
+            single_line.to_string_with_line_ending(LineEnding::Windows),
+            "single line"
+        );
+
+        // Test rope ending with newline
+        let ending_newline = Rope::from("line1\nline2\n");
+        assert_eq!(
+            ending_newline.to_string_with_line_ending(LineEnding::Windows),
+            "line1\r\nline2\r\n"
+        );
+
+        // Test large rope with multiple chunks
+        let mut large_rope = Rope::new();
+        for i in 0..100 {
+            large_rope.push(&format!("line{}\n", i));
+        }
+        let result = large_rope.to_string_with_line_ending(LineEnding::Windows);
+        assert!(result.contains("\r\n"));
+        assert!(!result.contains("\n\n"));
+        assert_eq!(result.matches("\r\n").count(), 100);
+    }
+
     fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
         while !text.is_char_boundary(offset) {
             match bias {

crates/text/Cargo.toml 🔗

@@ -23,7 +23,6 @@ log.workspace = true
 parking_lot.workspace = true
 postage.workspace = true
 rand = { workspace = true, optional = true }
-regex.workspace = true
 rope.workspace = true
 smallvec.workspace = true
 sum_tree.workspace = true

crates/text/src/text.rs 🔗

@@ -20,11 +20,9 @@ use operation_queue::OperationQueue;
 pub use patch::Patch;
 use postage::{oneshot, prelude::*};
 
-use regex::Regex;
 pub use rope::*;
 pub use selection::*;
 use std::{
-    borrow::Cow,
     cmp::{self, Ordering, Reverse},
     fmt::Display,
     future::Future,
@@ -32,7 +30,7 @@ use std::{
     num::NonZeroU64,
     ops::{self, Deref, Range, Sub},
     str,
-    sync::{Arc, LazyLock},
+    sync::Arc,
     time::{Duration, Instant},
 };
 pub use subscription::*;
@@ -43,9 +41,6 @@ use undo_map::UndoMap;
 #[cfg(any(test, feature = "test-support"))]
 use util::RandomCharIter;
 
-static LINE_SEPARATORS_REGEX: LazyLock<Regex> =
-    LazyLock::new(|| Regex::new(r"\r\n|\r").expect("Failed to create LINE_SEPARATORS_REGEX"));
-
 pub type TransactionId = clock::Lamport;
 
 pub struct Buffer {
@@ -2019,10 +2014,24 @@ impl BufferSnapshot {
         start..position
     }
 
+    /// Returns the buffer's text as a String.
+    ///
+    /// Note: This always uses `\n` as the line separator, regardless of the buffer's
+    /// actual line ending setting. For LSP communication or other cases where you need
+    /// to preserve the original line endings, use [`Self::text_with_original_line_endings`] instead.
     pub fn text(&self) -> String {
         self.visible_text.to_string()
     }
 
+    /// Returns the buffer's text with line same endings as in buffer's file.
+    ///
+    /// Unlike [`Self::text`] which always uses `\n`, this method formats the text using
+    /// the buffer's actual line ending setting (Unix `\n` or Windows `\r\n`).
+    pub fn text_with_original_line_endings(&self) -> String {
+        self.visible_text
+            .to_string_with_line_ending(self.line_ending)
+    }
+
     pub fn line_ending(&self) -> LineEnding {
         self.line_ending
     }
@@ -2126,6 +2135,10 @@ impl BufferSnapshot {
         self.visible_text.reversed_bytes_in_range(start..end)
     }
 
+    /// Returns the text in the given range.
+    ///
+    /// Note: This always uses `\n` as the line separator, regardless of the buffer's
+    /// actual line ending setting.
     pub fn text_for_range<T: ToOffset>(&self, range: Range<T>) -> Chunks<'_> {
         let start = range.start.to_offset(self);
         let end = range.end.to_offset(self);
@@ -3246,77 +3259,6 @@ impl FromAnchor for usize {
     }
 }
 
-#[derive(Clone, Copy, Debug, PartialEq)]
-pub enum LineEnding {
-    Unix,
-    Windows,
-}
-
-impl Default for LineEnding {
-    fn default() -> Self {
-        #[cfg(unix)]
-        return Self::Unix;
-
-        #[cfg(not(unix))]
-        return Self::Windows;
-    }
-}
-
-impl LineEnding {
-    pub fn as_str(&self) -> &'static str {
-        match self {
-            LineEnding::Unix => "\n",
-            LineEnding::Windows => "\r\n",
-        }
-    }
-
-    pub fn label(&self) -> &'static str {
-        match self {
-            LineEnding::Unix => "LF",
-            LineEnding::Windows => "CRLF",
-        }
-    }
-
-    pub fn detect(text: &str) -> Self {
-        let mut max_ix = cmp::min(text.len(), 1000);
-        while !text.is_char_boundary(max_ix) {
-            max_ix -= 1;
-        }
-
-        if let Some(ix) = text[..max_ix].find(['\n']) {
-            if ix > 0 && text.as_bytes()[ix - 1] == b'\r' {
-                Self::Windows
-            } else {
-                Self::Unix
-            }
-        } else {
-            Self::default()
-        }
-    }
-
-    pub fn normalize(text: &mut String) {
-        if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(text, "\n") {
-            *text = replaced;
-        }
-    }
-
-    pub fn normalize_arc(text: Arc<str>) -> Arc<str> {
-        if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") {
-            replaced.into()
-        } else {
-            text
-        }
-    }
-
-    pub fn normalize_cow(text: Cow<str>) -> Cow<str> {
-        if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") {
-            replaced.into()
-        } else {
-            text
-        }
-    }
-}
-
 #[cfg(debug_assertions)]
 pub mod debug {
     use super::*;