From 6212ebad9b980f46fc38b688dcea38fb308d1378 Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Fri, 29 Oct 2021 18:31:21 +0200 Subject: [PATCH] Communicate with language servers in terms of UTF-16 coordinates This required indexing UTF-16 positions in `Rope`. We tried opting into the UTF-8 experimental support but it didn't seem to work correctly and the standard is UTF-16 anyway. Co-Authored-By: Nathan Sobo --- crates/buffer/Cargo.toml | 2 +- crates/buffer/src/lib.rs | 184 +++++++-------- crates/buffer/src/point.rs | 28 +-- crates/buffer/src/point_utf16.rs | 111 +++++++++ crates/buffer/src/rope.rs | 261 ++++++++++++++++++++-- crates/buffer/src/tests.rs | 17 +- crates/editor/src/display_map/fold_map.rs | 104 ++++----- crates/language/src/lib.rs | 67 +++--- crates/lsp/Cargo.toml | 2 +- crates/lsp/src/lib.rs | 1 - crates/sum_tree/src/cursor.rs | 4 + 11 files changed, 562 insertions(+), 219 deletions(-) create mode 100644 crates/buffer/src/point_utf16.rs diff --git a/crates/buffer/Cargo.toml b/crates/buffer/Cargo.toml index e4112c20d5a4c8ecf95d697ecdc2412a92d4b5d6..f6d949c05f47fbc0305d1ab8790f311b06f253ca 100644 --- a/crates/buffer/Cargo.toml +++ b/crates/buffer/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "buffer" version = "0.1.0" -edition = "2018" +edition = "2021" [features] test-support = ["rand", "seahash"] diff --git a/crates/buffer/src/lib.rs b/crates/buffer/src/lib.rs index a8d4981ed755ce64108e22d54889bb244b02addd..301cc1478c649401c761f990e743024a6387100f 100644 --- a/crates/buffer/src/lib.rs +++ b/crates/buffer/src/lib.rs @@ -1,6 +1,7 @@ mod anchor; mod operation_queue; mod point; +mod point_utf16; #[cfg(any(test, feature = "test-support"))] pub mod random_char_iter; pub mod rope; @@ -13,8 +14,10 @@ use anyhow::{anyhow, Result}; use clock::ReplicaId; use operation_queue::OperationQueue; pub use point::*; +pub use point_utf16::*; #[cfg(any(test, feature = "test-support"))] pub use random_char_iter::*; +use rope::TextDimension; pub use rope::{Chunks, Rope, TextSummary}; use rpc::proto; pub use selection::*; @@ -309,41 +312,34 @@ impl UndoMap { } } -struct Edits<'a, F: FnMut(&FragmentSummary) -> bool> { - visible_text: &'a Rope, - deleted_text: &'a Rope, - cursor: Option>, +struct Edits<'a, D: TextDimension<'a>, F: FnMut(&FragmentSummary) -> bool> { + visible_cursor: rope::Cursor<'a>, + deleted_cursor: rope::Cursor<'a>, + fragments_cursor: Option>, undos: &'a UndoMap, since: clock::Global, - old_offset: usize, - new_offset: usize, - old_point: Point, - new_point: Point, + old_end: D, + new_end: D, } #[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct Edit { - pub old_bytes: Range, - pub new_bytes: Range, - pub old_lines: Range, - pub new_lines: Range, +pub struct Edit { + pub old: Range, + pub new: Range, } -impl Edit { - pub fn delta(&self) -> isize { - self.inserted_bytes() as isize - self.deleted_bytes() as isize - } - - pub fn deleted_bytes(&self) -> usize { - self.old_bytes.end - self.old_bytes.start - } - - pub fn inserted_bytes(&self) -> usize { - self.new_bytes.end - self.new_bytes.start - } - - pub fn deleted_lines(&self) -> Point { - self.old_lines.end - self.old_lines.start +impl Edit<(D1, D2)> { + pub fn flatten(self) -> (Edit, Edit) { + ( + Edit { + old: self.old.start.0..self.old.end.0, + new: self.new.start.0..self.new.end.0, + }, + Edit { + old: self.old.start.1..self.old.end.1, + new: self.new.start.1..self.new.end.1, + }, + ) } } @@ -1369,7 +1365,10 @@ impl Buffer { }) } - pub fn edits_since<'a>(&'a self, since: clock::Global) -> impl 'a + Iterator { + pub fn edits_since<'a, D>(&'a self, since: clock::Global) -> impl 'a + Iterator> + where + D: 'a + TextDimension<'a> + Ord, + { self.content().edits_since(since) } } @@ -1589,11 +1588,11 @@ impl Snapshot { } pub fn to_offset(&self, point: Point) -> usize { - self.visible_text.to_offset(point) + self.visible_text.point_to_offset(point) } pub fn to_point(&self, offset: usize) -> Point { - self.visible_text.to_point(offset) + self.visible_text.offset_to_point(offset) } pub fn anchor_before(&self, position: T) -> Anchor { @@ -1604,7 +1603,10 @@ impl Snapshot { self.content().anchor_at(position, Bias::Right) } - pub fn edits_since<'a>(&'a self, since: clock::Global) -> impl 'a + Iterator { + pub fn edits_since<'a, D>(&'a self, since: clock::Global) -> impl 'a + Iterator> + where + D: 'a + TextDimension<'a> + Ord, + { self.content().edits_since(since) } @@ -1756,7 +1758,7 @@ impl<'a> Content<'a> { } else { 0 }; - summary += rope_cursor.summary(cursor.start().1 + overshoot); + summary += rope_cursor.summary::(cursor.start().1 + overshoot); (summary.clone(), value) }) } @@ -1785,7 +1787,7 @@ impl<'a> Content<'a> { } else { 0 }; - summary += rope_cursor.summary(cursor.start().1 + overshoot); + summary += rope_cursor.summary::(cursor.start().1 + overshoot); let start_summary = summary.clone(); cursor.seek_forward(&VersionedFullOffset::Offset(*end_offset), *end_bias, &cx); @@ -1794,7 +1796,7 @@ impl<'a> Content<'a> { } else { 0 }; - summary += rope_cursor.summary(cursor.start().1 + overshoot); + summary += rope_cursor.summary::(cursor.start().1 + overshoot); let end_summary = summary.clone(); (start_summary..end_summary, value) @@ -1921,6 +1923,10 @@ impl<'a> Content<'a> { self.visible_text.clip_point(point, bias) } + pub fn clip_point_utf16(&self, point: PointUtf16, bias: Bias) -> PointUtf16 { + self.visible_text.clip_point_utf16(point, bias) + } + fn point_for_offset(&self, offset: usize) -> Result { if offset <= self.len() { Ok(self.text_summary_for_range(0..offset).lines) @@ -1930,9 +1936,12 @@ impl<'a> Content<'a> { } // TODO: take a reference to clock::Global. - pub fn edits_since(&self, since: clock::Global) -> impl 'a + Iterator { + pub fn edits_since(&self, since: clock::Global) -> impl 'a + Iterator> + where + D: 'a + TextDimension<'a> + Ord, + { let since_2 = since.clone(); - let cursor = if since == *self.version { + let fragments_cursor = if since == *self.version { None } else { Some(self.fragments.filter( @@ -1942,15 +1951,13 @@ impl<'a> Content<'a> { }; Edits { - visible_text: &self.visible_text, - deleted_text: &self.deleted_text, - cursor, + visible_cursor: self.visible_text.cursor(0), + deleted_cursor: self.deleted_text.cursor(0), + fragments_cursor, undos: &self.undo_map, since, - old_offset: 0, - new_offset: 0, - old_point: Point::zero(), - new_point: Point::zero(), + old_end: Default::default(), + new_end: Default::default(), } } } @@ -2008,70 +2015,61 @@ impl<'a> RopeBuilder<'a> { } } -impl<'a, F: FnMut(&FragmentSummary) -> bool> Iterator for Edits<'a, F> { - type Item = Edit; +impl<'a, D: TextDimension<'a> + Ord, F: FnMut(&FragmentSummary) -> bool> Iterator + for Edits<'a, D, F> +{ + type Item = Edit; fn next(&mut self) -> Option { - let mut change: Option = None; - let cursor = self.cursor.as_mut()?; + let mut pending_edit: Option> = None; + let cursor = self.fragments_cursor.as_mut()?; while let Some(fragment) = cursor.item() { - let bytes = cursor.start().visible - self.new_offset; - let lines = self.visible_text.to_point(cursor.start().visible) - self.new_point; - self.old_offset += bytes; - self.old_point += &lines; - self.new_offset += bytes; - self.new_point += &lines; + let summary = self.visible_cursor.summary(cursor.start().visible); + self.old_end.add_assign(&summary); + self.new_end.add_assign(&summary); + if pending_edit + .as_ref() + .map_or(false, |change| change.new.end < self.new_end) + { + break; + } if !fragment.was_visible(&self.since, &self.undos) && fragment.visible { - let fragment_lines = - self.visible_text.to_point(self.new_offset + fragment.len) - self.new_point; - if let Some(ref mut change) = change { - if change.new_bytes.end == self.new_offset { - change.new_bytes.end += fragment.len; - change.new_lines.end += fragment_lines; - } else { - break; - } + let fragment_summary = self.visible_cursor.summary(cursor.end(&None).visible); + let mut new_end = self.new_end.clone(); + new_end.add_assign(&fragment_summary); + if let Some(pending_edit) = pending_edit.as_mut() { + pending_edit.new.end = new_end.clone(); } else { - change = Some(Edit { - old_bytes: self.old_offset..self.old_offset, - new_bytes: self.new_offset..self.new_offset + fragment.len, - old_lines: self.old_point..self.old_point, - new_lines: self.new_point..self.new_point + fragment_lines, + pending_edit = Some(Edit { + old: self.old_end.clone()..self.old_end.clone(), + new: self.new_end.clone()..new_end.clone(), }); } - self.new_offset += fragment.len; - self.new_point += &fragment_lines; + self.new_end = new_end; } else if fragment.was_visible(&self.since, &self.undos) && !fragment.visible { - let deleted_start = cursor.start().deleted; - let fragment_lines = self.deleted_text.to_point(deleted_start + fragment.len) - - self.deleted_text.to_point(deleted_start); - if let Some(ref mut change) = change { - if change.new_bytes.end == self.new_offset { - change.old_bytes.end += fragment.len; - change.old_lines.end += &fragment_lines; - } else { - break; - } + self.deleted_cursor.seek_forward(cursor.start().deleted); + let fragment_summary = self.deleted_cursor.summary(cursor.end(&None).deleted); + let mut old_end = self.old_end.clone(); + old_end.add_assign(&fragment_summary); + if let Some(pending_edit) = pending_edit.as_mut() { + pending_edit.old.end = old_end.clone(); } else { - change = Some(Edit { - old_bytes: self.old_offset..self.old_offset + fragment.len, - new_bytes: self.new_offset..self.new_offset, - old_lines: self.old_point..self.old_point + &fragment_lines, - new_lines: self.new_point..self.new_point, + pending_edit = Some(Edit { + old: self.old_end.clone()..old_end.clone(), + new: self.new_end.clone()..self.new_end.clone(), }); } - self.old_offset += fragment.len; - self.old_point += &fragment_lines; + self.old_end = old_end; } cursor.next(&None); } - change + pending_edit } } @@ -2531,7 +2529,13 @@ pub trait ToOffset { impl ToOffset for Point { fn to_offset<'a>(&self, content: impl Into>) -> usize { - content.into().visible_text.to_offset(*self) + content.into().visible_text.point_to_offset(*self) + } +} + +impl ToOffset for PointUtf16 { + fn to_offset<'a>(&self, content: impl Into>) -> usize { + content.into().visible_text.point_utf16_to_offset(*self) } } @@ -2566,7 +2570,7 @@ impl ToPoint for Anchor { impl ToPoint for usize { fn to_point<'a>(&self, content: impl Into>) -> Point { - content.into().visible_text.to_point(*self) + content.into().visible_text.offset_to_point(*self) } } diff --git a/crates/buffer/src/point.rs b/crates/buffer/src/point.rs index a2da4e4f6ce245a1cf7198f7fa1bae0f1d622fe6..5e62176956cfb378089b465e6778425cc40ec183 100644 --- a/crates/buffer/src/point.rs +++ b/crates/buffer/src/point.rs @@ -32,11 +32,7 @@ impl<'a> Add<&'a Self> for Point { type Output = Point; fn add(self, other: &'a Self) -> Self::Output { - if other.row == 0 { - Point::new(self.row, self.column + other.column) - } else { - Point::new(self.row + other.row, other.column) - } + self + *other } } @@ -44,7 +40,11 @@ impl Add for Point { type Output = Point; fn add(self, other: Self) -> Self::Output { - self + &other + if other.row == 0 { + Point::new(self.row, self.column + other.column) + } else { + Point::new(self.row + other.row, other.column) + } } } @@ -52,13 +52,7 @@ impl<'a> Sub<&'a Self> for Point { type Output = Point; fn sub(self, other: &'a Self) -> Self::Output { - debug_assert!(*other <= self); - - if self.row == other.row { - Point::new(0, self.column - other.column) - } else { - Point::new(self.row - other.row, self.column) - } + self - *other } } @@ -66,7 +60,13 @@ impl Sub for Point { type Output = Point; fn sub(self, other: Self) -> Self::Output { - self - &other + debug_assert!(other <= self); + + if self.row == other.row { + Point::new(0, self.column - other.column) + } else { + Point::new(self.row - other.row, self.column) + } } } diff --git a/crates/buffer/src/point_utf16.rs b/crates/buffer/src/point_utf16.rs new file mode 100644 index 0000000000000000000000000000000000000000..22b895a2c009b0d38ee8b82c9d1e5f1401578b8d --- /dev/null +++ b/crates/buffer/src/point_utf16.rs @@ -0,0 +1,111 @@ +use std::{ + cmp::Ordering, + ops::{Add, AddAssign, Sub}, +}; + +#[derive(Clone, Copy, Default, Eq, PartialEq, Debug, Hash)] +pub struct PointUtf16 { + pub row: u32, + pub column: u32, +} + +impl PointUtf16 { + pub const MAX: Self = Self { + row: u32::MAX, + column: u32::MAX, + }; + + pub fn new(row: u32, column: u32) -> Self { + PointUtf16 { row, column } + } + + pub fn zero() -> Self { + PointUtf16::new(0, 0) + } + + pub fn is_zero(&self) -> bool { + self.row == 0 && self.column == 0 + } +} + +impl<'a> Add<&'a Self> for PointUtf16 { + type Output = PointUtf16; + + fn add(self, other: &'a Self) -> Self::Output { + self + *other + } +} + +impl Add for PointUtf16 { + type Output = PointUtf16; + + fn add(self, other: Self) -> Self::Output { + if other.row == 0 { + PointUtf16::new(self.row, self.column + other.column) + } else { + PointUtf16::new(self.row + other.row, other.column) + } + } +} + +impl<'a> Sub<&'a Self> for PointUtf16 { + type Output = PointUtf16; + + fn sub(self, other: &'a Self) -> Self::Output { + self - *other + } +} + +impl Sub for PointUtf16 { + type Output = PointUtf16; + + fn sub(self, other: Self) -> Self::Output { + debug_assert!(other <= self); + + if self.row == other.row { + PointUtf16::new(0, self.column - other.column) + } else { + PointUtf16::new(self.row - other.row, self.column) + } + } +} + +impl<'a> AddAssign<&'a Self> for PointUtf16 { + fn add_assign(&mut self, other: &'a Self) { + *self += *other; + } +} + +impl AddAssign for PointUtf16 { + fn add_assign(&mut self, other: Self) { + if other.row == 0 { + self.column += other.column; + } else { + self.row += other.row; + self.column = other.column; + } + } +} + +impl PartialOrd for PointUtf16 { + fn partial_cmp(&self, other: &PointUtf16) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for PointUtf16 { + #[cfg(target_pointer_width = "64")] + fn cmp(&self, other: &PointUtf16) -> Ordering { + let a = (self.row as usize) << 32 | self.column as usize; + let b = (other.row as usize) << 32 | other.column as usize; + a.cmp(&b) + } + + #[cfg(target_pointer_width = "32")] + fn cmp(&self, other: &PointUtf16) -> Ordering { + match self.row.cmp(&other.row) { + Ordering::Equal => self.column.cmp(&other.column), + comparison @ _ => comparison, + } + } +} diff --git a/crates/buffer/src/rope.rs b/crates/buffer/src/rope.rs index a1c57140025c0d8465a908118f8be0c168d85100..3cf43bd16025f408ad16dfc79181ad64dbc49a89 100644 --- a/crates/buffer/src/rope.rs +++ b/crates/buffer/src/rope.rs @@ -1,8 +1,10 @@ +use crate::PointUtf16; + use super::Point; use arrayvec::ArrayString; use smallvec::SmallVec; use std::{cmp, ops::Range, str}; -use sum_tree::{Bias, SumTree}; +use sum_tree::{Bias, Dimension, SumTree}; #[cfg(test)] const CHUNK_BASE: usize = 6; @@ -136,7 +138,7 @@ impl Rope { Chunks::new(self, range, true) } - pub fn to_point(&self, offset: usize) -> Point { + pub fn offset_to_point(&self, offset: usize) -> Point { assert!(offset <= self.summary().bytes); let mut cursor = self.chunks.cursor::<(usize, Point)>(); cursor.seek(&offset, Bias::Left, &()); @@ -144,15 +146,40 @@ impl Rope { cursor.start().1 + cursor .item() - .map_or(Point::zero(), |chunk| chunk.to_point(overshoot)) + .map_or(Point::zero(), |chunk| chunk.offset_to_point(overshoot)) + } + + pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 { + assert!(offset <= self.summary().bytes); + let mut cursor = self.chunks.cursor::<(usize, PointUtf16)>(); + cursor.seek(&offset, Bias::Left, &()); + let overshoot = offset - cursor.start().0; + cursor.start().1 + + cursor.item().map_or(PointUtf16::zero(), |chunk| { + chunk.offset_to_point_utf16(overshoot) + }) } - pub fn to_offset(&self, point: Point) -> usize { + pub fn point_to_offset(&self, point: Point) -> usize { assert!(point <= self.summary().lines); let mut cursor = self.chunks.cursor::<(Point, usize)>(); cursor.seek(&point, Bias::Left, &()); let overshoot = point - cursor.start().0; - cursor.start().1 + cursor.item().map_or(0, |chunk| chunk.to_offset(overshoot)) + cursor.start().1 + + cursor + .item() + .map_or(0, |chunk| chunk.point_to_offset(overshoot)) + } + + pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize { + assert!(point <= self.summary().lines_utf16); + let mut cursor = self.chunks.cursor::<(PointUtf16, usize)>(); + cursor.seek(&point, Bias::Left, &()); + let overshoot = point - cursor.start().0; + cursor.start().1 + + cursor + .item() + .map_or(0, |chunk| chunk.point_utf16_to_offset(overshoot)) } pub fn clip_offset(&self, mut offset: usize, bias: Bias) -> usize { @@ -188,6 +215,17 @@ impl Rope { self.summary().lines } } + + pub fn clip_point_utf16(&self, point: PointUtf16, bias: Bias) -> PointUtf16 { + let mut cursor = self.chunks.cursor::(); + cursor.seek(&point, Bias::Right, &()); + if let Some(chunk) = cursor.item() { + let overshoot = point - cursor.start(); + *cursor.start() + chunk.clip_point_utf16(overshoot, bias) + } else { + self.summary().lines_utf16 + } + } } impl<'a> From<&'a str> for Rope { @@ -258,22 +296,24 @@ impl<'a> Cursor<'a> { slice } - pub fn summary(&mut self, end_offset: usize) -> TextSummary { + pub fn summary>(&mut self, end_offset: usize) -> D { debug_assert!(end_offset >= self.offset); - let mut summary = TextSummary::default(); + let mut summary = D::default(); if let Some(start_chunk) = self.chunks.item() { let start_ix = self.offset - self.chunks.start(); let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start(); - summary = TextSummary::from(&start_chunk.0[start_ix..end_ix]); + summary.add_assign(&D::from_summary(&TextSummary::from( + &start_chunk.0[start_ix..end_ix], + ))); } if end_offset > self.chunks.end(&()) { self.chunks.next(&()); - summary += &self.chunks.summary(&end_offset, Bias::Right, &()); + summary.add_assign(&self.chunks.summary(&end_offset, Bias::Right, &())); if let Some(end_chunk) = self.chunks.item() { let end_ix = end_offset - self.chunks.start(); - summary += TextSummary::from(&end_chunk.0[..end_ix]); + summary.add_assign(&D::from_summary(&TextSummary::from(&end_chunk.0[..end_ix]))); } } @@ -375,7 +415,7 @@ impl<'a> Iterator for Chunks<'a> { struct Chunk(ArrayString<{ 2 * CHUNK_BASE }>); impl Chunk { - fn to_point(&self, target: usize) -> Point { + fn offset_to_point(&self, target: usize) -> Point { let mut offset = 0; let mut point = Point::new(0, 0); for ch in self.0.chars() { @@ -394,7 +434,26 @@ impl Chunk { point } - fn to_offset(&self, target: Point) -> usize { + fn offset_to_point_utf16(&self, target: usize) -> PointUtf16 { + let mut offset = 0; + let mut point = PointUtf16::new(0, 0); + for ch in self.0.chars() { + if offset >= target { + break; + } + + if ch == '\n' { + point.row += 1; + point.column = 0; + } else { + point.column += ch.len_utf16() as u32; + } + offset += ch.len_utf8(); + } + point + } + + fn point_to_offset(&self, target: Point) -> usize { let mut offset = 0; let mut point = Point::new(0, 0); for ch in self.0.chars() { @@ -416,6 +475,28 @@ impl Chunk { offset } + fn point_utf16_to_offset(&self, target: PointUtf16) -> usize { + let mut offset = 0; + let mut point = PointUtf16::new(0, 0); + for ch in self.0.chars() { + if point >= target { + if point > target { + panic!("point {:?} is inside of character {:?}", target, ch); + } + break; + } + + if ch == '\n' { + point.row += 1; + point.column = 0; + } else { + point.column += ch.len_utf16() as u32; + } + offset += ch.len_utf8(); + } + offset + } + fn clip_point(&self, target: Point, bias: Bias) -> Point { for (row, line) in self.0.split('\n').enumerate() { if row == target.row as usize { @@ -431,6 +512,23 @@ impl Chunk { } unreachable!() } + + fn clip_point_utf16(&self, target: PointUtf16, bias: Bias) -> PointUtf16 { + for (row, line) in self.0.split('\n').enumerate() { + if row == target.row as usize { + let mut code_units = line.encode_utf16(); + let mut column = code_units.by_ref().take(target.column as usize).count(); + if char::decode_utf16(code_units).next().transpose().is_err() { + match bias { + Bias::Left => column -= 1, + Bias::Right => column += 1, + } + } + return PointUtf16::new(row as u32, column as u32); + } + } + unreachable!() + } } impl sum_tree::Item for Chunk { @@ -445,6 +543,7 @@ impl sum_tree::Item for Chunk { pub struct TextSummary { pub bytes: usize, pub lines: Point, + pub lines_utf16: PointUtf16, pub first_line_chars: u32, pub last_line_chars: u32, pub longest_row: u32, @@ -454,17 +553,19 @@ pub struct TextSummary { impl<'a> From<&'a str> for TextSummary { fn from(text: &'a str) -> Self { let mut lines = Point::new(0, 0); + let mut lines_utf16 = PointUtf16::new(0, 0); let mut first_line_chars = 0; let mut last_line_chars = 0; let mut longest_row = 0; let mut longest_row_chars = 0; for c in text.chars() { if c == '\n' { - lines.row += 1; - lines.column = 0; + lines += Point::new(1, 0); + lines_utf16 += PointUtf16::new(1, 0); last_line_chars = 0; } else { lines.column += c.len_utf8() as u32; + lines_utf16.column += c.len_utf16() as u32; last_line_chars += 1; } @@ -481,6 +582,7 @@ impl<'a> From<&'a str> for TextSummary { TextSummary { bytes: text.len(), lines, + lines_utf16, first_line_chars, last_line_chars, longest_row, @@ -520,7 +622,8 @@ impl<'a> std::ops::AddAssign<&'a Self> for TextSummary { } self.bytes += other.bytes; - self.lines += &other.lines; + self.lines += other.lines; + self.lines_utf16 += other.lines_utf16; } } @@ -530,15 +633,77 @@ impl std::ops::AddAssign for TextSummary { } } +pub trait TextDimension<'a>: Dimension<'a, TextSummary> { + fn from_summary(summary: &TextSummary) -> Self; + fn add_assign(&mut self, other: &Self); +} + +impl<'a, D1: TextDimension<'a>, D2: TextDimension<'a>> TextDimension<'a> for (D1, D2) { + fn from_summary(summary: &TextSummary) -> Self { + (D1::from_summary(summary), D2::from_summary(summary)) + } + + fn add_assign(&mut self, other: &Self) { + self.0.add_assign(&other.0); + self.1.add_assign(&other.1); + } +} + +impl<'a> TextDimension<'a> for TextSummary { + fn from_summary(summary: &TextSummary) -> Self { + summary.clone() + } + + fn add_assign(&mut self, other: &Self) { + *self += other; + } +} + impl<'a> sum_tree::Dimension<'a, TextSummary> for usize { fn add_summary(&mut self, summary: &'a TextSummary, _: &()) { *self += summary.bytes; } } +impl<'a> TextDimension<'a> for usize { + fn from_summary(summary: &TextSummary) -> Self { + summary.bytes + } + + fn add_assign(&mut self, other: &Self) { + *self += other; + } +} + impl<'a> sum_tree::Dimension<'a, TextSummary> for Point { fn add_summary(&mut self, summary: &'a TextSummary, _: &()) { - *self += &summary.lines; + *self += summary.lines; + } +} + +impl<'a> TextDimension<'a> for Point { + fn from_summary(summary: &TextSummary) -> Self { + summary.lines + } + + fn add_assign(&mut self, other: &Self) { + *self += other; + } +} + +impl<'a> sum_tree::Dimension<'a, TextSummary> for PointUtf16 { + fn add_summary(&mut self, summary: &'a TextSummary, _: &()) { + *self += summary.lines_utf16; + } +} + +impl<'a> TextDimension<'a> for PointUtf16 { + fn from_summary(summary: &TextSummary) -> Self { + summary.lines_utf16 + } + + fn add_assign(&mut self, other: &Self) { + *self += other; } } @@ -577,6 +742,41 @@ mod tests { assert_eq!(rope.text(), text); } + #[test] + fn test_clip() { + let rope = Rope::from("🧘"); + + assert_eq!(rope.clip_offset(1, Bias::Left), 0); + assert_eq!(rope.clip_offset(1, Bias::Right), 4); + assert_eq!(rope.clip_offset(5, Bias::Right), 4); + + assert_eq!( + rope.clip_point(Point::new(0, 1), Bias::Left), + Point::new(0, 0) + ); + assert_eq!( + rope.clip_point(Point::new(0, 1), Bias::Right), + Point::new(0, 4) + ); + assert_eq!( + rope.clip_point(Point::new(0, 5), Bias::Right), + Point::new(0, 4) + ); + + assert_eq!( + rope.clip_point_utf16(PointUtf16::new(0, 1), Bias::Left), + PointUtf16::new(0, 0) + ); + assert_eq!( + rope.clip_point_utf16(PointUtf16::new(0, 1), Bias::Right), + PointUtf16::new(0, 2) + ); + assert_eq!( + rope.clip_point_utf16(PointUtf16::new(0, 3), Bias::Right), + PointUtf16::new(0, 2) + ); + } + #[gpui::test(iterations = 100)] fn test_random(mut rng: StdRng) { let operations = env::var("OPERATIONS") @@ -624,14 +824,33 @@ mod tests { } let mut point = Point::new(0, 0); + let mut point_utf16 = PointUtf16::new(0, 0); for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) { - assert_eq!(actual.to_point(ix), point, "to_point({})", ix); - assert_eq!(actual.to_offset(point), ix, "to_offset({:?})", point); + assert_eq!(actual.offset_to_point(ix), point, "offset_to_point({})", ix); + assert_eq!( + actual.offset_to_point_utf16(ix), + point_utf16, + "offset_to_point_utf16({})", + ix + ); + assert_eq!( + actual.point_to_offset(point), + ix, + "point_to_offset({:?})", + point + ); + assert_eq!( + actual.point_utf16_to_offset(point_utf16), + ix, + "point_utf16_to_offset({:?})", + point_utf16 + ); if ch == '\n' { - point.row += 1; - point.column = 0 + point += Point::new(1, 0); + point_utf16 += PointUtf16::new(1, 0); } else { point.column += ch.len_utf8() as u32; + point_utf16.column += ch.len_utf16() as u32; } } @@ -639,7 +858,7 @@ mod tests { let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right); let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left); assert_eq!( - actual.cursor(start_ix).summary(end_ix), + actual.cursor(start_ix).summary::(end_ix), TextSummary::from(&expected[start_ix..end_ix]) ); } diff --git a/crates/buffer/src/tests.rs b/crates/buffer/src/tests.rs index bce08ebf738925a31b19541186669cc0b6ac6f8f..5cbc36a8f51901a11be89c7514410d57eaad6a6c 100644 --- a/crates/buffer/src/tests.rs +++ b/crates/buffer/src/tests.rs @@ -78,7 +78,7 @@ fn test_random_edits(mut rng: StdRng) { for mut old_buffer in buffer_versions { let edits = buffer - .edits_since(old_buffer.version.clone()) + .edits_since::(old_buffer.version.clone()) .collect::>(); log::info!( @@ -88,12 +88,12 @@ fn test_random_edits(mut rng: StdRng) { edits, ); - let mut delta = 0_isize; for edit in edits { - let old_start = (edit.old_bytes.start as isize + delta) as usize; - let new_text: String = buffer.text_for_range(edit.new_bytes.clone()).collect(); - old_buffer.edit(Some(old_start..old_start + edit.deleted_bytes()), new_text); - delta += edit.delta(); + let new_text: String = buffer.text_for_range(edit.new.clone()).collect(); + old_buffer.edit( + Some(edit.new.start..edit.new.start + edit.old.len()), + new_text, + ); } assert_eq!(old_buffer.text(), buffer.text()); } @@ -123,6 +123,7 @@ fn test_text_summary_for_range() { TextSummary { bytes: 2, lines: Point::new(1, 0), + lines_utf16: PointUtf16::new(1, 0), first_line_chars: 1, last_line_chars: 0, longest_row: 0, @@ -134,6 +135,7 @@ fn test_text_summary_for_range() { TextSummary { bytes: 11, lines: Point::new(3, 0), + lines_utf16: PointUtf16::new(3, 0), first_line_chars: 1, last_line_chars: 0, longest_row: 2, @@ -145,6 +147,7 @@ fn test_text_summary_for_range() { TextSummary { bytes: 20, lines: Point::new(4, 1), + lines_utf16: PointUtf16::new(4, 1), first_line_chars: 2, last_line_chars: 1, longest_row: 3, @@ -156,6 +159,7 @@ fn test_text_summary_for_range() { TextSummary { bytes: 22, lines: Point::new(4, 3), + lines_utf16: PointUtf16::new(4, 3), first_line_chars: 2, last_line_chars: 3, longest_row: 3, @@ -167,6 +171,7 @@ fn test_text_summary_for_range() { TextSummary { bytes: 15, lines: Point::new(2, 3), + lines_utf16: PointUtf16::new(2, 3), first_line_chars: 4, last_line_chars: 3, longest_row: 1, diff --git a/crates/editor/src/display_map/fold_map.rs b/crates/editor/src/display_map/fold_map.rs index a2e6b150e7f81857860529f9ba19c241260aaf25..5ff2d3db6b35ea22943c44d12faf907624707bfd 100644 --- a/crates/editor/src/display_map/fold_map.rs +++ b/crates/editor/src/display_map/fold_map.rs @@ -1,6 +1,7 @@ use gpui::{AppContext, ModelHandle}; use language::{ - Anchor, AnchorRangeExt, Buffer, HighlightId, HighlightedChunk, Point, TextSummary, ToOffset, + Anchor, AnchorRangeExt, Buffer, HighlightId, HighlightedChunk, Point, PointUtf16, TextSummary, + ToOffset, }; use parking_lot::Mutex; use std::{ @@ -112,9 +113,8 @@ impl<'a> FoldMapWriter<'a> { let fold = Fold(buffer.anchor_after(range.start)..buffer.anchor_before(range.end)); folds.push(fold); edits.push(buffer::Edit { - old_bytes: range.clone(), - new_bytes: range.clone(), - ..Default::default() + old: range.clone(), + new: range, }); } } @@ -157,9 +157,8 @@ impl<'a> FoldMapWriter<'a> { while let Some(fold) = folds_cursor.item() { let offset_range = fold.0.start.to_offset(&buffer)..fold.0.end.to_offset(&buffer); edits.push(buffer::Edit { - old_bytes: offset_range.clone(), - new_bytes: offset_range, - ..Default::default() + old: offset_range.clone(), + new: offset_range, }); fold_ixs_to_delete.push(*folds_cursor.start()); folds_cursor.next(&buffer); @@ -288,7 +287,11 @@ impl FoldMap { } } - fn apply_edits(&self, buffer_edits: Vec, cx: &AppContext) -> Vec { + fn apply_edits( + &self, + buffer_edits: Vec>, + cx: &AppContext, + ) -> Vec { let buffer = self.buffer.read(cx).snapshot(); let mut buffer_edits_iter = buffer_edits.iter().cloned().peekable(); @@ -298,28 +301,28 @@ impl FoldMap { cursor.seek(&0, Bias::Right, &()); while let Some(mut edit) = buffer_edits_iter.next() { - new_transforms.push_tree(cursor.slice(&edit.old_bytes.start, Bias::Left, &()), &()); - edit.new_bytes.start -= edit.old_bytes.start - cursor.start(); - edit.old_bytes.start = *cursor.start(); + new_transforms.push_tree(cursor.slice(&edit.old.start, Bias::Left, &()), &()); + edit.new.start -= edit.old.start - cursor.start(); + edit.old.start = *cursor.start(); - cursor.seek(&edit.old_bytes.end, Bias::Right, &()); + cursor.seek(&edit.old.end, Bias::Right, &()); cursor.next(&()); - let mut delta = edit.delta(); + let mut delta = edit.new.len() as isize - edit.old.len() as isize; loop { - edit.old_bytes.end = *cursor.start(); + edit.old.end = *cursor.start(); if let Some(next_edit) = buffer_edits_iter.peek() { - if next_edit.old_bytes.start > edit.old_bytes.end { + if next_edit.old.start > edit.old.end { break; } let next_edit = buffer_edits_iter.next().unwrap(); - delta += next_edit.delta(); + delta += next_edit.new.len() as isize - next_edit.old.len() as isize; - if next_edit.old_bytes.end >= edit.old_bytes.end { - edit.old_bytes.end = next_edit.old_bytes.end; - cursor.seek(&edit.old_bytes.end, Bias::Right, &()); + if next_edit.old.end >= edit.old.end { + edit.old.end = next_edit.old.end; + cursor.seek(&edit.old.end, Bias::Right, &()); cursor.next(&()); } } else { @@ -327,10 +330,9 @@ impl FoldMap { } } - edit.new_bytes.end = - ((edit.new_bytes.start + edit.deleted_bytes()) as isize + delta) as usize; + edit.new.end = ((edit.new.start + edit.old.len()) as isize + delta) as usize; - let anchor = buffer.anchor_before(edit.new_bytes.start); + let anchor = buffer.anchor_before(edit.new.start); let mut folds_cursor = self.folds.cursor::(); folds_cursor.seek(&Fold(anchor..Anchor::max()), Bias::Left, &buffer); @@ -346,10 +348,7 @@ impl FoldMap { }) .peekable(); - while folds - .peek() - .map_or(false, |fold| fold.start < edit.new_bytes.end) - { + while folds.peek().map_or(false, |fold| fold.start < edit.new.end) { let mut fold = folds.next().unwrap(); let sum = new_transforms.summary(); @@ -382,13 +381,15 @@ impl FoldMap { if fold.end > fold.start { let output_text = "…"; let chars = output_text.chars().count() as u32; - let lines = super::Point::new(0, output_text.len() as u32); + let lines = Point::new(0, output_text.len() as u32); + let lines_utf16 = PointUtf16::new(0, output_text.encode_utf16().count() as u32); new_transforms.push( Transform { summary: TransformSummary { output: TextSummary { bytes: output_text.len(), lines, + lines_utf16, first_line_chars: chars, last_line_chars: chars, longest_row: 0, @@ -404,9 +405,8 @@ impl FoldMap { } let sum = new_transforms.summary(); - if sum.input.bytes < edit.new_bytes.end { - let text_summary = - buffer.text_summary_for_range(sum.input.bytes..edit.new_bytes.end); + if sum.input.bytes < edit.new.end { + let text_summary = buffer.text_summary_for_range(sum.input.bytes..edit.new.end); new_transforms.push( Transform { summary: TransformSummary { @@ -443,35 +443,35 @@ impl FoldMap { let mut new_transforms = new_transforms.cursor::<(usize, FoldOffset)>(); for mut edit in buffer_edits { - old_transforms.seek(&edit.old_bytes.start, Bias::Left, &()); + old_transforms.seek(&edit.old.start, Bias::Left, &()); if old_transforms.item().map_or(false, |t| t.is_fold()) { - edit.old_bytes.start = old_transforms.start().0; + edit.old.start = old_transforms.start().0; } let old_start = - old_transforms.start().1 .0 + (edit.old_bytes.start - old_transforms.start().0); + old_transforms.start().1 .0 + (edit.old.start - old_transforms.start().0); - old_transforms.seek_forward(&edit.old_bytes.end, Bias::Right, &()); + old_transforms.seek_forward(&edit.old.end, Bias::Right, &()); if old_transforms.item().map_or(false, |t| t.is_fold()) { old_transforms.next(&()); - edit.old_bytes.end = old_transforms.start().0; + edit.old.end = old_transforms.start().0; } let old_end = - old_transforms.start().1 .0 + (edit.old_bytes.end - old_transforms.start().0); + old_transforms.start().1 .0 + (edit.old.end - old_transforms.start().0); - new_transforms.seek(&edit.new_bytes.start, Bias::Left, &()); + new_transforms.seek(&edit.new.start, Bias::Left, &()); if new_transforms.item().map_or(false, |t| t.is_fold()) { - edit.new_bytes.start = new_transforms.start().0; + edit.new.start = new_transforms.start().0; } let new_start = - new_transforms.start().1 .0 + (edit.new_bytes.start - new_transforms.start().0); + new_transforms.start().1 .0 + (edit.new.start - new_transforms.start().0); - new_transforms.seek_forward(&edit.new_bytes.end, Bias::Right, &()); + new_transforms.seek_forward(&edit.new.end, Bias::Right, &()); if new_transforms.item().map_or(false, |t| t.is_fold()) { new_transforms.next(&()); - edit.new_bytes.end = new_transforms.start().0; + edit.new.end = new_transforms.start().0; } let new_end = - new_transforms.start().1 .0 + (edit.new_bytes.end - new_transforms.start().0); + new_transforms.start().1 .0 + (edit.new.end - new_transforms.start().0); fold_edits.push(FoldEdit { old_bytes: FoldOffset(old_start)..FoldOffset(old_end), @@ -748,22 +748,22 @@ where ) } -fn consolidate_buffer_edits(edits: &mut Vec) { +fn consolidate_buffer_edits(edits: &mut Vec>) { edits.sort_unstable_by(|a, b| { - a.old_bytes + a.old .start - .cmp(&b.old_bytes.start) - .then_with(|| b.old_bytes.end.cmp(&a.old_bytes.end)) + .cmp(&b.old.start) + .then_with(|| b.old.end.cmp(&a.old.end)) }); let mut i = 1; while i < edits.len() { let edit = edits[i].clone(); let prev_edit = &mut edits[i - 1]; - if prev_edit.old_bytes.end >= edit.old_bytes.start { - prev_edit.old_bytes.end = prev_edit.old_bytes.end.max(edit.old_bytes.end); - prev_edit.new_bytes.start = prev_edit.new_bytes.start.min(edit.new_bytes.start); - prev_edit.new_bytes.end = prev_edit.new_bytes.end.max(edit.new_bytes.end); + if prev_edit.old.end >= edit.old.start { + prev_edit.old.end = prev_edit.old.end.max(edit.old.end); + prev_edit.new.start = prev_edit.new.start.min(edit.new.start); + prev_edit.new.end = prev_edit.new.end.max(edit.new.end); edits.remove(i); continue; } @@ -1343,7 +1343,9 @@ mod tests { let start_version = buffer.version.clone(); let edit_count = rng.gen_range(1..=5); buffer.randomly_edit(&mut rng, edit_count); - buffer.edits_since(start_version).collect::>() + buffer + .edits_since::(start_version) + .collect::>() }); log::info!("editing {:?}", edits); } diff --git a/crates/language/src/lib.rs b/crates/language/src/lib.rs index 96ca701fb90083779a60481bf33fc7789037c582..0b48b698a4f6e18f722ecc822319abec95fafe97 100644 --- a/crates/language/src/lib.rs +++ b/crates/language/src/lib.rs @@ -359,13 +359,19 @@ impl Buffer { ), content_changes: snapshot .buffer_snapshot - .edits_since( + .edits_since::<(PointUtf16, usize)>( prev_snapshot.buffer_snapshot.version().clone(), ) .map(|edit| { - // TODO: Use UTF-16 positions. - let edit_start = edit.new_lines.start; - let edit_end = edit_start + edit.deleted_lines(); + let edit_start = edit.new.start.0; + let edit_end = edit_start + + (edit.old.end.0 - edit.old.start.0); + let new_text = snapshot + .buffer_snapshot + .text_for_range( + edit.new.start.1..edit.new.end.1, + ) + .collect(); lsp::TextDocumentContentChangeEvent { range: Some(lsp::Range::new( lsp::Position::new( @@ -378,10 +384,7 @@ impl Buffer { ), )), range_length: None, - text: snapshot - .buffer_snapshot - .text_for_range(edit.new_bytes) - .collect(), + text: new_text, } }) .collect(), @@ -613,22 +616,17 @@ impl Buffer { } fn interpolate_tree(&self, tree: &mut SyntaxTree) { - let mut delta = 0_isize; - for edit in self.edits_since(tree.version.clone()) { - let start_offset = (edit.old_bytes.start as isize + delta) as usize; - let start_point = self.as_rope().to_point(start_offset); + for edit in self.edits_since::<(usize, Point)>(tree.version.clone()) { + let (bytes, lines) = edit.flatten(); tree.tree.edit(&InputEdit { - start_byte: start_offset, - old_end_byte: start_offset + edit.deleted_bytes(), - new_end_byte: start_offset + edit.inserted_bytes(), - start_position: start_point.to_ts_point(), - old_end_position: (start_point + edit.deleted_lines()).to_ts_point(), - new_end_position: self - .as_rope() - .to_point(start_offset + edit.inserted_bytes()) + start_byte: bytes.new.start, + old_end_byte: bytes.new.start + bytes.old.len(), + new_end_byte: bytes.new.end, + start_position: lines.new.start.to_ts_point(), + old_end_position: (lines.new.start + (lines.old.end - lines.old.start)) .to_ts_point(), + new_end_position: lines.new.end.to_ts_point(), }); - delta += edit.inserted_bytes() as isize - edit.deleted_bytes() as isize; } tree.version = self.version(); } @@ -673,21 +671,22 @@ impl Buffer { diagnostics.sort_unstable_by_key(|d| (d.range.start, d.range.end)); self.diagnostics = { - let mut edits_since_save = content.edits_since(self.saved_version.clone()).peekable(); - let mut last_edit_old_end = Point::zero(); - let mut last_edit_new_end = Point::zero(); + let mut edits_since_save = content + .edits_since::(self.saved_version.clone()) + .peekable(); + let mut last_edit_old_end = PointUtf16::zero(); + let mut last_edit_new_end = PointUtf16::zero(); content.anchor_range_multimap( Bias::Left, Bias::Right, diagnostics.into_iter().filter_map(|diagnostic| { - // TODO: Use UTF-16 positions. - let mut start = Point::new( + let mut start = PointUtf16::new( diagnostic.range.start.line, diagnostic.range.start.character, ); let mut end = - Point::new(diagnostic.range.end.line, diagnostic.range.end.character); + PointUtf16::new(diagnostic.range.end.line, diagnostic.range.end.character); let severity = diagnostic.severity.unwrap_or(DiagnosticSeverity::ERROR); if diagnostic @@ -696,11 +695,11 @@ impl Buffer { .map_or(false, |source| disk_based_sources.contains(source)) { while let Some(edit) = edits_since_save.peek() { - if edit.old_lines.end <= start { - last_edit_old_end = edit.old_lines.end; - last_edit_new_end = edit.new_lines.end; + if edit.old.end <= start { + last_edit_old_end = edit.old.end; + last_edit_new_end = edit.new.end; edits_since_save.next(); - } else if edit.old_lines.start <= end && edit.old_lines.end >= start { + } else if edit.old.start <= end && edit.old.end >= start { return None; } else { break; @@ -711,8 +710,8 @@ impl Buffer { end = last_edit_new_end + (end - last_edit_old_end); } - let range = - content.clip_point(start, Bias::Left)..content.clip_point(end, Bias::Right); + let range = content.clip_point_utf16(start, Bias::Left) + ..content.clip_point_utf16(end, Bias::Right); Some((range, (severity, diagnostic.message))) }), ) @@ -1223,7 +1222,7 @@ impl Buffer { was_dirty: bool, cx: &mut ModelContext, ) { - if self.edits_since(old_version).next().is_none() { + if self.edits_since::(old_version).next().is_none() { return; } diff --git a/crates/lsp/Cargo.toml b/crates/lsp/Cargo.toml index 08c48b7ec35ca951f9cceac1bdbe8e593fcb0f18..263eed76fb9d515e0194835a94bcf9c79c08d909 100644 --- a/crates/lsp/Cargo.toml +++ b/crates/lsp/Cargo.toml @@ -13,7 +13,7 @@ anyhow = "1.0" async-pipe = { git = "https://github.com/routerify/async-pipe-rs", rev = "feeb77e83142a9ff837d0767652ae41bfc5d8e47", optional = true } futures = "0.3" log = "0.4" -lsp-types = { version = "0.91", features = ["proposed"] } +lsp-types = "0.91" parking_lot = "0.11" postage = { version = "0.4.1", features = ["futures-traits"] } serde = { version = "1.0", features = ["derive"] } diff --git a/crates/lsp/src/lib.rs b/crates/lsp/src/lib.rs index 1d6ed4439460fdb99840ffed825cf9cf966776fa..642595c5adb0255e9a1193b5ad841ebaaa5d8007 100644 --- a/crates/lsp/src/lib.rs +++ b/crates/lsp/src/lib.rs @@ -226,7 +226,6 @@ impl LanguageServer { experimental: Some(json!({ "serverStatusNotification": true, })), - offset_encoding: Some(vec!["utf-8".to_string()]), ..Default::default() }, trace: Default::default(), diff --git a/crates/sum_tree/src/cursor.rs b/crates/sum_tree/src/cursor.rs index 324cf0eadb5773c6a8597f30b13bedcefddb202f..7799bb2ff004f65c168a56505fdaac5b40492221 100644 --- a/crates/sum_tree/src/cursor.rs +++ b/crates/sum_tree/src/cursor.rs @@ -537,6 +537,10 @@ where self.cursor.start() } + pub fn end(&self, cx: &::Context) -> D { + self.cursor.end(cx) + } + pub fn item(&self) -> Option<&'a T> { self.cursor.item() }