Detailed changes
@@ -416,7 +416,6 @@ dependencies = [
"serde",
"serde_json_lenient",
"settings",
- "similar",
"smol",
"streaming_diff",
"telemetry",
@@ -482,7 +481,6 @@ dependencies = [
"serde",
"serde_json",
"settings",
- "similar",
"smol",
"streaming_diff",
"telemetry_events",
@@ -4064,7 +4062,6 @@ dependencies = [
"serde",
"serde_json",
"settings",
- "similar",
"smallvec",
"smol",
"snippet",
@@ -6376,6 +6373,15 @@ version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edcd27d72f2f071c64249075f42e205ff93c9a4c5f6c6da53e79ed9f9832c285"
+[[package]]
+name = "imara-diff"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17d34b7d42178945f775e84bc4c36dde7c1c6cdfea656d3354d009056f2bb3d2"
+dependencies = [
+ "hashbrown 0.15.2",
+]
+
[[package]]
name = "imgref"
version = "1.11.0"
@@ -6868,6 +6874,7 @@ dependencies = [
"globset",
"gpui",
"http_client",
+ "imara-diff",
"indoc",
"itertools 0.14.0",
"log",
@@ -6882,7 +6889,6 @@ dependencies = [
"serde",
"serde_json",
"settings",
- "similar",
"smallvec",
"smol",
"streaming-iterator",
@@ -10129,7 +10135,6 @@ dependencies = [
"sha2",
"shellexpand 2.1.2",
"shlex",
- "similar",
"smol",
"snippet",
"snippet_provider",
@@ -12270,12 +12275,6 @@ version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
-[[package]]
-name = "similar"
-version = "1.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ad1d488a557b235fc46dae55512ffbfc429d2482b08b4d9435ab07384ca8aec"
-
[[package]]
name = "simple_asn1"
version = "0.6.2"
@@ -17140,7 +17139,6 @@ dependencies = [
"serde",
"serde_json",
"settings",
- "similar",
"telemetry",
"telemetry_events",
"theme",
@@ -421,6 +421,7 @@ hyper = "0.14"
http = "1.1"
ignore = "0.4.22"
image = "0.25.1"
+imara-diff = "0.1.8"
indexmap = { version = "2.7.0", features = ["serde"] }
indoc = "2"
inventory = "0.3.19"
@@ -499,7 +500,6 @@ sha2 = "0.10"
shellexpand = "2.1.0"
shlex = "1.3.0"
signal-hook = "0.3.17"
-similar = "1.3"
simplelog = "0.12.2"
smallvec = { version = "1.6", features = ["union"] }
smol = "2.0"
@@ -59,7 +59,6 @@ search.workspace = true
semantic_index.workspace = true
serde.workspace = true
settings.workspace = true
-similar.workspace = true
smol.workspace = true
streaming_diff.workspace = true
telemetry.workspace = true
@@ -30,7 +30,7 @@ use gpui::{
EventEmitter, FocusHandle, Focusable, FontWeight, Global, HighlightStyle, Subscription, Task,
TextStyle, UpdateGlobal, WeakEntity, Window,
};
-use language::{Buffer, IndentKind, Point, Selection, TransactionId};
+use language::{line_diff, Buffer, IndentKind, Point, Selection, TransactionId};
use language_model::{
LanguageModel, LanguageModelRegistry, LanguageModelRequest, LanguageModelRequestMessage,
LanguageModelTextStream, Role,
@@ -3350,52 +3350,29 @@ impl CodegenAlternative {
)
.collect::<String>();
- let mut old_row = old_range.start.row;
- let mut new_row = new_range.start.row;
- let batch_diff =
- similar::TextDiff::from_lines(old_text.as_str(), new_text.as_str());
-
+ let old_start_row = old_range.start.row;
+ let new_start_row = new_range.start.row;
let mut deleted_row_ranges: Vec<(Anchor, RangeInclusive<u32>)> = Vec::new();
let mut inserted_row_ranges = Vec::new();
- for change in batch_diff.iter_all_changes() {
- let line_count = change.value().lines().count() as u32;
- match change.tag() {
- similar::ChangeTag::Equal => {
- old_row += line_count;
- new_row += line_count;
- }
- similar::ChangeTag::Delete => {
- let old_end_row = old_row + line_count - 1;
- let new_row = new_snapshot.anchor_before(Point::new(new_row, 0));
-
- if let Some((_, last_deleted_row_range)) =
- deleted_row_ranges.last_mut()
- {
- if *last_deleted_row_range.end() + 1 == old_row {
- *last_deleted_row_range =
- *last_deleted_row_range.start()..=old_end_row;
- } else {
- deleted_row_ranges.push((new_row, old_row..=old_end_row));
- }
- } else {
- deleted_row_ranges.push((new_row, old_row..=old_end_row));
- }
-
- old_row += line_count;
- }
- similar::ChangeTag::Insert => {
- let new_end_row = new_row + line_count - 1;
- let start = new_snapshot.anchor_before(Point::new(new_row, 0));
- let end = new_snapshot.anchor_before(Point::new(
- new_end_row,
- new_snapshot.line_len(MultiBufferRow(new_end_row)),
- ));
- inserted_row_ranges.push(start..end);
- new_row += line_count;
- }
+ for (old_rows, new_rows) in line_diff(&old_text, &new_text) {
+ let old_rows = old_start_row + old_rows.start..old_start_row + old_rows.end;
+ let new_rows = new_start_row + new_rows.start..new_start_row + new_rows.end;
+ if !old_rows.is_empty() {
+ deleted_row_ranges.push((
+ new_snapshot.anchor_before(Point::new(new_rows.start, 0)),
+ old_rows.start..=old_rows.end - 1,
+ ));
+ }
+ if !new_rows.is_empty() {
+ let start = new_snapshot.anchor_before(Point::new(new_rows.start, 0));
+ let new_end_row = new_rows.end - 1;
+ let end = new_snapshot.anchor_before(Point::new(
+ new_end_row,
+ new_snapshot.line_len(MultiBufferRow(new_end_row)),
+ ));
+ inserted_row_ranges.push(start..end);
}
}
-
(deleted_row_ranges, inserted_row_ranges)
})
.await;
@@ -62,7 +62,6 @@ rope.workspace = true
serde.workspace = true
serde_json.workspace = true
settings.workspace = true
-similar.workspace = true
smol.workspace = true
streaming_diff.workspace = true
telemetry_events.workspace = true
@@ -79,5 +78,10 @@ workspace.workspace = true
zed_actions.workspace = true
[dev-dependencies]
+editor = { workspace = true, features = ["test-support"] }
+gpui = { workspace = true, "features" = ["test-support"] }
+language = { workspace = true, "features" = ["test-support"] }
+language_model = { workspace = true, "features" = ["test-support"] }
+project = { workspace = true, features = ["test-support"] }
rand.workspace = true
indoc.workspace = true
@@ -7,7 +7,7 @@ use collections::HashSet;
use editor::{Anchor, AnchorRangeExt, MultiBuffer, MultiBufferSnapshot, ToOffset as _, ToPoint};
use futures::{channel::mpsc, future::LocalBoxFuture, join, SinkExt, Stream, StreamExt};
use gpui::{App, AppContext as _, Context, Entity, EventEmitter, Subscription, Task};
-use language::{Buffer, IndentKind, Point, TransactionId};
+use language::{line_diff, Buffer, IndentKind, Point, TransactionId};
use language_model::{
LanguageModel, LanguageModelRegistry, LanguageModelRequest, LanguageModelRequestMessage,
LanguageModelTextStream, Role,
@@ -827,52 +827,29 @@ impl CodegenAlternative {
)
.collect::<String>();
- let mut old_row = old_range.start.row;
- let mut new_row = new_range.start.row;
- let batch_diff =
- similar::TextDiff::from_lines(old_text.as_str(), new_text.as_str());
-
+ let old_start_row = old_range.start.row;
+ let new_start_row = new_range.start.row;
let mut deleted_row_ranges: Vec<(Anchor, RangeInclusive<u32>)> = Vec::new();
let mut inserted_row_ranges = Vec::new();
- for change in batch_diff.iter_all_changes() {
- let line_count = change.value().lines().count() as u32;
- match change.tag() {
- similar::ChangeTag::Equal => {
- old_row += line_count;
- new_row += line_count;
- }
- similar::ChangeTag::Delete => {
- let old_end_row = old_row + line_count - 1;
- let new_row = new_snapshot.anchor_before(Point::new(new_row, 0));
-
- if let Some((_, last_deleted_row_range)) =
- deleted_row_ranges.last_mut()
- {
- if *last_deleted_row_range.end() + 1 == old_row {
- *last_deleted_row_range =
- *last_deleted_row_range.start()..=old_end_row;
- } else {
- deleted_row_ranges.push((new_row, old_row..=old_end_row));
- }
- } else {
- deleted_row_ranges.push((new_row, old_row..=old_end_row));
- }
-
- old_row += line_count;
- }
- similar::ChangeTag::Insert => {
- let new_end_row = new_row + line_count - 1;
- let start = new_snapshot.anchor_before(Point::new(new_row, 0));
- let end = new_snapshot.anchor_before(Point::new(
- new_end_row,
- new_snapshot.line_len(MultiBufferRow(new_end_row)),
- ));
- inserted_row_ranges.push(start..end);
- new_row += line_count;
- }
+ for (old_rows, new_rows) in line_diff(&old_text, &new_text) {
+ let old_rows = old_start_row + old_rows.start..old_start_row + old_rows.end;
+ let new_rows = new_start_row + new_rows.start..new_start_row + new_rows.end;
+ if !old_rows.is_empty() {
+ deleted_row_ranges.push((
+ new_snapshot.anchor_before(Point::new(new_rows.start, 0)),
+ old_rows.start..=old_rows.end - 1,
+ ));
+ }
+ if !new_rows.is_empty() {
+ let start = new_snapshot.anchor_before(Point::new(new_rows.start, 0));
+ let new_end_row = new_rows.end - 1;
+ let end = new_snapshot.anchor_before(Point::new(
+ new_end_row,
+ new_snapshot.line_len(MultiBufferRow(new_end_row)),
+ ));
+ inserted_row_ranges.push(start..end);
}
}
-
(deleted_row_ranges, inserted_row_ranges)
})
.await;
@@ -66,7 +66,6 @@ schemars.workspace = true
serde.workspace = true
serde_json.workspace = true
settings.workspace = true
-similar.workspace = true
smallvec.workspace = true
smol.workspace = true
snippet.workspace = true
@@ -100,10 +100,10 @@ use language::{
language_settings::{
self, all_language_settings, language_settings, InlayHintSettings, RewrapBehavior,
},
- point_from_lsp, AutoindentMode, BracketPair, Buffer, Capability, CharKind, CodeLabel,
- CursorShape, Diagnostic, DiskState, EditPredictionsMode, EditPreview, HighlightedText,
- IndentKind, IndentSize, Language, OffsetRangeExt, Point, Selection, SelectionGoal, TextObject,
- TransactionId, TreeSitterOptions,
+ point_from_lsp, text_diff_with_options, AutoindentMode, BracketPair, Buffer, Capability,
+ CharKind, CodeLabel, CursorShape, Diagnostic, DiffOptions, DiskState, EditPredictionsMode,
+ EditPreview, HighlightedText, IndentKind, IndentSize, Language, OffsetRangeExt, Point,
+ Selection, SelectionGoal, TextObject, TransactionId, TreeSitterOptions,
};
use language::{point_to_lsp, BufferRow, CharClassifier, Runnable, RunnableRange};
use linked_editing_ranges::refresh_linked_ranges;
@@ -112,7 +112,6 @@ use persistence::DB;
pub use proposed_changes_editor::{
ProposedChangeLocation, ProposedChangesEditor, ProposedChangesEditorToolbar,
};
-use similar::{ChangeTag, TextDiff};
use std::iter::Peekable;
use task::{ResolvedTask, TaskTemplate, TaskVariables};
@@ -202,7 +201,7 @@ pub(crate) const CURSORS_VISIBLE_FOR: Duration = Duration::from_millis(2000);
#[doc(hidden)]
pub const CODE_ACTIONS_DEBOUNCE_TIMEOUT: Duration = Duration::from_millis(250);
-pub(crate) const FORMAT_TIMEOUT: Duration = Duration::from_secs(2);
+pub(crate) const FORMAT_TIMEOUT: Duration = Duration::from_secs(5);
pub(crate) const SCROLL_CENTER_TOP_BOTTOM_DEBOUNCE_TIMEOUT: Duration = Duration::from_secs(1);
pub(crate) const EDIT_PREDICTION_KEY_CONTEXT: &str = "edit_prediction";
@@ -7829,6 +7828,7 @@ impl Editor {
}
let start = Point::new(start_row, 0);
+ let start_offset = start.to_offset(&buffer);
let end = Point::new(end_row, buffer.line_len(MultiBufferRow(end_row)));
let selection_text = buffer.text_for_range(start..end).collect::<String>();
let Some(lines_without_prefixes) = selection_text
@@ -7858,44 +7858,21 @@ impl Editor {
// TODO: should always use char-based diff while still supporting cursor behavior that
// matches vim.
- let diff = match is_vim_mode {
- IsVimMode::Yes => TextDiff::from_lines(&selection_text, &wrapped_text),
- IsVimMode::No => TextDiff::from_chars(&selection_text, &wrapped_text),
- };
- let mut offset = start.to_offset(&buffer);
- let mut moved_since_edit = true;
-
- for change in diff.iter_all_changes() {
- let value = change.value();
- match change.tag() {
- ChangeTag::Equal => {
- offset += value.len();
- moved_since_edit = true;
- }
- ChangeTag::Delete => {
- let start = buffer.anchor_after(offset);
- let end = buffer.anchor_before(offset + value.len());
-
- if moved_since_edit {
- edits.push((start..end, String::new()));
- } else {
- edits.last_mut().unwrap().0.end = end;
- }
-
- offset += value.len();
- moved_since_edit = false;
- }
- ChangeTag::Insert => {
- if moved_since_edit {
- let anchor = buffer.anchor_after(offset);
- edits.push((anchor..anchor, value.to_string()));
- } else {
- edits.last_mut().unwrap().1.push_str(value);
- }
+ let mut diff_options = DiffOptions::default();
+ if is_vim_mode == IsVimMode::Yes {
+ diff_options.max_word_diff_len = 0;
+ diff_options.max_word_diff_line_count = 0;
+ } else {
+ diff_options.max_word_diff_len = usize::MAX;
+ diff_options.max_word_diff_line_count = usize::MAX;
+ }
- moved_since_edit = false;
- }
- }
+ for (old_range, new_text) in
+ text_diff_with_options(&selection_text, &wrapped_text, diff_options)
+ {
+ let edit_start = buffer.anchor_after(start_offset + old_range.start);
+ let edit_end = buffer.anchor_after(start_offset + old_range.end);
+ edits.push((edit_start..edit_end, new_text));
}
rewrapped_row_ranges.push(start_row..=end_row);
@@ -8409,7 +8409,6 @@ mod tests {
use gpui::{TestAppContext, VisualTestContext};
use language::language_settings;
use log::info;
- use similar::DiffableStr;
use std::num::NonZeroU32;
use util::test::sample_text;
@@ -8709,7 +8708,7 @@ mod tests {
state
.line_numbers
.get(&MultiBufferRow(0))
- .and_then(|line_number| line_number.shaped_line.text.as_str()),
+ .map(|line_number| line_number.shaped_line.text.as_ref()),
Some("1")
);
}
@@ -37,6 +37,7 @@ fuzzy.workspace = true
globset.workspace = true
gpui.workspace = true
http_client.workspace = true
+imara-diff.workspace = true
itertools.workspace = true
log.workspace = true
lsp.workspace = true
@@ -49,7 +50,6 @@ schemars.workspace = true
serde.workspace = true
serde_json.workspace = true
settings.workspace = true
-similar.workspace = true
smallvec.workspace = true
smol.workspace = true
streaming-iterator.workspace = true
@@ -12,6 +12,7 @@ use crate::{
SyntaxMapMatches, SyntaxSnapshot, ToTreeSitterPoint,
},
task_context::RunnableRange,
+ text_diff::text_diff,
LanguageScope, Outline, OutlineConfig, RunnableCapture, RunnableTag, TextObject,
TreeSitterOptions,
};
@@ -32,7 +33,6 @@ use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use settings::WorktreeId;
-use similar::{ChangeTag, TextDiff};
use smallvec::SmallVec;
use smol::future::yield_now;
use std::{
@@ -1792,61 +1792,7 @@ impl Buffer {
let old_text = old_text.to_string();
let line_ending = LineEnding::detect(&new_text);
LineEnding::normalize(&mut new_text);
-
- let diff = TextDiff::from_chars(old_text.as_str(), new_text.as_str());
- let empty: Arc<str> = Arc::default();
-
- let mut edits = Vec::new();
- let mut old_offset = 0;
- let mut new_offset = 0;
- let mut last_edit: Option<(Range<usize>, Range<usize>)> = None;
- for change in diff.iter_all_changes().map(Some).chain([None]) {
- if let Some(change) = &change {
- let len = change.value().len();
- match change.tag() {
- ChangeTag::Equal => {
- old_offset += len;
- new_offset += len;
- }
- ChangeTag::Delete => {
- let old_end_offset = old_offset + len;
- if let Some((last_old_range, _)) = &mut last_edit {
- last_old_range.end = old_end_offset;
- } else {
- last_edit =
- Some((old_offset..old_end_offset, new_offset..new_offset));
- }
- old_offset = old_end_offset;
- }
- ChangeTag::Insert => {
- let new_end_offset = new_offset + len;
- if let Some((_, last_new_range)) = &mut last_edit {
- last_new_range.end = new_end_offset;
- } else {
- last_edit =
- Some((old_offset..old_offset, new_offset..new_end_offset));
- }
- new_offset = new_end_offset;
- }
- }
- }
-
- if let Some((old_range, new_range)) = &last_edit {
- if old_offset > old_range.end
- || new_offset > new_range.end
- || change.is_none()
- {
- let text = if new_range.is_empty() {
- empty.clone()
- } else {
- new_text[new_range.clone()].into()
- };
- edits.push((old_range.clone(), text));
- last_edit.take();
- }
- }
- }
-
+ let edits = text_diff(&old_text, &new_text);
Diff {
base_version,
line_ending,
@@ -25,6 +25,7 @@ use text::{BufferId, LineEnding};
use text::{Point, ToPoint};
use theme::ActiveTheme;
use unindent::Unindent as _;
+use util::test::marked_text_offsets;
use util::{assert_set_eq, post_inc, test::marked_text_ranges, RandomCharIter};
pub static TRAILING_WHITESPACE_REGEX: LazyLock<regex::Regex> = LazyLock::new(|| {
@@ -354,24 +355,44 @@ fn test_edit_events(cx: &mut gpui::App) {
#[gpui::test]
async fn test_apply_diff(cx: &mut TestAppContext) {
- let text = "a\nbb\nccc\ndddd\neeeee\nffffff\n";
+ let (text, offsets) = marked_text_offsets(
+ "one two three\nfour fiหve six\nseven eightห nine\nten eleven twelve\n",
+ );
let buffer = cx.new(|cx| Buffer::local(text, cx));
- let anchor = buffer.update(cx, |buffer, _| buffer.anchor_before(Point::new(3, 3)));
+ let anchors = buffer.update(cx, |buffer, _| {
+ offsets
+ .iter()
+ .map(|offset| buffer.anchor_before(offset))
+ .collect::<Vec<_>>()
+ });
+
+ let (text, offsets) = marked_text_offsets(
+ "one two three\n{\nfour FIVEห six\n}\nseven AND EIGHTห nine\nten eleven twelve\n",
+ );
- let text = "a\nccc\ndddd\nffffff\n";
- let diff = buffer.update(cx, |b, cx| b.diff(text.into(), cx)).await;
+ let diff = buffer.update(cx, |b, cx| b.diff(text.clone(), cx)).await;
buffer.update(cx, |buffer, cx| {
buffer.apply_diff(diff, cx).unwrap();
assert_eq!(buffer.text(), text);
- assert_eq!(anchor.to_point(buffer), Point::new(2, 3));
+ let actual_offsets = anchors
+ .iter()
+ .map(|anchor| anchor.to_offset(buffer))
+ .collect::<Vec<_>>();
+ assert_eq!(actual_offsets, offsets);
});
- let text = "a\n1\n\nccc\ndd2dd\nffffff\n";
- let diff = buffer.update(cx, |b, cx| b.diff(text.into(), cx)).await;
+ let (text, offsets) =
+ marked_text_offsets("one two three\n{\nห}\nseven AND EIGHTEENห nine\nten eleven twelve\n");
+
+ let diff = buffer.update(cx, |b, cx| b.diff(text.clone(), cx)).await;
buffer.update(cx, |buffer, cx| {
buffer.apply_diff(diff, cx).unwrap();
assert_eq!(buffer.text(), text);
- assert_eq!(anchor.to_point(buffer), Point::new(4, 4));
+ let actual_offsets = anchors
+ .iter()
+ .map(|anchor| anchor.to_offset(buffer))
+ .collect::<Vec<_>>();
+ assert_eq!(actual_offsets, offsets);
});
}
@@ -15,6 +15,7 @@ mod outline;
pub mod proto;
mod syntax_map;
mod task_context;
+mod text_diff;
mod toolchain;
#[cfg(test)]
@@ -62,6 +63,7 @@ use std::{num::NonZeroU32, sync::OnceLock};
use syntax_map::{QueryCursorHandle, SyntaxSnapshot};
use task::RunnableTag;
pub use task_context::{ContextProvider, RunnableRange};
+pub use text_diff::{line_diff, text_diff, text_diff_with_options, unified_diff, DiffOptions};
use theme::SyntaxTheme;
pub use toolchain::{LanguageToolchainStore, Toolchain, ToolchainList, ToolchainLister};
use tree_sitter::{self, wasmtime, Query, QueryCursor, WasmStore};
@@ -0,0 +1,274 @@
+use crate::{CharClassifier, CharKind, LanguageScope};
+use imara_diff::{
+ diff,
+ intern::{InternedInput, Token},
+ sources::lines_with_terminator,
+ Algorithm, UnifiedDiffBuilder,
+};
+use std::{iter, ops::Range, sync::Arc};
+
+const MAX_WORD_DIFF_LEN: usize = 512;
+const MAX_WORD_DIFF_LINE_COUNT: usize = 8;
+
+/// Computes a diff between two strings, returning a unified diff string.
+pub fn unified_diff(old_text: &str, new_text: &str) -> String {
+ let input = InternedInput::new(old_text, new_text);
+ diff(
+ Algorithm::Histogram,
+ &input,
+ UnifiedDiffBuilder::new(&input),
+ )
+}
+
+/// Computes a diff between two strings, returning a vector of old and new row
+/// ranges.
+pub fn line_diff(old_text: &str, new_text: &str) -> Vec<(Range<u32>, Range<u32>)> {
+ let mut edits = Vec::new();
+ let input = InternedInput::new(
+ lines_with_terminator(old_text),
+ lines_with_terminator(new_text),
+ );
+ diff_internal(&input, |_, _, old_rows, new_rows| {
+ edits.push((old_rows, new_rows));
+ });
+ edits
+}
+
+/// Computes a diff between two strings, returning a vector of edits.
+///
+/// The edits are represented as tuples of byte ranges and replacement strings.
+///
+/// Internally, this function first performs a line-based diff, and then performs a second
+/// word-based diff within hunks that replace small numbers of lines.
+pub fn text_diff(old_text: &str, new_text: &str) -> Vec<(Range<usize>, Arc<str>)> {
+ text_diff_with_options(old_text, new_text, DiffOptions::default())
+}
+
+pub struct DiffOptions {
+ pub language_scope: Option<LanguageScope>,
+ pub max_word_diff_len: usize,
+ pub max_word_diff_line_count: usize,
+}
+
+impl Default for DiffOptions {
+ fn default() -> Self {
+ Self {
+ language_scope: Default::default(),
+ max_word_diff_len: MAX_WORD_DIFF_LEN,
+ max_word_diff_line_count: MAX_WORD_DIFF_LINE_COUNT,
+ }
+ }
+}
+
+/// Computes a diff between two strings, using a specific language scope's
+/// word characters for word-level diffing.
+pub fn text_diff_with_options(
+ old_text: &str,
+ new_text: &str,
+ options: DiffOptions,
+) -> Vec<(Range<usize>, Arc<str>)> {
+ let empty: Arc<str> = Arc::default();
+ let mut edits = Vec::new();
+ let mut hunk_input = InternedInput::default();
+ let input = InternedInput::new(
+ lines_with_terminator(old_text),
+ lines_with_terminator(new_text),
+ );
+ diff_internal(
+ &input,
+ |old_byte_range, new_byte_range, old_rows, new_rows| {
+ if should_perform_word_diff_within_hunk(
+ &old_rows,
+ &old_byte_range,
+ &new_rows,
+ &new_byte_range,
+ &options,
+ ) {
+ let old_offset = old_byte_range.start;
+ let new_offset = new_byte_range.start;
+ hunk_input.clear();
+ hunk_input.update_before(tokenize(
+ &old_text[old_byte_range.clone()],
+ options.language_scope.clone(),
+ ));
+ hunk_input.update_after(tokenize(
+ &new_text[new_byte_range.clone()],
+ options.language_scope.clone(),
+ ));
+ diff_internal(&hunk_input, |old_byte_range, new_byte_range, _, _| {
+ let old_byte_range =
+ old_offset + old_byte_range.start..old_offset + old_byte_range.end;
+ let new_byte_range =
+ new_offset + new_byte_range.start..new_offset + new_byte_range.end;
+ let replacement_text = if new_byte_range.is_empty() {
+ empty.clone()
+ } else {
+ new_text[new_byte_range.clone()].into()
+ };
+ edits.push((old_byte_range, replacement_text));
+ });
+ } else {
+ let replacement_text = if new_byte_range.is_empty() {
+ empty.clone()
+ } else {
+ new_text[new_byte_range.clone()].into()
+ };
+ edits.push((old_byte_range.clone(), replacement_text));
+ }
+ },
+ );
+ edits
+}
+
+fn should_perform_word_diff_within_hunk(
+ old_row_range: &Range<u32>,
+ old_byte_range: &Range<usize>,
+ new_row_range: &Range<u32>,
+ new_byte_range: &Range<usize>,
+ options: &DiffOptions,
+) -> bool {
+ !old_byte_range.is_empty()
+ && !new_byte_range.is_empty()
+ && old_byte_range.len() <= options.max_word_diff_len
+ && new_byte_range.len() <= options.max_word_diff_len
+ && old_row_range.len() <= options.max_word_diff_line_count
+ && new_row_range.len() <= options.max_word_diff_line_count
+}
+
+fn diff_internal(
+ input: &InternedInput<&str>,
+ mut on_change: impl FnMut(Range<usize>, Range<usize>, Range<u32>, Range<u32>),
+) {
+ let mut old_offset = 0;
+ let mut new_offset = 0;
+ let mut old_token_ix = 0;
+ let mut new_token_ix = 0;
+ diff(
+ Algorithm::Histogram,
+ input,
+ |old_tokens: Range<u32>, new_tokens: Range<u32>| {
+ old_offset += token_len(
+ &input,
+ &input.before[old_token_ix as usize..old_tokens.start as usize],
+ );
+ new_offset += token_len(
+ &input,
+ &input.after[new_token_ix as usize..new_tokens.start as usize],
+ );
+ let old_len = token_len(
+ &input,
+ &input.before[old_tokens.start as usize..old_tokens.end as usize],
+ );
+ let new_len = token_len(
+ &input,
+ &input.after[new_tokens.start as usize..new_tokens.end as usize],
+ );
+ let old_byte_range = old_offset..old_offset + old_len;
+ let new_byte_range = new_offset..new_offset + new_len;
+ old_token_ix = old_tokens.end;
+ new_token_ix = new_tokens.end;
+ old_offset = old_byte_range.end;
+ new_offset = new_byte_range.end;
+ on_change(old_byte_range, new_byte_range, old_tokens, new_tokens);
+ },
+ );
+}
+
+fn tokenize(text: &str, language_scope: Option<LanguageScope>) -> impl Iterator<Item = &str> {
+ let classifier = CharClassifier::new(language_scope).for_completion(true);
+ let mut chars = text.char_indices();
+ let mut prev = None;
+ let mut start_ix = 0;
+ iter::from_fn(move || {
+ while let Some((ix, c)) = chars.next() {
+ let mut token = None;
+ let kind = classifier.kind(c);
+ if let Some((prev_char, prev_kind)) = prev {
+ if kind != prev_kind || (kind == CharKind::Punctuation && c != prev_char) {
+ token = Some(&text[start_ix..ix]);
+ start_ix = ix;
+ }
+ }
+ prev = Some((c, kind));
+ if token.is_some() {
+ return token;
+ }
+ }
+ if start_ix < text.len() {
+ let token = &text[start_ix..];
+ start_ix = text.len();
+ return Some(token);
+ }
+ None
+ })
+}
+
+fn token_len(input: &InternedInput<&str>, tokens: &[Token]) -> usize {
+ tokens
+ .iter()
+ .map(|token| input.interner[*token].len())
+ .sum()
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_tokenize() {
+ let text = "";
+ assert_eq!(tokenize(text, None).collect::<Vec<_>>(), Vec::<&str>::new());
+
+ let text = " ";
+ assert_eq!(tokenize(text, None).collect::<Vec<_>>(), vec![" "]);
+
+ let text = "one";
+ assert_eq!(tokenize(text, None).collect::<Vec<_>>(), vec!["one"]);
+
+ let text = "one\n";
+ assert_eq!(tokenize(text, None).collect::<Vec<_>>(), vec!["one", "\n"]);
+
+ let text = "one.two(three)";
+ assert_eq!(
+ tokenize(text, None).collect::<Vec<_>>(),
+ vec!["one", ".", "two", "(", "three", ")"]
+ );
+
+ let text = "one two three()";
+ assert_eq!(
+ tokenize(text, None).collect::<Vec<_>>(),
+ vec!["one", " ", "two", " ", "three", "(", ")"]
+ );
+
+ let text = " one\n two three";
+ assert_eq!(
+ tokenize(text, None).collect::<Vec<_>>(),
+ vec![" ", "one", "\n ", "two", " ", "three"]
+ );
+ }
+
+ #[test]
+ fn test_text_diff() {
+ let old_text = "one two three";
+ let new_text = "one TWO three";
+ assert_eq!(text_diff(old_text, new_text), [(4..7, "TWO".into()),]);
+
+ let old_text = "one\ntwo\nthree\n";
+ let new_text = "one\ntwo\nAND\nTHEN\nthree\n";
+ assert_eq!(
+ text_diff(old_text, new_text),
+ [(8..8, "AND\nTHEN\n".into()),]
+ );
+
+ let old_text = "one two\nthree four five\nsix seven eight nine\nten\n";
+ let new_text = "one two\nthree FOUR five\nsix SEVEN eight nine\nten\nELEVEN\n";
+ assert_eq!(
+ text_diff(old_text, new_text),
+ [
+ (14..18, "FOUR".into()),
+ (28..33, "SEVEN".into()),
+ (49..49, "ELEVEN\n".into())
+ ]
+ );
+ }
+}
@@ -64,7 +64,6 @@ settings.workspace = true
sha2.workspace = true
shellexpand.workspace = true
shlex.workspace = true
-similar = "1.3"
smol.workspace = true
snippet.workspace = true
snippet_provider.workspace = true
@@ -63,7 +63,6 @@ use rpc::{
use serde::Serialize;
use settings::{Settings, SettingsLocation, SettingsStore};
use sha2::{Digest, Sha256};
-use similar::{ChangeTag, TextDiff};
use smol::channel::Sender;
use snippet::Snippet;
use std::{
@@ -110,15 +109,13 @@ pub enum LspFormatTarget {
Ranges(BTreeMap<BufferId, Vec<Range<Anchor>>>),
}
-// proto::RegisterBufferWithLanguageServer {}
-
pub type OpenLspBufferHandle = Entity<Entity<Buffer>>;
// Currently, formatting operations are represented differently depending on
// whether they come from a language server or an external command.
#[derive(Debug)]
pub enum FormatOperation {
- Lsp(Vec<(Range<Anchor>, String)>),
+ Lsp(Vec<(Range<Anchor>, Arc<str>)>),
External(Diff),
Prettier(Diff),
}
@@ -1388,7 +1385,7 @@ impl LocalLspStore {
language_server: &Arc<LanguageServer>,
settings: &LanguageSettings,
cx: &mut AsyncApp,
- ) -> Result<Vec<(Range<Anchor>, String)>> {
+ ) -> Result<Vec<(Range<Anchor>, Arc<str>)>> {
let capabilities = &language_server.capabilities();
let range_formatting_provider = capabilities.document_range_formatting_provider.as_ref();
if range_formatting_provider.map_or(false, |provider| provider == &OneOf::Left(false)) {
@@ -1459,7 +1456,7 @@ impl LocalLspStore {
language_server: &Arc<LanguageServer>,
settings: &LanguageSettings,
cx: &mut AsyncApp,
- ) -> Result<Vec<(Range<Anchor>, String)>> {
+ ) -> Result<Vec<(Range<Anchor>, Arc<str>)>> {
let uri = lsp::Url::from_file_path(abs_path)
.map_err(|_| anyhow!("failed to convert abs path to uri"))?;
let text_document = lsp::TextDocumentIdentifier::new(uri);
@@ -2144,7 +2141,7 @@ impl LocalLspStore {
server_id: LanguageServerId,
version: Option<i32>,
cx: &mut Context<LspStore>,
- ) -> Task<Result<Vec<(Range<Anchor>, String)>>> {
+ ) -> Task<Result<Vec<(Range<Anchor>, Arc<str>)>>> {
let snapshot = self.buffer_snapshot_for_lsp_version(buffer, server_id, version, cx);
cx.background_spawn(async move {
let snapshot = snapshot?;
@@ -2192,48 +2189,23 @@ impl LocalLspStore {
// we can identify the changes more precisely, preserving the locations
// of any anchors positioned in the unchanged regions.
if range.end.row > range.start.row {
- let mut offset = range.start.to_offset(&snapshot);
+ let offset = range.start.to_offset(&snapshot);
let old_text = snapshot.text_for_range(range).collect::<String>();
-
- let diff = TextDiff::from_lines(old_text.as_str(), &new_text);
- let mut moved_since_edit = true;
- for change in diff.iter_all_changes() {
- let tag = change.tag();
- let value = change.value();
- match tag {
- ChangeTag::Equal => {
- offset += value.len();
- moved_since_edit = true;
- }
- ChangeTag::Delete => {
- let start = snapshot.anchor_after(offset);
- let end = snapshot.anchor_before(offset + value.len());
- if moved_since_edit {
- edits.push((start..end, String::new()));
- } else {
- edits.last_mut().unwrap().0.end = end;
- }
- offset += value.len();
- moved_since_edit = false;
- }
- ChangeTag::Insert => {
- if moved_since_edit {
- let anchor = snapshot.anchor_after(offset);
- edits.push((anchor..anchor, value.to_string()));
- } else {
- edits.last_mut().unwrap().1.push_str(value);
- }
- moved_since_edit = false;
- }
- }
- }
+ let range_edits = language::text_diff(old_text.as_str(), &new_text);
+ edits.extend(range_edits.into_iter().map(|(range, replacement)| {
+ (
+ snapshot.anchor_after(offset + range.start)
+ ..snapshot.anchor_before(offset + range.end),
+ replacement,
+ )
+ }));
} else if range.end == range.start {
let anchor = snapshot.anchor_after(range.start);
- edits.push((anchor..anchor, new_text));
+ edits.push((anchor..anchor, new_text.into()));
} else {
let edit_start = snapshot.anchor_after(range.start);
let edit_end = snapshot.anchor_before(range.end);
- edits.push((edit_start..edit_end, new_text));
+ edits.push((edit_start..edit_end, new_text.into()));
}
}
@@ -25,7 +25,11 @@ use std::{mem, num::NonZeroU32, ops::Range, task::Poll};
use task::{ResolvedTask, TaskContext};
use unindent::Unindent as _;
use util::{
- assert_set_eq, path, paths::PathMatcher, separator, test::TempTree, uri, TryFutureExt as _,
+ assert_set_eq, path,
+ paths::PathMatcher,
+ separator,
+ test::{marked_text_offsets, TempTree},
+ uri, TryFutureExt as _,
};
#[gpui::test]
@@ -3625,7 +3629,8 @@ async fn test_buffer_is_dirty(cx: &mut gpui::TestAppContext) {
async fn test_buffer_file_changes_on_disk(cx: &mut gpui::TestAppContext) {
init_test(cx);
- let initial_contents = "aaa\nbbbbb\nc\n";
+ let (initial_contents, initial_offsets) =
+ marked_text_offsets("one twoห\nthree หfourห five\nsixห seven\n");
let fs = FakeFs::new(cx.executor());
fs.insert_tree(
path!("/dir"),
@@ -3640,8 +3645,9 @@ async fn test_buffer_file_changes_on_disk(cx: &mut gpui::TestAppContext) {
.await
.unwrap();
- let anchors = (0..3)
- .map(|row| buffer.update(cx, |b, _| b.anchor_before(Point::new(row, 1))))
+ let anchors = initial_offsets
+ .iter()
+ .map(|offset| buffer.update(cx, |b, _| b.anchor_before(offset)))
.collect::<Vec<_>>();
// Change the file on disk, adding two new lines of text, and removing
@@ -3650,10 +3656,12 @@ async fn test_buffer_file_changes_on_disk(cx: &mut gpui::TestAppContext) {
assert!(!buffer.is_dirty());
assert!(!buffer.has_conflict());
});
- let new_contents = "AAAA\naaa\nBB\nbbbbb\n";
+
+ let (new_contents, new_offsets) =
+ marked_text_offsets("oneห\nthree หFOURห five\nsixtyห seven\n");
fs.save(
path!("/dir/the-file").as_ref(),
- &new_contents.into(),
+ &new_contents.as_str().into(),
LineEnding::Unix,
)
.await
@@ -3668,14 +3676,11 @@ async fn test_buffer_file_changes_on_disk(cx: &mut gpui::TestAppContext) {
assert!(!buffer.is_dirty());
assert!(!buffer.has_conflict());
- let anchor_positions = anchors
+ let anchor_offsets = anchors
.iter()
- .map(|anchor| anchor.to_point(&*buffer))
+ .map(|anchor| anchor.to_offset(&*buffer))
.collect::<Vec<_>>();
- assert_eq!(
- anchor_positions,
- [Point::new(1, 1), Point::new(3, 1), Point::new(3, 5)]
- );
+ assert_eq!(anchor_offsets, new_offsets);
});
// Modify the buffer
@@ -3698,6 +3703,7 @@ async fn test_buffer_file_changes_on_disk(cx: &mut gpui::TestAppContext) {
// marked as having a conflict.
cx.executor().run_until_parked();
buffer.update(cx, |buffer, _| {
+ assert_eq!(buffer.text(), " ".to_string() + &new_contents);
assert!(buffer.has_conflict());
});
}
@@ -45,7 +45,6 @@ release_channel.workspace = true
serde.workspace = true
serde_json.workspace = true
settings.workspace = true
-similar.workspace = true
telemetry.workspace = true
telemetry_events.workspace = true
theme.workspace = true
@@ -29,8 +29,7 @@ use gpui::{
use http_client::{HttpClient, Method};
use input_excerpt::excerpt_for_cursor_position;
use language::{
- Anchor, Buffer, BufferSnapshot, CharClassifier, CharKind, EditPreview, OffsetRangeExt,
- ToOffset, ToPoint,
+ text_diff, Anchor, Buffer, BufferSnapshot, EditPreview, OffsetRangeExt, ToOffset, ToPoint,
};
use language_models::LlmApiToken;
use postage::watch;
@@ -919,77 +918,18 @@ and then another
offset: usize,
snapshot: &BufferSnapshot,
) -> Vec<(Range<Anchor>, String)> {
- fn tokenize(text: &str) -> Vec<&str> {
- let classifier = CharClassifier::new(None).for_completion(true);
- let mut chars = text.chars().peekable();
- let mut prev_ch = chars.peek().copied();
- let mut tokens = Vec::new();
- let mut start = 0;
- let mut end = 0;
- while let Some(ch) = chars.next() {
- let prev_kind = prev_ch.map(|ch| classifier.kind(ch));
- let kind = classifier.kind(ch);
- if Some(kind) != prev_kind || (kind == CharKind::Punctuation && Some(ch) != prev_ch)
- {
- tokens.push(&text[start..end]);
- start = end;
- }
- end += ch.len_utf8();
- prev_ch = Some(ch);
- }
- tokens.push(&text[start..end]);
- tokens
- }
-
- let old_tokens = tokenize(&old_text);
- let new_tokens = tokenize(new_text);
-
- let diff = similar::TextDiffConfig::default()
- .algorithm(similar::Algorithm::Patience)
- .diff_slices(&old_tokens, &new_tokens);
- let mut edits: Vec<(Range<usize>, String)> = Vec::new();
- let mut old_start = offset;
- for change in diff.iter_all_changes() {
- let value = change.value();
- match change.tag() {
- similar::ChangeTag::Equal => {
- old_start += value.len();
- }
- similar::ChangeTag::Delete => {
- let old_end = old_start + value.len();
- if let Some((last_old_range, _)) = edits.last_mut() {
- if last_old_range.end == old_start {
- last_old_range.end = old_end;
- } else {
- edits.push((old_start..old_end, String::new()));
- }
- } else {
- edits.push((old_start..old_end, String::new()));
- }
- old_start = old_end;
- }
- similar::ChangeTag::Insert => {
- if let Some((last_old_range, last_new_text)) = edits.last_mut() {
- if last_old_range.end == old_start {
- last_new_text.push_str(value);
- } else {
- edits.push((old_start..old_start, value.into()));
- }
- } else {
- edits.push((old_start..old_start, value.into()));
- }
- }
- }
- }
-
- edits
+ text_diff(&old_text, &new_text)
.into_iter()
.map(|(mut old_range, new_text)| {
+ old_range.start += offset;
+ old_range.end += offset;
+
let prefix_len = common_prefix(
snapshot.chars_for_range(old_range.clone()),
new_text.chars(),
);
old_range.start += prefix_len;
+
let suffix_len = common_prefix(
snapshot.reversed_chars_for_range(old_range.clone()),
new_text[prefix_len..].chars().rev(),
@@ -1248,10 +1188,7 @@ impl Event {
writeln!(prompt, "User renamed {:?} to {:?}\n", old_path, new_path).unwrap();
}
- let diff =
- similar::TextDiff::from_lines(&old_snapshot.text(), &new_snapshot.text())
- .unified_diff()
- .to_string();
+ let diff = language::unified_diff(&old_snapshot.text(), &new_snapshot.text());
if !diff.is_empty() {
write!(
prompt,