1use language::{Point, TextBufferSnapshot};
2use std::{cmp, ops::Range};
3
4const REPLACEMENT_COST: u32 = 1;
5const INSERTION_COST: u32 = 3;
6const DELETION_COST: u32 = 10;
7
8/// A streaming fuzzy matcher that can process text chunks incrementally
9/// and return the best match found so far at each step.
10pub struct StreamingFuzzyMatcher {
11 snapshot: TextBufferSnapshot,
12 query_lines: Vec<String>,
13 line_hint: Option<u32>,
14 incomplete_line: String,
15 matches: Vec<Range<usize>>,
16 matrix: SearchMatrix,
17}
18
19impl StreamingFuzzyMatcher {
20 pub fn new(snapshot: TextBufferSnapshot) -> Self {
21 let buffer_line_count = snapshot.max_point().row as usize + 1;
22 Self {
23 snapshot,
24 query_lines: Vec::new(),
25 line_hint: None,
26 incomplete_line: String::new(),
27 matches: Vec::new(),
28 matrix: SearchMatrix::new(buffer_line_count + 1),
29 }
30 }
31
32 /// Returns the query lines.
33 pub fn query_lines(&self) -> &[String] {
34 &self.query_lines
35 }
36
37 /// Push a new chunk of text and get the best match found so far.
38 ///
39 /// This method accumulates text chunks and processes complete lines.
40 /// Partial lines are buffered internally until a newline is received.
41 ///
42 /// # Returns
43 ///
44 /// Returns `Some(range)` if a match has been found with the accumulated
45 /// query so far, or `None` if no suitable match exists yet.
46 pub fn push(&mut self, chunk: &str, line_hint: Option<u32>) -> Option<Range<usize>> {
47 if line_hint.is_some() {
48 self.line_hint = line_hint;
49 }
50
51 // Add the chunk to our incomplete line buffer
52 self.incomplete_line.push_str(chunk);
53 self.line_hint = line_hint;
54
55 if let Some((last_pos, _)) = self.incomplete_line.match_indices('\n').next_back() {
56 let complete_part = &self.incomplete_line[..=last_pos];
57
58 // Split into lines and add to query_lines
59 for line in complete_part.lines() {
60 self.query_lines.push(line.to_string());
61 }
62
63 self.incomplete_line.replace_range(..last_pos + 1, "");
64
65 self.matches = self.resolve_location_fuzzy();
66 }
67
68 let best_match = self.select_best_match();
69 best_match.or_else(|| self.matches.first().cloned())
70 }
71
72 /// Finish processing and return the final best match(es).
73 ///
74 /// This processes any remaining incomplete line before returning the final
75 /// match result.
76 pub fn finish(&mut self) -> Vec<Range<usize>> {
77 // Process any remaining incomplete line
78 if !self.incomplete_line.is_empty() {
79 self.query_lines.push(self.incomplete_line.clone());
80 self.incomplete_line.clear();
81 self.matches = self.resolve_location_fuzzy();
82 }
83 self.matches.clone()
84 }
85
86 fn resolve_location_fuzzy(&mut self) -> Vec<Range<usize>> {
87 let new_query_line_count = self.query_lines.len();
88 let old_query_line_count = self.matrix.rows.saturating_sub(1);
89 if new_query_line_count == old_query_line_count {
90 return Vec::new();
91 }
92
93 self.matrix.resize_rows(new_query_line_count + 1);
94
95 // Process only the new query lines
96 for row in old_query_line_count..new_query_line_count {
97 let query_line = self.query_lines[row].trim();
98 let leading_deletion_cost = (row + 1) as u32 * DELETION_COST;
99
100 self.matrix.set(
101 row + 1,
102 0,
103 SearchState::new(leading_deletion_cost, SearchDirection::Up),
104 );
105
106 let mut buffer_lines = self.snapshot.as_rope().chunks().lines();
107 let mut col = 0;
108 while let Some(buffer_line) = buffer_lines.next() {
109 let buffer_line = buffer_line.trim();
110 let up = SearchState::new(
111 self.matrix
112 .get(row, col + 1)
113 .cost
114 .saturating_add(DELETION_COST),
115 SearchDirection::Up,
116 );
117 let left = SearchState::new(
118 self.matrix
119 .get(row + 1, col)
120 .cost
121 .saturating_add(INSERTION_COST),
122 SearchDirection::Left,
123 );
124 let diagonal = SearchState::new(
125 if query_line == buffer_line {
126 self.matrix.get(row, col).cost
127 } else if fuzzy_eq(query_line, buffer_line) {
128 self.matrix.get(row, col).cost + REPLACEMENT_COST
129 } else {
130 self.matrix
131 .get(row, col)
132 .cost
133 .saturating_add(DELETION_COST + INSERTION_COST)
134 },
135 SearchDirection::Diagonal,
136 );
137 self.matrix
138 .set(row + 1, col + 1, up.min(left).min(diagonal));
139 col += 1;
140 }
141 }
142
143 // Find all matches with the best cost
144 let buffer_line_count = self.snapshot.max_point().row as usize + 1;
145 let mut best_cost = u32::MAX;
146 let mut matches_with_best_cost = Vec::new();
147
148 for col in 1..=buffer_line_count {
149 let cost = self.matrix.get(new_query_line_count, col).cost;
150 if cost < best_cost {
151 best_cost = cost;
152 matches_with_best_cost.clear();
153 matches_with_best_cost.push(col as u32);
154 } else if cost == best_cost {
155 matches_with_best_cost.push(col as u32);
156 }
157 }
158
159 // Find ranges for the matches
160 let mut valid_matches = Vec::new();
161 for &buffer_row_end in &matches_with_best_cost {
162 let mut matched_lines = 0;
163 let mut query_row = new_query_line_count;
164 let mut buffer_row_start = buffer_row_end;
165 while query_row > 0 && buffer_row_start > 0 {
166 let current = self.matrix.get(query_row, buffer_row_start as usize);
167 match current.direction {
168 SearchDirection::Diagonal => {
169 query_row -= 1;
170 buffer_row_start -= 1;
171 matched_lines += 1;
172 }
173 SearchDirection::Up => {
174 query_row -= 1;
175 }
176 SearchDirection::Left => {
177 buffer_row_start -= 1;
178 }
179 }
180 }
181
182 let matched_buffer_row_count = buffer_row_end - buffer_row_start;
183 let matched_ratio = matched_lines as f32
184 / (matched_buffer_row_count as f32).max(new_query_line_count as f32);
185 if matched_ratio >= 0.8 {
186 let buffer_start_ix = self
187 .snapshot
188 .point_to_offset(Point::new(buffer_row_start, 0));
189 let buffer_end_ix = self.snapshot.point_to_offset(Point::new(
190 buffer_row_end - 1,
191 self.snapshot.line_len(buffer_row_end - 1),
192 ));
193 valid_matches.push((buffer_row_start, buffer_start_ix..buffer_end_ix));
194 }
195 }
196
197 valid_matches.into_iter().map(|(_, range)| range).collect()
198 }
199
200 /// Return the best match with starting position close enough to line_hint.
201 pub fn select_best_match(&self) -> Option<Range<usize>> {
202 // Allow line hint to be off by that many lines.
203 // Higher values increase probability of applying edits to a wrong place,
204 // Lower values increase edits failures and overall conversation length.
205 const LINE_HINT_TOLERANCE: u32 = 200;
206
207 if self.matches.is_empty() {
208 return None;
209 }
210
211 if self.matches.len() == 1 {
212 return self.matches.first().cloned();
213 }
214
215 let Some(line_hint) = self.line_hint else {
216 // Multiple ambiguous matches
217 return None;
218 };
219
220 let mut best_match = None;
221 let mut best_distance = u32::MAX;
222
223 for range in &self.matches {
224 let start_point = self.snapshot.offset_to_point(range.start);
225 let start_line = start_point.row;
226 let distance = start_line.abs_diff(line_hint);
227
228 if distance <= LINE_HINT_TOLERANCE && distance < best_distance {
229 best_distance = distance;
230 best_match = Some(range.clone());
231 }
232 }
233
234 best_match
235 }
236}
237
238fn fuzzy_eq(left: &str, right: &str) -> bool {
239 const THRESHOLD: f64 = 0.8;
240
241 let min_levenshtein = left.len().abs_diff(right.len());
242 let min_normalized_levenshtein =
243 1. - (min_levenshtein as f64 / cmp::max(left.len(), right.len()) as f64);
244 if min_normalized_levenshtein < THRESHOLD {
245 return false;
246 }
247
248 strsim::normalized_levenshtein(left, right) >= THRESHOLD
249}
250
251#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
252enum SearchDirection {
253 Up,
254 Left,
255 Diagonal,
256}
257
258#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
259struct SearchState {
260 cost: u32,
261 direction: SearchDirection,
262}
263
264impl SearchState {
265 fn new(cost: u32, direction: SearchDirection) -> Self {
266 Self { cost, direction }
267 }
268}
269
270struct SearchMatrix {
271 cols: usize,
272 rows: usize,
273 data: Vec<SearchState>,
274}
275
276impl SearchMatrix {
277 fn new(cols: usize) -> Self {
278 SearchMatrix {
279 cols,
280 rows: 0,
281 data: Vec::new(),
282 }
283 }
284
285 fn resize_rows(&mut self, needed_rows: usize) {
286 debug_assert!(needed_rows > self.rows);
287 self.rows = needed_rows;
288 self.data.resize(
289 self.rows * self.cols,
290 SearchState::new(0, SearchDirection::Diagonal),
291 );
292 }
293
294 fn get(&self, row: usize, col: usize) -> SearchState {
295 debug_assert!(row < self.rows && col < self.cols);
296 self.data[row * self.cols + col]
297 }
298
299 fn set(&mut self, row: usize, col: usize, state: SearchState) {
300 debug_assert!(row < self.rows && col < self.cols);
301 self.data[row * self.cols + col] = state;
302 }
303}
304
305#[cfg(test)]
306mod tests {
307 use super::*;
308 use indoc::indoc;
309 use language::{BufferId, TextBuffer};
310 use rand::prelude::*;
311 use util::test::{generate_marked_text, marked_text_ranges};
312
313 #[test]
314 fn test_empty_query() {
315 let buffer = TextBuffer::new(
316 0,
317 BufferId::new(1).unwrap(),
318 "Hello world\nThis is a test\nFoo bar baz",
319 );
320 let snapshot = buffer.snapshot();
321
322 let mut finder = StreamingFuzzyMatcher::new(snapshot);
323 assert_eq!(push(&mut finder, ""), None);
324 assert_eq!(finish(finder), None);
325 }
326
327 #[test]
328 fn test_streaming_exact_match() {
329 let buffer = TextBuffer::new(
330 0,
331 BufferId::new(1).unwrap(),
332 "Hello world\nThis is a test\nFoo bar baz",
333 );
334 let snapshot = buffer.snapshot();
335
336 let mut finder = StreamingFuzzyMatcher::new(snapshot);
337
338 // Push partial query
339 assert_eq!(push(&mut finder, "This"), None);
340
341 // Complete the line
342 assert_eq!(
343 push(&mut finder, " is a test\n"),
344 Some("This is a test".to_string())
345 );
346
347 // Finish should return the same result
348 assert_eq!(finish(finder), Some("This is a test".to_string()));
349 }
350
351 #[test]
352 fn test_streaming_fuzzy_match() {
353 let buffer = TextBuffer::new(
354 0,
355 BufferId::new(1).unwrap(),
356 indoc! {"
357 function foo(a, b) {
358 return a + b;
359 }
360
361 function bar(x, y) {
362 return x * y;
363 }
364 "},
365 );
366 let snapshot = buffer.snapshot();
367
368 let mut finder = StreamingFuzzyMatcher::new(snapshot);
369
370 // Push a fuzzy query that should match the first function
371 assert_eq!(
372 push(&mut finder, "function foo(a, c) {\n").as_deref(),
373 Some("function foo(a, b) {")
374 );
375 assert_eq!(
376 push(&mut finder, " return a + c;\n}\n").as_deref(),
377 Some(concat!(
378 "function foo(a, b) {\n",
379 " return a + b;\n",
380 "}"
381 ))
382 );
383 }
384
385 #[test]
386 fn test_incremental_improvement() {
387 let buffer = TextBuffer::new(
388 0,
389 BufferId::new(1).unwrap(),
390 "Line 1\nLine 2\nLine 3\nLine 4\nLine 5",
391 );
392 let snapshot = buffer.snapshot();
393
394 let mut finder = StreamingFuzzyMatcher::new(snapshot);
395
396 // No match initially
397 assert_eq!(push(&mut finder, "Lin"), None);
398
399 // Get a match when we complete a line
400 assert_eq!(push(&mut finder, "e 3\n"), Some("Line 3".to_string()));
401
402 // The match might change if we add more specific content
403 assert_eq!(
404 push(&mut finder, "Line 4\n"),
405 Some("Line 3\nLine 4".to_string())
406 );
407 assert_eq!(finish(finder), Some("Line 3\nLine 4".to_string()));
408 }
409
410 #[test]
411 fn test_incomplete_lines_buffering() {
412 let buffer = TextBuffer::new(
413 0,
414 BufferId::new(1).unwrap(),
415 indoc! {"
416 The quick brown fox
417 jumps over the lazy dog
418 Pack my box with five dozen liquor jugs
419 "},
420 );
421 let snapshot = buffer.snapshot();
422
423 let mut finder = StreamingFuzzyMatcher::new(snapshot);
424
425 // Push text in small chunks across line boundaries
426 assert_eq!(push(&mut finder, "jumps "), None); // No newline yet
427 assert_eq!(push(&mut finder, "over the"), None); // Still no newline
428 assert_eq!(push(&mut finder, " lazy"), None); // Still incomplete
429
430 // Complete the line
431 assert_eq!(
432 push(&mut finder, " dog\n"),
433 Some("jumps over the lazy dog".to_string())
434 );
435 }
436
437 #[test]
438 fn test_multiline_fuzzy_match() {
439 let buffer = TextBuffer::new(
440 0,
441 BufferId::new(1).unwrap(),
442 indoc! {r#"
443 impl Display for User {
444 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
445 write!(f, "User: {} ({})", self.name, self.email)
446 }
447 }
448
449 impl Debug for User {
450 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
451 f.debug_struct("User")
452 .field("name", &self.name)
453 .field("email", &self.email)
454 .finish()
455 }
456 }
457 "#},
458 );
459 let snapshot = buffer.snapshot();
460
461 let mut finder = StreamingFuzzyMatcher::new(snapshot);
462
463 assert_eq!(
464 push(&mut finder, "impl Debug for User {\n"),
465 Some("impl Debug for User {".to_string())
466 );
467 assert_eq!(
468 push(
469 &mut finder,
470 " fn fmt(&self, f: &mut Formatter) -> Result {\n"
471 )
472 .as_deref(),
473 Some(concat!(
474 "impl Debug for User {\n",
475 " fn fmt(&self, f: &mut Formatter) -> fmt::Result {"
476 ))
477 );
478 assert_eq!(
479 push(&mut finder, " f.debug_struct(\"User\")\n").as_deref(),
480 Some(concat!(
481 "impl Debug for User {\n",
482 " fn fmt(&self, f: &mut Formatter) -> fmt::Result {\n",
483 " f.debug_struct(\"User\")"
484 ))
485 );
486 assert_eq!(
487 push(
488 &mut finder,
489 " .field(\"name\", &self.username)\n"
490 )
491 .as_deref(),
492 Some(concat!(
493 "impl Debug for User {\n",
494 " fn fmt(&self, f: &mut Formatter) -> fmt::Result {\n",
495 " f.debug_struct(\"User\")\n",
496 " .field(\"name\", &self.name)"
497 ))
498 );
499 assert_eq!(
500 finish(finder).as_deref(),
501 Some(concat!(
502 "impl Debug for User {\n",
503 " fn fmt(&self, f: &mut Formatter) -> fmt::Result {\n",
504 " f.debug_struct(\"User\")\n",
505 " .field(\"name\", &self.name)"
506 ))
507 );
508 }
509
510 #[gpui::test(iterations = 100)]
511 fn test_resolve_location_single_line(mut rng: StdRng) {
512 assert_location_resolution(
513 concat!(
514 " Lorem\n",
515 "« ipsum»\n",
516 " dolor sit amet\n",
517 " consecteur",
518 ),
519 "ipsum",
520 &mut rng,
521 );
522 }
523
524 #[gpui::test(iterations = 100)]
525 fn test_resolve_location_multiline(mut rng: StdRng) {
526 assert_location_resolution(
527 concat!(
528 " Lorem\n",
529 "« ipsum\n",
530 " dolor sit amet»\n",
531 " consecteur",
532 ),
533 "ipsum\ndolor sit amet",
534 &mut rng,
535 );
536 }
537
538 #[gpui::test(iterations = 100)]
539 fn test_resolve_location_function_with_typo(mut rng: StdRng) {
540 assert_location_resolution(
541 indoc! {"
542 «fn foo1(a: usize) -> usize {
543 40
544 }»
545
546 fn foo2(b: usize) -> usize {
547 42
548 }
549 "},
550 "fn foo1(a: usize) -> u32 {\n40\n}",
551 &mut rng,
552 );
553 }
554
555 #[gpui::test(iterations = 100)]
556 fn test_resolve_location_class_methods(mut rng: StdRng) {
557 assert_location_resolution(
558 indoc! {"
559 class Something {
560 one() { return 1; }
561 « two() { return 2222; }
562 three() { return 333; }
563 four() { return 4444; }
564 five() { return 5555; }
565 six() { return 6666; }»
566 seven() { return 7; }
567 eight() { return 8; }
568 }
569 "},
570 indoc! {"
571 two() { return 2222; }
572 four() { return 4444; }
573 five() { return 5555; }
574 six() { return 6666; }
575 "},
576 &mut rng,
577 );
578 }
579
580 #[gpui::test(iterations = 100)]
581 fn test_resolve_location_imports_no_match(mut rng: StdRng) {
582 assert_location_resolution(
583 indoc! {"
584 use std::ops::Range;
585 use std::sync::Mutex;
586 use std::{
587 collections::HashMap,
588 env,
589 ffi::{OsStr, OsString},
590 fs,
591 io::{BufRead, BufReader},
592 mem,
593 path::{Path, PathBuf},
594 process::Command,
595 sync::LazyLock,
596 time::SystemTime,
597 };
598 "},
599 indoc! {"
600 use std::collections::{HashMap, HashSet};
601 use std::ffi::{OsStr, OsString};
602 use std::fmt::Write as _;
603 use std::fs;
604 use std::io::{BufReader, Read, Write};
605 use std::mem;
606 use std::path::{Path, PathBuf};
607 use std::process::Command;
608 use std::sync::Arc;
609 "},
610 &mut rng,
611 );
612 }
613
614 #[gpui::test(iterations = 100)]
615 fn test_resolve_location_nested_closure(mut rng: StdRng) {
616 assert_location_resolution(
617 indoc! {"
618 impl Foo {
619 fn new() -> Self {
620 Self {
621 subscriptions: vec![
622 cx.observe_window_activation(window, |editor, window, cx| {
623 let active = window.is_window_active();
624 editor.blink_manager.update(cx, |blink_manager, cx| {
625 if active {
626 blink_manager.enable(cx);
627 } else {
628 blink_manager.disable(cx);
629 }
630 });
631 }),
632 ];
633 }
634 }
635 }
636 "},
637 concat!(
638 " editor.blink_manager.update(cx, |blink_manager, cx| {\n",
639 " blink_manager.enable(cx);\n",
640 " });",
641 ),
642 &mut rng,
643 );
644 }
645
646 #[gpui::test(iterations = 100)]
647 fn test_resolve_location_tool_invocation(mut rng: StdRng) {
648 assert_location_resolution(
649 indoc! {r#"
650 let tool = cx
651 .update(|cx| working_set.tool(&tool_name, cx))
652 .map_err(|err| {
653 anyhow!("Failed to look up tool '{}': {}", tool_name, err)
654 })?;
655
656 let Some(tool) = tool else {
657 return Err(anyhow!("Tool '{}' not found", tool_name));
658 };
659
660 let project = project.clone();
661 let action_log = action_log.clone();
662 let messages = messages.clone();
663 let tool_result = cx
664 .update(|cx| tool.run(invocation.input, &messages, project, action_log, cx))
665 .map_err(|err| anyhow!("Failed to start tool '{}': {}", tool_name, err))?;
666
667 tasks.push(tool_result.output);
668 "#},
669 concat!(
670 "let tool_result = cx\n",
671 " .update(|cx| tool.run(invocation.input, &messages, project, action_log, cx))\n",
672 " .output;",
673 ),
674 &mut rng,
675 );
676 }
677
678 #[gpui::test]
679 fn test_line_hint_selection() {
680 let text = indoc! {r#"
681 fn first_function() {
682 return 42;
683 }
684
685 fn second_function() {
686 return 42;
687 }
688
689 fn third_function() {
690 return 42;
691 }
692 "#};
693
694 let buffer = TextBuffer::new(0, BufferId::new(1).unwrap(), text.to_string());
695 let snapshot = buffer.snapshot();
696 let mut matcher = StreamingFuzzyMatcher::new(snapshot.clone());
697
698 // Given a query that matches all three functions
699 let query = "return 42;\n";
700
701 // Test with line hint pointing to second function (around line 5)
702 let best_match = matcher.push(query, Some(5)).expect("Failed to match query");
703
704 let matched_text = snapshot
705 .text_for_range(best_match.clone())
706 .collect::<String>();
707 assert!(matched_text.contains("return 42;"));
708 assert_eq!(
709 best_match,
710 63..77,
711 "Expected to match `second_function` based on the line hint"
712 );
713
714 let mut matcher = StreamingFuzzyMatcher::new(snapshot);
715 matcher.push(query, None);
716 matcher.finish();
717 let best_match = matcher.select_best_match();
718 assert!(
719 best_match.is_none(),
720 "Best match should be None when query cannot be uniquely resolved"
721 );
722 }
723
724 #[track_caller]
725 fn assert_location_resolution(text_with_expected_range: &str, query: &str, rng: &mut StdRng) {
726 let (text, expected_ranges) = marked_text_ranges(text_with_expected_range, false);
727 let buffer = TextBuffer::new(0, BufferId::new(1).unwrap(), text.clone());
728 let snapshot = buffer.snapshot();
729
730 let mut matcher = StreamingFuzzyMatcher::new(snapshot);
731
732 // Split query into random chunks
733 let chunks = to_random_chunks(rng, query);
734
735 // Push chunks incrementally
736 for chunk in &chunks {
737 matcher.push(chunk, None);
738 }
739
740 let actual_ranges = matcher.finish();
741
742 // If no expected ranges, we expect no match
743 if expected_ranges.is_empty() {
744 assert!(
745 actual_ranges.is_empty(),
746 "Expected no match for query: {:?}, but found: {:?}",
747 query,
748 actual_ranges
749 );
750 } else {
751 let text_with_actual_range = generate_marked_text(&text, &actual_ranges, false);
752 pretty_assertions::assert_eq!(
753 text_with_actual_range,
754 text_with_expected_range,
755 indoc! {"
756 Query: {:?}
757 Chunks: {:?}
758 Expected marked text: {}
759 Actual marked text: {}
760 Expected ranges: {:?}
761 Actual ranges: {:?}"
762 },
763 query,
764 chunks,
765 text_with_expected_range,
766 text_with_actual_range,
767 expected_ranges,
768 actual_ranges
769 );
770 }
771 }
772
773 fn to_random_chunks(rng: &mut StdRng, input: &str) -> Vec<String> {
774 let chunk_count = rng.random_range(1..=cmp::min(input.len(), 50));
775 let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count);
776 chunk_indices.sort();
777 chunk_indices.push(input.len());
778
779 let mut chunks = Vec::new();
780 let mut last_ix = 0;
781 for chunk_ix in chunk_indices {
782 chunks.push(input[last_ix..chunk_ix].to_string());
783 last_ix = chunk_ix;
784 }
785 chunks
786 }
787
788 fn push(finder: &mut StreamingFuzzyMatcher, chunk: &str) -> Option<String> {
789 finder
790 .push(chunk, None)
791 .map(|range| finder.snapshot.text_for_range(range).collect::<String>())
792 }
793
794 fn finish(mut finder: StreamingFuzzyMatcher) -> Option<String> {
795 let snapshot = finder.snapshot.clone();
796 let matches = finder.finish();
797 matches
798 .first()
799 .map(|range| snapshot.text_for_range(range.clone()).collect::<String>())
800 }
801}