1use language::{Point, TextBufferSnapshot};
2use std::{cmp, ops::Range};
3
4const REPLACEMENT_COST: u32 = 1;
5const INSERTION_COST: u32 = 3;
6const DELETION_COST: u32 = 10;
7
8/// A streaming fuzzy matcher that can process text chunks incrementally
9/// and return the best match found so far at each step.
10pub struct StreamingFuzzyMatcher {
11 snapshot: TextBufferSnapshot,
12 query_lines: Vec<String>,
13 line_hint: Option<u32>,
14 incomplete_line: String,
15 matches: Vec<Range<usize>>,
16 matrix: SearchMatrix,
17}
18
19impl StreamingFuzzyMatcher {
20 pub fn new(snapshot: TextBufferSnapshot) -> Self {
21 let buffer_line_count = snapshot.max_point().row as usize + 1;
22 Self {
23 snapshot,
24 query_lines: Vec::new(),
25 line_hint: None,
26 incomplete_line: String::new(),
27 matches: Vec::new(),
28 matrix: SearchMatrix::new(buffer_line_count + 1),
29 }
30 }
31
32 /// Returns the query lines.
33 pub fn query_lines(&self) -> &[String] {
34 &self.query_lines
35 }
36
37 /// Push a new chunk of text and get the best match found so far.
38 ///
39 /// This method accumulates text chunks and processes complete lines.
40 /// Partial lines are buffered internally until a newline is received.
41 ///
42 /// # Returns
43 ///
44 /// Returns `Some(range)` if a match has been found with the accumulated
45 /// query so far, or `None` if no suitable match exists yet.
46 pub fn push(&mut self, chunk: &str, line_hint: Option<u32>) -> Option<Range<usize>> {
47 // Add the chunk to our incomplete line buffer
48 self.incomplete_line.push_str(chunk);
49 self.line_hint = line_hint;
50
51 if let Some((last_pos, _)) = self.incomplete_line.match_indices('\n').next_back() {
52 let complete_part = &self.incomplete_line[..=last_pos];
53
54 // Split into lines and add to query_lines
55 for line in complete_part.lines() {
56 self.query_lines.push(line.to_string());
57 }
58
59 self.incomplete_line.replace_range(..last_pos + 1, "");
60
61 self.matches = self.resolve_location_fuzzy();
62 }
63
64 let best_match = self.select_best_match();
65 best_match.or_else(|| self.matches.first().cloned())
66 }
67
68 /// Finish processing and return the final best match(es).
69 ///
70 /// This processes any remaining incomplete line before returning the final
71 /// match result.
72 pub fn finish(&mut self) -> Vec<Range<usize>> {
73 // Process any remaining incomplete line
74 if !self.incomplete_line.is_empty() {
75 self.query_lines.push(self.incomplete_line.clone());
76 self.incomplete_line.clear();
77 self.matches = self.resolve_location_fuzzy();
78 }
79 self.matches.clone()
80 }
81
82 fn resolve_location_fuzzy(&mut self) -> Vec<Range<usize>> {
83 let new_query_line_count = self.query_lines.len();
84 let old_query_line_count = self.matrix.rows.saturating_sub(1);
85 if new_query_line_count == old_query_line_count {
86 return Vec::new();
87 }
88
89 self.matrix.resize_rows(new_query_line_count + 1);
90
91 // Process only the new query lines
92 for row in old_query_line_count..new_query_line_count {
93 let query_line = self.query_lines[row].trim();
94 let leading_deletion_cost = (row + 1) as u32 * DELETION_COST;
95
96 self.matrix.set(
97 row + 1,
98 0,
99 SearchState::new(leading_deletion_cost, SearchDirection::Up),
100 );
101
102 let mut buffer_lines = self.snapshot.as_rope().chunks().lines();
103 let mut col = 0;
104 while let Some(buffer_line) = buffer_lines.next() {
105 let buffer_line = buffer_line.trim();
106 let up = SearchState::new(
107 self.matrix
108 .get(row, col + 1)
109 .cost
110 .saturating_add(DELETION_COST),
111 SearchDirection::Up,
112 );
113 let left = SearchState::new(
114 self.matrix
115 .get(row + 1, col)
116 .cost
117 .saturating_add(INSERTION_COST),
118 SearchDirection::Left,
119 );
120 let diagonal = SearchState::new(
121 if query_line == buffer_line {
122 self.matrix.get(row, col).cost
123 } else if fuzzy_eq(query_line, buffer_line) {
124 self.matrix.get(row, col).cost + REPLACEMENT_COST
125 } else {
126 self.matrix
127 .get(row, col)
128 .cost
129 .saturating_add(DELETION_COST + INSERTION_COST)
130 },
131 SearchDirection::Diagonal,
132 );
133 self.matrix
134 .set(row + 1, col + 1, up.min(left).min(diagonal));
135 col += 1;
136 }
137 }
138
139 // Find all matches with the best cost
140 let buffer_line_count = self.snapshot.max_point().row as usize + 1;
141 let mut best_cost = u32::MAX;
142 let mut matches_with_best_cost = Vec::new();
143
144 for col in 1..=buffer_line_count {
145 let cost = self.matrix.get(new_query_line_count, col).cost;
146 if cost < best_cost {
147 best_cost = cost;
148 matches_with_best_cost.clear();
149 matches_with_best_cost.push(col as u32);
150 } else if cost == best_cost {
151 matches_with_best_cost.push(col as u32);
152 }
153 }
154
155 // Find ranges for the matches
156 let mut valid_matches = Vec::new();
157 for &buffer_row_end in &matches_with_best_cost {
158 let mut matched_lines = 0;
159 let mut query_row = new_query_line_count;
160 let mut buffer_row_start = buffer_row_end;
161 while query_row > 0 && buffer_row_start > 0 {
162 let current = self.matrix.get(query_row, buffer_row_start as usize);
163 match current.direction {
164 SearchDirection::Diagonal => {
165 query_row -= 1;
166 buffer_row_start -= 1;
167 matched_lines += 1;
168 }
169 SearchDirection::Up => {
170 query_row -= 1;
171 }
172 SearchDirection::Left => {
173 buffer_row_start -= 1;
174 }
175 }
176 }
177
178 let matched_buffer_row_count = buffer_row_end - buffer_row_start;
179 let matched_ratio = matched_lines as f32
180 / (matched_buffer_row_count as f32).max(new_query_line_count as f32);
181 if matched_ratio >= 0.8 {
182 let buffer_start_ix = self
183 .snapshot
184 .point_to_offset(Point::new(buffer_row_start, 0));
185 let buffer_end_ix = self.snapshot.point_to_offset(Point::new(
186 buffer_row_end - 1,
187 self.snapshot.line_len(buffer_row_end - 1),
188 ));
189 valid_matches.push((buffer_row_start, buffer_start_ix..buffer_end_ix));
190 }
191 }
192
193 valid_matches.into_iter().map(|(_, range)| range).collect()
194 }
195
196 /// Return the best match with starting position close enough to line_hint.
197 pub fn select_best_match(&self) -> Option<Range<usize>> {
198 // Allow line hint to be off by that many lines.
199 // Higher values increase probability of applying edits to a wrong place,
200 // Lower values increase edits failures and overall conversation length.
201 const LINE_HINT_TOLERANCE: u32 = 200;
202
203 if self.matches.is_empty() {
204 return None;
205 }
206
207 if self.matches.len() == 1 {
208 return self.matches.first().cloned();
209 }
210
211 let Some(line_hint) = self.line_hint else {
212 // Multiple ambiguous matches
213 return None;
214 };
215
216 let mut best_match = None;
217 let mut best_distance = u32::MAX;
218
219 for range in &self.matches {
220 let start_point = self.snapshot.offset_to_point(range.start);
221 let start_line = start_point.row;
222 let distance = start_line.abs_diff(line_hint);
223
224 if distance <= LINE_HINT_TOLERANCE && distance < best_distance {
225 best_distance = distance;
226 best_match = Some(range.clone());
227 }
228 }
229
230 best_match
231 }
232}
233
234fn fuzzy_eq(left: &str, right: &str) -> bool {
235 const THRESHOLD: f64 = 0.8;
236
237 let min_levenshtein = left.len().abs_diff(right.len());
238 let min_normalized_levenshtein =
239 1. - (min_levenshtein as f64 / cmp::max(left.len(), right.len()) as f64);
240 if min_normalized_levenshtein < THRESHOLD {
241 return false;
242 }
243
244 strsim::normalized_levenshtein(left, right) >= THRESHOLD
245}
246
247#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
248enum SearchDirection {
249 Up,
250 Left,
251 Diagonal,
252}
253
254#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
255struct SearchState {
256 cost: u32,
257 direction: SearchDirection,
258}
259
260impl SearchState {
261 fn new(cost: u32, direction: SearchDirection) -> Self {
262 Self { cost, direction }
263 }
264}
265
266struct SearchMatrix {
267 cols: usize,
268 rows: usize,
269 data: Vec<SearchState>,
270}
271
272impl SearchMatrix {
273 fn new(cols: usize) -> Self {
274 SearchMatrix {
275 cols,
276 rows: 0,
277 data: Vec::new(),
278 }
279 }
280
281 fn resize_rows(&mut self, needed_rows: usize) {
282 debug_assert!(needed_rows > self.rows);
283 self.rows = needed_rows;
284 self.data.resize(
285 self.rows * self.cols,
286 SearchState::new(0, SearchDirection::Diagonal),
287 );
288 }
289
290 fn get(&self, row: usize, col: usize) -> SearchState {
291 debug_assert!(row < self.rows && col < self.cols);
292 self.data[row * self.cols + col]
293 }
294
295 fn set(&mut self, row: usize, col: usize, state: SearchState) {
296 debug_assert!(row < self.rows && col < self.cols);
297 self.data[row * self.cols + col] = state;
298 }
299}
300
301#[cfg(test)]
302mod tests {
303 use super::*;
304 use indoc::indoc;
305 use language::{BufferId, TextBuffer};
306 use rand::prelude::*;
307 use util::test::{generate_marked_text, marked_text_ranges};
308
309 #[test]
310 fn test_empty_query() {
311 let buffer = TextBuffer::new(
312 0,
313 BufferId::new(1).unwrap(),
314 "Hello world\nThis is a test\nFoo bar baz",
315 );
316 let snapshot = buffer.snapshot();
317
318 let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
319 assert_eq!(push(&mut finder, ""), None);
320 assert_eq!(finish(finder), None);
321 }
322
323 #[test]
324 fn test_streaming_exact_match() {
325 let buffer = TextBuffer::new(
326 0,
327 BufferId::new(1).unwrap(),
328 "Hello world\nThis is a test\nFoo bar baz",
329 );
330 let snapshot = buffer.snapshot();
331
332 let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
333
334 // Push partial query
335 assert_eq!(push(&mut finder, "This"), None);
336
337 // Complete the line
338 assert_eq!(
339 push(&mut finder, " is a test\n"),
340 Some("This is a test".to_string())
341 );
342
343 // Finish should return the same result
344 assert_eq!(finish(finder), Some("This is a test".to_string()));
345 }
346
347 #[test]
348 fn test_streaming_fuzzy_match() {
349 let buffer = TextBuffer::new(
350 0,
351 BufferId::new(1).unwrap(),
352 indoc! {"
353 function foo(a, b) {
354 return a + b;
355 }
356
357 function bar(x, y) {
358 return x * y;
359 }
360 "},
361 );
362 let snapshot = buffer.snapshot();
363
364 let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
365
366 // Push a fuzzy query that should match the first function
367 assert_eq!(
368 push(&mut finder, "function foo(a, c) {\n").as_deref(),
369 Some("function foo(a, b) {")
370 );
371 assert_eq!(
372 push(&mut finder, " return a + c;\n}\n").as_deref(),
373 Some(concat!(
374 "function foo(a, b) {\n",
375 " return a + b;\n",
376 "}"
377 ))
378 );
379 }
380
381 #[test]
382 fn test_incremental_improvement() {
383 let buffer = TextBuffer::new(
384 0,
385 BufferId::new(1).unwrap(),
386 "Line 1\nLine 2\nLine 3\nLine 4\nLine 5",
387 );
388 let snapshot = buffer.snapshot();
389
390 let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
391
392 // No match initially
393 assert_eq!(push(&mut finder, "Lin"), None);
394
395 // Get a match when we complete a line
396 assert_eq!(push(&mut finder, "e 3\n"), Some("Line 3".to_string()));
397
398 // The match might change if we add more specific content
399 assert_eq!(
400 push(&mut finder, "Line 4\n"),
401 Some("Line 3\nLine 4".to_string())
402 );
403 assert_eq!(finish(finder), Some("Line 3\nLine 4".to_string()));
404 }
405
406 #[test]
407 fn test_incomplete_lines_buffering() {
408 let buffer = TextBuffer::new(
409 0,
410 BufferId::new(1).unwrap(),
411 indoc! {"
412 The quick brown fox
413 jumps over the lazy dog
414 Pack my box with five dozen liquor jugs
415 "},
416 );
417 let snapshot = buffer.snapshot();
418
419 let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
420
421 // Push text in small chunks across line boundaries
422 assert_eq!(push(&mut finder, "jumps "), None); // No newline yet
423 assert_eq!(push(&mut finder, "over the"), None); // Still no newline
424 assert_eq!(push(&mut finder, " lazy"), None); // Still incomplete
425
426 // Complete the line
427 assert_eq!(
428 push(&mut finder, " dog\n"),
429 Some("jumps over the lazy dog".to_string())
430 );
431 }
432
433 #[test]
434 fn test_multiline_fuzzy_match() {
435 let buffer = TextBuffer::new(
436 0,
437 BufferId::new(1).unwrap(),
438 indoc! {r#"
439 impl Display for User {
440 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
441 write!(f, "User: {} ({})", self.name, self.email)
442 }
443 }
444
445 impl Debug for User {
446 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
447 f.debug_struct("User")
448 .field("name", &self.name)
449 .field("email", &self.email)
450 .finish()
451 }
452 }
453 "#},
454 );
455 let snapshot = buffer.snapshot();
456
457 let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
458
459 assert_eq!(
460 push(&mut finder, "impl Debug for User {\n"),
461 Some("impl Debug for User {".to_string())
462 );
463 assert_eq!(
464 push(
465 &mut finder,
466 " fn fmt(&self, f: &mut Formatter) -> Result {\n"
467 )
468 .as_deref(),
469 Some(concat!(
470 "impl Debug for User {\n",
471 " fn fmt(&self, f: &mut Formatter) -> fmt::Result {"
472 ))
473 );
474 assert_eq!(
475 push(&mut finder, " f.debug_struct(\"User\")\n").as_deref(),
476 Some(concat!(
477 "impl Debug for User {\n",
478 " fn fmt(&self, f: &mut Formatter) -> fmt::Result {\n",
479 " f.debug_struct(\"User\")"
480 ))
481 );
482 assert_eq!(
483 push(
484 &mut finder,
485 " .field(\"name\", &self.username)\n"
486 )
487 .as_deref(),
488 Some(concat!(
489 "impl Debug for User {\n",
490 " fn fmt(&self, f: &mut Formatter) -> fmt::Result {\n",
491 " f.debug_struct(\"User\")\n",
492 " .field(\"name\", &self.name)"
493 ))
494 );
495 assert_eq!(
496 finish(finder).as_deref(),
497 Some(concat!(
498 "impl Debug for User {\n",
499 " fn fmt(&self, f: &mut Formatter) -> fmt::Result {\n",
500 " f.debug_struct(\"User\")\n",
501 " .field(\"name\", &self.name)"
502 ))
503 );
504 }
505
506 #[gpui::test(iterations = 100)]
507 fn test_resolve_location_single_line(mut rng: StdRng) {
508 assert_location_resolution(
509 concat!(
510 " Lorem\n",
511 "« ipsum»\n",
512 " dolor sit amet\n",
513 " consecteur",
514 ),
515 "ipsum",
516 &mut rng,
517 );
518 }
519
520 #[gpui::test(iterations = 100)]
521 fn test_resolve_location_multiline(mut rng: StdRng) {
522 assert_location_resolution(
523 concat!(
524 " Lorem\n",
525 "« ipsum\n",
526 " dolor sit amet»\n",
527 " consecteur",
528 ),
529 "ipsum\ndolor sit amet",
530 &mut rng,
531 );
532 }
533
534 #[gpui::test(iterations = 100)]
535 fn test_resolve_location_function_with_typo(mut rng: StdRng) {
536 assert_location_resolution(
537 indoc! {"
538 «fn foo1(a: usize) -> usize {
539 40
540 }»
541
542 fn foo2(b: usize) -> usize {
543 42
544 }
545 "},
546 "fn foo1(a: usize) -> u32 {\n40\n}",
547 &mut rng,
548 );
549 }
550
551 #[gpui::test(iterations = 100)]
552 fn test_resolve_location_class_methods(mut rng: StdRng) {
553 assert_location_resolution(
554 indoc! {"
555 class Something {
556 one() { return 1; }
557 « two() { return 2222; }
558 three() { return 333; }
559 four() { return 4444; }
560 five() { return 5555; }
561 six() { return 6666; }»
562 seven() { return 7; }
563 eight() { return 8; }
564 }
565 "},
566 indoc! {"
567 two() { return 2222; }
568 four() { return 4444; }
569 five() { return 5555; }
570 six() { return 6666; }
571 "},
572 &mut rng,
573 );
574 }
575
576 #[gpui::test(iterations = 100)]
577 fn test_resolve_location_imports_no_match(mut rng: StdRng) {
578 assert_location_resolution(
579 indoc! {"
580 use std::ops::Range;
581 use std::sync::Mutex;
582 use std::{
583 collections::HashMap,
584 env,
585 ffi::{OsStr, OsString},
586 fs,
587 io::{BufRead, BufReader},
588 mem,
589 path::{Path, PathBuf},
590 process::Command,
591 sync::LazyLock,
592 time::SystemTime,
593 };
594 "},
595 indoc! {"
596 use std::collections::{HashMap, HashSet};
597 use std::ffi::{OsStr, OsString};
598 use std::fmt::Write as _;
599 use std::fs;
600 use std::io::{BufReader, Read, Write};
601 use std::mem;
602 use std::path::{Path, PathBuf};
603 use std::process::Command;
604 use std::sync::Arc;
605 "},
606 &mut rng,
607 );
608 }
609
610 #[gpui::test(iterations = 100)]
611 fn test_resolve_location_nested_closure(mut rng: StdRng) {
612 assert_location_resolution(
613 indoc! {"
614 impl Foo {
615 fn new() -> Self {
616 Self {
617 subscriptions: vec![
618 cx.observe_window_activation(window, |editor, window, cx| {
619 let active = window.is_window_active();
620 editor.blink_manager.update(cx, |blink_manager, cx| {
621 if active {
622 blink_manager.enable(cx);
623 } else {
624 blink_manager.disable(cx);
625 }
626 });
627 }),
628 ];
629 }
630 }
631 }
632 "},
633 concat!(
634 " editor.blink_manager.update(cx, |blink_manager, cx| {\n",
635 " blink_manager.enable(cx);\n",
636 " });",
637 ),
638 &mut rng,
639 );
640 }
641
642 #[gpui::test(iterations = 100)]
643 fn test_resolve_location_tool_invocation(mut rng: StdRng) {
644 assert_location_resolution(
645 indoc! {r#"
646 let tool = cx
647 .update(|cx| working_set.tool(&tool_name, cx))
648 .map_err(|err| {
649 anyhow!("Failed to look up tool '{}': {}", tool_name, err)
650 })?;
651
652 let Some(tool) = tool else {
653 return Err(anyhow!("Tool '{}' not found", tool_name));
654 };
655
656 let project = project.clone();
657 let action_log = action_log.clone();
658 let messages = messages.clone();
659 let tool_result = cx
660 .update(|cx| tool.run(invocation.input, &messages, project, action_log, cx))
661 .map_err(|err| anyhow!("Failed to start tool '{}': {}", tool_name, err))?;
662
663 tasks.push(tool_result.output);
664 "#},
665 concat!(
666 "let tool_result = cx\n",
667 " .update(|cx| tool.run(invocation.input, &messages, project, action_log, cx))\n",
668 " .output;",
669 ),
670 &mut rng,
671 );
672 }
673
674 #[gpui::test]
675 fn test_line_hint_selection() {
676 let text = indoc! {r#"
677 fn first_function() {
678 return 42;
679 }
680
681 fn second_function() {
682 return 42;
683 }
684
685 fn third_function() {
686 return 42;
687 }
688 "#};
689
690 let buffer = TextBuffer::new(0, BufferId::new(1).unwrap(), text.to_string());
691 let snapshot = buffer.snapshot();
692 let mut matcher = StreamingFuzzyMatcher::new(snapshot.clone());
693
694 // Given a query that matches all three functions
695 let query = "return 42;\n";
696
697 // Test with line hint pointing to second function (around line 5)
698 let best_match = matcher.push(query, Some(5)).expect("Failed to match query");
699
700 let matched_text = snapshot
701 .text_for_range(best_match.clone())
702 .collect::<String>();
703 assert!(matched_text.contains("return 42;"));
704 assert_eq!(
705 best_match,
706 63..77,
707 "Expected to match `second_function` based on the line hint"
708 );
709
710 let mut matcher = StreamingFuzzyMatcher::new(snapshot.clone());
711 matcher.push(query, None);
712 matcher.finish();
713 let best_match = matcher.select_best_match();
714 assert!(
715 best_match.is_none(),
716 "Best match should be None when query cannot be uniquely resolved"
717 );
718 }
719
720 #[track_caller]
721 fn assert_location_resolution(text_with_expected_range: &str, query: &str, rng: &mut StdRng) {
722 let (text, expected_ranges) = marked_text_ranges(text_with_expected_range, false);
723 let buffer = TextBuffer::new(0, BufferId::new(1).unwrap(), text.clone());
724 let snapshot = buffer.snapshot();
725
726 let mut matcher = StreamingFuzzyMatcher::new(snapshot.clone());
727
728 // Split query into random chunks
729 let chunks = to_random_chunks(rng, query);
730
731 // Push chunks incrementally
732 for chunk in &chunks {
733 matcher.push(chunk, None);
734 }
735
736 let actual_ranges = matcher.finish();
737
738 // If no expected ranges, we expect no match
739 if expected_ranges.is_empty() {
740 assert!(
741 actual_ranges.is_empty(),
742 "Expected no match for query: {:?}, but found: {:?}",
743 query,
744 actual_ranges
745 );
746 } else {
747 let text_with_actual_range = generate_marked_text(&text, &actual_ranges, false);
748 pretty_assertions::assert_eq!(
749 text_with_actual_range,
750 text_with_expected_range,
751 indoc! {"
752 Query: {:?}
753 Chunks: {:?}
754 Expected marked text: {}
755 Actual marked text: {}
756 Expected ranges: {:?}
757 Actual ranges: {:?}"
758 },
759 query,
760 chunks,
761 text_with_expected_range,
762 text_with_actual_range,
763 expected_ranges,
764 actual_ranges
765 );
766 }
767 }
768
769 fn to_random_chunks(rng: &mut StdRng, input: &str) -> Vec<String> {
770 let chunk_count = rng.gen_range(1..=cmp::min(input.len(), 50));
771 let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count);
772 chunk_indices.sort();
773 chunk_indices.push(input.len());
774
775 let mut chunks = Vec::new();
776 let mut last_ix = 0;
777 for chunk_ix in chunk_indices {
778 chunks.push(input[last_ix..chunk_ix].to_string());
779 last_ix = chunk_ix;
780 }
781 chunks
782 }
783
784 fn push(finder: &mut StreamingFuzzyMatcher, chunk: &str) -> Option<String> {
785 finder
786 .push(chunk, None)
787 .map(|range| finder.snapshot.text_for_range(range).collect::<String>())
788 }
789
790 fn finish(mut finder: StreamingFuzzyMatcher) -> Option<String> {
791 let snapshot = finder.snapshot.clone();
792 let matches = finder.finish();
793 if let Some(range) = matches.first() {
794 Some(snapshot.text_for_range(range.clone()).collect::<String>())
795 } else {
796 None
797 }
798 }
799}