1use language::{Point, TextBufferSnapshot};
2use std::{cmp, ops::Range};
3
4const REPLACEMENT_COST: u32 = 1;
5const INSERTION_COST: u32 = 3;
6const DELETION_COST: u32 = 10;
7
8/// A streaming fuzzy matcher that can process text chunks incrementally
9/// and return the best match found so far at each step.
10pub struct StreamingFuzzyMatcher {
11 snapshot: TextBufferSnapshot,
12 query_lines: Vec<String>,
13 incomplete_line: String,
14 best_match: Option<Range<usize>>,
15 matrix: SearchMatrix,
16}
17
18impl StreamingFuzzyMatcher {
19 pub fn new(snapshot: TextBufferSnapshot) -> Self {
20 let buffer_line_count = snapshot.max_point().row as usize + 1;
21 Self {
22 snapshot,
23 query_lines: Vec::new(),
24 incomplete_line: String::new(),
25 best_match: None,
26 matrix: SearchMatrix::new(buffer_line_count + 1),
27 }
28 }
29
30 /// Returns the query lines.
31 pub fn query_lines(&self) -> &[String] {
32 &self.query_lines
33 }
34
35 /// Push a new chunk of text and get the best match found so far.
36 ///
37 /// This method accumulates text chunks and processes complete lines.
38 /// Partial lines are buffered internally until a newline is received.
39 ///
40 /// # Returns
41 ///
42 /// Returns `Some(range)` if a match has been found with the accumulated
43 /// query so far, or `None` if no suitable match exists yet.
44 pub fn push(&mut self, chunk: &str) -> Option<Range<usize>> {
45 // Add the chunk to our incomplete line buffer
46 self.incomplete_line.push_str(chunk);
47
48 if let Some((last_pos, _)) = self.incomplete_line.match_indices('\n').next_back() {
49 let complete_part = &self.incomplete_line[..=last_pos];
50
51 // Split into lines and add to query_lines
52 for line in complete_part.lines() {
53 self.query_lines.push(line.to_string());
54 }
55
56 self.incomplete_line.replace_range(..last_pos + 1, "");
57
58 self.best_match = self.resolve_location_fuzzy();
59 }
60
61 self.best_match.clone()
62 }
63
64 /// Finish processing and return the final best match.
65 ///
66 /// This processes any remaining incomplete line before returning the final
67 /// match result.
68 pub fn finish(&mut self) -> Option<Range<usize>> {
69 // Process any remaining incomplete line
70 if !self.incomplete_line.is_empty() {
71 self.query_lines.push(self.incomplete_line.clone());
72 self.best_match = self.resolve_location_fuzzy();
73 }
74
75 self.best_match.clone()
76 }
77
78 fn resolve_location_fuzzy(&mut self) -> Option<Range<usize>> {
79 let new_query_line_count = self.query_lines.len();
80 let old_query_line_count = self.matrix.rows.saturating_sub(1);
81 if new_query_line_count == old_query_line_count {
82 return None;
83 }
84
85 self.matrix.resize_rows(new_query_line_count + 1);
86
87 // Process only the new query lines
88 for row in old_query_line_count..new_query_line_count {
89 let query_line = self.query_lines[row].trim();
90 let leading_deletion_cost = (row + 1) as u32 * DELETION_COST;
91
92 self.matrix.set(
93 row + 1,
94 0,
95 SearchState::new(leading_deletion_cost, SearchDirection::Up),
96 );
97
98 let mut buffer_lines = self.snapshot.as_rope().chunks().lines();
99 let mut col = 0;
100 while let Some(buffer_line) = buffer_lines.next() {
101 let buffer_line = buffer_line.trim();
102 let up = SearchState::new(
103 self.matrix
104 .get(row, col + 1)
105 .cost
106 .saturating_add(DELETION_COST),
107 SearchDirection::Up,
108 );
109 let left = SearchState::new(
110 self.matrix
111 .get(row + 1, col)
112 .cost
113 .saturating_add(INSERTION_COST),
114 SearchDirection::Left,
115 );
116 let diagonal = SearchState::new(
117 if query_line == buffer_line {
118 self.matrix.get(row, col).cost
119 } else if fuzzy_eq(query_line, buffer_line) {
120 self.matrix.get(row, col).cost + REPLACEMENT_COST
121 } else {
122 self.matrix
123 .get(row, col)
124 .cost
125 .saturating_add(DELETION_COST + INSERTION_COST)
126 },
127 SearchDirection::Diagonal,
128 );
129 self.matrix
130 .set(row + 1, col + 1, up.min(left).min(diagonal));
131 col += 1;
132 }
133 }
134
135 // Traceback to find the best match
136 let buffer_line_count = self.snapshot.max_point().row as usize + 1;
137 let mut buffer_row_end = buffer_line_count as u32;
138 let mut best_cost = u32::MAX;
139 for col in 1..=buffer_line_count {
140 let cost = self.matrix.get(new_query_line_count, col).cost;
141 if cost < best_cost {
142 best_cost = cost;
143 buffer_row_end = col as u32;
144 }
145 }
146
147 let mut matched_lines = 0;
148 let mut query_row = new_query_line_count;
149 let mut buffer_row_start = buffer_row_end;
150 while query_row > 0 && buffer_row_start > 0 {
151 let current = self.matrix.get(query_row, buffer_row_start as usize);
152 match current.direction {
153 SearchDirection::Diagonal => {
154 query_row -= 1;
155 buffer_row_start -= 1;
156 matched_lines += 1;
157 }
158 SearchDirection::Up => {
159 query_row -= 1;
160 }
161 SearchDirection::Left => {
162 buffer_row_start -= 1;
163 }
164 }
165 }
166
167 let matched_buffer_row_count = buffer_row_end - buffer_row_start;
168 let matched_ratio = matched_lines as f32
169 / (matched_buffer_row_count as f32).max(new_query_line_count as f32);
170 if matched_ratio >= 0.8 {
171 let buffer_start_ix = self
172 .snapshot
173 .point_to_offset(Point::new(buffer_row_start, 0));
174 let buffer_end_ix = self.snapshot.point_to_offset(Point::new(
175 buffer_row_end - 1,
176 self.snapshot.line_len(buffer_row_end - 1),
177 ));
178 Some(buffer_start_ix..buffer_end_ix)
179 } else {
180 None
181 }
182 }
183}
184
185fn fuzzy_eq(left: &str, right: &str) -> bool {
186 const THRESHOLD: f64 = 0.8;
187
188 let min_levenshtein = left.len().abs_diff(right.len());
189 let min_normalized_levenshtein =
190 1. - (min_levenshtein as f64 / cmp::max(left.len(), right.len()) as f64);
191 if min_normalized_levenshtein < THRESHOLD {
192 return false;
193 }
194
195 strsim::normalized_levenshtein(left, right) >= THRESHOLD
196}
197
198#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
199enum SearchDirection {
200 Up,
201 Left,
202 Diagonal,
203}
204
205#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
206struct SearchState {
207 cost: u32,
208 direction: SearchDirection,
209}
210
211impl SearchState {
212 fn new(cost: u32, direction: SearchDirection) -> Self {
213 Self { cost, direction }
214 }
215}
216
217struct SearchMatrix {
218 cols: usize,
219 rows: usize,
220 data: Vec<SearchState>,
221}
222
223impl SearchMatrix {
224 fn new(cols: usize) -> Self {
225 SearchMatrix {
226 cols,
227 rows: 0,
228 data: Vec::new(),
229 }
230 }
231
232 fn resize_rows(&mut self, needed_rows: usize) {
233 debug_assert!(needed_rows > self.rows);
234 self.rows = needed_rows;
235 self.data.resize(
236 self.rows * self.cols,
237 SearchState::new(0, SearchDirection::Diagonal),
238 );
239 }
240
241 fn get(&self, row: usize, col: usize) -> SearchState {
242 debug_assert!(row < self.rows && col < self.cols);
243 self.data[row * self.cols + col]
244 }
245
246 fn set(&mut self, row: usize, col: usize, state: SearchState) {
247 debug_assert!(row < self.rows && col < self.cols);
248 self.data[row * self.cols + col] = state;
249 }
250}
251
252#[cfg(test)]
253mod tests {
254 use super::*;
255 use indoc::indoc;
256 use language::{BufferId, TextBuffer};
257 use rand::prelude::*;
258 use util::test::{generate_marked_text, marked_text_ranges};
259
260 #[test]
261 fn test_empty_query() {
262 let buffer = TextBuffer::new(
263 0,
264 BufferId::new(1).unwrap(),
265 "Hello world\nThis is a test\nFoo bar baz",
266 );
267 let snapshot = buffer.snapshot();
268
269 let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
270 assert_eq!(push(&mut finder, ""), None);
271 assert_eq!(finish(finder), None);
272 }
273
274 #[test]
275 fn test_streaming_exact_match() {
276 let buffer = TextBuffer::new(
277 0,
278 BufferId::new(1).unwrap(),
279 "Hello world\nThis is a test\nFoo bar baz",
280 );
281 let snapshot = buffer.snapshot();
282
283 let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
284
285 // Push partial query
286 assert_eq!(push(&mut finder, "This"), None);
287
288 // Complete the line
289 assert_eq!(
290 push(&mut finder, " is a test\n"),
291 Some("This is a test".to_string())
292 );
293
294 // Finish should return the same result
295 assert_eq!(finish(finder), Some("This is a test".to_string()));
296 }
297
298 #[test]
299 fn test_streaming_fuzzy_match() {
300 let buffer = TextBuffer::new(
301 0,
302 BufferId::new(1).unwrap(),
303 indoc! {"
304 function foo(a, b) {
305 return a + b;
306 }
307
308 function bar(x, y) {
309 return x * y;
310 }
311 "},
312 );
313 let snapshot = buffer.snapshot();
314
315 let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
316
317 // Push a fuzzy query that should match the first function
318 assert_eq!(
319 push(&mut finder, "function foo(a, c) {\n").as_deref(),
320 Some("function foo(a, b) {")
321 );
322 assert_eq!(
323 push(&mut finder, " return a + c;\n}\n").as_deref(),
324 Some(concat!(
325 "function foo(a, b) {\n",
326 " return a + b;\n",
327 "}"
328 ))
329 );
330 }
331
332 #[test]
333 fn test_incremental_improvement() {
334 let buffer = TextBuffer::new(
335 0,
336 BufferId::new(1).unwrap(),
337 "Line 1\nLine 2\nLine 3\nLine 4\nLine 5",
338 );
339 let snapshot = buffer.snapshot();
340
341 let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
342
343 // No match initially
344 assert_eq!(push(&mut finder, "Lin"), None);
345
346 // Get a match when we complete a line
347 assert_eq!(push(&mut finder, "e 3\n"), Some("Line 3".to_string()));
348
349 // The match might change if we add more specific content
350 assert_eq!(
351 push(&mut finder, "Line 4\n"),
352 Some("Line 3\nLine 4".to_string())
353 );
354 assert_eq!(finish(finder), Some("Line 3\nLine 4".to_string()));
355 }
356
357 #[test]
358 fn test_incomplete_lines_buffering() {
359 let buffer = TextBuffer::new(
360 0,
361 BufferId::new(1).unwrap(),
362 indoc! {"
363 The quick brown fox
364 jumps over the lazy dog
365 Pack my box with five dozen liquor jugs
366 "},
367 );
368 let snapshot = buffer.snapshot();
369
370 let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
371
372 // Push text in small chunks across line boundaries
373 assert_eq!(push(&mut finder, "jumps "), None); // No newline yet
374 assert_eq!(push(&mut finder, "over the"), None); // Still no newline
375 assert_eq!(push(&mut finder, " lazy"), None); // Still incomplete
376
377 // Complete the line
378 assert_eq!(
379 push(&mut finder, " dog\n"),
380 Some("jumps over the lazy dog".to_string())
381 );
382 }
383
384 #[test]
385 fn test_multiline_fuzzy_match() {
386 let buffer = TextBuffer::new(
387 0,
388 BufferId::new(1).unwrap(),
389 indoc! {r#"
390 impl Display for User {
391 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
392 write!(f, "User: {} ({})", self.name, self.email)
393 }
394 }
395
396 impl Debug for User {
397 fn fmt(&self, f: &mut Formatter) -> fmt::Result {
398 f.debug_struct("User")
399 .field("name", &self.name)
400 .field("email", &self.email)
401 .finish()
402 }
403 }
404 "#},
405 );
406 let snapshot = buffer.snapshot();
407
408 let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
409
410 assert_eq!(
411 push(&mut finder, "impl Debug for User {\n"),
412 Some("impl Debug for User {".to_string())
413 );
414 assert_eq!(
415 push(
416 &mut finder,
417 " fn fmt(&self, f: &mut Formatter) -> Result {\n"
418 )
419 .as_deref(),
420 Some(concat!(
421 "impl Debug for User {\n",
422 " fn fmt(&self, f: &mut Formatter) -> fmt::Result {"
423 ))
424 );
425 assert_eq!(
426 push(&mut finder, " f.debug_struct(\"User\")\n").as_deref(),
427 Some(concat!(
428 "impl Debug for User {\n",
429 " fn fmt(&self, f: &mut Formatter) -> fmt::Result {\n",
430 " f.debug_struct(\"User\")"
431 ))
432 );
433 assert_eq!(
434 push(
435 &mut finder,
436 " .field(\"name\", &self.username)\n"
437 )
438 .as_deref(),
439 Some(concat!(
440 "impl Debug for User {\n",
441 " fn fmt(&self, f: &mut Formatter) -> fmt::Result {\n",
442 " f.debug_struct(\"User\")\n",
443 " .field(\"name\", &self.name)"
444 ))
445 );
446 assert_eq!(
447 finish(finder).as_deref(),
448 Some(concat!(
449 "impl Debug for User {\n",
450 " fn fmt(&self, f: &mut Formatter) -> fmt::Result {\n",
451 " f.debug_struct(\"User\")\n",
452 " .field(\"name\", &self.name)"
453 ))
454 );
455 }
456
457 #[gpui::test(iterations = 100)]
458 fn test_resolve_location_single_line(mut rng: StdRng) {
459 assert_location_resolution(
460 concat!(
461 " Lorem\n",
462 "« ipsum»\n",
463 " dolor sit amet\n",
464 " consecteur",
465 ),
466 "ipsum",
467 &mut rng,
468 );
469 }
470
471 #[gpui::test(iterations = 100)]
472 fn test_resolve_location_multiline(mut rng: StdRng) {
473 assert_location_resolution(
474 concat!(
475 " Lorem\n",
476 "« ipsum\n",
477 " dolor sit amet»\n",
478 " consecteur",
479 ),
480 "ipsum\ndolor sit amet",
481 &mut rng,
482 );
483 }
484
485 #[gpui::test(iterations = 100)]
486 fn test_resolve_location_function_with_typo(mut rng: StdRng) {
487 assert_location_resolution(
488 indoc! {"
489 «fn foo1(a: usize) -> usize {
490 40
491 }»
492
493 fn foo2(b: usize) -> usize {
494 42
495 }
496 "},
497 "fn foo1(a: usize) -> u32 {\n40\n}",
498 &mut rng,
499 );
500 }
501
502 #[gpui::test(iterations = 100)]
503 fn test_resolve_location_class_methods(mut rng: StdRng) {
504 assert_location_resolution(
505 indoc! {"
506 class Something {
507 one() { return 1; }
508 « two() { return 2222; }
509 three() { return 333; }
510 four() { return 4444; }
511 five() { return 5555; }
512 six() { return 6666; }»
513 seven() { return 7; }
514 eight() { return 8; }
515 }
516 "},
517 indoc! {"
518 two() { return 2222; }
519 four() { return 4444; }
520 five() { return 5555; }
521 six() { return 6666; }
522 "},
523 &mut rng,
524 );
525 }
526
527 #[gpui::test(iterations = 100)]
528 fn test_resolve_location_imports_no_match(mut rng: StdRng) {
529 assert_location_resolution(
530 indoc! {"
531 use std::ops::Range;
532 use std::sync::Mutex;
533 use std::{
534 collections::HashMap,
535 env,
536 ffi::{OsStr, OsString},
537 fs,
538 io::{BufRead, BufReader},
539 mem,
540 path::{Path, PathBuf},
541 process::Command,
542 sync::LazyLock,
543 time::SystemTime,
544 };
545 "},
546 indoc! {"
547 use std::collections::{HashMap, HashSet};
548 use std::ffi::{OsStr, OsString};
549 use std::fmt::Write as _;
550 use std::fs;
551 use std::io::{BufReader, Read, Write};
552 use std::mem;
553 use std::path::{Path, PathBuf};
554 use std::process::Command;
555 use std::sync::Arc;
556 "},
557 &mut rng,
558 );
559 }
560
561 #[gpui::test(iterations = 100)]
562 fn test_resolve_location_nested_closure(mut rng: StdRng) {
563 assert_location_resolution(
564 indoc! {"
565 impl Foo {
566 fn new() -> Self {
567 Self {
568 subscriptions: vec![
569 cx.observe_window_activation(window, |editor, window, cx| {
570 let active = window.is_window_active();
571 editor.blink_manager.update(cx, |blink_manager, cx| {
572 if active {
573 blink_manager.enable(cx);
574 } else {
575 blink_manager.disable(cx);
576 }
577 });
578 }),
579 ];
580 }
581 }
582 }
583 "},
584 concat!(
585 " editor.blink_manager.update(cx, |blink_manager, cx| {\n",
586 " blink_manager.enable(cx);\n",
587 " });",
588 ),
589 &mut rng,
590 );
591 }
592
593 #[gpui::test(iterations = 100)]
594 fn test_resolve_location_tool_invocation(mut rng: StdRng) {
595 assert_location_resolution(
596 indoc! {r#"
597 let tool = cx
598 .update(|cx| working_set.tool(&tool_name, cx))
599 .map_err(|err| {
600 anyhow!("Failed to look up tool '{}': {}", tool_name, err)
601 })?;
602
603 let Some(tool) = tool else {
604 return Err(anyhow!("Tool '{}' not found", tool_name));
605 };
606
607 let project = project.clone();
608 let action_log = action_log.clone();
609 let messages = messages.clone();
610 let tool_result = cx
611 .update(|cx| tool.run(invocation.input, &messages, project, action_log, cx))
612 .map_err(|err| anyhow!("Failed to start tool '{}': {}", tool_name, err))?;
613
614 tasks.push(tool_result.output);
615 "#},
616 concat!(
617 "let tool_result = cx\n",
618 " .update(|cx| tool.run(invocation.input, &messages, project, action_log, cx))\n",
619 " .output;",
620 ),
621 &mut rng,
622 );
623 }
624
625 #[track_caller]
626 fn assert_location_resolution(text_with_expected_range: &str, query: &str, rng: &mut StdRng) {
627 let (text, expected_ranges) = marked_text_ranges(text_with_expected_range, false);
628 let buffer = TextBuffer::new(0, BufferId::new(1).unwrap(), text.clone());
629 let snapshot = buffer.snapshot();
630
631 let mut matcher = StreamingFuzzyMatcher::new(snapshot.clone());
632
633 // Split query into random chunks
634 let chunks = to_random_chunks(rng, query);
635
636 // Push chunks incrementally
637 for chunk in &chunks {
638 matcher.push(chunk);
639 }
640
641 let result = matcher.finish();
642
643 // If no expected ranges, we expect no match
644 if expected_ranges.is_empty() {
645 assert_eq!(
646 result, None,
647 "Expected no match for query: {:?}, but found: {:?}",
648 query, result
649 );
650 } else {
651 let mut actual_ranges = Vec::new();
652 if let Some(range) = result {
653 actual_ranges.push(range);
654 }
655
656 let text_with_actual_range = generate_marked_text(&text, &actual_ranges, false);
657 pretty_assertions::assert_eq!(
658 text_with_actual_range,
659 text_with_expected_range,
660 "Query: {:?}, Chunks: {:?}",
661 query,
662 chunks
663 );
664 }
665 }
666
667 fn to_random_chunks(rng: &mut StdRng, input: &str) -> Vec<String> {
668 let chunk_count = rng.gen_range(1..=cmp::min(input.len(), 50));
669 let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count);
670 chunk_indices.sort();
671 chunk_indices.push(input.len());
672
673 let mut chunks = Vec::new();
674 let mut last_ix = 0;
675 for chunk_ix in chunk_indices {
676 chunks.push(input[last_ix..chunk_ix].to_string());
677 last_ix = chunk_ix;
678 }
679 chunks
680 }
681
682 fn push(finder: &mut StreamingFuzzyMatcher, chunk: &str) -> Option<String> {
683 finder
684 .push(chunk)
685 .map(|range| finder.snapshot.text_for_range(range).collect::<String>())
686 }
687
688 fn finish(mut finder: StreamingFuzzyMatcher) -> Option<String> {
689 let snapshot = finder.snapshot.clone();
690 finder
691 .finish()
692 .map(|range| snapshot.text_for_range(range).collect::<String>())
693 }
694}