streaming_fuzzy_matcher.rs

  1use language::{Point, TextBufferSnapshot};
  2use std::{cmp, ops::Range};
  3
  4const REPLACEMENT_COST: u32 = 1;
  5const INSERTION_COST: u32 = 3;
  6const DELETION_COST: u32 = 10;
  7
  8/// A streaming fuzzy matcher that can process text chunks incrementally
  9/// and return the best match found so far at each step.
 10pub struct StreamingFuzzyMatcher {
 11    snapshot: TextBufferSnapshot,
 12    query_lines: Vec<String>,
 13    incomplete_line: String,
 14    best_match: Option<Range<usize>>,
 15    matrix: SearchMatrix,
 16}
 17
 18impl StreamingFuzzyMatcher {
 19    pub fn new(snapshot: TextBufferSnapshot) -> Self {
 20        let buffer_line_count = snapshot.max_point().row as usize + 1;
 21        Self {
 22            snapshot,
 23            query_lines: Vec::new(),
 24            incomplete_line: String::new(),
 25            best_match: None,
 26            matrix: SearchMatrix::new(buffer_line_count + 1),
 27        }
 28    }
 29
 30    /// Returns the query lines.
 31    pub fn query_lines(&self) -> &[String] {
 32        &self.query_lines
 33    }
 34
 35    /// Push a new chunk of text and get the best match found so far.
 36    ///
 37    /// This method accumulates text chunks and processes complete lines.
 38    /// Partial lines are buffered internally until a newline is received.
 39    ///
 40    /// # Returns
 41    ///
 42    /// Returns `Some(range)` if a match has been found with the accumulated
 43    /// query so far, or `None` if no suitable match exists yet.
 44    pub fn push(&mut self, chunk: &str) -> Option<Range<usize>> {
 45        // Add the chunk to our incomplete line buffer
 46        self.incomplete_line.push_str(chunk);
 47
 48        if let Some((last_pos, _)) = self.incomplete_line.match_indices('\n').next_back() {
 49            let complete_part = &self.incomplete_line[..=last_pos];
 50
 51            // Split into lines and add to query_lines
 52            for line in complete_part.lines() {
 53                self.query_lines.push(line.to_string());
 54            }
 55
 56            self.incomplete_line.replace_range(..last_pos + 1, "");
 57
 58            self.best_match = self.resolve_location_fuzzy();
 59        }
 60
 61        self.best_match.clone()
 62    }
 63
 64    /// Finish processing and return the final best match.
 65    ///
 66    /// This processes any remaining incomplete line before returning the final
 67    /// match result.
 68    pub fn finish(&mut self) -> Option<Range<usize>> {
 69        // Process any remaining incomplete line
 70        if !self.incomplete_line.is_empty() {
 71            self.query_lines.push(self.incomplete_line.clone());
 72            self.best_match = self.resolve_location_fuzzy();
 73        }
 74
 75        self.best_match.clone()
 76    }
 77
 78    fn resolve_location_fuzzy(&mut self) -> Option<Range<usize>> {
 79        let new_query_line_count = self.query_lines.len();
 80        let old_query_line_count = self.matrix.rows.saturating_sub(1);
 81        if new_query_line_count == old_query_line_count {
 82            return None;
 83        }
 84
 85        self.matrix.resize_rows(new_query_line_count + 1);
 86
 87        // Process only the new query lines
 88        for row in old_query_line_count..new_query_line_count {
 89            let query_line = self.query_lines[row].trim();
 90            let leading_deletion_cost = (row + 1) as u32 * DELETION_COST;
 91
 92            self.matrix.set(
 93                row + 1,
 94                0,
 95                SearchState::new(leading_deletion_cost, SearchDirection::Up),
 96            );
 97
 98            let mut buffer_lines = self.snapshot.as_rope().chunks().lines();
 99            let mut col = 0;
100            while let Some(buffer_line) = buffer_lines.next() {
101                let buffer_line = buffer_line.trim();
102                let up = SearchState::new(
103                    self.matrix
104                        .get(row, col + 1)
105                        .cost
106                        .saturating_add(DELETION_COST),
107                    SearchDirection::Up,
108                );
109                let left = SearchState::new(
110                    self.matrix
111                        .get(row + 1, col)
112                        .cost
113                        .saturating_add(INSERTION_COST),
114                    SearchDirection::Left,
115                );
116                let diagonal = SearchState::new(
117                    if query_line == buffer_line {
118                        self.matrix.get(row, col).cost
119                    } else if fuzzy_eq(query_line, buffer_line) {
120                        self.matrix.get(row, col).cost + REPLACEMENT_COST
121                    } else {
122                        self.matrix
123                            .get(row, col)
124                            .cost
125                            .saturating_add(DELETION_COST + INSERTION_COST)
126                    },
127                    SearchDirection::Diagonal,
128                );
129                self.matrix
130                    .set(row + 1, col + 1, up.min(left).min(diagonal));
131                col += 1;
132            }
133        }
134
135        // Traceback to find the best match
136        let buffer_line_count = self.snapshot.max_point().row as usize + 1;
137        let mut buffer_row_end = buffer_line_count as u32;
138        let mut best_cost = u32::MAX;
139        for col in 1..=buffer_line_count {
140            let cost = self.matrix.get(new_query_line_count, col).cost;
141            if cost < best_cost {
142                best_cost = cost;
143                buffer_row_end = col as u32;
144            }
145        }
146
147        let mut matched_lines = 0;
148        let mut query_row = new_query_line_count;
149        let mut buffer_row_start = buffer_row_end;
150        while query_row > 0 && buffer_row_start > 0 {
151            let current = self.matrix.get(query_row, buffer_row_start as usize);
152            match current.direction {
153                SearchDirection::Diagonal => {
154                    query_row -= 1;
155                    buffer_row_start -= 1;
156                    matched_lines += 1;
157                }
158                SearchDirection::Up => {
159                    query_row -= 1;
160                }
161                SearchDirection::Left => {
162                    buffer_row_start -= 1;
163                }
164            }
165        }
166
167        let matched_buffer_row_count = buffer_row_end - buffer_row_start;
168        let matched_ratio = matched_lines as f32
169            / (matched_buffer_row_count as f32).max(new_query_line_count as f32);
170        if matched_ratio >= 0.8 {
171            let buffer_start_ix = self
172                .snapshot
173                .point_to_offset(Point::new(buffer_row_start, 0));
174            let buffer_end_ix = self.snapshot.point_to_offset(Point::new(
175                buffer_row_end - 1,
176                self.snapshot.line_len(buffer_row_end - 1),
177            ));
178            Some(buffer_start_ix..buffer_end_ix)
179        } else {
180            None
181        }
182    }
183}
184
185fn fuzzy_eq(left: &str, right: &str) -> bool {
186    const THRESHOLD: f64 = 0.8;
187
188    let min_levenshtein = left.len().abs_diff(right.len());
189    let min_normalized_levenshtein =
190        1. - (min_levenshtein as f64 / cmp::max(left.len(), right.len()) as f64);
191    if min_normalized_levenshtein < THRESHOLD {
192        return false;
193    }
194
195    strsim::normalized_levenshtein(left, right) >= THRESHOLD
196}
197
198#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
199enum SearchDirection {
200    Up,
201    Left,
202    Diagonal,
203}
204
205#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
206struct SearchState {
207    cost: u32,
208    direction: SearchDirection,
209}
210
211impl SearchState {
212    fn new(cost: u32, direction: SearchDirection) -> Self {
213        Self { cost, direction }
214    }
215}
216
217struct SearchMatrix {
218    cols: usize,
219    rows: usize,
220    data: Vec<SearchState>,
221}
222
223impl SearchMatrix {
224    fn new(cols: usize) -> Self {
225        SearchMatrix {
226            cols,
227            rows: 0,
228            data: Vec::new(),
229        }
230    }
231
232    fn resize_rows(&mut self, needed_rows: usize) {
233        debug_assert!(needed_rows > self.rows);
234        self.rows = needed_rows;
235        self.data.resize(
236            self.rows * self.cols,
237            SearchState::new(0, SearchDirection::Diagonal),
238        );
239    }
240
241    fn get(&self, row: usize, col: usize) -> SearchState {
242        debug_assert!(row < self.rows && col < self.cols);
243        self.data[row * self.cols + col]
244    }
245
246    fn set(&mut self, row: usize, col: usize, state: SearchState) {
247        debug_assert!(row < self.rows && col < self.cols);
248        self.data[row * self.cols + col] = state;
249    }
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255    use indoc::indoc;
256    use language::{BufferId, TextBuffer};
257    use rand::prelude::*;
258    use util::test::{generate_marked_text, marked_text_ranges};
259
260    #[test]
261    fn test_empty_query() {
262        let buffer = TextBuffer::new(
263            0,
264            BufferId::new(1).unwrap(),
265            "Hello world\nThis is a test\nFoo bar baz",
266        );
267        let snapshot = buffer.snapshot();
268
269        let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
270        assert_eq!(push(&mut finder, ""), None);
271        assert_eq!(finish(finder), None);
272    }
273
274    #[test]
275    fn test_streaming_exact_match() {
276        let buffer = TextBuffer::new(
277            0,
278            BufferId::new(1).unwrap(),
279            "Hello world\nThis is a test\nFoo bar baz",
280        );
281        let snapshot = buffer.snapshot();
282
283        let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
284
285        // Push partial query
286        assert_eq!(push(&mut finder, "This"), None);
287
288        // Complete the line
289        assert_eq!(
290            push(&mut finder, " is a test\n"),
291            Some("This is a test".to_string())
292        );
293
294        // Finish should return the same result
295        assert_eq!(finish(finder), Some("This is a test".to_string()));
296    }
297
298    #[test]
299    fn test_streaming_fuzzy_match() {
300        let buffer = TextBuffer::new(
301            0,
302            BufferId::new(1).unwrap(),
303            indoc! {"
304                function foo(a, b) {
305                    return a + b;
306                }
307
308                function bar(x, y) {
309                    return x * y;
310                }
311            "},
312        );
313        let snapshot = buffer.snapshot();
314
315        let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
316
317        // Push a fuzzy query that should match the first function
318        assert_eq!(
319            push(&mut finder, "function foo(a, c) {\n").as_deref(),
320            Some("function foo(a, b) {")
321        );
322        assert_eq!(
323            push(&mut finder, "    return a + c;\n}\n").as_deref(),
324            Some(concat!(
325                "function foo(a, b) {\n",
326                "    return a + b;\n",
327                "}"
328            ))
329        );
330    }
331
332    #[test]
333    fn test_incremental_improvement() {
334        let buffer = TextBuffer::new(
335            0,
336            BufferId::new(1).unwrap(),
337            "Line 1\nLine 2\nLine 3\nLine 4\nLine 5",
338        );
339        let snapshot = buffer.snapshot();
340
341        let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
342
343        // No match initially
344        assert_eq!(push(&mut finder, "Lin"), None);
345
346        // Get a match when we complete a line
347        assert_eq!(push(&mut finder, "e 3\n"), Some("Line 3".to_string()));
348
349        // The match might change if we add more specific content
350        assert_eq!(
351            push(&mut finder, "Line 4\n"),
352            Some("Line 3\nLine 4".to_string())
353        );
354        assert_eq!(finish(finder), Some("Line 3\nLine 4".to_string()));
355    }
356
357    #[test]
358    fn test_incomplete_lines_buffering() {
359        let buffer = TextBuffer::new(
360            0,
361            BufferId::new(1).unwrap(),
362            indoc! {"
363                The quick brown fox
364                jumps over the lazy dog
365                Pack my box with five dozen liquor jugs
366            "},
367        );
368        let snapshot = buffer.snapshot();
369
370        let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
371
372        // Push text in small chunks across line boundaries
373        assert_eq!(push(&mut finder, "jumps "), None); // No newline yet
374        assert_eq!(push(&mut finder, "over the"), None); // Still no newline
375        assert_eq!(push(&mut finder, " lazy"), None); // Still incomplete
376
377        // Complete the line
378        assert_eq!(
379            push(&mut finder, " dog\n"),
380            Some("jumps over the lazy dog".to_string())
381        );
382    }
383
384    #[test]
385    fn test_multiline_fuzzy_match() {
386        let buffer = TextBuffer::new(
387            0,
388            BufferId::new(1).unwrap(),
389            indoc! {r#"
390                impl Display for User {
391                    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
392                        write!(f, "User: {} ({})", self.name, self.email)
393                    }
394                }
395
396                impl Debug for User {
397                    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
398                        f.debug_struct("User")
399                            .field("name", &self.name)
400                            .field("email", &self.email)
401                            .finish()
402                    }
403                }
404            "#},
405        );
406        let snapshot = buffer.snapshot();
407
408        let mut finder = StreamingFuzzyMatcher::new(snapshot.clone());
409
410        assert_eq!(
411            push(&mut finder, "impl Debug for User {\n"),
412            Some("impl Debug for User {".to_string())
413        );
414        assert_eq!(
415            push(
416                &mut finder,
417                "    fn fmt(&self, f: &mut Formatter) -> Result {\n"
418            )
419            .as_deref(),
420            Some(concat!(
421                "impl Debug for User {\n",
422                "    fn fmt(&self, f: &mut Formatter) -> fmt::Result {"
423            ))
424        );
425        assert_eq!(
426            push(&mut finder, "        f.debug_struct(\"User\")\n").as_deref(),
427            Some(concat!(
428                "impl Debug for User {\n",
429                "    fn fmt(&self, f: &mut Formatter) -> fmt::Result {\n",
430                "        f.debug_struct(\"User\")"
431            ))
432        );
433        assert_eq!(
434            push(
435                &mut finder,
436                "            .field(\"name\", &self.username)\n"
437            )
438            .as_deref(),
439            Some(concat!(
440                "impl Debug for User {\n",
441                "    fn fmt(&self, f: &mut Formatter) -> fmt::Result {\n",
442                "        f.debug_struct(\"User\")\n",
443                "            .field(\"name\", &self.name)"
444            ))
445        );
446        assert_eq!(
447            finish(finder).as_deref(),
448            Some(concat!(
449                "impl Debug for User {\n",
450                "    fn fmt(&self, f: &mut Formatter) -> fmt::Result {\n",
451                "        f.debug_struct(\"User\")\n",
452                "            .field(\"name\", &self.name)"
453            ))
454        );
455    }
456
457    #[gpui::test(iterations = 100)]
458    fn test_resolve_location_single_line(mut rng: StdRng) {
459        assert_location_resolution(
460            concat!(
461                "    Lorem\n",
462                "«    ipsum»\n",
463                "    dolor sit amet\n",
464                "    consecteur",
465            ),
466            "ipsum",
467            &mut rng,
468        );
469    }
470
471    #[gpui::test(iterations = 100)]
472    fn test_resolve_location_multiline(mut rng: StdRng) {
473        assert_location_resolution(
474            concat!(
475                "    Lorem\n",
476                "«    ipsum\n",
477                "    dolor sit amet»\n",
478                "    consecteur",
479            ),
480            "ipsum\ndolor sit amet",
481            &mut rng,
482        );
483    }
484
485    #[gpui::test(iterations = 100)]
486    fn test_resolve_location_function_with_typo(mut rng: StdRng) {
487        assert_location_resolution(
488            indoc! {"
489                «fn foo1(a: usize) -> usize {
490                    40
491492
493                fn foo2(b: usize) -> usize {
494                    42
495                }
496            "},
497            "fn foo1(a: usize) -> u32 {\n40\n}",
498            &mut rng,
499        );
500    }
501
502    #[gpui::test(iterations = 100)]
503    fn test_resolve_location_class_methods(mut rng: StdRng) {
504        assert_location_resolution(
505            indoc! {"
506                class Something {
507                    one() { return 1; }
508                «    two() { return 2222; }
509                    three() { return 333; }
510                    four() { return 4444; }
511                    five() { return 5555; }
512                    six() { return 6666; }»
513                    seven() { return 7; }
514                    eight() { return 8; }
515                }
516            "},
517            indoc! {"
518                two() { return 2222; }
519                four() { return 4444; }
520                five() { return 5555; }
521                six() { return 6666; }
522            "},
523            &mut rng,
524        );
525    }
526
527    #[gpui::test(iterations = 100)]
528    fn test_resolve_location_imports_no_match(mut rng: StdRng) {
529        assert_location_resolution(
530            indoc! {"
531                use std::ops::Range;
532                use std::sync::Mutex;
533                use std::{
534                    collections::HashMap,
535                    env,
536                    ffi::{OsStr, OsString},
537                    fs,
538                    io::{BufRead, BufReader},
539                    mem,
540                    path::{Path, PathBuf},
541                    process::Command,
542                    sync::LazyLock,
543                    time::SystemTime,
544                };
545            "},
546            indoc! {"
547                use std::collections::{HashMap, HashSet};
548                use std::ffi::{OsStr, OsString};
549                use std::fmt::Write as _;
550                use std::fs;
551                use std::io::{BufReader, Read, Write};
552                use std::mem;
553                use std::path::{Path, PathBuf};
554                use std::process::Command;
555                use std::sync::Arc;
556            "},
557            &mut rng,
558        );
559    }
560
561    #[gpui::test(iterations = 100)]
562    fn test_resolve_location_nested_closure(mut rng: StdRng) {
563        assert_location_resolution(
564            indoc! {"
565                impl Foo {
566                    fn new() -> Self {
567                        Self {
568                            subscriptions: vec![
569                                cx.observe_window_activation(window, |editor, window, cx| {
570                                    let active = window.is_window_active();
571                                    editor.blink_manager.update(cx, |blink_manager, cx| {
572                                        if active {
573                                            blink_manager.enable(cx);
574                                        } else {
575                                            blink_manager.disable(cx);
576                                        }
577                                    });
578                                }),
579                            ];
580                        }
581                    }
582                }
583            "},
584            concat!(
585                "                    editor.blink_manager.update(cx, |blink_manager, cx| {\n",
586                "                        blink_manager.enable(cx);\n",
587                "                    });",
588            ),
589            &mut rng,
590        );
591    }
592
593    #[gpui::test(iterations = 100)]
594    fn test_resolve_location_tool_invocation(mut rng: StdRng) {
595        assert_location_resolution(
596            indoc! {r#"
597                let tool = cx
598                    .update(|cx| working_set.tool(&tool_name, cx))
599                    .map_err(|err| {
600                        anyhow!("Failed to look up tool '{}': {}", tool_name, err)
601                    })?;
602
603                let Some(tool) = tool else {
604                    return Err(anyhow!("Tool '{}' not found", tool_name));
605                };
606
607                let project = project.clone();
608                let action_log = action_log.clone();
609                let messages = messages.clone();
610                let tool_result = cx
611                    .update(|cx| tool.run(invocation.input, &messages, project, action_log, cx))
612                    .map_err(|err| anyhow!("Failed to start tool '{}': {}", tool_name, err))?;
613
614                tasks.push(tool_result.output);
615            "#},
616            concat!(
617                "let tool_result = cx\n",
618                "    .update(|cx| tool.run(invocation.input, &messages, project, action_log, cx))\n",
619                "    .output;",
620            ),
621            &mut rng,
622        );
623    }
624
625    #[track_caller]
626    fn assert_location_resolution(text_with_expected_range: &str, query: &str, rng: &mut StdRng) {
627        let (text, expected_ranges) = marked_text_ranges(text_with_expected_range, false);
628        let buffer = TextBuffer::new(0, BufferId::new(1).unwrap(), text.clone());
629        let snapshot = buffer.snapshot();
630
631        let mut matcher = StreamingFuzzyMatcher::new(snapshot.clone());
632
633        // Split query into random chunks
634        let chunks = to_random_chunks(rng, query);
635
636        // Push chunks incrementally
637        for chunk in &chunks {
638            matcher.push(chunk);
639        }
640
641        let result = matcher.finish();
642
643        // If no expected ranges, we expect no match
644        if expected_ranges.is_empty() {
645            assert_eq!(
646                result, None,
647                "Expected no match for query: {:?}, but found: {:?}",
648                query, result
649            );
650        } else {
651            let mut actual_ranges = Vec::new();
652            if let Some(range) = result {
653                actual_ranges.push(range);
654            }
655
656            let text_with_actual_range = generate_marked_text(&text, &actual_ranges, false);
657            pretty_assertions::assert_eq!(
658                text_with_actual_range,
659                text_with_expected_range,
660                "Query: {:?}, Chunks: {:?}",
661                query,
662                chunks
663            );
664        }
665    }
666
667    fn to_random_chunks(rng: &mut StdRng, input: &str) -> Vec<String> {
668        let chunk_count = rng.gen_range(1..=cmp::min(input.len(), 50));
669        let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count);
670        chunk_indices.sort();
671        chunk_indices.push(input.len());
672
673        let mut chunks = Vec::new();
674        let mut last_ix = 0;
675        for chunk_ix in chunk_indices {
676            chunks.push(input[last_ix..chunk_ix].to_string());
677            last_ix = chunk_ix;
678        }
679        chunks
680    }
681
682    fn push(finder: &mut StreamingFuzzyMatcher, chunk: &str) -> Option<String> {
683        finder
684            .push(chunk)
685            .map(|range| finder.snapshot.text_for_range(range).collect::<String>())
686    }
687
688    fn finish(mut finder: StreamingFuzzyMatcher) -> Option<String> {
689        let snapshot = finder.snapshot.clone();
690        finder
691            .finish()
692            .map(|range| snapshot.text_for_range(range).collect::<String>())
693    }
694}