Apply a score boost when consecutive triplets of characters match

Antonio Scandurra created

Change summary

crates/ai/src/diff.rs | 35 ++++++++++++++++++++---------------
1 file changed, 20 insertions(+), 15 deletions(-)

Detailed changes

crates/ai/src/diff.rs 🔗

@@ -1,3 +1,4 @@
+use collections::HashMap;
 use std::{
     fmt::{self, Debug},
     ops::Range,
@@ -74,12 +75,13 @@ pub struct Diff {
     scores: Matrix,
     old_text_ix: usize,
     new_text_ix: usize,
+    equal_runs: HashMap<(usize, usize), u32>,
 }
 
 impl Diff {
     const INSERTION_SCORE: isize = -1;
-    const DELETION_SCORE: isize = -4;
-    const EQUALITY_SCORE: isize = 15;
+    const DELETION_SCORE: isize = -5;
+    const EQUALITY_BASE: isize = 2;
 
     pub fn new(old: String) -> Self {
         let old = old.chars().collect::<Vec<_>>();
@@ -94,6 +96,7 @@ impl Diff {
             scores,
             old_text_ix: 0,
             new_text_ix: 0,
+            equal_runs: Default::default(),
         }
     }
 
@@ -107,36 +110,38 @@ impl Diff {
                 let insertion_score = self.scores.get(i, j - 1) + Self::INSERTION_SCORE;
                 let deletion_score = self.scores.get(i - 1, j) + Self::DELETION_SCORE;
                 let equality_score = if self.old[i - 1] == self.new[j - 1] {
+                    let mut equal_run = self.equal_runs.get(&(i - 1, j - 1)).copied().unwrap_or(0);
+                    equal_run += 1;
+                    self.equal_runs.insert((i, j), equal_run);
+
                     if self.old[i - 1] == ' ' {
                         self.scores.get(i - 1, j - 1)
                     } else {
-                        self.scores.get(i - 1, j - 1) + Self::EQUALITY_SCORE
+                        self.scores.get(i - 1, j - 1) + Self::EQUALITY_BASE.pow(equal_run / 3)
                     }
                 } else {
                     isize::MIN
                 };
+
                 let score = insertion_score.max(deletion_score).max(equality_score);
                 self.scores.set(i, j, score);
             }
         }
 
         let mut max_score = isize::MIN;
-        let mut best_row = self.old_text_ix;
-        let mut best_col = self.new_text_ix;
+        let mut next_old_text_ix = self.old_text_ix;
+        let next_new_text_ix = self.new.len();
         for i in self.old_text_ix..=self.old.len() {
-            for j in self.new_text_ix..=self.new.len() {
-                let score = self.scores.get(i, j);
-                if score > max_score {
-                    max_score = score;
-                    best_row = i;
-                    best_col = j;
-                }
+            let score = self.scores.get(i, next_new_text_ix);
+            if score > max_score {
+                max_score = score;
+                next_old_text_ix = i;
             }
         }
 
-        let hunks = self.backtrack(best_row, best_col);
-        self.old_text_ix = best_row;
-        self.new_text_ix = best_col;
+        let hunks = self.backtrack(next_old_text_ix, next_new_text_ix);
+        self.old_text_ix = next_old_text_ix;
+        self.new_text_ix = next_new_text_ix;
         hunks
     }