ep: Add metrics for inserted/deleted tokens (#49330)

Oleksiy Syvokon created 1 month ago

Other changes:
- Changed tokenization to more code-aware tokenization from split-commit
- Fixed word-diff implementation which was inefficient and sometimes
incorrect

Release Notes:

- N/A

Change summary

crates/edit_prediction_cli/src/example.rs      |   4 
crates/edit_prediction_cli/src/metrics.rs      | 198 +++++++++++++++++++
crates/edit_prediction_cli/src/score.rs        |  85 ++++++++
crates/edit_prediction_cli/src/split_commit.rs |  66 +----
crates/edit_prediction_cli/src/word_diff.rs    | 207 ++++++-------------
5 files changed, 369 insertions(+), 191 deletions(-)

Detailed changes

crates/edit_prediction_cli/src/example.rs 🔗

@@ -172,6 +172,10 @@ pub struct ExampleScore {
     pub wrong_editable_region: Option<bool>,
     #[serde(default)]
     pub has_isolated_whitespace_changes: bool,
+    #[serde(default)]
+    pub inserted_tokens: usize,
+    #[serde(default)]
+    pub deleted_tokens: usize,
 }
 
 impl Example {

crates/edit_prediction_cli/src/metrics.rs 🔗

@@ -3,6 +3,7 @@ use collections::HashMap;
 use crate::{
     example::ActualCursor,
     reorder_patch::{Patch, PatchLine},
+    word_diff::{DiffOp, diff_tokens, tokenize},
 };
 
 pub type Counts = HashMap<String, usize>;
@@ -486,6 +487,91 @@ pub fn is_editable_region_correct(actual_patch: &str) -> bool {
     true
 }
 
+#[derive(Debug, Default, Clone)]
+pub struct TokenChangeCounts {
+    pub inserted_tokens: usize,
+    pub deleted_tokens: usize,
+}
+
+/// Counts the number of inserted and deleted tokens in a unified diff patch.
+///
+/// Tokens are words and whitespace sequences (as defined by `word_diff::tokenize`).
+/// Within each hunk, the old (`-`) and new (`+`) lines are compared at the token level
+/// using an LCS-based diff, so modified lines only count the actually changed tokens
+/// rather than the entire line.
+pub fn count_patch_token_changes(patch: &str) -> TokenChangeCounts {
+    let mut counts = TokenChangeCounts::default();
+    let mut old_lines: Vec<&str> = Vec::new();
+    let mut new_lines: Vec<&str> = Vec::new();
+
+    let flush =
+        |old_lines: &mut Vec<&str>, new_lines: &mut Vec<&str>, counts: &mut TokenChangeCounts| {
+            if old_lines.is_empty() && new_lines.is_empty() {
+                return;
+            }
+
+            let old_text: String = old_lines
+                .iter()
+                .map(|line| if line.len() > 1 { &line[1..] } else { "" })
+                .collect::<Vec<_>>()
+                .join("\n");
+
+            let new_text: String = new_lines
+                .iter()
+                .map(|line| if line.len() > 1 { &line[1..] } else { "" })
+                .collect::<Vec<_>>()
+                .join("\n");
+
+            let old_tokens = tokenize(&old_text);
+            let new_tokens = tokenize(&new_text);
+            let ops = diff_tokens(&old_tokens, &new_tokens);
+
+            for op in ops {
+                match op {
+                    DiffOp::Equal(..) => {}
+                    DiffOp::Delete(start, end) => {
+                        counts.deleted_tokens += end - start;
+                    }
+                    DiffOp::Insert(start, end) => {
+                        counts.inserted_tokens += end - start;
+                    }
+                    DiffOp::Replace {
+                        old_start,
+                        old_end,
+                        new_start,
+                        new_end,
+                    } => {
+                        counts.deleted_tokens += old_end - old_start;
+                        counts.inserted_tokens += new_end - new_start;
+                    }
+                }
+            }
+
+            old_lines.clear();
+            new_lines.clear();
+        };
+
+    for line in patch.lines() {
+        if line.starts_with("---")
+            || line.starts_with("+++")
+            || line.starts_with("@@")
+            || line.starts_with("diff ")
+            || line.starts_with("index ")
+        {
+            flush(&mut old_lines, &mut new_lines, &mut counts);
+        } else if line.starts_with('-') {
+            old_lines.push(line);
+        } else if line.starts_with('+') {
+            new_lines.push(line);
+        } else {
+            flush(&mut old_lines, &mut new_lines, &mut counts);
+        }
+    }
+
+    flush(&mut old_lines, &mut new_lines, &mut counts);
+    counts
+}
+
 #[cfg(test)]
 mod test_optimization {
     use super::*;
@@ -977,4 +1063,116 @@ index abc123..def456 100644
         let cursor = cursor_on_line(2);
         assert!(has_isolated_whitespace_changes(patch, Some(&cursor)));
     }
+
+    #[test]
+    fn test_count_patch_token_changes_real_world_rename() {
+        // Real-world patch that was reported as returning 0 tokens
+        let patch = "--- a/sip_call\\README.md\n+++ b/sip_call\\README.md\n@@ -1,1 +1,1 @@\n-# \n+# SIP Call\n";
+        let counts = count_patch_token_changes(patch);
+        // "# " vs "# SIP Call" — the "SIP" and "Call" tokens (and a whitespace token) are inserted
+        assert!(
+            counts.inserted_tokens > 0,
+            "expected inserted tokens > 0, got {}",
+            counts.inserted_tokens
+        );
+        assert_eq!(counts.deleted_tokens, 0);
+    }
+
+    #[test]
+    fn test_count_patch_token_changes_real_world_expansion() {
+        // Real-world patch: single token expanded to multiple lines
+        let patch = "--- a/task1/src/app/app.html\n+++ b/task1/src/app/app.html\n@@ -1,7 +1,9 @@\n <style>\n-  m\n+  main {\n+    \n+  }\n </style>\n \n <main>\n   \n </main>\n";
+        let counts = count_patch_token_changes(patch);
+        assert!(
+            counts.inserted_tokens > 0,
+            "expected inserted tokens > 0, got {}",
+            counts.inserted_tokens
+        );
+        assert!(
+            counts.deleted_tokens > 0,
+            "expected deleted tokens > 0, got {}",
+            counts.deleted_tokens
+        );
+    }
+
+    #[test]
+    fn test_count_patch_token_changes_simple_replacement() {
+        let patch = indoc! {"
+            @@ -1,3 +1,3 @@
+             fn main() {
+            -    println!(\"hello\");
+            +    println!(\"world\");
+             }
+        "};
+        let counts = count_patch_token_changes(patch);
+        assert_eq!(counts.deleted_tokens, 1, "deleted: \"hello\"");
+        assert_eq!(counts.inserted_tokens, 1, "inserted: \"world\"");
+    }
+
+    #[test]
+    fn test_count_patch_token_changes_insertion_only() {
+        let patch = indoc! {"
+            @@ -1,2 +1,3 @@
+             fn main() {
+            +    println!(\"hello\");
+             }
+        "};
+        let counts = count_patch_token_changes(patch);
+        assert_eq!(counts.deleted_tokens, 0);
+        assert!(counts.inserted_tokens > 0);
+    }
+
+    #[test]
+    fn test_count_patch_token_changes_deletion_only() {
+        let patch = indoc! {"
+            @@ -1,3 +1,2 @@
+             fn main() {
+            -    println!(\"hello\");
+             }
+        "};
+        let counts = count_patch_token_changes(patch);
+        assert!(counts.deleted_tokens > 0);
+        assert_eq!(counts.inserted_tokens, 0);
+    }
+
+    #[test]
+    fn test_count_patch_token_changes_empty_patch() {
+        let patch = "";
+        let counts = count_patch_token_changes(patch);
+        assert_eq!(counts.deleted_tokens, 0);
+        assert_eq!(counts.inserted_tokens, 0);
+    }
+
+    #[test]
+    fn test_count_patch_token_changes_multiple_hunks() {
+        let patch = indoc! {"
+            @@ -1,3 +1,3 @@
+             fn main() {
+            -    let x = 1;
+            +    let x = 2;
+             }
+            @@ -10,3 +10,3 @@
+             fn other() {
+            -    let y = 3;
+            +    let y = 4;
+             }
+        "};
+        let counts = count_patch_token_changes(patch);
+        assert_eq!(counts.deleted_tokens, 2, "deleted: \"1\" and \"3\"");
+        assert_eq!(counts.inserted_tokens, 2, "inserted: \"2\" and \"4\"");
+    }
+
+    #[test]
+    fn test_count_patch_token_changes_multiword_change() {
+        let patch = indoc! {"
+            @@ -1,1 +1,1 @@
+            -hello world foo
+            +hello bar baz
+        "};
+        let counts = count_patch_token_changes(patch);
+        // "world" and "foo" deleted, "bar" and "baz" inserted
+        // (whitespace tokens between them may also count)
+        assert!(counts.deleted_tokens >= 2);
+        assert!(counts.inserted_tokens >= 2);
+    }
 }

crates/edit_prediction_cli/src/score.rs 🔗

@@ -76,6 +76,8 @@ pub async fn run_scoring(
         cursor_exact_match: None,
         wrong_editable_region: None,
         has_isolated_whitespace_changes: false,
+        inserted_tokens: 0,
+        deleted_tokens: 0,
     };
 
     let prompt_inputs = example.prompt_inputs.as_ref().unwrap();
@@ -95,10 +97,15 @@ pub async fn run_scoring(
             continue;
         };
 
+        let token_changes = metrics::count_patch_token_changes(&actual_patch);
+
         let actual_text = match apply_diff_to_string(&actual_patch, original_text) {
             Ok(text) => text,
             Err(_) => {
-                scores.push(zero_scores.clone());
+                let mut s = zero_scores.clone();
+                s.inserted_tokens = token_changes.inserted_tokens;
+                s.deleted_tokens = token_changes.deleted_tokens;
+                scores.push(s);
                 continue;
             }
         };
@@ -181,6 +188,8 @@ pub async fn run_scoring(
             cursor_exact_match,
             wrong_editable_region,
             has_isolated_whitespace_changes,
+            inserted_tokens: token_changes.inserted_tokens,
+            deleted_tokens: token_changes.deleted_tokens,
         });
     }
 
@@ -238,6 +247,9 @@ pub fn print_report(examples: &[Example]) {
     let mut wrong_editable_region_count: usize = 0;
     let mut wrong_editable_region_total: usize = 0;
     let mut isolated_whitespace_count: usize = 0;
+    let mut patch_inserted_tokens: Vec<usize> = Vec::new();
+    let mut patch_deleted_tokens: Vec<usize> = Vec::new();
+    let mut predictions_with_patch: usize = 0;
 
     for example in examples {
         for (score_idx, score) in example.score.iter().enumerate() {
@@ -321,6 +333,18 @@ pub fn print_report(examples: &[Example]) {
                 isolated_whitespace_count += 1;
             }
 
+            // Accumulate token change metrics (only for predictions that produced a patch)
+            let has_patch = example
+                .predictions
+                .get(score_idx)
+                .and_then(|p| p.actual_patch.as_ref())
+                .is_some_and(|p| !p.is_empty());
+            if has_patch {
+                predictions_with_patch += 1;
+                patch_inserted_tokens.push(score.inserted_tokens);
+                patch_deleted_tokens.push(score.deleted_tokens);
+            }
+
             // Accumulate cursor metrics
             if let Some(exact_match) = score.cursor_exact_match {
                 cursor_total += 1;
@@ -421,11 +445,70 @@ pub fn print_report(examples: &[Example]) {
         if total_scores > 0 {
             println!("Isolated whitespace changes: {}", isolated_ws_str);
         }
+
+        // Print token change percentile summary (only for predictions with a patch)
+        if !patch_inserted_tokens.is_empty() {
+            patch_inserted_tokens.sort_unstable();
+            patch_deleted_tokens.sort_unstable();
+            let mut patch_total_tokens: Vec<usize> = patch_inserted_tokens
+                .iter()
+                .zip(patch_deleted_tokens.iter())
+                .map(|(i, d)| i + d)
+                .collect();
+            patch_total_tokens.sort_unstable();
+
+            let patch_rate = predictions_with_patch as f32 / total_scores as f32 * 100.0;
+            println!();
+            println!(
+                "Token changes ({}/{} predictions produced a patch, {:.1}% — table includes only those)",
+                predictions_with_patch, total_scores, patch_rate
+            );
+            println!(
+                "{:<20} {:>8} {:>8} {:>8} {:>8} {:>8}",
+                "", "p25", "p50", "p75", "p90", "p99"
+            );
+            println!("{}", "─".repeat(LINE_WIDTH));
+            println!(
+                "{:<20} {:>8} {:>8} {:>8} {:>8} {:>8}",
+                "Inserted tokens",
+                percentile(&patch_inserted_tokens, 25),
+                percentile(&patch_inserted_tokens, 50),
+                percentile(&patch_inserted_tokens, 75),
+                percentile(&patch_inserted_tokens, 90),
+                percentile(&patch_inserted_tokens, 99),
+            );
+            println!(
+                "{:<20} {:>8} {:>8} {:>8} {:>8} {:>8}",
+                "Deleted tokens",
+                percentile(&patch_deleted_tokens, 25),
+                percentile(&patch_deleted_tokens, 50),
+                percentile(&patch_deleted_tokens, 75),
+                percentile(&patch_deleted_tokens, 90),
+                percentile(&patch_deleted_tokens, 99),
+            );
+            println!(
+                "{:<20} {:>8} {:>8} {:>8} {:>8} {:>8}",
+                "Total tokens",
+                percentile(&patch_total_tokens, 25),
+                percentile(&patch_total_tokens, 50),
+                percentile(&patch_total_tokens, 75),
+                percentile(&patch_total_tokens, 90),
+                percentile(&patch_total_tokens, 99),
+            );
+        }
     }
 
     println!("\n");
 }
 
+fn percentile(sorted_values: &[usize], p: usize) -> usize {
+    if sorted_values.is_empty() {
+        return 0;
+    }
+    let idx = (p as f64 / 100.0 * (sorted_values.len() as f64 - 1.0)).round() as usize;
+    sorted_values[idx.min(sorted_values.len() - 1)]
+}
+
 fn truncate_name(name: &str, max_len: usize) -> String {
     if name.len() <= max_len {
         name.to_string()

crates/edit_prediction_cli/src/split_commit.rs 🔗

@@ -6,6 +6,7 @@
 //! TODO: Port Python code to generate chronologically-ordered commits
 use crate::FailedHandling;
 use crate::reorder_patch::{Patch, PatchLine, extract_edits, locate_edited_line};
+use crate::word_diff::tokenize;
 
 /// Find the largest valid UTF-8 char boundary at or before `index` in `s`.
 fn floor_char_boundary(s: &str, index: usize) -> usize {
@@ -413,37 +414,6 @@ pub fn split_ordered_commit(commit: &str, split_pos: usize) -> (String, String)
     (source_str, target_str)
 }
 
-/// Tokenize text into words and non-word characters.
-fn tokenize(text: &str) -> Vec<String> {
-    let mut tokens = Vec::new();
-    let mut current = String::new();
-
-    for ch in text.chars() {
-        if ch.is_alphanumeric() {
-            current.push(ch);
-        } else if ch == '_' {
-            // Include underscore with the current word, then flush
-            current.push(ch);
-            if !current.is_empty() {
-                tokens.push(std::mem::take(&mut current));
-            }
-        } else {
-            // Punctuation or whitespace - flush current word first
-            if !current.is_empty() {
-                tokens.push(std::mem::take(&mut current));
-            }
-            // Each punctuation/whitespace is its own token
-            tokens.push(ch.to_string());
-        }
-    }
-
-    if !current.is_empty() {
-        tokens.push(current);
-    }
-
-    tokens
-}
-
 /// Calculate the weight for a split position based on the character at that position.
 ///
 /// Higher weights indicate more natural pause points (e.g., after punctuation,
@@ -647,12 +617,8 @@ pub fn imitate_human_edits(
     let src_tokens = tokenize(&src_line);
     let tgt_tokens = tokenize(&tgt_line);
 
-    // Convert to slices for similar
-    let src_refs: Vec<&str> = src_tokens.iter().map(|s| s.as_str()).collect();
-    let tgt_refs: Vec<&str> = tgt_tokens.iter().map(|s| s.as_str()).collect();
-
     // Use similar to get diff operations
-    let diff = TextDiff::from_slices(&src_refs, &tgt_refs);
+    let diff = TextDiff::from_slices(&src_tokens, &tgt_tokens);
 
     // Build weights for each possible split position
     let mut position_weights: Vec<u32> = Vec::new();
@@ -665,12 +631,12 @@ pub fn imitate_human_edits(
             match op.tag() {
                 DiffTag::Equal => {
                     for i in op.old_range() {
-                        current_text.push_str(&src_tokens[i]);
+                        current_text.push_str(src_tokens[i]);
                     }
                 }
                 DiffTag::Replace => {
-                    let ins: String = op.new_range().map(|i| tgt_tokens[i].as_str()).collect();
-                    let del: String = op.old_range().map(|i| src_tokens[i].as_str()).collect();
+                    let ins: String = op.new_range().map(|i| tgt_tokens[i]).collect();
+                    let del: String = op.old_range().map(|i| src_tokens[i]).collect();
 
                     // For insertion part
                     for ch in ins.chars() {
@@ -686,7 +652,7 @@ pub fn imitate_human_edits(
                     }
                 }
                 DiffTag::Insert => {
-                    let ins: String = op.new_range().map(|i| tgt_tokens[i].as_str()).collect();
+                    let ins: String = op.new_range().map(|i| tgt_tokens[i]).collect();
                     for ch in ins.chars() {
                         current_text.push(ch);
                         let weight = position_weight(&current_text, current_text.len());
@@ -694,7 +660,7 @@ pub fn imitate_human_edits(
                     }
                 }
                 DiffTag::Delete => {
-                    let del: String = op.old_range().map(|i| src_tokens[i].as_str()).collect();
+                    let del: String = op.old_range().map(|i| src_tokens[i]).collect();
                     for _ in del.chars() {
                         // Weight deletions lower
                         position_weights.push(2);
@@ -719,14 +685,14 @@ pub fn imitate_human_edits(
         match op.tag() {
             DiffTag::Equal => {
                 for i in op.old_range() {
-                    new_src.push_str(&src_tokens[i]);
+                    new_src.push_str(src_tokens[i]);
                 }
                 last_old_end = op.old_range().end;
             }
             DiffTag::Replace => {
                 // Handle replace as delete + insert
-                let del: String = op.old_range().map(|i| src_tokens[i].as_str()).collect();
-                let ins: String = op.new_range().map(|i| tgt_tokens[i].as_str()).collect();
+                let del: String = op.old_range().map(|i| src_tokens[i]).collect();
+                let ins: String = op.new_range().map(|i| tgt_tokens[i]).collect();
                 let repl_len = del.len() + ins.len();
                 if edit_index + repl_len >= split_index {
                     // Split within this replace operation
@@ -750,7 +716,7 @@ pub fn imitate_human_edits(
                 }
             }
             DiffTag::Insert => {
-                let repl: String = op.new_range().map(|i| tgt_tokens[i].as_str()).collect();
+                let repl: String = op.new_range().map(|i| tgt_tokens[i]).collect();
                 if edit_index + repl.len() >= split_index {
                     let offset = split_index - edit_index;
                     let safe_offset = floor_char_boundary(&repl, offset);
@@ -763,7 +729,7 @@ pub fn imitate_human_edits(
                 }
             }
             DiffTag::Delete => {
-                let repl: String = op.old_range().map(|i| src_tokens[i].as_str()).collect();
+                let repl: String = op.old_range().map(|i| src_tokens[i]).collect();
                 if edit_index + repl.len() >= split_index {
                     let offset = split_index - edit_index;
                     let safe_offset = floor_char_boundary(&repl, offset);
@@ -797,10 +763,10 @@ pub fn imitate_human_edits(
 
     // Add remainder of source if similar enough to target remainder
     let remainder_src: String = (last_old_end..src_tokens.len())
-        .map(|i| src_tokens[i].as_str())
+        .map(|i| src_tokens[i])
         .collect();
     let remainder_tgt: String = (last_old_end..tgt_tokens.len())
-        .filter_map(|i| tgt_tokens.get(i).map(|s| s.as_str()))
+        .filter_map(|i| tgt_tokens.get(i).copied())
         .collect();
 
     let ratio = fuzzy_ratio(&remainder_src, &remainder_tgt);
@@ -1104,13 +1070,13 @@ mod tests {
         assert_eq!(tokens, vec!["hello", " ", "world"]);
 
         let tokens = tokenize("foo_bar123 + baz");
-        assert_eq!(tokens, vec!["foo_", "bar123", " ", "+", " ", "baz"]);
+        assert_eq!(tokens, vec!["foo_bar123", " ", "+", " ", "baz"]);
 
         let tokens = tokenize("print(\"hello\")");
         assert_eq!(tokens, vec!["print", "(", "\"", "hello", "\"", ")"]);
 
         let tokens = tokenize("hello_world");
-        assert_eq!(tokens, vec!["hello_", "world"]);
+        assert_eq!(tokens, vec!["hello_world"]);
 
         let tokens = tokenize("fn();");
         assert_eq!(tokens, vec!["fn", "(", ")", ";"]);

crates/edit_prediction_cli/src/word_diff.rs 🔗

@@ -1,5 +1,7 @@
 //! Word-diff utilities for converting unified diffs to word-diff format.
 
+use similar::{DiffTag, TextDiff};
+
 /// Convert unified diff to word-diff format.
 ///
 /// This transforms line-based diffs into word-level diffs where:
@@ -129,29 +131,38 @@ fn compute_word_diff(old_text: &str, new_text: &str) -> String {
     result
 }
 
-/// Tokenize text into words and whitespace sequences.
-fn tokenize(text: &str) -> Vec<&str> {
+/// Classify a character into one of three token classes:
+/// - 0: identifier (alphanumeric or `_`)
+/// - 1: whitespace
+/// - 2: punctuation (everything else, each character becomes its own token)
+fn char_class(ch: char) -> u8 {
+    if ch.is_alphanumeric() || ch == '_' {
+        0
+    } else if ch.is_whitespace() {
+        1
+    } else {
+        2
+    }
+}
+
+/// Tokenize text into identifier words, whitespace runs, and individual punctuation characters.
+///
+/// This splitting aligns with the syntactic atoms of source code so that the
+/// LCS-based diff can produce fine-grained, meaningful change regions.
+pub(crate) fn tokenize(text: &str) -> Vec<&str> {
     let mut tokens = Vec::new();
     let mut chars = text.char_indices().peekable();
 
     while let Some((start, ch)) = chars.next() {
-        if ch.is_whitespace() {
-            // Collect contiguous whitespace
-            let mut end = start + ch.len_utf8();
-            while let Some(&(_, next_ch)) = chars.peek() {
-                if next_ch.is_whitespace() {
-                    end += next_ch.len_utf8();
-                    chars.next();
-                } else {
-                    break;
-                }
-            }
-            tokens.push(&text[start..end]);
+        let class = char_class(ch);
+        if class == 2 {
+            // Punctuation: each character is a separate token
+            tokens.push(&text[start..start + ch.len_utf8()]);
         } else {
-            // Collect contiguous non-whitespace
+            // Identifier or whitespace: collect contiguous run of same class
             let mut end = start + ch.len_utf8();
             while let Some(&(_, next_ch)) = chars.peek() {
-                if !next_ch.is_whitespace() {
+                if char_class(next_ch) == class {
                     end += next_ch.len_utf8();
                     chars.next();
                 } else {
@@ -166,7 +177,7 @@ fn tokenize(text: &str) -> Vec<&str> {
 }
 
 #[derive(Debug)]
-enum DiffOp {
+pub(crate) enum DiffOp {
     Equal(usize, usize),
     Delete(usize, usize),
     Insert(usize, usize),
@@ -178,130 +189,28 @@ enum DiffOp {
     },
 }
 
-/// Compute diff operations between two token sequences using a simple LCS-based algorithm.
-fn diff_tokens<'a>(old: &[&'a str], new: &[&'a str]) -> Vec<DiffOp> {
-    // Build LCS table
-    let m = old.len();
-    let n = new.len();
-
-    if m == 0 && n == 0 {
-        return vec![];
-    }
-    if m == 0 {
-        return vec![DiffOp::Insert(0, n)];
-    }
-    if n == 0 {
-        return vec![DiffOp::Delete(0, m)];
-    }
-
-    // LCS dynamic programming
-    let mut dp = vec![vec![0usize; n + 1]; m + 1];
-    for i in 1..=m {
-        for j in 1..=n {
-            if old[i - 1] == new[j - 1] {
-                dp[i][j] = dp[i - 1][j - 1] + 1;
-            } else {
-                dp[i][j] = dp[i - 1][j].max(dp[i][j - 1]);
-            }
-        }
-    }
-
-    // Backtrack to find operations
-    let mut ops = Vec::new();
-    let mut i = m;
-    let mut j = n;
-
-    // We'll collect in reverse order, then reverse at the end
-    let mut stack: Vec<(usize, usize, bool)> = Vec::new(); // (index, end, is_old)
-
-    while i > 0 || j > 0 {
-        if i > 0 && j > 0 && old[i - 1] == new[j - 1] {
-            stack.push((i - 1, i, true)); // Equal marker (using old index)
-            stack.push((j - 1, j, false)); // Paired with new index
-            i -= 1;
-            j -= 1;
-        } else if j > 0 && (i == 0 || dp[i][j - 1] >= dp[i - 1][j]) {
-            // Insert from new
-            stack.push((j - 1, j, false));
-            j -= 1;
-        } else {
-            // Delete from old
-            stack.push((i - 1, i, true));
-            i -= 1;
-        }
-    }
-
-    // Process the stack to build proper DiffOps
-    // This is a simplified approach - just iterate through and build ops
-    let mut old_idx = 0;
-    let mut new_idx = 0;
-
-    while old_idx < m || new_idx < n {
-        // Find next matching pair
-        let mut old_match = None;
-        let mut new_match = None;
-
-        for oi in old_idx..m {
-            for ni in new_idx..n {
-                if old[oi] == new[ni] {
-                    old_match = Some(oi);
-                    new_match = Some(ni);
-                    break;
-                }
-            }
-            if old_match.is_some() {
-                break;
-            }
-        }
-
-        match (old_match, new_match) {
-            (Some(om), Some(nm)) => {
-                // Handle any deletions/insertions before the match
-                if old_idx < om && new_idx < nm {
-                    ops.push(DiffOp::Replace {
-                        old_start: old_idx,
-                        old_end: om,
-                        new_start: new_idx,
-                        new_end: nm,
-                    });
-                } else if old_idx < om {
-                    ops.push(DiffOp::Delete(old_idx, om));
-                } else if new_idx < nm {
-                    ops.push(DiffOp::Insert(new_idx, nm));
-                }
-
-                // Find the extent of the equal sequence
-                let mut eq_end_old = om;
-                let mut eq_end_new = nm;
-                while eq_end_old < m && eq_end_new < n && old[eq_end_old] == new[eq_end_new] {
-                    eq_end_old += 1;
-                    eq_end_new += 1;
-                }
-
-                ops.push(DiffOp::Equal(om, eq_end_old));
-                old_idx = eq_end_old;
-                new_idx = eq_end_new;
-            }
-            _ => {
-                // No more matches, handle remaining
-                if old_idx < m && new_idx < n {
-                    ops.push(DiffOp::Replace {
-                        old_start: old_idx,
-                        old_end: m,
-                        new_start: new_idx,
-                        new_end: n,
-                    });
-                } else if old_idx < m {
-                    ops.push(DiffOp::Delete(old_idx, m));
-                } else if new_idx < n {
-                    ops.push(DiffOp::Insert(new_idx, n));
-                }
-                break;
+/// Compute diff operations between two token sequences using `similar`'s Myers diff.
+pub(crate) fn diff_tokens<'a>(old: &[&'a str], new: &[&'a str]) -> Vec<DiffOp> {
+    let diff = TextDiff::from_slices(old, new);
+    diff.ops()
+        .iter()
+        .map(|op| {
+            let tag = op.tag();
+            let old_range = op.old_range();
+            let new_range = op.new_range();
+            match tag {
+                DiffTag::Equal => DiffOp::Equal(old_range.start, old_range.end),
+                DiffTag::Delete => DiffOp::Delete(old_range.start, old_range.end),
+                DiffTag::Insert => DiffOp::Insert(new_range.start, new_range.end),
+                DiffTag::Replace => DiffOp::Replace {
+                    old_start: old_range.start,
+                    old_end: old_range.end,
+                    new_start: new_range.start,
+                    new_end: new_range.end,
+                },
             }
-        }
-    }
-
-    ops
+        })
+        .collect()
 }
 
 #[cfg(test)]
@@ -315,6 +224,24 @@ mod tests {
 
         let tokens = tokenize("  multiple   spaces  ");
         assert_eq!(tokens, vec!["  ", "multiple", "   ", "spaces", "  "]);
+
+        let tokens = tokenize("self.name");
+        assert_eq!(tokens, vec!["self", ".", "name"]);
+
+        let tokens = tokenize("foo(bar, baz)");
+        assert_eq!(tokens, vec!["foo", "(", "bar", ",", " ", "baz", ")"]);
+
+        let tokens = tokenize("hello_world");
+        assert_eq!(tokens, vec!["hello_world"]);
+
+        let tokens = tokenize("fn();");
+        assert_eq!(tokens, vec!["fn", "(", ")", ";"]);
+
+        let tokens = tokenize("foo_bar123 + baz");
+        assert_eq!(tokens, vec!["foo_bar123", " ", "+", " ", "baz"]);
+
+        let tokens = tokenize("print(\"hello\")");
+        assert_eq!(tokens, vec!["print", "(", "\"", "hello", "\"", ")"]);
     }
 
     #[test]