terminal: Fix performance issues with hyperlink regex matching (#44407)

Piotr Osiewicz and Dave Waggoner created

Problem statement: When given a line that contained a lot of matches of
your hyperlink regex of choice (thanks to #40305), we would look for
matches
that intersected with currently hovered point. This is *hella*
expensive, because we would re-walk the whole alacritty grid for each
match. With the repro that Joseph shared, we had to go through 4000 such
matches on each frame render.

Problem solution: We now convert the hovered point into a range within
the line (byte-wise) in order to throw away matches that do not
intersect the
hovered range. This lets us avoid performing the unnecessary conversion
when we know it's never going to yield a match range that intersects the
hovered point.

Release Notes:

- terminal: Fixed performance regression when handling long lines.

---------

Co-authored-by: Dave Waggoner <waggoner.dave@gmail.com>

Change summary

crates/terminal/src/terminal_hyperlinks.rs | 51 +++++++++++++++++------
1 file changed, 38 insertions(+), 13 deletions(-)

Detailed changes

crates/terminal/src/terminal_hyperlinks.rs 🔗

@@ -208,7 +208,8 @@ fn path_match<T>(
     if path_hyperlink_regexes.is_empty() || path_hyperlink_timeout.as_millis() == 0 {
         return None;
     }
-
+    debug_assert!(line_start <= hovered);
+    debug_assert!(line_end >= hovered);
     let search_start_time = Instant::now();
 
     let timed_out = || {
@@ -224,13 +225,35 @@ fn path_match<T>(
     let mut line = String::with_capacity(
         (line_end.line.0 - line_start.line.0 + 1) as usize * term.grid().columns(),
     );
-    line.push(term.grid()[line_start].c);
+    let first_cell = &term.grid()[line_start];
+    line.push(first_cell.c);
+    let mut start_offset = 0;
+    let mut hovered_point_byte_offset = None;
+
+    if !first_cell.flags.intersects(WIDE_CHAR_SPACERS) {
+        start_offset += first_cell.c.len_utf8();
+        if line_start == hovered {
+            hovered_point_byte_offset = Some(0);
+        }
+    }
+
     for cell in term.grid().iter_from(line_start) {
         if cell.point > line_end {
             break;
         }
+        let is_spacer = cell.flags.intersects(WIDE_CHAR_SPACERS);
+        if cell.point == hovered {
+            debug_assert!(hovered_point_byte_offset.is_none());
+            if start_offset > 0 && cell.flags.contains(Flags::WIDE_CHAR_SPACER) {
+                // If we hovered on a trailing spacer, back up to the end of the previous char's bytes.
+                start_offset -= 1;
+            }
+            hovered_point_byte_offset = Some(start_offset);
+        } else if cell.point < hovered && !is_spacer {
+            start_offset += cell.c.len_utf8();
+        }
 
-        if !cell.flags.intersects(WIDE_CHAR_SPACERS) {
+        if !is_spacer {
             line.push(match cell.c {
                 '\t' => ' ',
                 c @ _ => c,
@@ -238,7 +261,7 @@ fn path_match<T>(
         }
     }
     let line = line.trim_ascii_end();
-
+    let hovered_point_byte_offset = hovered_point_byte_offset?;
     let found_from_range = |path_range: Range<usize>,
                             link_range: Range<usize>,
                             position: Option<(u32, Option<u32>)>| {
@@ -268,7 +291,7 @@ fn path_match<T>(
                 .expand_wide(link_end, AlacDirection::Left)
                 .sub(term, Boundary::Grid, 1);
 
-        Some((
+        (
             {
                 let mut path = line[path_range].to_string();
                 position.inspect(|(line, column)| {
@@ -278,7 +301,7 @@ fn path_match<T>(
                 path
             },
             link_match,
-        ))
+        )
     };
 
     for regex in path_hyperlink_regexes {
@@ -296,7 +319,7 @@ fn path_match<T>(
                     continue;
                 }
             };
-
+            path_found = true;
             let match_range = captures.get(0).unwrap().range();
             let (path_range, line_column) = if let Some(path) = captures.name("path") {
                 let parse = |name: &str| {
@@ -314,14 +337,16 @@ fn path_match<T>(
             };
             let link_range = captures
                 .name("link")
-                .map_or(match_range, |link| link.range());
+                .map_or_else(|| match_range.clone(), |link| link.range());
+
+            if !link_range.contains(&hovered_point_byte_offset) {
+                // No match, just skip.
+                continue;
+            }
             let found = found_from_range(path_range, link_range, line_column);
 
-            if let Some(found) = found {
-                path_found = true;
-                if found.1.contains(&hovered) {
-                    return Some(found);
-                }
+            if found.1.contains(&hovered) {
+                return Some(found);
             }
         }