Terminal regex perf improvements (#44679)

Conrad Irwin created

Closes #44510

Release Notes:

- Improve performance of terminal link matching even more

Change summary

Cargo.lock                                 |  2 
assets/settings/default.json               | 34 ++++++++++-------------
crates/terminal/Cargo.toml                 |  2 
crates/terminal/src/terminal_hyperlinks.rs | 33 ++++-------------------
4 files changed, 23 insertions(+), 48 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -16365,13 +16365,13 @@ dependencies = [
  "alacritty_terminal",
  "anyhow",
  "collections",
- "fancy-regex",
  "futures 0.3.31",
  "gpui",
  "itertools 0.14.0",
  "libc",
  "log",
  "rand 0.9.2",
+ "regex",
  "release_channel",
  "schemars",
  "serde",

assets/settings/default.json 🔗

@@ -1646,25 +1646,21 @@
       // surrounding symbols or quotes
       [
         "(?x)",
-        "# optionally starts with 0-2 opening prefix symbols",
-        "[({\\[<]{0,2}",
-        "# which may be followed by an opening quote",
-        "(?<quote>[\"'`])?",
-        "# `path` is the shortest sequence of any non-space character",
-        "(?<link>(?<path>[^ ]+?",
-        "    # which may end with a line and optionally a column,",
-        "    (?<line_column>:+[0-9]+(:[0-9]+)?|:?\\([0-9]+([,:][0-9]+)?\\))?",
-        "))",
-        "# which must be followed by a matching quote",
-        "(?(<quote>)\\k<quote>)",
-        "# and optionally a single closing symbol",
-        "[)}\\]>]?",
-        "# if line/column matched, may be followed by a description",
-        "(?(<line_column>):[^ 0-9][^ ]*)?",
-        "# which may be followed by trailing punctuation",
-        "[.,:)}\\]>]*",
-        "# and always includes trailing whitespace or end of line",
-        "([ ]+|$)",
+        "(?<path>",
+        "    (",
+        "        # multi-char path: first char (not opening delimiter or space)",
+        "        [^({\\[<\"'`\\ ]",
+        "        # middle chars: non-space, and colon/paren only if not followed by digit/paren",
+        "        ([^\\ :(]|[:(][^0-9()])*",
+        "        # last char: not closing delimiter or colon",
+        "        [^()}\\]>\"'`.,;:\\ ]",
+        "    |",
+        "        # single-char path: not delimiter, punctuation, or space",
+        "        [^(){}\\[\\]<>\"'`.,;:\\ ]",
+        "    )",
+        "    # optional line/column suffix (included in path for PathWithPosition::parse_str)",
+        "    (:+[0-9]+(:[0-9]+)?|:?\\([0-9]+([,:]?[0-9]+)?\\))?",
+        ")",
       ],
     ],
     // Timeout for hover and Cmd-click path hyperlink discovery in milliseconds. Specifying a

crates/terminal/Cargo.toml 🔗

@@ -28,6 +28,7 @@ gpui.workspace = true
 itertools.workspace = true
 libc.workspace = true
 log.workspace = true
+regex.workspace = true
 release_channel.workspace = true
 schemars.workspace = true
 serde.workspace = true
@@ -38,7 +39,6 @@ task.workspace = true
 theme.workspace = true
 thiserror.workspace = true
 util.workspace = true
-fancy-regex.workspace = true
 urlencoding.workspace = true
 
 [target.'cfg(windows)'.dependencies]

crates/terminal/src/terminal_hyperlinks.rs 🔗

@@ -8,8 +8,8 @@ use alacritty_terminal::{
         search::{Match, RegexIter, RegexSearch},
     },
 };
-use fancy_regex::Regex;
 use log::{info, warn};
+use regex::Regex;
 use std::{
     ops::{Index, Range},
     time::{Duration, Instant},
@@ -308,17 +308,6 @@ fn path_match<T>(
         let mut path_found = false;
 
         for captures in regex.captures_iter(&line) {
-            let captures = match captures {
-                Ok(captures) => captures,
-                Err(error) => {
-                    warn!("Error '{error}' searching for path hyperlinks in line: {line}");
-                    info!(
-                        "Skipping match from path hyperlinks with regex: {}",
-                        regex.as_str()
-                    );
-                    continue;
-                }
-            };
             path_found = true;
             let match_range = captures.get(0).unwrap().range();
             let (path_range, line_column) = if let Some(path) = captures.name("path") {
@@ -376,7 +365,7 @@ mod tests {
         term::{Config, cell::Flags, test::TermSize},
         vte::ansi::Handler,
     };
-    use fancy_regex::Regex;
+    use regex::Regex;
     use settings::{self, Settings, SettingsContent};
     use std::{cell::RefCell, ops::RangeInclusive, path::PathBuf, rc::Rc};
     use url::Url;
@@ -386,7 +375,7 @@ mod tests {
         let results: Vec<_> = Regex::new(re)
             .unwrap()
             .find_iter(hay)
-            .map(|m| m.unwrap().as_str())
+            .map(|m| m.as_str())
             .collect();
         assert_eq!(results, expected);
     }
@@ -578,8 +567,6 @@ mod tests {
             test_path!("/test/cool.rs(4,2)👉:", "What is this?");
 
             // path, line, column, and description
-            test_path!("/test/cool.rs:4:2👉:Error!");
-            test_path!("/test/cool.rs:4:2:👉Error!");
             test_path!("‹«/test/co👉ol.rs»:«4»:«2»›:Error!");
             test_path!("‹«/test/co👉ol.rs»(«4»,«2»)›:Error!");
 
@@ -590,6 +577,7 @@ mod tests {
 
             // Python
             test_path!("‹«awe👉some.py»›");
+            test_path!("‹«👉a»› ");
 
             test_path!("    ‹F👉ile \"«/awesome.py»\", line «42»›: Wat?");
             test_path!("    ‹File \"«/awe👉some.py»\", line «42»›");
@@ -602,18 +590,14 @@ mod tests {
             // path, line, column and description
             test_path!("‹«/👉test/cool.rs»:«4»:«2»›:例Desc例例例");
             test_path!("‹«/test/cool.rs»:«4»:«👉2»›:例Desc例例例");
-            test_path!("/test/cool.rs:4:2:例Desc例👉例例");
             test_path!("‹«/👉test/cool.rs»(«4»,«2»)›:例Desc例例例");
             test_path!("‹«/test/cool.rs»(«4»👉,«2»)›:例Desc例例例");
-            test_path!("/test/cool.rs(4,2):例Desc例👉例例");
 
             // path, line, column and description w/extra colons
             test_path!("‹«/👉test/cool.rs»:«4»:«2»›::例Desc例例例");
             test_path!("‹«/test/cool.rs»:«4»:«👉2»›::例Desc例例例");
-            test_path!("/test/cool.rs:4:2::例Desc例👉例例");
             test_path!("‹«/👉test/cool.rs»(«4»,«2»)›::例Desc例例例");
             test_path!("‹«/test/cool.rs»(«4»,«2»👉)›::例Desc例例例");
-            test_path!("/test/cool.rs(4,2)::例Desc例👉例例");
         }
 
         #[test]
@@ -658,8 +642,6 @@ mod tests {
             test_path!("‹«/test/co👉ol.rs»(«1»,«618»)›:");
             test_path!("‹«/test/co👉ol.rs»::«42»›");
             test_path!("‹«/test/co👉ol.rs»::«42»›:");
-            test_path!("‹«/test/co👉ol.rs:4:2»(«1»,«618»)›");
-            test_path!("‹«/test/co👉ol.rs:4:2»(«1»,«618»)›:");
             test_path!("‹«/test/co👉ol.rs»(«1»,«618»)›::");
         }
 
@@ -675,7 +657,7 @@ mod tests {
             test_path!("<‹«/test/co👉ol.rs»:«4»›>");
 
             test_path!("[\"‹«/test/co👉ol.rs»:«4»›\"]");
-            test_path!("'‹«(/test/co👉ol.rs:4)»›'");
+            test_path!("'(‹«/test/co👉ol.rs»:«4»›)'");
 
             test_path!("\"‹«/test/co👉ol.rs»:«4»:«2»›\"");
             test_path!("'‹«/test/co👉ol.rs»:«4»:«2»›'");
@@ -724,7 +706,7 @@ mod tests {
             test_path!("‹«/test/co👉ol.rs»:«4»›:,");
             test_path!("/test/cool.rs:4:👉,");
             test_path!("[\"‹«/test/co👉ol.rs»:«4»›\"]:,");
-            test_path!("'‹«(/test/co👉ol.rs:4),,»›'..");
+            test_path!("'(‹«/test/co👉ol.rs»:«4»›),,'...");
             test_path!("('‹«/test/co👉ol.rs»:«4»›'::: was here...)");
             test_path!("[Here's <‹«/test/co👉ol.rs»:«4»›>]::: ");
         }
@@ -849,9 +831,6 @@ mod tests {
                 test_path!(
                     "‹«test/c👉ontrollers/template_items_controller_test.rb»:«20»›:in 'block (2 levels) in <class:TemplateItemsControllerTest>'"
                 );
-                test_path!(
-                    "test/controllers/template_items_controller_test.rb:19:i👉n 'block in <class:TemplateItemsControllerTest>'"
-                );
             }
 
             #[test]