pattern_extraction.rs

  1use url::Url;
  2
  3/// Extracts a regex pattern from a terminal command based on the first token (command name).
  4///
  5/// Returns `None` for commands starting with `./`, `/`, or other path-like prefixes.
  6/// This is a deliberate security decision: we only allow pattern-based "always allow"
  7/// rules for well-known command names (like `cargo`, `npm`, `git`), not for arbitrary
  8/// scripts or absolute paths which could be manipulated by an attacker.
  9pub fn extract_terminal_pattern(command: &str) -> Option<String> {
 10    let first_token = command.split_whitespace().next()?;
 11    // Only allow alphanumeric commands with hyphens/underscores.
 12    // Reject paths like "./script.sh" or "/usr/bin/python" to prevent
 13    // users from accidentally allowing arbitrary script execution.
 14    if first_token
 15        .chars()
 16        .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
 17    {
 18        Some(format!("^{}\\s", regex::escape(first_token)))
 19    } else {
 20        None
 21    }
 22}
 23
 24pub fn extract_terminal_pattern_display(command: &str) -> Option<String> {
 25    let first_token = command.split_whitespace().next()?;
 26    if first_token
 27        .chars()
 28        .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
 29    {
 30        Some(first_token.to_string())
 31    } else {
 32        None
 33    }
 34}
 35
 36pub fn extract_path_pattern(path: &str) -> Option<String> {
 37    let parent = std::path::Path::new(path).parent()?;
 38    let parent_str = parent.to_str()?;
 39    if parent_str.is_empty() || parent_str == "/" {
 40        return None;
 41    }
 42    Some(format!("^{}/", regex::escape(parent_str)))
 43}
 44
 45pub fn extract_path_pattern_display(path: &str) -> Option<String> {
 46    let parent = std::path::Path::new(path).parent()?;
 47    let parent_str = parent.to_str()?;
 48    if parent_str.is_empty() || parent_str == "/" {
 49        return None;
 50    }
 51    Some(format!("{}/", parent_str))
 52}
 53
 54pub fn extract_url_pattern(url: &str) -> Option<String> {
 55    let parsed = Url::parse(url).ok()?;
 56    let domain = parsed.host_str()?;
 57    Some(format!("^https?://{}", regex::escape(domain)))
 58}
 59
 60pub fn extract_url_pattern_display(url: &str) -> Option<String> {
 61    let parsed = Url::parse(url).ok()?;
 62    let domain = parsed.host_str()?;
 63    Some(domain.to_string())
 64}
 65
 66#[cfg(test)]
 67mod tests {
 68    use super::*;
 69
 70    #[test]
 71    fn test_extract_terminal_pattern() {
 72        assert_eq!(
 73            extract_terminal_pattern("cargo build --release"),
 74            Some("^cargo\\s".to_string())
 75        );
 76        assert_eq!(
 77            extract_terminal_pattern("npm install"),
 78            Some("^npm\\s".to_string())
 79        );
 80        assert_eq!(
 81            extract_terminal_pattern("git-lfs pull"),
 82            Some("^git\\-lfs\\s".to_string())
 83        );
 84        assert_eq!(
 85            extract_terminal_pattern("my_script arg"),
 86            Some("^my_script\\s".to_string())
 87        );
 88        assert_eq!(extract_terminal_pattern("./script.sh arg"), None);
 89        assert_eq!(extract_terminal_pattern("/usr/bin/python arg"), None);
 90    }
 91
 92    #[test]
 93    fn test_extract_terminal_pattern_display() {
 94        assert_eq!(
 95            extract_terminal_pattern_display("cargo build --release"),
 96            Some("cargo".to_string())
 97        );
 98        assert_eq!(
 99            extract_terminal_pattern_display("npm install"),
100            Some("npm".to_string())
101        );
102    }
103
104    #[test]
105    fn test_extract_path_pattern() {
106        assert_eq!(
107            extract_path_pattern("/Users/alice/project/src/main.rs"),
108            Some("^/Users/alice/project/src/".to_string())
109        );
110        assert_eq!(
111            extract_path_pattern("src/lib.rs"),
112            Some("^src/".to_string())
113        );
114        assert_eq!(extract_path_pattern("file.txt"), None);
115        assert_eq!(extract_path_pattern("/file.txt"), None);
116    }
117
118    #[test]
119    fn test_extract_path_pattern_display() {
120        assert_eq!(
121            extract_path_pattern_display("/Users/alice/project/src/main.rs"),
122            Some("/Users/alice/project/src/".to_string())
123        );
124        assert_eq!(
125            extract_path_pattern_display("src/lib.rs"),
126            Some("src/".to_string())
127        );
128    }
129
130    #[test]
131    fn test_extract_url_pattern() {
132        assert_eq!(
133            extract_url_pattern("https://github.com/user/repo"),
134            Some("^https?://github\\.com".to_string())
135        );
136        assert_eq!(
137            extract_url_pattern("http://example.com/path?query=1"),
138            Some("^https?://example\\.com".to_string())
139        );
140        assert_eq!(extract_url_pattern("not a url"), None);
141    }
142
143    #[test]
144    fn test_extract_url_pattern_display() {
145        assert_eq!(
146            extract_url_pattern_display("https://github.com/user/repo"),
147            Some("github.com".to_string())
148        );
149        assert_eq!(
150            extract_url_pattern_display("http://api.example.com/v1/users"),
151            Some("api.example.com".to_string())
152        );
153    }
154
155    #[test]
156    fn test_special_chars_are_escaped() {
157        assert_eq!(
158            extract_path_pattern("/path/with (parens)/file.txt"),
159            Some("^/path/with \\(parens\\)/".to_string())
160        );
161        assert_eq!(
162            extract_url_pattern("https://test.example.com/path"),
163            Some("^https?://test\\.example\\.com".to_string())
164        );
165    }
166}