paths.rs

  1use std::sync::OnceLock;
  2use std::{
  3    ffi::OsStr,
  4    path::{Path, PathBuf},
  5    sync::LazyLock,
  6};
  7
  8use globset::{Glob, GlobSet, GlobSetBuilder};
  9use regex::Regex;
 10use serde::{Deserialize, Serialize};
 11
 12/// Returns the path to the user's home directory.
 13pub fn home_dir() -> &'static PathBuf {
 14    static HOME_DIR: OnceLock<PathBuf> = OnceLock::new();
 15    HOME_DIR.get_or_init(|| dirs::home_dir().expect("failed to determine home directory"))
 16}
 17
 18pub trait PathExt {
 19    fn compact(&self) -> PathBuf;
 20    fn icon_stem_or_suffix(&self) -> Option<&str>;
 21    fn extension_or_hidden_file_name(&self) -> Option<&str>;
 22    fn try_from_bytes<'a>(bytes: &'a [u8]) -> anyhow::Result<Self>
 23    where
 24        Self: From<&'a Path>,
 25    {
 26        #[cfg(unix)]
 27        {
 28            use std::os::unix::prelude::OsStrExt;
 29            Ok(Self::from(Path::new(OsStr::from_bytes(bytes))))
 30        }
 31        #[cfg(windows)]
 32        {
 33            use anyhow::anyhow;
 34            use tendril::fmt::{Format, WTF8};
 35            WTF8::validate(bytes)
 36                .then(|| {
 37                    // Safety: bytes are valid WTF-8 sequence.
 38                    Self::from(Path::new(unsafe {
 39                        OsStr::from_encoded_bytes_unchecked(bytes)
 40                    }))
 41                })
 42                .ok_or_else(|| anyhow!("Invalid WTF-8 sequence: {bytes:?}"))
 43        }
 44    }
 45}
 46
 47impl<T: AsRef<Path>> PathExt for T {
 48    /// Compacts a given file path by replacing the user's home directory
 49    /// prefix with a tilde (`~`).
 50    ///
 51    /// # Returns
 52    ///
 53    /// * A `PathBuf` containing the compacted file path. If the input path
 54    ///   does not have the user's home directory prefix, or if we are not on
 55    ///   Linux or macOS, the original path is returned unchanged.
 56    fn compact(&self) -> PathBuf {
 57        if cfg!(target_os = "linux") || cfg!(target_os = "macos") {
 58            match self.as_ref().strip_prefix(home_dir().as_path()) {
 59                Ok(relative_path) => {
 60                    let mut shortened_path = PathBuf::new();
 61                    shortened_path.push("~");
 62                    shortened_path.push(relative_path);
 63                    shortened_path
 64                }
 65                Err(_) => self.as_ref().to_path_buf(),
 66            }
 67        } else {
 68            self.as_ref().to_path_buf()
 69        }
 70    }
 71
 72    /// Returns either the suffix if available, or the file stem otherwise to determine which file icon to use
 73    fn icon_stem_or_suffix(&self) -> Option<&str> {
 74        let path = self.as_ref();
 75        let file_name = path.file_name()?.to_str()?;
 76        if file_name.starts_with('.') {
 77            return file_name.strip_prefix('.');
 78        }
 79
 80        path.extension()
 81            .and_then(|e| e.to_str())
 82            .or_else(|| path.file_stem()?.to_str())
 83    }
 84
 85    /// Returns a file's extension or, if the file is hidden, its name without the leading dot
 86    fn extension_or_hidden_file_name(&self) -> Option<&str> {
 87        if let Some(extension) = self.as_ref().extension() {
 88            return extension.to_str();
 89        }
 90
 91        self.as_ref().file_name()?.to_str()?.split('.').last()
 92    }
 93}
 94
 95/// A delimiter to use in `path_query:row_number:column_number` strings parsing.
 96pub const FILE_ROW_COLUMN_DELIMITER: char = ':';
 97
 98/// Extracts filename and row-column suffixes.
 99/// Parenthesis format is used by [MSBuild](https://learn.microsoft.com/en-us/visualstudio/msbuild/msbuild-diagnostic-format-for-tasks) compatible tools
100// NOTE: All cases need to have exactly three capture groups for extract(): file_name, row and column.
101// Valid patterns that don't contain row and/or column should have empty groups in their place.
102const ROW_COL_CAPTURE_REGEX: &str = r"(?x)
103    ([^\(]+)(?:
104        \((\d+),(\d+)\) # filename(row,column)
105        |
106        \((\d+)\)()     # filename(row)
107    )
108    |
109    ([^\:]+)(?:
110        \:(\d+)\:(\d+)  # filename:row:column
111        |
112        \:(\d+)()       # filename:row
113        |
114        \:()()          # filename:
115    )";
116
117/// A representation of a path-like string with optional row and column numbers.
118/// Matching values example: `te`, `test.rs:22`, `te:22:5`, `test.c(22)`, `test.c(22,5)`etc.
119#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
120pub struct PathWithPosition {
121    pub path: PathBuf,
122    pub row: Option<u32>,
123    // Absent if row is absent.
124    pub column: Option<u32>,
125}
126
127impl PathWithPosition {
128    /// Returns a PathWithPosition from a path.
129    pub fn from_path(path: PathBuf) -> Self {
130        Self {
131            path,
132            row: None,
133            column: None,
134        }
135    }
136    /// Parses a string that possibly has `:row:column` or `(row, column)` suffix.
137    /// Ignores trailing `:`s, so `test.rs:22:` is parsed as `test.rs:22`.
138    /// If the suffix parsing fails, the whole string is parsed as a path.
139    pub fn parse_str(s: &str) -> Self {
140        let trimmed = s.trim();
141        let path = Path::new(trimmed);
142        let maybe_file_name_with_row_col = path
143            .file_name()
144            .unwrap_or_default()
145            .to_str()
146            .unwrap_or_default();
147        if maybe_file_name_with_row_col.is_empty() {
148            return Self {
149                path: Path::new(s).to_path_buf(),
150                row: None,
151                column: None,
152            };
153        }
154
155        // Let's avoid repeated init cost on this. It is subject to thread contention, but
156        // so far this code isn't called from multiple hot paths. Getting contention here
157        // in the future seems unlikely.
158        static SUFFIX_RE: LazyLock<Regex> =
159            LazyLock::new(|| Regex::new(ROW_COL_CAPTURE_REGEX).unwrap());
160        match SUFFIX_RE
161            .captures(maybe_file_name_with_row_col)
162            .map(|caps| caps.extract())
163        {
164            Some((_, [file_name, maybe_row, maybe_column])) => {
165                let row = maybe_row.parse::<u32>().ok();
166                let column = maybe_column.parse::<u32>().ok();
167
168                let suffix_length = maybe_file_name_with_row_col.len() - file_name.len();
169                let path_without_suffix = &trimmed[..trimmed.len() - suffix_length];
170
171                Self {
172                    path: Path::new(path_without_suffix).to_path_buf(),
173                    row,
174                    column,
175                }
176            }
177            None => Self {
178                path: Path::new(s).to_path_buf(),
179                row: None,
180                column: None,
181            },
182        }
183    }
184
185    pub fn map_path<E>(
186        self,
187        mapping: impl FnOnce(PathBuf) -> Result<PathBuf, E>,
188    ) -> Result<PathWithPosition, E> {
189        Ok(PathWithPosition {
190            path: mapping(self.path)?,
191            row: self.row,
192            column: self.column,
193        })
194    }
195
196    pub fn to_string(&self, path_to_string: impl Fn(&PathBuf) -> String) -> String {
197        let path_string = path_to_string(&self.path);
198        if let Some(row) = self.row {
199            if let Some(column) = self.column {
200                format!("{path_string}:{row}:{column}")
201            } else {
202                format!("{path_string}:{row}")
203            }
204        } else {
205            path_string
206        }
207    }
208}
209
210#[derive(Clone, Debug, Default)]
211pub struct PathMatcher {
212    sources: Vec<String>,
213    glob: GlobSet,
214}
215
216// impl std::fmt::Display for PathMatcher {
217//     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
218//         self.sources.fmt(f)
219//     }
220// }
221
222impl PartialEq for PathMatcher {
223    fn eq(&self, other: &Self) -> bool {
224        self.sources.eq(&other.sources)
225    }
226}
227
228impl Eq for PathMatcher {}
229
230impl PathMatcher {
231    pub fn new(globs: &[String]) -> Result<Self, globset::Error> {
232        let globs = globs
233            .into_iter()
234            .map(|glob| Glob::new(&glob))
235            .collect::<Result<Vec<_>, _>>()?;
236        let sources = globs.iter().map(|glob| glob.glob().to_owned()).collect();
237        let mut glob_builder = GlobSetBuilder::new();
238        for single_glob in globs {
239            glob_builder.add(single_glob);
240        }
241        let glob = glob_builder.build()?;
242        Ok(PathMatcher { glob, sources })
243    }
244
245    pub fn sources(&self) -> &[String] {
246        &self.sources
247    }
248
249    pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
250        let other_path = other.as_ref();
251        self.sources.iter().any(|source| {
252            let as_bytes = other_path.as_os_str().as_encoded_bytes();
253            as_bytes.starts_with(source.as_bytes()) || as_bytes.ends_with(source.as_bytes())
254        }) || self.glob.is_match(other_path)
255            || self.check_with_end_separator(other_path)
256    }
257
258    fn check_with_end_separator(&self, path: &Path) -> bool {
259        let path_str = path.to_string_lossy();
260        let separator = std::path::MAIN_SEPARATOR_STR;
261        if path_str.ends_with(separator) {
262            self.glob.is_match(path)
263        } else {
264            self.glob.is_match(path_str.to_string() + separator)
265        }
266    }
267}
268
269#[cfg(test)]
270mod tests {
271    use super::*;
272
273    #[test]
274    fn path_with_position_parsing_positive() {
275        let input_and_expected = [
276            (
277                "test_file.rs",
278                PathWithPosition {
279                    path: PathBuf::from("test_file.rs"),
280                    row: None,
281                    column: None,
282                },
283            ),
284            (
285                "test_file.rs:1",
286                PathWithPosition {
287                    path: PathBuf::from("test_file.rs"),
288                    row: Some(1),
289                    column: None,
290                },
291            ),
292            (
293                "test_file.rs:1:2",
294                PathWithPosition {
295                    path: PathBuf::from("test_file.rs"),
296                    row: Some(1),
297                    column: Some(2),
298                },
299            ),
300        ];
301
302        for (input, expected) in input_and_expected {
303            let actual = PathWithPosition::parse_str(input);
304            assert_eq!(
305                actual, expected,
306                "For positive case input str '{input}', got a parse mismatch"
307            );
308        }
309    }
310
311    #[test]
312    fn path_with_position_parsing_negative() {
313        for (input, row, column) in [
314            ("test_file.rs:a", None, None),
315            ("test_file.rs:a:b", None, None),
316            ("test_file.rs::", None, None),
317            ("test_file.rs::1", None, None),
318            ("test_file.rs:1::", Some(1), None),
319            ("test_file.rs::1:2", None, None),
320            ("test_file.rs:1::2", Some(1), None),
321            ("test_file.rs:1:2:3", Some(1), Some(2)),
322        ] {
323            let actual = PathWithPosition::parse_str(input);
324            assert_eq!(
325                actual,
326                PathWithPosition {
327                    path: PathBuf::from("test_file.rs"),
328                    row,
329                    column,
330                },
331                "For negative case input str '{input}', got a parse mismatch"
332            );
333        }
334    }
335
336    // Trim off trailing `:`s for otherwise valid input.
337    #[test]
338    fn path_with_position_parsing_special() {
339        #[cfg(not(target_os = "windows"))]
340        let input_and_expected = [
341            (
342                "test_file.rs:",
343                PathWithPosition {
344                    path: PathBuf::from("test_file.rs"),
345                    row: None,
346                    column: None,
347                },
348            ),
349            (
350                "test_file.rs:1:",
351                PathWithPosition {
352                    path: PathBuf::from("test_file.rs"),
353                    row: Some(1),
354                    column: None,
355                },
356            ),
357            (
358                "crates/file_finder/src/file_finder.rs:1902:13:",
359                PathWithPosition {
360                    path: PathBuf::from("crates/file_finder/src/file_finder.rs"),
361                    row: Some(1902),
362                    column: Some(13),
363                },
364            ),
365        ];
366
367        #[cfg(target_os = "windows")]
368        let input_and_expected = [
369            (
370                "test_file.rs:",
371                PathWithPosition {
372                    path: PathBuf::from("test_file.rs"),
373                    row: None,
374                    column: None,
375                },
376            ),
377            (
378                "test_file.rs:1:",
379                PathWithPosition {
380                    path: PathBuf::from("test_file.rs"),
381                    row: Some(1),
382                    column: None,
383                },
384            ),
385            (
386                "\\\\?\\C:\\Users\\someone\\test_file.rs:1902:13:",
387                PathWithPosition {
388                    path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
389                    row: Some(1902),
390                    column: Some(13),
391                },
392            ),
393            (
394                "\\\\?\\C:\\Users\\someone\\test_file.rs:1902:13:15:",
395                PathWithPosition {
396                    path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
397                    row: Some(1902),
398                    column: Some(13),
399                },
400            ),
401            (
402                "\\\\?\\C:\\Users\\someone\\test_file.rs:1902:::15:",
403                PathWithPosition {
404                    path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
405                    row: Some(1902),
406                    column: None,
407                },
408            ),
409            (
410                "\\\\?\\C:\\Users\\someone\\test_file.rs(1902,13):",
411                PathWithPosition {
412                    path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
413                    row: Some(1902),
414                    column: Some(13),
415                },
416            ),
417            (
418                "\\\\?\\C:\\Users\\someone\\test_file.rs(1902):",
419                PathWithPosition {
420                    path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
421                    row: Some(1902),
422                    column: None,
423                },
424            ),
425            (
426                "C:\\Users\\someone\\test_file.rs:1902:13:",
427                PathWithPosition {
428                    path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
429                    row: Some(1902),
430                    column: Some(13),
431                },
432            ),
433            (
434                "crates/utils/paths.rs",
435                PathWithPosition {
436                    path: PathBuf::from("crates\\utils\\paths.rs"),
437                    row: None,
438                    column: None,
439                },
440            ),
441            (
442                "C:\\Users\\someone\\test_file.rs(1902,13):",
443                PathWithPosition {
444                    path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
445                    row: Some(1902),
446                    column: Some(13),
447                },
448            ),
449            (
450                "C:\\Users\\someone\\test_file.rs(1902):",
451                PathWithPosition {
452                    path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
453                    row: Some(1902),
454                    column: None,
455                },
456            ),
457            (
458                "crates/utils/paths.rs:101",
459                PathWithPosition {
460                    path: PathBuf::from("crates\\utils\\paths.rs"),
461                    row: Some(101),
462                    column: None,
463                },
464            ),
465        ];
466
467        for (input, expected) in input_and_expected {
468            let actual = PathWithPosition::parse_str(input);
469            assert_eq!(
470                actual, expected,
471                "For special case input str '{input}', got a parse mismatch"
472            );
473        }
474    }
475
476    #[test]
477    fn test_path_compact() {
478        let path: PathBuf = [
479            home_dir().to_string_lossy().to_string(),
480            "some_file.txt".to_string(),
481        ]
482        .iter()
483        .collect();
484        if cfg!(target_os = "linux") || cfg!(target_os = "macos") {
485            assert_eq!(path.compact().to_str(), Some("~/some_file.txt"));
486        } else {
487            assert_eq!(path.compact().to_str(), path.to_str());
488        }
489    }
490
491    #[test]
492    fn test_icon_stem_or_suffix() {
493        // No dots in name
494        let path = Path::new("/a/b/c/file_name.rs");
495        assert_eq!(path.icon_stem_or_suffix(), Some("rs"));
496
497        // Single dot in name
498        let path = Path::new("/a/b/c/file.name.rs");
499        assert_eq!(path.icon_stem_or_suffix(), Some("rs"));
500
501        // No suffix
502        let path = Path::new("/a/b/c/file");
503        assert_eq!(path.icon_stem_or_suffix(), Some("file"));
504
505        // Multiple dots in name
506        let path = Path::new("/a/b/c/long.file.name.rs");
507        assert_eq!(path.icon_stem_or_suffix(), Some("rs"));
508
509        // Hidden file, no extension
510        let path = Path::new("/a/b/c/.gitignore");
511        assert_eq!(path.icon_stem_or_suffix(), Some("gitignore"));
512
513        // Hidden file, with extension
514        let path = Path::new("/a/b/c/.eslintrc.js");
515        assert_eq!(path.icon_stem_or_suffix(), Some("eslintrc.js"));
516    }
517
518    #[test]
519    fn test_extension_or_hidden_file_name() {
520        // No dots in name
521        let path = Path::new("/a/b/c/file_name.rs");
522        assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
523
524        // Single dot in name
525        let path = Path::new("/a/b/c/file.name.rs");
526        assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
527
528        // Multiple dots in name
529        let path = Path::new("/a/b/c/long.file.name.rs");
530        assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
531
532        // Hidden file, no extension
533        let path = Path::new("/a/b/c/.gitignore");
534        assert_eq!(path.extension_or_hidden_file_name(), Some("gitignore"));
535
536        // Hidden file, with extension
537        let path = Path::new("/a/b/c/.eslintrc.js");
538        assert_eq!(path.extension_or_hidden_file_name(), Some("js"));
539    }
540
541    #[test]
542    fn edge_of_glob() {
543        let path = Path::new("/work/node_modules");
544        let path_matcher = PathMatcher::new(&["**/node_modules/**".to_owned()]).unwrap();
545        assert!(
546            path_matcher.is_match(path),
547            "Path matcher should match {path:?}"
548        );
549    }
550
551    #[test]
552    fn project_search() {
553        let path = Path::new("/Users/someonetoignore/work/zed/zed.dev/node_modules");
554        let path_matcher = PathMatcher::new(&["**/node_modules/**".to_owned()]).unwrap();
555        assert!(
556            path_matcher.is_match(path),
557            "Path matcher should match {path:?}"
558        );
559    }
560}