paths.rs

  1use std::cmp;
  2use std::sync::OnceLock;
  3use std::{
  4    ffi::OsStr,
  5    path::{Path, PathBuf},
  6    sync::LazyLock,
  7};
  8
  9use globset::{Glob, GlobSet, GlobSetBuilder};
 10use regex::Regex;
 11use serde::{Deserialize, Serialize};
 12use unicase::UniCase;
 13
 14use crate::{maybe, NumericPrefixWithSuffix};
 15
 16/// Returns the path to the user's home directory.
 17pub fn home_dir() -> &'static PathBuf {
 18    static HOME_DIR: OnceLock<PathBuf> = OnceLock::new();
 19    HOME_DIR.get_or_init(|| dirs::home_dir().expect("failed to determine home directory"))
 20}
 21
 22pub trait PathExt {
 23    fn compact(&self) -> PathBuf;
 24    fn icon_stem_or_suffix(&self) -> Option<&str>;
 25    fn extension_or_hidden_file_name(&self) -> Option<&str>;
 26    fn try_from_bytes<'a>(bytes: &'a [u8]) -> anyhow::Result<Self>
 27    where
 28        Self: From<&'a Path>,
 29    {
 30        #[cfg(unix)]
 31        {
 32            use std::os::unix::prelude::OsStrExt;
 33            Ok(Self::from(Path::new(OsStr::from_bytes(bytes))))
 34        }
 35        #[cfg(windows)]
 36        {
 37            use anyhow::anyhow;
 38            use tendril::fmt::{Format, WTF8};
 39            WTF8::validate(bytes)
 40                .then(|| {
 41                    // Safety: bytes are valid WTF-8 sequence.
 42                    Self::from(Path::new(unsafe {
 43                        OsStr::from_encoded_bytes_unchecked(bytes)
 44                    }))
 45                })
 46                .ok_or_else(|| anyhow!("Invalid WTF-8 sequence: {bytes:?}"))
 47        }
 48    }
 49}
 50
 51impl<T: AsRef<Path>> PathExt for T {
 52    /// Compacts a given file path by replacing the user's home directory
 53    /// prefix with a tilde (`~`).
 54    ///
 55    /// # Returns
 56    ///
 57    /// * A `PathBuf` containing the compacted file path. If the input path
 58    ///   does not have the user's home directory prefix, or if we are not on
 59    ///   Linux or macOS, the original path is returned unchanged.
 60    fn compact(&self) -> PathBuf {
 61        if cfg!(target_os = "linux") || cfg!(target_os = "macos") {
 62            match self.as_ref().strip_prefix(home_dir().as_path()) {
 63                Ok(relative_path) => {
 64                    let mut shortened_path = PathBuf::new();
 65                    shortened_path.push("~");
 66                    shortened_path.push(relative_path);
 67                    shortened_path
 68                }
 69                Err(_) => self.as_ref().to_path_buf(),
 70            }
 71        } else {
 72            self.as_ref().to_path_buf()
 73        }
 74    }
 75
 76    /// Returns either the suffix if available, or the file stem otherwise to determine which file icon to use
 77    fn icon_stem_or_suffix(&self) -> Option<&str> {
 78        let path = self.as_ref();
 79        let file_name = path.file_name()?.to_str()?;
 80        if file_name.starts_with('.') {
 81            return file_name.strip_prefix('.');
 82        }
 83
 84        path.extension()
 85            .and_then(|e| e.to_str())
 86            .or_else(|| path.file_stem()?.to_str())
 87    }
 88
 89    /// Returns a file's extension or, if the file is hidden, its name without the leading dot
 90    fn extension_or_hidden_file_name(&self) -> Option<&str> {
 91        if let Some(extension) = self.as_ref().extension() {
 92            return extension.to_str();
 93        }
 94
 95        self.as_ref().file_name()?.to_str()?.split('.').last()
 96    }
 97}
 98
 99/// A delimiter to use in `path_query:row_number:column_number` strings parsing.
100pub const FILE_ROW_COLUMN_DELIMITER: char = ':';
101
102/// Extracts filename and row-column suffixes.
103/// Parenthesis format is used by [MSBuild](https://learn.microsoft.com/en-us/visualstudio/msbuild/msbuild-diagnostic-format-for-tasks) compatible tools
104// NOTE: All cases need to have exactly three capture groups for extract(): file_name, row and column.
105// Valid patterns that don't contain row and/or column should have empty groups in their place.
106const ROW_COL_CAPTURE_REGEX: &str = r"(?x)
107    ([^\(]+)(?:
108        \((\d+),(\d+)\) # filename(row,column)
109        |
110        \((\d+)\)()     # filename(row)
111    )
112    |
113    ([^\:]+)(?:
114        \:(\d+)\:(\d+)  # filename:row:column
115        |
116        \:(\d+)()       # filename:row
117        |
118        \:()()          # filename:
119    )";
120
121/// A representation of a path-like string with optional row and column numbers.
122/// Matching values example: `te`, `test.rs:22`, `te:22:5`, `test.c(22)`, `test.c(22,5)`etc.
123#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
124pub struct PathWithPosition {
125    pub path: PathBuf,
126    pub row: Option<u32>,
127    // Absent if row is absent.
128    pub column: Option<u32>,
129}
130
131impl PathWithPosition {
132    /// Returns a PathWithPosition from a path.
133    pub fn from_path(path: PathBuf) -> Self {
134        Self {
135            path,
136            row: None,
137            column: None,
138        }
139    }
140    /// Parses a string that possibly has `:row:column` or `(row, column)` suffix.
141    /// Ignores trailing `:`s, so `test.rs:22:` is parsed as `test.rs:22`.
142    /// If the suffix parsing fails, the whole string is parsed as a path.
143    pub fn parse_str(s: &str) -> Self {
144        let trimmed = s.trim();
145        let path = Path::new(trimmed);
146        let maybe_file_name_with_row_col = path
147            .file_name()
148            .unwrap_or_default()
149            .to_str()
150            .unwrap_or_default();
151        if maybe_file_name_with_row_col.is_empty() {
152            return Self {
153                path: Path::new(s).to_path_buf(),
154                row: None,
155                column: None,
156            };
157        }
158
159        // Let's avoid repeated init cost on this. It is subject to thread contention, but
160        // so far this code isn't called from multiple hot paths. Getting contention here
161        // in the future seems unlikely.
162        static SUFFIX_RE: LazyLock<Regex> =
163            LazyLock::new(|| Regex::new(ROW_COL_CAPTURE_REGEX).unwrap());
164        match SUFFIX_RE
165            .captures(maybe_file_name_with_row_col)
166            .map(|caps| caps.extract())
167        {
168            Some((_, [file_name, maybe_row, maybe_column])) => {
169                let row = maybe_row.parse::<u32>().ok();
170                let column = maybe_column.parse::<u32>().ok();
171
172                let suffix_length = maybe_file_name_with_row_col.len() - file_name.len();
173                let path_without_suffix = &trimmed[..trimmed.len() - suffix_length];
174
175                Self {
176                    path: Path::new(path_without_suffix).to_path_buf(),
177                    row,
178                    column,
179                }
180            }
181            None => Self {
182                path: Path::new(s).to_path_buf(),
183                row: None,
184                column: None,
185            },
186        }
187    }
188
189    pub fn map_path<E>(
190        self,
191        mapping: impl FnOnce(PathBuf) -> Result<PathBuf, E>,
192    ) -> Result<PathWithPosition, E> {
193        Ok(PathWithPosition {
194            path: mapping(self.path)?,
195            row: self.row,
196            column: self.column,
197        })
198    }
199
200    pub fn to_string(&self, path_to_string: impl Fn(&PathBuf) -> String) -> String {
201        let path_string = path_to_string(&self.path);
202        if let Some(row) = self.row {
203            if let Some(column) = self.column {
204                format!("{path_string}:{row}:{column}")
205            } else {
206                format!("{path_string}:{row}")
207            }
208        } else {
209            path_string
210        }
211    }
212}
213
214#[derive(Clone, Debug, Default)]
215pub struct PathMatcher {
216    sources: Vec<String>,
217    glob: GlobSet,
218}
219
220// impl std::fmt::Display for PathMatcher {
221//     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
222//         self.sources.fmt(f)
223//     }
224// }
225
226impl PartialEq for PathMatcher {
227    fn eq(&self, other: &Self) -> bool {
228        self.sources.eq(&other.sources)
229    }
230}
231
232impl Eq for PathMatcher {}
233
234impl PathMatcher {
235    pub fn new(globs: &[String]) -> Result<Self, globset::Error> {
236        let globs = globs
237            .iter()
238            .map(|glob| Glob::new(glob))
239            .collect::<Result<Vec<_>, _>>()?;
240        let sources = globs.iter().map(|glob| glob.glob().to_owned()).collect();
241        let mut glob_builder = GlobSetBuilder::new();
242        for single_glob in globs {
243            glob_builder.add(single_glob);
244        }
245        let glob = glob_builder.build()?;
246        Ok(PathMatcher { glob, sources })
247    }
248
249    pub fn sources(&self) -> &[String] {
250        &self.sources
251    }
252
253    pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
254        let other_path = other.as_ref();
255        self.sources.iter().any(|source| {
256            let as_bytes = other_path.as_os_str().as_encoded_bytes();
257            as_bytes.starts_with(source.as_bytes()) || as_bytes.ends_with(source.as_bytes())
258        }) || self.glob.is_match(other_path)
259            || self.check_with_end_separator(other_path)
260    }
261
262    fn check_with_end_separator(&self, path: &Path) -> bool {
263        let path_str = path.to_string_lossy();
264        let separator = std::path::MAIN_SEPARATOR_STR;
265        if path_str.ends_with(separator) {
266            false
267        } else {
268            self.glob.is_match(path_str.to_string() + separator)
269        }
270    }
271}
272
273pub fn compare_paths(
274    (path_a, a_is_file): (&Path, bool),
275    (path_b, b_is_file): (&Path, bool),
276) -> cmp::Ordering {
277    let mut components_a = path_a.components().peekable();
278    let mut components_b = path_b.components().peekable();
279    loop {
280        match (components_a.next(), components_b.next()) {
281            (Some(component_a), Some(component_b)) => {
282                let a_is_file = components_a.peek().is_none() && a_is_file;
283                let b_is_file = components_b.peek().is_none() && b_is_file;
284                let ordering = a_is_file.cmp(&b_is_file).then_with(|| {
285                    let maybe_numeric_ordering = maybe!({
286                        let path_a = Path::new(component_a.as_os_str());
287                        let num_and_remainder_a = if a_is_file {
288                            path_a.file_stem()
289                        } else {
290                            path_a.file_name()
291                        }
292                        .and_then(|s| s.to_str())
293                        .and_then(NumericPrefixWithSuffix::from_numeric_prefixed_str)?;
294
295                        let path_b = Path::new(component_b.as_os_str());
296                        let num_and_remainder_b = if b_is_file {
297                            path_b.file_stem()
298                        } else {
299                            path_b.file_name()
300                        }
301                        .and_then(|s| s.to_str())
302                        .and_then(NumericPrefixWithSuffix::from_numeric_prefixed_str)?;
303
304                        num_and_remainder_a.partial_cmp(&num_and_remainder_b)
305                    });
306
307                    maybe_numeric_ordering.unwrap_or_else(|| {
308                        let name_a = UniCase::new(component_a.as_os_str().to_string_lossy());
309                        let name_b = UniCase::new(component_b.as_os_str().to_string_lossy());
310
311                        name_a.cmp(&name_b)
312                    })
313                });
314                if !ordering.is_eq() {
315                    return ordering;
316                }
317            }
318            (Some(_), None) => break cmp::Ordering::Greater,
319            (None, Some(_)) => break cmp::Ordering::Less,
320            (None, None) => break cmp::Ordering::Equal,
321        }
322    }
323}
324
325#[cfg(test)]
326mod tests {
327    use super::*;
328
329    #[test]
330    fn compare_paths_with_dots() {
331        let mut paths = vec![
332            (Path::new("test_dirs"), false),
333            (Path::new("test_dirs/1.46"), false),
334            (Path::new("test_dirs/1.46/bar_1"), true),
335            (Path::new("test_dirs/1.46/bar_2"), true),
336            (Path::new("test_dirs/1.45"), false),
337            (Path::new("test_dirs/1.45/foo_2"), true),
338            (Path::new("test_dirs/1.45/foo_1"), true),
339        ];
340        paths.sort_by(|&a, &b| compare_paths(a, b));
341        assert_eq!(
342            paths,
343            vec![
344                (Path::new("test_dirs"), false),
345                (Path::new("test_dirs/1.45"), false),
346                (Path::new("test_dirs/1.45/foo_1"), true),
347                (Path::new("test_dirs/1.45/foo_2"), true),
348                (Path::new("test_dirs/1.46"), false),
349                (Path::new("test_dirs/1.46/bar_1"), true),
350                (Path::new("test_dirs/1.46/bar_2"), true),
351            ]
352        );
353    }
354
355    #[test]
356    fn path_with_position_parsing_positive() {
357        let input_and_expected = [
358            (
359                "test_file.rs",
360                PathWithPosition {
361                    path: PathBuf::from("test_file.rs"),
362                    row: None,
363                    column: None,
364                },
365            ),
366            (
367                "test_file.rs:1",
368                PathWithPosition {
369                    path: PathBuf::from("test_file.rs"),
370                    row: Some(1),
371                    column: None,
372                },
373            ),
374            (
375                "test_file.rs:1:2",
376                PathWithPosition {
377                    path: PathBuf::from("test_file.rs"),
378                    row: Some(1),
379                    column: Some(2),
380                },
381            ),
382        ];
383
384        for (input, expected) in input_and_expected {
385            let actual = PathWithPosition::parse_str(input);
386            assert_eq!(
387                actual, expected,
388                "For positive case input str '{input}', got a parse mismatch"
389            );
390        }
391    }
392
393    #[test]
394    fn path_with_position_parsing_negative() {
395        for (input, row, column) in [
396            ("test_file.rs:a", None, None),
397            ("test_file.rs:a:b", None, None),
398            ("test_file.rs::", None, None),
399            ("test_file.rs::1", None, None),
400            ("test_file.rs:1::", Some(1), None),
401            ("test_file.rs::1:2", None, None),
402            ("test_file.rs:1::2", Some(1), None),
403            ("test_file.rs:1:2:3", Some(1), Some(2)),
404        ] {
405            let actual = PathWithPosition::parse_str(input);
406            assert_eq!(
407                actual,
408                PathWithPosition {
409                    path: PathBuf::from("test_file.rs"),
410                    row,
411                    column,
412                },
413                "For negative case input str '{input}', got a parse mismatch"
414            );
415        }
416    }
417
418    // Trim off trailing `:`s for otherwise valid input.
419    #[test]
420    fn path_with_position_parsing_special() {
421        #[cfg(not(target_os = "windows"))]
422        let input_and_expected = [
423            (
424                "test_file.rs:",
425                PathWithPosition {
426                    path: PathBuf::from("test_file.rs"),
427                    row: None,
428                    column: None,
429                },
430            ),
431            (
432                "test_file.rs:1:",
433                PathWithPosition {
434                    path: PathBuf::from("test_file.rs"),
435                    row: Some(1),
436                    column: None,
437                },
438            ),
439            (
440                "crates/file_finder/src/file_finder.rs:1902:13:",
441                PathWithPosition {
442                    path: PathBuf::from("crates/file_finder/src/file_finder.rs"),
443                    row: Some(1902),
444                    column: Some(13),
445                },
446            ),
447        ];
448
449        #[cfg(target_os = "windows")]
450        let input_and_expected = [
451            (
452                "test_file.rs:",
453                PathWithPosition {
454                    path: PathBuf::from("test_file.rs"),
455                    row: None,
456                    column: None,
457                },
458            ),
459            (
460                "test_file.rs:1:",
461                PathWithPosition {
462                    path: PathBuf::from("test_file.rs"),
463                    row: Some(1),
464                    column: None,
465                },
466            ),
467            (
468                "\\\\?\\C:\\Users\\someone\\test_file.rs:1902:13:",
469                PathWithPosition {
470                    path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
471                    row: Some(1902),
472                    column: Some(13),
473                },
474            ),
475            (
476                "\\\\?\\C:\\Users\\someone\\test_file.rs:1902:13:15:",
477                PathWithPosition {
478                    path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
479                    row: Some(1902),
480                    column: Some(13),
481                },
482            ),
483            (
484                "\\\\?\\C:\\Users\\someone\\test_file.rs:1902:::15:",
485                PathWithPosition {
486                    path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
487                    row: Some(1902),
488                    column: None,
489                },
490            ),
491            (
492                "\\\\?\\C:\\Users\\someone\\test_file.rs(1902,13):",
493                PathWithPosition {
494                    path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
495                    row: Some(1902),
496                    column: Some(13),
497                },
498            ),
499            (
500                "\\\\?\\C:\\Users\\someone\\test_file.rs(1902):",
501                PathWithPosition {
502                    path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
503                    row: Some(1902),
504                    column: None,
505                },
506            ),
507            (
508                "C:\\Users\\someone\\test_file.rs:1902:13:",
509                PathWithPosition {
510                    path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
511                    row: Some(1902),
512                    column: Some(13),
513                },
514            ),
515            (
516                "crates/utils/paths.rs",
517                PathWithPosition {
518                    path: PathBuf::from("crates\\utils\\paths.rs"),
519                    row: None,
520                    column: None,
521                },
522            ),
523            (
524                "C:\\Users\\someone\\test_file.rs(1902,13):",
525                PathWithPosition {
526                    path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
527                    row: Some(1902),
528                    column: Some(13),
529                },
530            ),
531            (
532                "C:\\Users\\someone\\test_file.rs(1902):",
533                PathWithPosition {
534                    path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
535                    row: Some(1902),
536                    column: None,
537                },
538            ),
539            (
540                "crates/utils/paths.rs:101",
541                PathWithPosition {
542                    path: PathBuf::from("crates\\utils\\paths.rs"),
543                    row: Some(101),
544                    column: None,
545                },
546            ),
547        ];
548
549        for (input, expected) in input_and_expected {
550            let actual = PathWithPosition::parse_str(input);
551            assert_eq!(
552                actual, expected,
553                "For special case input str '{input}', got a parse mismatch"
554            );
555        }
556    }
557
558    #[test]
559    fn test_path_compact() {
560        let path: PathBuf = [
561            home_dir().to_string_lossy().to_string(),
562            "some_file.txt".to_string(),
563        ]
564        .iter()
565        .collect();
566        if cfg!(target_os = "linux") || cfg!(target_os = "macos") {
567            assert_eq!(path.compact().to_str(), Some("~/some_file.txt"));
568        } else {
569            assert_eq!(path.compact().to_str(), path.to_str());
570        }
571    }
572
573    #[test]
574    fn test_icon_stem_or_suffix() {
575        // No dots in name
576        let path = Path::new("/a/b/c/file_name.rs");
577        assert_eq!(path.icon_stem_or_suffix(), Some("rs"));
578
579        // Single dot in name
580        let path = Path::new("/a/b/c/file.name.rs");
581        assert_eq!(path.icon_stem_or_suffix(), Some("rs"));
582
583        // No suffix
584        let path = Path::new("/a/b/c/file");
585        assert_eq!(path.icon_stem_or_suffix(), Some("file"));
586
587        // Multiple dots in name
588        let path = Path::new("/a/b/c/long.file.name.rs");
589        assert_eq!(path.icon_stem_or_suffix(), Some("rs"));
590
591        // Hidden file, no extension
592        let path = Path::new("/a/b/c/.gitignore");
593        assert_eq!(path.icon_stem_or_suffix(), Some("gitignore"));
594
595        // Hidden file, with extension
596        let path = Path::new("/a/b/c/.eslintrc.js");
597        assert_eq!(path.icon_stem_or_suffix(), Some("eslintrc.js"));
598    }
599
600    #[test]
601    fn test_extension_or_hidden_file_name() {
602        // No dots in name
603        let path = Path::new("/a/b/c/file_name.rs");
604        assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
605
606        // Single dot in name
607        let path = Path::new("/a/b/c/file.name.rs");
608        assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
609
610        // Multiple dots in name
611        let path = Path::new("/a/b/c/long.file.name.rs");
612        assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
613
614        // Hidden file, no extension
615        let path = Path::new("/a/b/c/.gitignore");
616        assert_eq!(path.extension_or_hidden_file_name(), Some("gitignore"));
617
618        // Hidden file, with extension
619        let path = Path::new("/a/b/c/.eslintrc.js");
620        assert_eq!(path.extension_or_hidden_file_name(), Some("js"));
621    }
622
623    #[test]
624    fn edge_of_glob() {
625        let path = Path::new("/work/node_modules");
626        let path_matcher = PathMatcher::new(&["**/node_modules/**".to_owned()]).unwrap();
627        assert!(
628            path_matcher.is_match(path),
629            "Path matcher should match {path:?}"
630        );
631    }
632
633    #[test]
634    fn project_search() {
635        let path = Path::new("/Users/someonetoignore/work/zed/zed.dev/node_modules");
636        let path_matcher = PathMatcher::new(&["**/node_modules/**".to_owned()]).unwrap();
637        assert!(
638            path_matcher.is_match(path),
639            "Path matcher should match {path:?}"
640        );
641    }
642}