paths.rs

  1use std::cmp;
  2use std::sync::OnceLock;
  3use std::{
  4    ffi::OsStr,
  5    path::{Path, PathBuf},
  6    sync::LazyLock,
  7};
  8
  9use globset::{Glob, GlobSet, GlobSetBuilder};
 10use regex::Regex;
 11use serde::{Deserialize, Serialize};
 12
 13use crate::NumericPrefixWithSuffix;
 14
 15/// Returns the path to the user's home directory.
 16pub fn home_dir() -> &'static PathBuf {
 17    static HOME_DIR: OnceLock<PathBuf> = OnceLock::new();
 18    HOME_DIR.get_or_init(|| dirs::home_dir().expect("failed to determine home directory"))
 19}
 20
 21pub trait PathExt {
 22    fn compact(&self) -> PathBuf;
 23    fn icon_stem_or_suffix(&self) -> Option<&str>;
 24    fn extension_or_hidden_file_name(&self) -> Option<&str>;
 25    fn try_from_bytes<'a>(bytes: &'a [u8]) -> anyhow::Result<Self>
 26    where
 27        Self: From<&'a Path>,
 28    {
 29        #[cfg(unix)]
 30        {
 31            use std::os::unix::prelude::OsStrExt;
 32            Ok(Self::from(Path::new(OsStr::from_bytes(bytes))))
 33        }
 34        #[cfg(windows)]
 35        {
 36            use anyhow::anyhow;
 37            use tendril::fmt::{Format, WTF8};
 38            WTF8::validate(bytes)
 39                .then(|| {
 40                    // Safety: bytes are valid WTF-8 sequence.
 41                    Self::from(Path::new(unsafe {
 42                        OsStr::from_encoded_bytes_unchecked(bytes)
 43                    }))
 44                })
 45                .ok_or_else(|| anyhow!("Invalid WTF-8 sequence: {bytes:?}"))
 46        }
 47    }
 48}
 49
 50impl<T: AsRef<Path>> PathExt for T {
 51    /// Compacts a given file path by replacing the user's home directory
 52    /// prefix with a tilde (`~`).
 53    ///
 54    /// # Returns
 55    ///
 56    /// * A `PathBuf` containing the compacted file path. If the input path
 57    ///   does not have the user's home directory prefix, or if we are not on
 58    ///   Linux or macOS, the original path is returned unchanged.
 59    fn compact(&self) -> PathBuf {
 60        if cfg!(target_os = "linux") || cfg!(target_os = "macos") {
 61            match self.as_ref().strip_prefix(home_dir().as_path()) {
 62                Ok(relative_path) => {
 63                    let mut shortened_path = PathBuf::new();
 64                    shortened_path.push("~");
 65                    shortened_path.push(relative_path);
 66                    shortened_path
 67                }
 68                Err(_) => self.as_ref().to_path_buf(),
 69            }
 70        } else {
 71            self.as_ref().to_path_buf()
 72        }
 73    }
 74
 75    /// Returns either the suffix if available, or the file stem otherwise to determine which file icon to use
 76    fn icon_stem_or_suffix(&self) -> Option<&str> {
 77        let path = self.as_ref();
 78        let file_name = path.file_name()?.to_str()?;
 79        if file_name.starts_with('.') {
 80            return file_name.strip_prefix('.');
 81        }
 82
 83        path.extension()
 84            .and_then(|e| e.to_str())
 85            .or_else(|| path.file_stem()?.to_str())
 86    }
 87
 88    /// Returns a file's extension or, if the file is hidden, its name without the leading dot
 89    fn extension_or_hidden_file_name(&self) -> Option<&str> {
 90        if let Some(extension) = self.as_ref().extension() {
 91            return extension.to_str();
 92        }
 93
 94        self.as_ref().file_name()?.to_str()?.split('.').last()
 95    }
 96}
 97
 98/// A delimiter to use in `path_query:row_number:column_number` strings parsing.
 99pub const FILE_ROW_COLUMN_DELIMITER: char = ':';
100
101/// Extracts filename and row-column suffixes.
102/// Parenthesis format is used by [MSBuild](https://learn.microsoft.com/en-us/visualstudio/msbuild/msbuild-diagnostic-format-for-tasks) compatible tools
103// NOTE: All cases need to have exactly three capture groups for extract(): file_name, row and column.
104// Valid patterns that don't contain row and/or column should have empty groups in their place.
105const ROW_COL_CAPTURE_REGEX: &str = r"(?x)
106    ([^\(]+)(?:
107        \((\d+),(\d+)\) # filename(row,column)
108        |
109        \((\d+)\)()     # filename(row)
110    )
111    |
112    ([^\:]+)(?:
113        \:(\d+)\:(\d+)  # filename:row:column
114        |
115        \:(\d+)()       # filename:row
116        |
117        \:()()          # filename:
118    )";
119
120/// A representation of a path-like string with optional row and column numbers.
121/// Matching values example: `te`, `test.rs:22`, `te:22:5`, `test.c(22)`, `test.c(22,5)`etc.
122#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
123pub struct PathWithPosition {
124    pub path: PathBuf,
125    pub row: Option<u32>,
126    // Absent if row is absent.
127    pub column: Option<u32>,
128}
129
130impl PathWithPosition {
131    /// Returns a PathWithPosition from a path.
132    pub fn from_path(path: PathBuf) -> Self {
133        Self {
134            path,
135            row: None,
136            column: None,
137        }
138    }
139    /// Parses a string that possibly has `:row:column` or `(row, column)` suffix.
140    /// Ignores trailing `:`s, so `test.rs:22:` is parsed as `test.rs:22`.
141    /// If the suffix parsing fails, the whole string is parsed as a path.
142    pub fn parse_str(s: &str) -> Self {
143        let trimmed = s.trim();
144        let path = Path::new(trimmed);
145        let maybe_file_name_with_row_col = path
146            .file_name()
147            .unwrap_or_default()
148            .to_str()
149            .unwrap_or_default();
150        if maybe_file_name_with_row_col.is_empty() {
151            return Self {
152                path: Path::new(s).to_path_buf(),
153                row: None,
154                column: None,
155            };
156        }
157
158        // Let's avoid repeated init cost on this. It is subject to thread contention, but
159        // so far this code isn't called from multiple hot paths. Getting contention here
160        // in the future seems unlikely.
161        static SUFFIX_RE: LazyLock<Regex> =
162            LazyLock::new(|| Regex::new(ROW_COL_CAPTURE_REGEX).unwrap());
163        match SUFFIX_RE
164            .captures(maybe_file_name_with_row_col)
165            .map(|caps| caps.extract())
166        {
167            Some((_, [file_name, maybe_row, maybe_column])) => {
168                let row = maybe_row.parse::<u32>().ok();
169                let column = maybe_column.parse::<u32>().ok();
170
171                let suffix_length = maybe_file_name_with_row_col.len() - file_name.len();
172                let path_without_suffix = &trimmed[..trimmed.len() - suffix_length];
173
174                Self {
175                    path: Path::new(path_without_suffix).to_path_buf(),
176                    row,
177                    column,
178                }
179            }
180            None => Self {
181                path: Path::new(s).to_path_buf(),
182                row: None,
183                column: None,
184            },
185        }
186    }
187
188    pub fn map_path<E>(
189        self,
190        mapping: impl FnOnce(PathBuf) -> Result<PathBuf, E>,
191    ) -> Result<PathWithPosition, E> {
192        Ok(PathWithPosition {
193            path: mapping(self.path)?,
194            row: self.row,
195            column: self.column,
196        })
197    }
198
199    pub fn to_string(&self, path_to_string: impl Fn(&PathBuf) -> String) -> String {
200        let path_string = path_to_string(&self.path);
201        if let Some(row) = self.row {
202            if let Some(column) = self.column {
203                format!("{path_string}:{row}:{column}")
204            } else {
205                format!("{path_string}:{row}")
206            }
207        } else {
208            path_string
209        }
210    }
211}
212
213#[derive(Clone, Debug, Default)]
214pub struct PathMatcher {
215    sources: Vec<String>,
216    glob: GlobSet,
217}
218
219// impl std::fmt::Display for PathMatcher {
220//     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
221//         self.sources.fmt(f)
222//     }
223// }
224
225impl PartialEq for PathMatcher {
226    fn eq(&self, other: &Self) -> bool {
227        self.sources.eq(&other.sources)
228    }
229}
230
231impl Eq for PathMatcher {}
232
233impl PathMatcher {
234    pub fn new(globs: &[String]) -> Result<Self, globset::Error> {
235        let globs = globs
236            .iter()
237            .map(|glob| Glob::new(glob))
238            .collect::<Result<Vec<_>, _>>()?;
239        let sources = globs.iter().map(|glob| glob.glob().to_owned()).collect();
240        let mut glob_builder = GlobSetBuilder::new();
241        for single_glob in globs {
242            glob_builder.add(single_glob);
243        }
244        let glob = glob_builder.build()?;
245        Ok(PathMatcher { glob, sources })
246    }
247
248    pub fn sources(&self) -> &[String] {
249        &self.sources
250    }
251
252    pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
253        let other_path = other.as_ref();
254        self.sources.iter().any(|source| {
255            let as_bytes = other_path.as_os_str().as_encoded_bytes();
256            as_bytes.starts_with(source.as_bytes()) || as_bytes.ends_with(source.as_bytes())
257        }) || self.glob.is_match(other_path)
258            || self.check_with_end_separator(other_path)
259    }
260
261    fn check_with_end_separator(&self, path: &Path) -> bool {
262        let path_str = path.to_string_lossy();
263        let separator = std::path::MAIN_SEPARATOR_STR;
264        if path_str.ends_with(separator) {
265            false
266        } else {
267            self.glob.is_match(path_str.to_string() + separator)
268        }
269    }
270}
271
272pub fn compare_paths(
273    (path_a, a_is_file): (&Path, bool),
274    (path_b, b_is_file): (&Path, bool),
275) -> cmp::Ordering {
276    let mut components_a = path_a.components().peekable();
277    let mut components_b = path_b.components().peekable();
278    loop {
279        match (components_a.next(), components_b.next()) {
280            (Some(component_a), Some(component_b)) => {
281                let a_is_file = components_a.peek().is_none() && a_is_file;
282                let b_is_file = components_b.peek().is_none() && b_is_file;
283                let ordering = a_is_file.cmp(&b_is_file).then_with(|| {
284                    let path_a = Path::new(component_a.as_os_str());
285                    let num_and_remainder_a = NumericPrefixWithSuffix::from_numeric_prefixed_str(
286                        if a_is_file {
287                            path_a.file_stem()
288                        } else {
289                            path_a.file_name()
290                        }
291                        .and_then(|s| s.to_str())
292                        .unwrap_or_default(),
293                    );
294
295                    let path_b = Path::new(component_b.as_os_str());
296                    let num_and_remainder_b = NumericPrefixWithSuffix::from_numeric_prefixed_str(
297                        if b_is_file {
298                            path_b.file_stem()
299                        } else {
300                            path_b.file_name()
301                        }
302                        .and_then(|s| s.to_str())
303                        .unwrap_or_default(),
304                    );
305
306                    num_and_remainder_a.cmp(&num_and_remainder_b)
307                });
308                if !ordering.is_eq() {
309                    return ordering;
310                }
311            }
312            (Some(_), None) => break cmp::Ordering::Greater,
313            (None, Some(_)) => break cmp::Ordering::Less,
314            (None, None) => break cmp::Ordering::Equal,
315        }
316    }
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322
323    #[test]
324    fn compare_paths_with_dots() {
325        let mut paths = vec![
326            (Path::new("test_dirs"), false),
327            (Path::new("test_dirs/1.46"), false),
328            (Path::new("test_dirs/1.46/bar_1"), true),
329            (Path::new("test_dirs/1.46/bar_2"), true),
330            (Path::new("test_dirs/1.45"), false),
331            (Path::new("test_dirs/1.45/foo_2"), true),
332            (Path::new("test_dirs/1.45/foo_1"), true),
333        ];
334        paths.sort_by(|&a, &b| compare_paths(a, b));
335        assert_eq!(
336            paths,
337            vec![
338                (Path::new("test_dirs"), false),
339                (Path::new("test_dirs/1.45"), false),
340                (Path::new("test_dirs/1.45/foo_1"), true),
341                (Path::new("test_dirs/1.45/foo_2"), true),
342                (Path::new("test_dirs/1.46"), false),
343                (Path::new("test_dirs/1.46/bar_1"), true),
344                (Path::new("test_dirs/1.46/bar_2"), true),
345            ]
346        );
347        let mut paths = vec![
348            (Path::new("root1/one.txt"), true),
349            (Path::new("root1/one.two.txt"), true),
350        ];
351        paths.sort_by(|&a, &b| compare_paths(a, b));
352        assert_eq!(
353            paths,
354            vec![
355                (Path::new("root1/one.txt"), true),
356                (Path::new("root1/one.two.txt"), true),
357            ]
358        );
359    }
360
361    #[test]
362    fn path_with_position_parsing_positive() {
363        let input_and_expected = [
364            (
365                "test_file.rs",
366                PathWithPosition {
367                    path: PathBuf::from("test_file.rs"),
368                    row: None,
369                    column: None,
370                },
371            ),
372            (
373                "test_file.rs:1",
374                PathWithPosition {
375                    path: PathBuf::from("test_file.rs"),
376                    row: Some(1),
377                    column: None,
378                },
379            ),
380            (
381                "test_file.rs:1:2",
382                PathWithPosition {
383                    path: PathBuf::from("test_file.rs"),
384                    row: Some(1),
385                    column: Some(2),
386                },
387            ),
388        ];
389
390        for (input, expected) in input_and_expected {
391            let actual = PathWithPosition::parse_str(input);
392            assert_eq!(
393                actual, expected,
394                "For positive case input str '{input}', got a parse mismatch"
395            );
396        }
397    }
398
399    #[test]
400    fn path_with_position_parsing_negative() {
401        for (input, row, column) in [
402            ("test_file.rs:a", None, None),
403            ("test_file.rs:a:b", None, None),
404            ("test_file.rs::", None, None),
405            ("test_file.rs::1", None, None),
406            ("test_file.rs:1::", Some(1), None),
407            ("test_file.rs::1:2", None, None),
408            ("test_file.rs:1::2", Some(1), None),
409            ("test_file.rs:1:2:3", Some(1), Some(2)),
410        ] {
411            let actual = PathWithPosition::parse_str(input);
412            assert_eq!(
413                actual,
414                PathWithPosition {
415                    path: PathBuf::from("test_file.rs"),
416                    row,
417                    column,
418                },
419                "For negative case input str '{input}', got a parse mismatch"
420            );
421        }
422    }
423
424    // Trim off trailing `:`s for otherwise valid input.
425    #[test]
426    fn path_with_position_parsing_special() {
427        #[cfg(not(target_os = "windows"))]
428        let input_and_expected = [
429            (
430                "test_file.rs:",
431                PathWithPosition {
432                    path: PathBuf::from("test_file.rs"),
433                    row: None,
434                    column: None,
435                },
436            ),
437            (
438                "test_file.rs:1:",
439                PathWithPosition {
440                    path: PathBuf::from("test_file.rs"),
441                    row: Some(1),
442                    column: None,
443                },
444            ),
445            (
446                "crates/file_finder/src/file_finder.rs:1902:13:",
447                PathWithPosition {
448                    path: PathBuf::from("crates/file_finder/src/file_finder.rs"),
449                    row: Some(1902),
450                    column: Some(13),
451                },
452            ),
453        ];
454
455        #[cfg(target_os = "windows")]
456        let input_and_expected = [
457            (
458                "test_file.rs:",
459                PathWithPosition {
460                    path: PathBuf::from("test_file.rs"),
461                    row: None,
462                    column: None,
463                },
464            ),
465            (
466                "test_file.rs:1:",
467                PathWithPosition {
468                    path: PathBuf::from("test_file.rs"),
469                    row: Some(1),
470                    column: None,
471                },
472            ),
473            (
474                "\\\\?\\C:\\Users\\someone\\test_file.rs:1902:13:",
475                PathWithPosition {
476                    path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
477                    row: Some(1902),
478                    column: Some(13),
479                },
480            ),
481            (
482                "\\\\?\\C:\\Users\\someone\\test_file.rs:1902:13:15:",
483                PathWithPosition {
484                    path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
485                    row: Some(1902),
486                    column: Some(13),
487                },
488            ),
489            (
490                "\\\\?\\C:\\Users\\someone\\test_file.rs:1902:::15:",
491                PathWithPosition {
492                    path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
493                    row: Some(1902),
494                    column: None,
495                },
496            ),
497            (
498                "\\\\?\\C:\\Users\\someone\\test_file.rs(1902,13):",
499                PathWithPosition {
500                    path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
501                    row: Some(1902),
502                    column: Some(13),
503                },
504            ),
505            (
506                "\\\\?\\C:\\Users\\someone\\test_file.rs(1902):",
507                PathWithPosition {
508                    path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
509                    row: Some(1902),
510                    column: None,
511                },
512            ),
513            (
514                "C:\\Users\\someone\\test_file.rs:1902:13:",
515                PathWithPosition {
516                    path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
517                    row: Some(1902),
518                    column: Some(13),
519                },
520            ),
521            (
522                "crates/utils/paths.rs",
523                PathWithPosition {
524                    path: PathBuf::from("crates\\utils\\paths.rs"),
525                    row: None,
526                    column: None,
527                },
528            ),
529            (
530                "C:\\Users\\someone\\test_file.rs(1902,13):",
531                PathWithPosition {
532                    path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
533                    row: Some(1902),
534                    column: Some(13),
535                },
536            ),
537            (
538                "C:\\Users\\someone\\test_file.rs(1902):",
539                PathWithPosition {
540                    path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
541                    row: Some(1902),
542                    column: None,
543                },
544            ),
545            (
546                "crates/utils/paths.rs:101",
547                PathWithPosition {
548                    path: PathBuf::from("crates\\utils\\paths.rs"),
549                    row: Some(101),
550                    column: None,
551                },
552            ),
553        ];
554
555        for (input, expected) in input_and_expected {
556            let actual = PathWithPosition::parse_str(input);
557            assert_eq!(
558                actual, expected,
559                "For special case input str '{input}', got a parse mismatch"
560            );
561        }
562    }
563
564    #[test]
565    fn test_path_compact() {
566        let path: PathBuf = [
567            home_dir().to_string_lossy().to_string(),
568            "some_file.txt".to_string(),
569        ]
570        .iter()
571        .collect();
572        if cfg!(target_os = "linux") || cfg!(target_os = "macos") {
573            assert_eq!(path.compact().to_str(), Some("~/some_file.txt"));
574        } else {
575            assert_eq!(path.compact().to_str(), path.to_str());
576        }
577    }
578
579    #[test]
580    fn test_icon_stem_or_suffix() {
581        // No dots in name
582        let path = Path::new("/a/b/c/file_name.rs");
583        assert_eq!(path.icon_stem_or_suffix(), Some("rs"));
584
585        // Single dot in name
586        let path = Path::new("/a/b/c/file.name.rs");
587        assert_eq!(path.icon_stem_or_suffix(), Some("rs"));
588
589        // No suffix
590        let path = Path::new("/a/b/c/file");
591        assert_eq!(path.icon_stem_or_suffix(), Some("file"));
592
593        // Multiple dots in name
594        let path = Path::new("/a/b/c/long.file.name.rs");
595        assert_eq!(path.icon_stem_or_suffix(), Some("rs"));
596
597        // Hidden file, no extension
598        let path = Path::new("/a/b/c/.gitignore");
599        assert_eq!(path.icon_stem_or_suffix(), Some("gitignore"));
600
601        // Hidden file, with extension
602        let path = Path::new("/a/b/c/.eslintrc.js");
603        assert_eq!(path.icon_stem_or_suffix(), Some("eslintrc.js"));
604    }
605
606    #[test]
607    fn test_extension_or_hidden_file_name() {
608        // No dots in name
609        let path = Path::new("/a/b/c/file_name.rs");
610        assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
611
612        // Single dot in name
613        let path = Path::new("/a/b/c/file.name.rs");
614        assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
615
616        // Multiple dots in name
617        let path = Path::new("/a/b/c/long.file.name.rs");
618        assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
619
620        // Hidden file, no extension
621        let path = Path::new("/a/b/c/.gitignore");
622        assert_eq!(path.extension_or_hidden_file_name(), Some("gitignore"));
623
624        // Hidden file, with extension
625        let path = Path::new("/a/b/c/.eslintrc.js");
626        assert_eq!(path.extension_or_hidden_file_name(), Some("js"));
627    }
628
629    #[test]
630    fn edge_of_glob() {
631        let path = Path::new("/work/node_modules");
632        let path_matcher = PathMatcher::new(&["**/node_modules/**".to_owned()]).unwrap();
633        assert!(
634            path_matcher.is_match(path),
635            "Path matcher should match {path:?}"
636        );
637    }
638
639    #[test]
640    fn project_search() {
641        let path = Path::new("/Users/someonetoignore/work/zed/zed.dev/node_modules");
642        let path_matcher = PathMatcher::new(&["**/node_modules/**".to_owned()]).unwrap();
643        assert!(
644            path_matcher.is_match(path),
645            "Path matcher should match {path:?}"
646        );
647    }
648}