1use std::cmp;
2use std::sync::OnceLock;
3use std::{
4 ffi::OsStr,
5 path::{Path, PathBuf},
6 sync::LazyLock,
7};
8
9use globset::{Glob, GlobSet, GlobSetBuilder};
10use regex::Regex;
11use serde::{Deserialize, Serialize};
12use unicase::UniCase;
13
14use crate::{maybe, NumericPrefixWithSuffix};
15
16/// Returns the path to the user's home directory.
17pub fn home_dir() -> &'static PathBuf {
18 static HOME_DIR: OnceLock<PathBuf> = OnceLock::new();
19 HOME_DIR.get_or_init(|| dirs::home_dir().expect("failed to determine home directory"))
20}
21
22pub trait PathExt {
23 fn compact(&self) -> PathBuf;
24 fn icon_stem_or_suffix(&self) -> Option<&str>;
25 fn extension_or_hidden_file_name(&self) -> Option<&str>;
26 fn try_from_bytes<'a>(bytes: &'a [u8]) -> anyhow::Result<Self>
27 where
28 Self: From<&'a Path>,
29 {
30 #[cfg(unix)]
31 {
32 use std::os::unix::prelude::OsStrExt;
33 Ok(Self::from(Path::new(OsStr::from_bytes(bytes))))
34 }
35 #[cfg(windows)]
36 {
37 use anyhow::anyhow;
38 use tendril::fmt::{Format, WTF8};
39 WTF8::validate(bytes)
40 .then(|| {
41 // Safety: bytes are valid WTF-8 sequence.
42 Self::from(Path::new(unsafe {
43 OsStr::from_encoded_bytes_unchecked(bytes)
44 }))
45 })
46 .ok_or_else(|| anyhow!("Invalid WTF-8 sequence: {bytes:?}"))
47 }
48 }
49}
50
51impl<T: AsRef<Path>> PathExt for T {
52 /// Compacts a given file path by replacing the user's home directory
53 /// prefix with a tilde (`~`).
54 ///
55 /// # Returns
56 ///
57 /// * A `PathBuf` containing the compacted file path. If the input path
58 /// does not have the user's home directory prefix, or if we are not on
59 /// Linux or macOS, the original path is returned unchanged.
60 fn compact(&self) -> PathBuf {
61 if cfg!(target_os = "linux") || cfg!(target_os = "macos") {
62 match self.as_ref().strip_prefix(home_dir().as_path()) {
63 Ok(relative_path) => {
64 let mut shortened_path = PathBuf::new();
65 shortened_path.push("~");
66 shortened_path.push(relative_path);
67 shortened_path
68 }
69 Err(_) => self.as_ref().to_path_buf(),
70 }
71 } else {
72 self.as_ref().to_path_buf()
73 }
74 }
75
76 /// Returns either the suffix if available, or the file stem otherwise to determine which file icon to use
77 fn icon_stem_or_suffix(&self) -> Option<&str> {
78 let path = self.as_ref();
79 let file_name = path.file_name()?.to_str()?;
80 if file_name.starts_with('.') {
81 return file_name.strip_prefix('.');
82 }
83
84 path.extension()
85 .and_then(|e| e.to_str())
86 .or_else(|| path.file_stem()?.to_str())
87 }
88
89 /// Returns a file's extension or, if the file is hidden, its name without the leading dot
90 fn extension_or_hidden_file_name(&self) -> Option<&str> {
91 if let Some(extension) = self.as_ref().extension() {
92 return extension.to_str();
93 }
94
95 self.as_ref().file_name()?.to_str()?.split('.').last()
96 }
97}
98
99/// A delimiter to use in `path_query:row_number:column_number` strings parsing.
100pub const FILE_ROW_COLUMN_DELIMITER: char = ':';
101
102/// Extracts filename and row-column suffixes.
103/// Parenthesis format is used by [MSBuild](https://learn.microsoft.com/en-us/visualstudio/msbuild/msbuild-diagnostic-format-for-tasks) compatible tools
104// NOTE: All cases need to have exactly three capture groups for extract(): file_name, row and column.
105// Valid patterns that don't contain row and/or column should have empty groups in their place.
106const ROW_COL_CAPTURE_REGEX: &str = r"(?x)
107 ([^\(]+)(?:
108 \((\d+),(\d+)\) # filename(row,column)
109 |
110 \((\d+)\)() # filename(row)
111 )
112 |
113 ([^\:]+)(?:
114 \:(\d+)\:(\d+) # filename:row:column
115 |
116 \:(\d+)() # filename:row
117 |
118 \:()() # filename:
119 )";
120
121/// A representation of a path-like string with optional row and column numbers.
122/// Matching values example: `te`, `test.rs:22`, `te:22:5`, `test.c(22)`, `test.c(22,5)`etc.
123#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
124pub struct PathWithPosition {
125 pub path: PathBuf,
126 pub row: Option<u32>,
127 // Absent if row is absent.
128 pub column: Option<u32>,
129}
130
131impl PathWithPosition {
132 /// Returns a PathWithPosition from a path.
133 pub fn from_path(path: PathBuf) -> Self {
134 Self {
135 path,
136 row: None,
137 column: None,
138 }
139 }
140 /// Parses a string that possibly has `:row:column` or `(row, column)` suffix.
141 /// Ignores trailing `:`s, so `test.rs:22:` is parsed as `test.rs:22`.
142 /// If the suffix parsing fails, the whole string is parsed as a path.
143 pub fn parse_str(s: &str) -> Self {
144 let trimmed = s.trim();
145 let path = Path::new(trimmed);
146 let maybe_file_name_with_row_col = path
147 .file_name()
148 .unwrap_or_default()
149 .to_str()
150 .unwrap_or_default();
151 if maybe_file_name_with_row_col.is_empty() {
152 return Self {
153 path: Path::new(s).to_path_buf(),
154 row: None,
155 column: None,
156 };
157 }
158
159 // Let's avoid repeated init cost on this. It is subject to thread contention, but
160 // so far this code isn't called from multiple hot paths. Getting contention here
161 // in the future seems unlikely.
162 static SUFFIX_RE: LazyLock<Regex> =
163 LazyLock::new(|| Regex::new(ROW_COL_CAPTURE_REGEX).unwrap());
164 match SUFFIX_RE
165 .captures(maybe_file_name_with_row_col)
166 .map(|caps| caps.extract())
167 {
168 Some((_, [file_name, maybe_row, maybe_column])) => {
169 let row = maybe_row.parse::<u32>().ok();
170 let column = maybe_column.parse::<u32>().ok();
171
172 let suffix_length = maybe_file_name_with_row_col.len() - file_name.len();
173 let path_without_suffix = &trimmed[..trimmed.len() - suffix_length];
174
175 Self {
176 path: Path::new(path_without_suffix).to_path_buf(),
177 row,
178 column,
179 }
180 }
181 None => Self {
182 path: Path::new(s).to_path_buf(),
183 row: None,
184 column: None,
185 },
186 }
187 }
188
189 pub fn map_path<E>(
190 self,
191 mapping: impl FnOnce(PathBuf) -> Result<PathBuf, E>,
192 ) -> Result<PathWithPosition, E> {
193 Ok(PathWithPosition {
194 path: mapping(self.path)?,
195 row: self.row,
196 column: self.column,
197 })
198 }
199
200 pub fn to_string(&self, path_to_string: impl Fn(&PathBuf) -> String) -> String {
201 let path_string = path_to_string(&self.path);
202 if let Some(row) = self.row {
203 if let Some(column) = self.column {
204 format!("{path_string}:{row}:{column}")
205 } else {
206 format!("{path_string}:{row}")
207 }
208 } else {
209 path_string
210 }
211 }
212}
213
214#[derive(Clone, Debug, Default)]
215pub struct PathMatcher {
216 sources: Vec<String>,
217 glob: GlobSet,
218}
219
220// impl std::fmt::Display for PathMatcher {
221// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
222// self.sources.fmt(f)
223// }
224// }
225
226impl PartialEq for PathMatcher {
227 fn eq(&self, other: &Self) -> bool {
228 self.sources.eq(&other.sources)
229 }
230}
231
232impl Eq for PathMatcher {}
233
234impl PathMatcher {
235 pub fn new(globs: &[String]) -> Result<Self, globset::Error> {
236 let globs = globs
237 .into_iter()
238 .map(|glob| Glob::new(&glob))
239 .collect::<Result<Vec<_>, _>>()?;
240 let sources = globs.iter().map(|glob| glob.glob().to_owned()).collect();
241 let mut glob_builder = GlobSetBuilder::new();
242 for single_glob in globs {
243 glob_builder.add(single_glob);
244 }
245 let glob = glob_builder.build()?;
246 Ok(PathMatcher { glob, sources })
247 }
248
249 pub fn sources(&self) -> &[String] {
250 &self.sources
251 }
252
253 pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
254 let other_path = other.as_ref();
255 self.sources.iter().any(|source| {
256 let as_bytes = other_path.as_os_str().as_encoded_bytes();
257 as_bytes.starts_with(source.as_bytes()) || as_bytes.ends_with(source.as_bytes())
258 }) || self.glob.is_match(other_path)
259 || self.check_with_end_separator(other_path)
260 }
261
262 fn check_with_end_separator(&self, path: &Path) -> bool {
263 let path_str = path.to_string_lossy();
264 let separator = std::path::MAIN_SEPARATOR_STR;
265 if path_str.ends_with(separator) {
266 return false;
267 } else {
268 self.glob.is_match(path_str.to_string() + separator)
269 }
270 }
271}
272
273pub fn compare_paths(
274 (path_a, a_is_file): (&Path, bool),
275 (path_b, b_is_file): (&Path, bool),
276) -> cmp::Ordering {
277 let mut components_a = path_a.components().peekable();
278 let mut components_b = path_b.components().peekable();
279 loop {
280 match (components_a.next(), components_b.next()) {
281 (Some(component_a), Some(component_b)) => {
282 let a_is_file = components_a.peek().is_none() && a_is_file;
283 let b_is_file = components_b.peek().is_none() && b_is_file;
284 let ordering = a_is_file.cmp(&b_is_file).then_with(|| {
285 let maybe_numeric_ordering = maybe!({
286 let path_a = Path::new(component_a.as_os_str());
287 let num_and_remainder_a = if a_is_file {
288 path_a.file_stem()
289 } else {
290 path_a.file_name()
291 }
292 .and_then(|s| s.to_str())
293 .and_then(NumericPrefixWithSuffix::from_numeric_prefixed_str)?;
294
295 let path_b = Path::new(component_b.as_os_str());
296 let num_and_remainder_b = if b_is_file {
297 path_b.file_stem()
298 } else {
299 path_b.file_name()
300 }
301 .and_then(|s| s.to_str())
302 .and_then(NumericPrefixWithSuffix::from_numeric_prefixed_str)?;
303
304 num_and_remainder_a.partial_cmp(&num_and_remainder_b)
305 });
306
307 maybe_numeric_ordering.unwrap_or_else(|| {
308 let name_a = UniCase::new(component_a.as_os_str().to_string_lossy());
309 let name_b = UniCase::new(component_b.as_os_str().to_string_lossy());
310
311 name_a.cmp(&name_b)
312 })
313 });
314 if !ordering.is_eq() {
315 return ordering;
316 }
317 }
318 (Some(_), None) => break cmp::Ordering::Greater,
319 (None, Some(_)) => break cmp::Ordering::Less,
320 (None, None) => break cmp::Ordering::Equal,
321 }
322 }
323}
324
325#[cfg(test)]
326mod tests {
327 use super::*;
328
329 #[test]
330 fn compare_paths_with_dots() {
331 let mut paths = vec![
332 (Path::new("test_dirs"), false),
333 (Path::new("test_dirs/1.46"), false),
334 (Path::new("test_dirs/1.46/bar_1"), true),
335 (Path::new("test_dirs/1.46/bar_2"), true),
336 (Path::new("test_dirs/1.45"), false),
337 (Path::new("test_dirs/1.45/foo_2"), true),
338 (Path::new("test_dirs/1.45/foo_1"), true),
339 ];
340 paths.sort_by(|&a, &b| compare_paths(a, b));
341 assert_eq!(
342 paths,
343 vec![
344 (Path::new("test_dirs"), false),
345 (Path::new("test_dirs/1.45"), false),
346 (Path::new("test_dirs/1.45/foo_1"), true),
347 (Path::new("test_dirs/1.45/foo_2"), true),
348 (Path::new("test_dirs/1.46"), false),
349 (Path::new("test_dirs/1.46/bar_1"), true),
350 (Path::new("test_dirs/1.46/bar_2"), true),
351 ]
352 );
353 }
354
355 #[test]
356 fn path_with_position_parsing_positive() {
357 let input_and_expected = [
358 (
359 "test_file.rs",
360 PathWithPosition {
361 path: PathBuf::from("test_file.rs"),
362 row: None,
363 column: None,
364 },
365 ),
366 (
367 "test_file.rs:1",
368 PathWithPosition {
369 path: PathBuf::from("test_file.rs"),
370 row: Some(1),
371 column: None,
372 },
373 ),
374 (
375 "test_file.rs:1:2",
376 PathWithPosition {
377 path: PathBuf::from("test_file.rs"),
378 row: Some(1),
379 column: Some(2),
380 },
381 ),
382 ];
383
384 for (input, expected) in input_and_expected {
385 let actual = PathWithPosition::parse_str(input);
386 assert_eq!(
387 actual, expected,
388 "For positive case input str '{input}', got a parse mismatch"
389 );
390 }
391 }
392
393 #[test]
394 fn path_with_position_parsing_negative() {
395 for (input, row, column) in [
396 ("test_file.rs:a", None, None),
397 ("test_file.rs:a:b", None, None),
398 ("test_file.rs::", None, None),
399 ("test_file.rs::1", None, None),
400 ("test_file.rs:1::", Some(1), None),
401 ("test_file.rs::1:2", None, None),
402 ("test_file.rs:1::2", Some(1), None),
403 ("test_file.rs:1:2:3", Some(1), Some(2)),
404 ] {
405 let actual = PathWithPosition::parse_str(input);
406 assert_eq!(
407 actual,
408 PathWithPosition {
409 path: PathBuf::from("test_file.rs"),
410 row,
411 column,
412 },
413 "For negative case input str '{input}', got a parse mismatch"
414 );
415 }
416 }
417
418 // Trim off trailing `:`s for otherwise valid input.
419 #[test]
420 fn path_with_position_parsing_special() {
421 #[cfg(not(target_os = "windows"))]
422 let input_and_expected = [
423 (
424 "test_file.rs:",
425 PathWithPosition {
426 path: PathBuf::from("test_file.rs"),
427 row: None,
428 column: None,
429 },
430 ),
431 (
432 "test_file.rs:1:",
433 PathWithPosition {
434 path: PathBuf::from("test_file.rs"),
435 row: Some(1),
436 column: None,
437 },
438 ),
439 (
440 "crates/file_finder/src/file_finder.rs:1902:13:",
441 PathWithPosition {
442 path: PathBuf::from("crates/file_finder/src/file_finder.rs"),
443 row: Some(1902),
444 column: Some(13),
445 },
446 ),
447 ];
448
449 #[cfg(target_os = "windows")]
450 let input_and_expected = [
451 (
452 "test_file.rs:",
453 PathWithPosition {
454 path: PathBuf::from("test_file.rs"),
455 row: None,
456 column: None,
457 },
458 ),
459 (
460 "test_file.rs:1:",
461 PathWithPosition {
462 path: PathBuf::from("test_file.rs"),
463 row: Some(1),
464 column: None,
465 },
466 ),
467 (
468 "\\\\?\\C:\\Users\\someone\\test_file.rs:1902:13:",
469 PathWithPosition {
470 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
471 row: Some(1902),
472 column: Some(13),
473 },
474 ),
475 (
476 "\\\\?\\C:\\Users\\someone\\test_file.rs:1902:13:15:",
477 PathWithPosition {
478 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
479 row: Some(1902),
480 column: Some(13),
481 },
482 ),
483 (
484 "\\\\?\\C:\\Users\\someone\\test_file.rs:1902:::15:",
485 PathWithPosition {
486 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
487 row: Some(1902),
488 column: None,
489 },
490 ),
491 (
492 "\\\\?\\C:\\Users\\someone\\test_file.rs(1902,13):",
493 PathWithPosition {
494 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
495 row: Some(1902),
496 column: Some(13),
497 },
498 ),
499 (
500 "\\\\?\\C:\\Users\\someone\\test_file.rs(1902):",
501 PathWithPosition {
502 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
503 row: Some(1902),
504 column: None,
505 },
506 ),
507 (
508 "C:\\Users\\someone\\test_file.rs:1902:13:",
509 PathWithPosition {
510 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
511 row: Some(1902),
512 column: Some(13),
513 },
514 ),
515 (
516 "crates/utils/paths.rs",
517 PathWithPosition {
518 path: PathBuf::from("crates\\utils\\paths.rs"),
519 row: None,
520 column: None,
521 },
522 ),
523 (
524 "C:\\Users\\someone\\test_file.rs(1902,13):",
525 PathWithPosition {
526 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
527 row: Some(1902),
528 column: Some(13),
529 },
530 ),
531 (
532 "C:\\Users\\someone\\test_file.rs(1902):",
533 PathWithPosition {
534 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
535 row: Some(1902),
536 column: None,
537 },
538 ),
539 (
540 "crates/utils/paths.rs:101",
541 PathWithPosition {
542 path: PathBuf::from("crates\\utils\\paths.rs"),
543 row: Some(101),
544 column: None,
545 },
546 ),
547 ];
548
549 for (input, expected) in input_and_expected {
550 let actual = PathWithPosition::parse_str(input);
551 assert_eq!(
552 actual, expected,
553 "For special case input str '{input}', got a parse mismatch"
554 );
555 }
556 }
557
558 #[test]
559 fn test_path_compact() {
560 let path: PathBuf = [
561 home_dir().to_string_lossy().to_string(),
562 "some_file.txt".to_string(),
563 ]
564 .iter()
565 .collect();
566 if cfg!(target_os = "linux") || cfg!(target_os = "macos") {
567 assert_eq!(path.compact().to_str(), Some("~/some_file.txt"));
568 } else {
569 assert_eq!(path.compact().to_str(), path.to_str());
570 }
571 }
572
573 #[test]
574 fn test_icon_stem_or_suffix() {
575 // No dots in name
576 let path = Path::new("/a/b/c/file_name.rs");
577 assert_eq!(path.icon_stem_or_suffix(), Some("rs"));
578
579 // Single dot in name
580 let path = Path::new("/a/b/c/file.name.rs");
581 assert_eq!(path.icon_stem_or_suffix(), Some("rs"));
582
583 // No suffix
584 let path = Path::new("/a/b/c/file");
585 assert_eq!(path.icon_stem_or_suffix(), Some("file"));
586
587 // Multiple dots in name
588 let path = Path::new("/a/b/c/long.file.name.rs");
589 assert_eq!(path.icon_stem_or_suffix(), Some("rs"));
590
591 // Hidden file, no extension
592 let path = Path::new("/a/b/c/.gitignore");
593 assert_eq!(path.icon_stem_or_suffix(), Some("gitignore"));
594
595 // Hidden file, with extension
596 let path = Path::new("/a/b/c/.eslintrc.js");
597 assert_eq!(path.icon_stem_or_suffix(), Some("eslintrc.js"));
598 }
599
600 #[test]
601 fn test_extension_or_hidden_file_name() {
602 // No dots in name
603 let path = Path::new("/a/b/c/file_name.rs");
604 assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
605
606 // Single dot in name
607 let path = Path::new("/a/b/c/file.name.rs");
608 assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
609
610 // Multiple dots in name
611 let path = Path::new("/a/b/c/long.file.name.rs");
612 assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
613
614 // Hidden file, no extension
615 let path = Path::new("/a/b/c/.gitignore");
616 assert_eq!(path.extension_or_hidden_file_name(), Some("gitignore"));
617
618 // Hidden file, with extension
619 let path = Path::new("/a/b/c/.eslintrc.js");
620 assert_eq!(path.extension_or_hidden_file_name(), Some("js"));
621 }
622
623 #[test]
624 fn edge_of_glob() {
625 let path = Path::new("/work/node_modules");
626 let path_matcher = PathMatcher::new(&["**/node_modules/**".to_owned()]).unwrap();
627 assert!(
628 path_matcher.is_match(path),
629 "Path matcher should match {path:?}"
630 );
631 }
632
633 #[test]
634 fn project_search() {
635 let path = Path::new("/Users/someonetoignore/work/zed/zed.dev/node_modules");
636 let path_matcher = PathMatcher::new(&["**/node_modules/**".to_owned()]).unwrap();
637 assert!(
638 path_matcher.is_match(path),
639 "Path matcher should match {path:?}"
640 );
641 }
642}