1use std::sync::OnceLock;
2use std::{
3 ffi::OsStr,
4 path::{Path, PathBuf},
5 sync::LazyLock,
6};
7
8use globset::{Glob, GlobSet, GlobSetBuilder};
9use regex::Regex;
10use serde::{Deserialize, Serialize};
11
12/// Returns the path to the user's home directory.
13pub fn home_dir() -> &'static PathBuf {
14 static HOME_DIR: OnceLock<PathBuf> = OnceLock::new();
15 HOME_DIR.get_or_init(|| dirs::home_dir().expect("failed to determine home directory"))
16}
17
18pub trait PathExt {
19 fn compact(&self) -> PathBuf;
20 fn icon_stem_or_suffix(&self) -> Option<&str>;
21 fn extension_or_hidden_file_name(&self) -> Option<&str>;
22 fn try_from_bytes<'a>(bytes: &'a [u8]) -> anyhow::Result<Self>
23 where
24 Self: From<&'a Path>,
25 {
26 #[cfg(unix)]
27 {
28 use std::os::unix::prelude::OsStrExt;
29 Ok(Self::from(Path::new(OsStr::from_bytes(bytes))))
30 }
31 #[cfg(windows)]
32 {
33 use anyhow::anyhow;
34 use tendril::fmt::{Format, WTF8};
35 WTF8::validate(bytes)
36 .then(|| {
37 // Safety: bytes are valid WTF-8 sequence.
38 Self::from(Path::new(unsafe {
39 OsStr::from_encoded_bytes_unchecked(bytes)
40 }))
41 })
42 .ok_or_else(|| anyhow!("Invalid WTF-8 sequence: {bytes:?}"))
43 }
44 }
45}
46
47impl<T: AsRef<Path>> PathExt for T {
48 /// Compacts a given file path by replacing the user's home directory
49 /// prefix with a tilde (`~`).
50 ///
51 /// # Returns
52 ///
53 /// * A `PathBuf` containing the compacted file path. If the input path
54 /// does not have the user's home directory prefix, or if we are not on
55 /// Linux or macOS, the original path is returned unchanged.
56 fn compact(&self) -> PathBuf {
57 if cfg!(target_os = "linux") || cfg!(target_os = "macos") {
58 match self.as_ref().strip_prefix(home_dir().as_path()) {
59 Ok(relative_path) => {
60 let mut shortened_path = PathBuf::new();
61 shortened_path.push("~");
62 shortened_path.push(relative_path);
63 shortened_path
64 }
65 Err(_) => self.as_ref().to_path_buf(),
66 }
67 } else {
68 self.as_ref().to_path_buf()
69 }
70 }
71
72 /// Returns either the suffix if available, or the file stem otherwise to determine which file icon to use
73 fn icon_stem_or_suffix(&self) -> Option<&str> {
74 let path = self.as_ref();
75 let file_name = path.file_name()?.to_str()?;
76 if file_name.starts_with('.') {
77 return file_name.strip_prefix('.');
78 }
79
80 path.extension()
81 .and_then(|e| e.to_str())
82 .or_else(|| path.file_stem()?.to_str())
83 }
84
85 /// Returns a file's extension or, if the file is hidden, its name without the leading dot
86 fn extension_or_hidden_file_name(&self) -> Option<&str> {
87 if let Some(extension) = self.as_ref().extension() {
88 return extension.to_str();
89 }
90
91 self.as_ref().file_name()?.to_str()?.split('.').last()
92 }
93}
94
95/// A delimiter to use in `path_query:row_number:column_number` strings parsing.
96pub const FILE_ROW_COLUMN_DELIMITER: char = ':';
97
98/// Extracts filename and row-column suffixes.
99/// Parenthesis format is used by [MSBuild](https://learn.microsoft.com/en-us/visualstudio/msbuild/msbuild-diagnostic-format-for-tasks) compatible tools
100// NOTE: All cases need to have exactly three capture groups for extract(): file_name, row and column.
101// Valid patterns that don't contain row and/or column should have empty groups in their place.
102const ROW_COL_CAPTURE_REGEX: &str = r"(?x)
103 ([^\(]+)(?:
104 \((\d+),(\d+)\) # filename(row,column)
105 |
106 \((\d+)\)() # filename(row)
107 )
108 |
109 ([^\:]+)(?:
110 \:(\d+)\:(\d+) # filename:row:column
111 |
112 \:(\d+)() # filename:row
113 |
114 \:()() # filename:
115 )";
116
117/// A representation of a path-like string with optional row and column numbers.
118/// Matching values example: `te`, `test.rs:22`, `te:22:5`, `test.c(22)`, `test.c(22,5)`etc.
119#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
120pub struct PathWithPosition {
121 pub path: PathBuf,
122 pub row: Option<u32>,
123 // Absent if row is absent.
124 pub column: Option<u32>,
125}
126
127impl PathWithPosition {
128 /// Returns a PathWithPosition from a path.
129 pub fn from_path(path: PathBuf) -> Self {
130 Self {
131 path,
132 row: None,
133 column: None,
134 }
135 }
136 /// Parses a string that possibly has `:row:column` or `(row, column)` suffix.
137 /// Ignores trailing `:`s, so `test.rs:22:` is parsed as `test.rs:22`.
138 /// If the suffix parsing fails, the whole string is parsed as a path.
139 pub fn parse_str(s: &str) -> Self {
140 let trimmed = s.trim();
141 let path = Path::new(trimmed);
142 let maybe_file_name_with_row_col = path
143 .file_name()
144 .unwrap_or_default()
145 .to_str()
146 .unwrap_or_default();
147 if maybe_file_name_with_row_col.is_empty() {
148 return Self {
149 path: Path::new(s).to_path_buf(),
150 row: None,
151 column: None,
152 };
153 }
154
155 // Let's avoid repeated init cost on this. It is subject to thread contention, but
156 // so far this code isn't called from multiple hot paths. Getting contention here
157 // in the future seems unlikely.
158 static SUFFIX_RE: LazyLock<Regex> =
159 LazyLock::new(|| Regex::new(ROW_COL_CAPTURE_REGEX).unwrap());
160 match SUFFIX_RE
161 .captures(maybe_file_name_with_row_col)
162 .map(|caps| caps.extract())
163 {
164 Some((_, [file_name, maybe_row, maybe_column])) => {
165 let row = maybe_row.parse::<u32>().ok();
166 let column = maybe_column.parse::<u32>().ok();
167
168 let suffix_length = maybe_file_name_with_row_col.len() - file_name.len();
169 let path_without_suffix = &trimmed[..trimmed.len() - suffix_length];
170
171 Self {
172 path: Path::new(path_without_suffix).to_path_buf(),
173 row,
174 column,
175 }
176 }
177 None => Self {
178 path: Path::new(s).to_path_buf(),
179 row: None,
180 column: None,
181 },
182 }
183 }
184
185 pub fn map_path<E>(
186 self,
187 mapping: impl FnOnce(PathBuf) -> Result<PathBuf, E>,
188 ) -> Result<PathWithPosition, E> {
189 Ok(PathWithPosition {
190 path: mapping(self.path)?,
191 row: self.row,
192 column: self.column,
193 })
194 }
195
196 pub fn to_string(&self, path_to_string: impl Fn(&PathBuf) -> String) -> String {
197 let path_string = path_to_string(&self.path);
198 if let Some(row) = self.row {
199 if let Some(column) = self.column {
200 format!("{path_string}:{row}:{column}")
201 } else {
202 format!("{path_string}:{row}")
203 }
204 } else {
205 path_string
206 }
207 }
208}
209
210#[derive(Clone, Debug, Default)]
211pub struct PathMatcher {
212 sources: Vec<String>,
213 glob: GlobSet,
214}
215
216// impl std::fmt::Display for PathMatcher {
217// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
218// self.sources.fmt(f)
219// }
220// }
221
222impl PartialEq for PathMatcher {
223 fn eq(&self, other: &Self) -> bool {
224 self.sources.eq(&other.sources)
225 }
226}
227
228impl Eq for PathMatcher {}
229
230impl PathMatcher {
231 pub fn new(globs: &[String]) -> Result<Self, globset::Error> {
232 let globs = globs
233 .into_iter()
234 .map(|glob| Glob::new(&glob))
235 .collect::<Result<Vec<_>, _>>()?;
236 let sources = globs.iter().map(|glob| glob.glob().to_owned()).collect();
237 let mut glob_builder = GlobSetBuilder::new();
238 for single_glob in globs {
239 glob_builder.add(single_glob);
240 }
241 let glob = glob_builder.build()?;
242 Ok(PathMatcher { glob, sources })
243 }
244
245 pub fn sources(&self) -> &[String] {
246 &self.sources
247 }
248
249 pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
250 let other_path = other.as_ref();
251 self.sources.iter().any(|source| {
252 let as_bytes = other_path.as_os_str().as_encoded_bytes();
253 as_bytes.starts_with(source.as_bytes()) || as_bytes.ends_with(source.as_bytes())
254 }) || self.glob.is_match(other_path)
255 || self.check_with_end_separator(other_path)
256 }
257
258 fn check_with_end_separator(&self, path: &Path) -> bool {
259 let path_str = path.to_string_lossy();
260 let separator = std::path::MAIN_SEPARATOR_STR;
261 if path_str.ends_with(separator) {
262 self.glob.is_match(path)
263 } else {
264 self.glob.is_match(path_str.to_string() + separator)
265 }
266 }
267}
268
269#[cfg(test)]
270mod tests {
271 use super::*;
272
273 #[test]
274 fn path_with_position_parsing_positive() {
275 let input_and_expected = [
276 (
277 "test_file.rs",
278 PathWithPosition {
279 path: PathBuf::from("test_file.rs"),
280 row: None,
281 column: None,
282 },
283 ),
284 (
285 "test_file.rs:1",
286 PathWithPosition {
287 path: PathBuf::from("test_file.rs"),
288 row: Some(1),
289 column: None,
290 },
291 ),
292 (
293 "test_file.rs:1:2",
294 PathWithPosition {
295 path: PathBuf::from("test_file.rs"),
296 row: Some(1),
297 column: Some(2),
298 },
299 ),
300 ];
301
302 for (input, expected) in input_and_expected {
303 let actual = PathWithPosition::parse_str(input);
304 assert_eq!(
305 actual, expected,
306 "For positive case input str '{input}', got a parse mismatch"
307 );
308 }
309 }
310
311 #[test]
312 fn path_with_position_parsing_negative() {
313 for (input, row, column) in [
314 ("test_file.rs:a", None, None),
315 ("test_file.rs:a:b", None, None),
316 ("test_file.rs::", None, None),
317 ("test_file.rs::1", None, None),
318 ("test_file.rs:1::", Some(1), None),
319 ("test_file.rs::1:2", None, None),
320 ("test_file.rs:1::2", Some(1), None),
321 ("test_file.rs:1:2:3", Some(1), Some(2)),
322 ] {
323 let actual = PathWithPosition::parse_str(input);
324 assert_eq!(
325 actual,
326 PathWithPosition {
327 path: PathBuf::from("test_file.rs"),
328 row,
329 column,
330 },
331 "For negative case input str '{input}', got a parse mismatch"
332 );
333 }
334 }
335
336 // Trim off trailing `:`s for otherwise valid input.
337 #[test]
338 fn path_with_position_parsing_special() {
339 #[cfg(not(target_os = "windows"))]
340 let input_and_expected = [
341 (
342 "test_file.rs:",
343 PathWithPosition {
344 path: PathBuf::from("test_file.rs"),
345 row: None,
346 column: None,
347 },
348 ),
349 (
350 "test_file.rs:1:",
351 PathWithPosition {
352 path: PathBuf::from("test_file.rs"),
353 row: Some(1),
354 column: None,
355 },
356 ),
357 (
358 "crates/file_finder/src/file_finder.rs:1902:13:",
359 PathWithPosition {
360 path: PathBuf::from("crates/file_finder/src/file_finder.rs"),
361 row: Some(1902),
362 column: Some(13),
363 },
364 ),
365 ];
366
367 #[cfg(target_os = "windows")]
368 let input_and_expected = [
369 (
370 "test_file.rs:",
371 PathWithPosition {
372 path: PathBuf::from("test_file.rs"),
373 row: None,
374 column: None,
375 },
376 ),
377 (
378 "test_file.rs:1:",
379 PathWithPosition {
380 path: PathBuf::from("test_file.rs"),
381 row: Some(1),
382 column: None,
383 },
384 ),
385 (
386 "\\\\?\\C:\\Users\\someone\\test_file.rs:1902:13:",
387 PathWithPosition {
388 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
389 row: Some(1902),
390 column: Some(13),
391 },
392 ),
393 (
394 "\\\\?\\C:\\Users\\someone\\test_file.rs:1902:13:15:",
395 PathWithPosition {
396 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
397 row: Some(1902),
398 column: Some(13),
399 },
400 ),
401 (
402 "\\\\?\\C:\\Users\\someone\\test_file.rs:1902:::15:",
403 PathWithPosition {
404 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
405 row: Some(1902),
406 column: None,
407 },
408 ),
409 (
410 "\\\\?\\C:\\Users\\someone\\test_file.rs(1902,13):",
411 PathWithPosition {
412 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
413 row: Some(1902),
414 column: Some(13),
415 },
416 ),
417 (
418 "\\\\?\\C:\\Users\\someone\\test_file.rs(1902):",
419 PathWithPosition {
420 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
421 row: Some(1902),
422 column: None,
423 },
424 ),
425 (
426 "C:\\Users\\someone\\test_file.rs:1902:13:",
427 PathWithPosition {
428 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
429 row: Some(1902),
430 column: Some(13),
431 },
432 ),
433 (
434 "crates/utils/paths.rs",
435 PathWithPosition {
436 path: PathBuf::from("crates\\utils\\paths.rs"),
437 row: None,
438 column: None,
439 },
440 ),
441 (
442 "C:\\Users\\someone\\test_file.rs(1902,13):",
443 PathWithPosition {
444 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
445 row: Some(1902),
446 column: Some(13),
447 },
448 ),
449 (
450 "C:\\Users\\someone\\test_file.rs(1902):",
451 PathWithPosition {
452 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
453 row: Some(1902),
454 column: None,
455 },
456 ),
457 (
458 "crates/utils/paths.rs:101",
459 PathWithPosition {
460 path: PathBuf::from("crates\\utils\\paths.rs"),
461 row: Some(101),
462 column: None,
463 },
464 ),
465 ];
466
467 for (input, expected) in input_and_expected {
468 let actual = PathWithPosition::parse_str(input);
469 assert_eq!(
470 actual, expected,
471 "For special case input str '{input}', got a parse mismatch"
472 );
473 }
474 }
475
476 #[test]
477 fn test_path_compact() {
478 let path: PathBuf = [
479 home_dir().to_string_lossy().to_string(),
480 "some_file.txt".to_string(),
481 ]
482 .iter()
483 .collect();
484 if cfg!(target_os = "linux") || cfg!(target_os = "macos") {
485 assert_eq!(path.compact().to_str(), Some("~/some_file.txt"));
486 } else {
487 assert_eq!(path.compact().to_str(), path.to_str());
488 }
489 }
490
491 #[test]
492 fn test_icon_stem_or_suffix() {
493 // No dots in name
494 let path = Path::new("/a/b/c/file_name.rs");
495 assert_eq!(path.icon_stem_or_suffix(), Some("rs"));
496
497 // Single dot in name
498 let path = Path::new("/a/b/c/file.name.rs");
499 assert_eq!(path.icon_stem_or_suffix(), Some("rs"));
500
501 // No suffix
502 let path = Path::new("/a/b/c/file");
503 assert_eq!(path.icon_stem_or_suffix(), Some("file"));
504
505 // Multiple dots in name
506 let path = Path::new("/a/b/c/long.file.name.rs");
507 assert_eq!(path.icon_stem_or_suffix(), Some("rs"));
508
509 // Hidden file, no extension
510 let path = Path::new("/a/b/c/.gitignore");
511 assert_eq!(path.icon_stem_or_suffix(), Some("gitignore"));
512
513 // Hidden file, with extension
514 let path = Path::new("/a/b/c/.eslintrc.js");
515 assert_eq!(path.icon_stem_or_suffix(), Some("eslintrc.js"));
516 }
517
518 #[test]
519 fn test_extension_or_hidden_file_name() {
520 // No dots in name
521 let path = Path::new("/a/b/c/file_name.rs");
522 assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
523
524 // Single dot in name
525 let path = Path::new("/a/b/c/file.name.rs");
526 assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
527
528 // Multiple dots in name
529 let path = Path::new("/a/b/c/long.file.name.rs");
530 assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
531
532 // Hidden file, no extension
533 let path = Path::new("/a/b/c/.gitignore");
534 assert_eq!(path.extension_or_hidden_file_name(), Some("gitignore"));
535
536 // Hidden file, with extension
537 let path = Path::new("/a/b/c/.eslintrc.js");
538 assert_eq!(path.extension_or_hidden_file_name(), Some("js"));
539 }
540
541 #[test]
542 fn edge_of_glob() {
543 let path = Path::new("/work/node_modules");
544 let path_matcher = PathMatcher::new(&["**/node_modules/**".to_owned()]).unwrap();
545 assert!(
546 path_matcher.is_match(path),
547 "Path matcher should match {path:?}"
548 );
549 }
550
551 #[test]
552 fn project_search() {
553 let path = Path::new("/Users/someonetoignore/work/zed/zed.dev/node_modules");
554 let path_matcher = PathMatcher::new(&["**/node_modules/**".to_owned()]).unwrap();
555 assert!(
556 path_matcher.is_match(path),
557 "Path matcher should match {path:?}"
558 );
559 }
560}