1use globset::{Glob, GlobSet, GlobSetBuilder};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::borrow::Cow;
5use std::cmp::Ordering;
6use std::fmt::{Display, Formatter};
7use std::mem;
8use std::ops::Deref;
9use std::path::StripPrefixError;
10use std::sync::{Arc, OnceLock};
11use std::{
12 ffi::OsStr,
13 path::{Path, PathBuf},
14 sync::LazyLock,
15};
16
17/// Returns the path to the user's home directory.
18pub fn home_dir() -> &'static SanitizedPathBuf {
19 static HOME_DIR: OnceLock<SanitizedPathBuf> = OnceLock::new();
20 HOME_DIR.get_or_init(|| {
21 dirs::home_dir()
22 .expect("failed to determine home directory")
23 .into()
24 })
25}
26
27pub trait PathExt {
28 fn compact(&self) -> PathBuf;
29 fn extension_or_hidden_file_name(&self) -> Option<&str>;
30 fn to_sanitized_string(&self) -> String;
31 fn try_from_bytes<'a>(bytes: &'a [u8]) -> anyhow::Result<Self>
32 where
33 Self: From<&'a Path>,
34 {
35 #[cfg(unix)]
36 {
37 use std::os::unix::prelude::OsStrExt;
38 Ok(Self::from(Path::new(OsStr::from_bytes(bytes))))
39 }
40 #[cfg(windows)]
41 {
42 use anyhow::Context as _;
43 use tendril::fmt::{Format, WTF8};
44 WTF8::validate(bytes)
45 .then(|| {
46 // Safety: bytes are valid WTF-8 sequence.
47 Self::from(Path::new(unsafe {
48 OsStr::from_encoded_bytes_unchecked(bytes)
49 }))
50 })
51 .with_context(|| format!("Invalid WTF-8 sequence: {bytes:?}"))
52 }
53 }
54}
55
56impl<T: AsRef<Path>> PathExt for T {
57 /// Compacts a given file path by replacing the user's home directory
58 /// prefix with a tilde (`~`).
59 ///
60 /// # Returns
61 ///
62 /// * A `PathBuf` containing the compacted file path. If the input path
63 /// does not have the user's home directory prefix, or if we are not on
64 /// Linux or macOS, the original path is returned unchanged.
65 fn compact(&self) -> PathBuf {
66 if cfg!(any(target_os = "linux", target_os = "freebsd")) || cfg!(target_os = "macos") {
67 match self.as_ref().strip_prefix(home_dir().as_path()) {
68 Ok(relative_path) => {
69 let mut shortened_path = PathBuf::new();
70 shortened_path.push("~");
71 shortened_path.push(relative_path);
72 shortened_path
73 }
74 Err(_) => self.as_ref().to_path_buf(),
75 }
76 } else {
77 self.as_ref().to_path_buf()
78 }
79 }
80
81 /// Returns a file's extension or, if the file is hidden, its name without the leading dot
82 fn extension_or_hidden_file_name(&self) -> Option<&str> {
83 let path = self.as_ref();
84 let file_name = path.file_name()?.to_str()?;
85 if file_name.starts_with('.') {
86 return file_name.strip_prefix('.');
87 }
88
89 path.extension()
90 .and_then(|e| e.to_str())
91 .or_else(|| path.file_stem()?.to_str())
92 }
93
94 /// Returns a sanitized string representation of the path.
95 /// Note, on Windows, this assumes that the path is a valid UTF-8 string and
96 /// is not a UNC path.
97 fn to_sanitized_string(&self) -> String {
98 #[cfg(target_os = "windows")]
99 {
100 self.as_ref().to_string_lossy().replace("/", "\\")
101 }
102 #[cfg(not(target_os = "windows"))]
103 {
104 self.as_ref().to_string_lossy().to_string()
105 }
106 }
107}
108
109/// In memory, this is identical to `Path`. On non-Windows conversions to this type are no-ops. On
110/// windows, these conversions sanitize UNC paths by removing the `\\\\?\\` prefix.
111#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)]
112#[repr(transparent)]
113pub struct SanitizedPath(Path);
114
115impl SanitizedPath {
116 pub fn new<T: AsRef<Path> + ?Sized>(path: &T) -> &Self {
117 #[cfg(not(target_os = "windows"))]
118 return Self::unchecked_new(path.as_ref());
119
120 #[cfg(target_os = "windows")]
121 return Self::unchecked_new(dunce::simplified(path.as_ref()));
122 }
123
124 pub fn unchecked_new<T: AsRef<Path> + ?Sized>(path: &T) -> &Self {
125 // safe because `Path` and `SanitizedPath` have the same repr and Drop impl
126 unsafe { mem::transmute::<&Path, &Self>(path.as_ref()) }
127 }
128
129 pub fn from_arc(path: Arc<Path>) -> Arc<Self> {
130 // safe because `Path` and `SanitizedPath` have the same repr and Drop impl
131 #[cfg(not(target_os = "windows"))]
132 return unsafe { mem::transmute::<Arc<Path>, Arc<Self>>(path) };
133
134 // TODO: could avoid allocating here if dunce::simplified results in the same path
135 #[cfg(target_os = "windows")]
136 return Self::new(&path).into();
137 }
138
139 pub fn new_arc<T: AsRef<Path> + ?Sized>(path: &T) -> Arc<Self> {
140 Self::new(path).into()
141 }
142
143 pub fn cast_arc(path: Arc<Self>) -> Arc<Path> {
144 // safe because `Path` and `SanitizedPath` have the same repr and Drop impl
145 unsafe { mem::transmute::<Arc<Self>, Arc<Path>>(path) }
146 }
147
148 pub fn cast_arc_ref(path: &Arc<Self>) -> &Arc<Path> {
149 // safe because `Path` and `SanitizedPath` have the same repr and Drop impl
150 unsafe { mem::transmute::<&Arc<Self>, &Arc<Path>>(path) }
151 }
152
153 pub fn starts_with(&self, prefix: &Self) -> bool {
154 self.0.starts_with(&prefix.0)
155 }
156
157 pub fn as_path(&self) -> &Path {
158 &self.0
159 }
160
161 pub fn file_name(&self) -> Option<&std::ffi::OsStr> {
162 self.0.file_name()
163 }
164
165 pub fn extension(&self) -> Option<&std::ffi::OsStr> {
166 self.0.extension()
167 }
168
169 pub fn join<P: AsRef<Path>>(&self, path: P) -> SanitizedPathBuf {
170 self.0.join(path).into()
171 }
172
173 pub fn parent(&self) -> Option<&Self> {
174 self.0.parent().map(Self::unchecked_new)
175 }
176
177 pub fn strip_prefix(&self, base: &Self) -> Result<&Path, StripPrefixError> {
178 self.0.strip_prefix(base.as_path())
179 }
180
181 pub fn to_str(&self) -> Option<&str> {
182 self.0.to_str()
183 }
184
185 pub fn to_path_buf(&self) -> PathBuf {
186 self.0.to_path_buf()
187 }
188
189 pub fn to_glob_string(&self) -> String {
190 #[cfg(target_os = "windows")]
191 {
192 self.0.to_string_lossy().replace("/", "\\")
193 }
194 #[cfg(not(target_os = "windows"))]
195 {
196 self.0.to_string_lossy().to_string()
197 }
198 }
199}
200
201impl std::fmt::Debug for SanitizedPath {
202 fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
203 std::fmt::Debug::fmt(&self.0, formatter)
204 }
205}
206
207impl Display for SanitizedPath {
208 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
209 write!(f, "{}", self.0.display())
210 }
211}
212
213impl From<&SanitizedPath> for Arc<SanitizedPath> {
214 fn from(sanitized_path: &SanitizedPath) -> Self {
215 let path: Arc<Path> = sanitized_path.0.into();
216 // safe because `Path` and `SanitizedPath` have the same repr and Drop impl
217 unsafe { mem::transmute(path) }
218 }
219}
220
221impl From<&SanitizedPath> for PathBuf {
222 fn from(sanitized_path: &SanitizedPath) -> Self {
223 sanitized_path.as_path().into()
224 }
225}
226
227impl AsRef<Path> for SanitizedPath {
228 fn as_ref(&self) -> &Path {
229 &self.0
230 }
231}
232
233/// In memory, this is identical to `PathBuf`. On non-Windows conversions to this type are no-ops. On
234/// windows, these conversions sanitize UNC paths by removing the `\\\\?\\` prefix.
235#[derive(Default, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
236#[repr(transparent)]
237pub struct SanitizedPathBuf(PathBuf);
238
239impl SanitizedPathBuf {
240 pub fn new() -> Self {
241 PathBuf::new().into()
242 }
243
244 pub fn exists(&self) -> bool {
245 self.0.exists()
246 }
247
248 pub fn push<P: AsRef<Path>>(&mut self, path: P) {
249 if path.as_ref().is_absolute() {
250 self.0.push(SanitizedPath::new(path.as_ref()).as_path());
251 } else {
252 self.0.push(path);
253 }
254 }
255
256 pub fn is_relative(&self) -> bool {
257 self.0.is_relative()
258 }
259
260 pub fn canonicalize(&self) -> std::io::Result<Self> {
261 Ok(self.0.canonicalize()?.into())
262 }
263
264 pub fn join<P: AsRef<Path>>(&self, path: P) -> SanitizedPathBuf {
265 self.0.join(SanitizedPath::new(path.as_ref())).into()
266 }
267
268 pub fn display(&self) -> std::path::Display<'_> {
269 self.0.display()
270 }
271
272 pub fn to_string_lossy(&self) -> Cow<'_, str> {
273 self.0.to_string_lossy()
274 }
275
276 pub fn as_path(&self) -> &SanitizedPath {
277 SanitizedPath::unchecked_new(self)
278 }
279}
280
281impl std::fmt::Debug for SanitizedPathBuf {
282 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
283 self.0.fmt(f)
284 }
285}
286
287impl AsRef<PathBuf> for SanitizedPathBuf {
288 fn as_ref(&self) -> &PathBuf {
289 &self.0
290 }
291}
292
293impl Deref for SanitizedPathBuf {
294 type Target = SanitizedPath;
295
296 fn deref(&self) -> &Self::Target {
297 self.as_path()
298 }
299}
300
301impl AsRef<SanitizedPath> for SanitizedPathBuf {
302 fn as_ref(&self) -> &SanitizedPath {
303 self.as_path()
304 }
305}
306
307impl AsRef<OsStr> for SanitizedPathBuf {
308 fn as_ref(&self) -> &OsStr {
309 self.0.as_os_str()
310 }
311}
312
313impl AsRef<Path> for SanitizedPathBuf {
314 fn as_ref(&self) -> &Path {
315 &self.0
316 }
317}
318
319impl From<PathBuf> for SanitizedPathBuf {
320 fn from(path: PathBuf) -> Self {
321 Self::from(path.as_path())
322 }
323}
324
325impl From<&SanitizedPath> for SanitizedPathBuf {
326 fn from(value: &SanitizedPath) -> Self {
327 Self(PathBuf::from(value.as_path()))
328 }
329}
330
331impl From<&Path> for SanitizedPathBuf {
332 fn from(path: &Path) -> Self {
333 Self(PathBuf::from(dunce::simplified(path)))
334 }
335}
336
337impl From<&str> for SanitizedPathBuf {
338 fn from(path: &str) -> Self {
339 PathBuf::from(path).into()
340 }
341}
342
343#[derive(Debug, Clone, Copy, PartialEq, Eq)]
344pub enum PathStyle {
345 Posix,
346 Windows,
347}
348
349impl PathStyle {
350 #[cfg(target_os = "windows")]
351 pub const fn current() -> Self {
352 PathStyle::Windows
353 }
354
355 #[cfg(not(target_os = "windows"))]
356 pub const fn current() -> Self {
357 PathStyle::Posix
358 }
359
360 #[inline]
361 pub fn separator(&self) -> &str {
362 match self {
363 PathStyle::Posix => "/",
364 PathStyle::Windows => "\\",
365 }
366 }
367}
368
369#[derive(Debug, Clone)]
370pub struct RemotePathBuf {
371 inner: PathBuf,
372 style: PathStyle,
373 string: String, // Cached string representation
374}
375
376impl RemotePathBuf {
377 pub fn new(path: PathBuf, style: PathStyle) -> Self {
378 #[cfg(target_os = "windows")]
379 let string = match style {
380 PathStyle::Posix => path.to_string_lossy().replace('\\', "/"),
381 PathStyle::Windows => path.to_string_lossy().into(),
382 };
383 #[cfg(not(target_os = "windows"))]
384 let string = match style {
385 PathStyle::Posix => path.to_string_lossy().to_string(),
386 PathStyle::Windows => path.to_string_lossy().replace('/', "\\"),
387 };
388 Self {
389 inner: path,
390 style,
391 string,
392 }
393 }
394
395 pub fn from_str(path: &str, style: PathStyle) -> Self {
396 let path_buf = PathBuf::from(path);
397 Self::new(path_buf, style)
398 }
399
400 #[cfg(target_os = "windows")]
401 pub fn to_proto(&self) -> String {
402 match self.path_style() {
403 PathStyle::Posix => self.to_string(),
404 PathStyle::Windows => self.inner.to_string_lossy().replace('\\', "/"),
405 }
406 }
407
408 #[cfg(not(target_os = "windows"))]
409 pub fn to_proto(&self) -> String {
410 match self.path_style() {
411 PathStyle::Posix => self.inner.to_string_lossy().to_string(),
412 PathStyle::Windows => self.to_string(),
413 }
414 }
415
416 pub fn as_path(&self) -> &Path {
417 &self.inner
418 }
419
420 pub fn path_style(&self) -> PathStyle {
421 self.style
422 }
423
424 pub fn parent(&self) -> Option<RemotePathBuf> {
425 self.inner
426 .parent()
427 .map(|p| RemotePathBuf::new(p.to_path_buf(), self.style))
428 }
429}
430
431impl Display for RemotePathBuf {
432 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
433 write!(f, "{}", self.string)
434 }
435}
436
437/// A delimiter to use in `path_query:row_number:column_number` strings parsing.
438pub const FILE_ROW_COLUMN_DELIMITER: char = ':';
439
440const ROW_COL_CAPTURE_REGEX: &str = r"(?xs)
441 ([^\(]+)\:(?:
442 \((\d+)[,:](\d+)\) # filename:(row,column), filename:(row:column)
443 |
444 \((\d+)\)() # filename:(row)
445 )
446 |
447 ([^\(]+)(?:
448 \((\d+)[,:](\d+)\) # filename(row,column), filename(row:column)
449 |
450 \((\d+)\)() # filename(row)
451 )
452 |
453 (.+?)(?:
454 \:+(\d+)\:(\d+)\:*$ # filename:row:column
455 |
456 \:+(\d+)\:*()$ # filename:row
457 )";
458
459/// A representation of a path-like string with optional row and column numbers.
460/// Matching values example: `te`, `test.rs:22`, `te:22:5`, `test.c(22)`, `test.c(22,5)`etc.
461#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
462pub struct PathWithPosition {
463 pub path: PathBuf,
464 pub row: Option<u32>,
465 // Absent if row is absent.
466 pub column: Option<u32>,
467}
468
469impl PathWithPosition {
470 /// Returns a PathWithPosition from a path.
471 pub fn from_path(path: PathBuf) -> Self {
472 Self {
473 path,
474 row: None,
475 column: None,
476 }
477 }
478
479 /// Parses a string that possibly has `:row:column` or `(row, column)` suffix.
480 /// Parenthesis format is used by [MSBuild](https://learn.microsoft.com/en-us/visualstudio/msbuild/msbuild-diagnostic-format-for-tasks) compatible tools
481 /// Ignores trailing `:`s, so `test.rs:22:` is parsed as `test.rs:22`.
482 /// If the suffix parsing fails, the whole string is parsed as a path.
483 ///
484 /// Be mindful that `test_file:10:1:` is a valid posix filename.
485 /// `PathWithPosition` class assumes that the ending position-like suffix is **not** part of the filename.
486 ///
487 /// # Examples
488 ///
489 /// ```
490 /// # use util::paths::PathWithPosition;
491 /// # use std::path::PathBuf;
492 /// assert_eq!(PathWithPosition::parse_str("test_file"), PathWithPosition {
493 /// path: PathBuf::from("test_file"),
494 /// row: None,
495 /// column: None,
496 /// });
497 /// assert_eq!(PathWithPosition::parse_str("test_file:10"), PathWithPosition {
498 /// path: PathBuf::from("test_file"),
499 /// row: Some(10),
500 /// column: None,
501 /// });
502 /// assert_eq!(PathWithPosition::parse_str("test_file.rs"), PathWithPosition {
503 /// path: PathBuf::from("test_file.rs"),
504 /// row: None,
505 /// column: None,
506 /// });
507 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:1"), PathWithPosition {
508 /// path: PathBuf::from("test_file.rs"),
509 /// row: Some(1),
510 /// column: None,
511 /// });
512 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:1:2"), PathWithPosition {
513 /// path: PathBuf::from("test_file.rs"),
514 /// row: Some(1),
515 /// column: Some(2),
516 /// });
517 /// ```
518 ///
519 /// # Expected parsing results when encounter ill-formatted inputs.
520 /// ```
521 /// # use util::paths::PathWithPosition;
522 /// # use std::path::PathBuf;
523 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:a"), PathWithPosition {
524 /// path: PathBuf::from("test_file.rs:a"),
525 /// row: None,
526 /// column: None,
527 /// });
528 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:a:b"), PathWithPosition {
529 /// path: PathBuf::from("test_file.rs:a:b"),
530 /// row: None,
531 /// column: None,
532 /// });
533 /// assert_eq!(PathWithPosition::parse_str("test_file.rs::"), PathWithPosition {
534 /// path: PathBuf::from("test_file.rs::"),
535 /// row: None,
536 /// column: None,
537 /// });
538 /// assert_eq!(PathWithPosition::parse_str("test_file.rs::1"), PathWithPosition {
539 /// path: PathBuf::from("test_file.rs"),
540 /// row: Some(1),
541 /// column: None,
542 /// });
543 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:1::"), PathWithPosition {
544 /// path: PathBuf::from("test_file.rs"),
545 /// row: Some(1),
546 /// column: None,
547 /// });
548 /// assert_eq!(PathWithPosition::parse_str("test_file.rs::1:2"), PathWithPosition {
549 /// path: PathBuf::from("test_file.rs"),
550 /// row: Some(1),
551 /// column: Some(2),
552 /// });
553 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:1::2"), PathWithPosition {
554 /// path: PathBuf::from("test_file.rs:1"),
555 /// row: Some(2),
556 /// column: None,
557 /// });
558 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:1:2:3"), PathWithPosition {
559 /// path: PathBuf::from("test_file.rs:1"),
560 /// row: Some(2),
561 /// column: Some(3),
562 /// });
563 /// ```
564 pub fn parse_str(s: &str) -> Self {
565 let trimmed = s.trim();
566 let path = Path::new(trimmed);
567 let maybe_file_name_with_row_col = path.file_name().unwrap_or_default().to_string_lossy();
568 if maybe_file_name_with_row_col.is_empty() {
569 return Self {
570 path: Path::new(s).to_path_buf(),
571 row: None,
572 column: None,
573 };
574 }
575
576 // Let's avoid repeated init cost on this. It is subject to thread contention, but
577 // so far this code isn't called from multiple hot paths. Getting contention here
578 // in the future seems unlikely.
579 static SUFFIX_RE: LazyLock<Regex> =
580 LazyLock::new(|| Regex::new(ROW_COL_CAPTURE_REGEX).unwrap());
581 match SUFFIX_RE
582 .captures(&maybe_file_name_with_row_col)
583 .map(|caps| caps.extract())
584 {
585 Some((_, [file_name, maybe_row, maybe_column])) => {
586 let row = maybe_row.parse::<u32>().ok();
587 let column = maybe_column.parse::<u32>().ok();
588
589 let suffix_length = maybe_file_name_with_row_col.len() - file_name.len();
590 let path_without_suffix = &trimmed[..trimmed.len() - suffix_length];
591
592 Self {
593 path: Path::new(path_without_suffix).to_path_buf(),
594 row,
595 column,
596 }
597 }
598 None => {
599 // The `ROW_COL_CAPTURE_REGEX` deals with separated digits only,
600 // but in reality there could be `foo/bar.py:22:in` inputs which we want to match too.
601 // The regex mentioned is not very extendable with "digit or random string" checks, so do this here instead.
602 let delimiter = ':';
603 let mut path_parts = s
604 .rsplitn(3, delimiter)
605 .collect::<Vec<_>>()
606 .into_iter()
607 .rev()
608 .fuse();
609 let mut path_string = path_parts.next().expect("rsplitn should have the rest of the string as its last parameter that we reversed").to_owned();
610 let mut row = None;
611 let mut column = None;
612 if let Some(maybe_row) = path_parts.next() {
613 if let Ok(parsed_row) = maybe_row.parse::<u32>() {
614 row = Some(parsed_row);
615 if let Some(parsed_column) = path_parts
616 .next()
617 .and_then(|maybe_col| maybe_col.parse::<u32>().ok())
618 {
619 column = Some(parsed_column);
620 }
621 } else {
622 path_string.push(delimiter);
623 path_string.push_str(maybe_row);
624 }
625 }
626 for split in path_parts {
627 path_string.push(delimiter);
628 path_string.push_str(split);
629 }
630
631 Self {
632 path: PathBuf::from(path_string),
633 row,
634 column,
635 }
636 }
637 }
638 }
639
640 pub fn map_path<E>(
641 self,
642 mapping: impl FnOnce(PathBuf) -> Result<PathBuf, E>,
643 ) -> Result<PathWithPosition, E> {
644 Ok(PathWithPosition {
645 path: mapping(self.path)?,
646 row: self.row,
647 column: self.column,
648 })
649 }
650
651 pub fn to_string(&self, path_to_string: impl Fn(&PathBuf) -> String) -> String {
652 let path_string = path_to_string(&self.path);
653 if let Some(row) = self.row {
654 if let Some(column) = self.column {
655 format!("{path_string}:{row}:{column}")
656 } else {
657 format!("{path_string}:{row}")
658 }
659 } else {
660 path_string
661 }
662 }
663}
664
665#[derive(Clone, Debug, Default)]
666pub struct PathMatcher {
667 sources: Vec<String>,
668 glob: GlobSet,
669}
670
671// impl std::fmt::Display for PathMatcher {
672// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
673// self.sources.fmt(f)
674// }
675// }
676
677impl PartialEq for PathMatcher {
678 fn eq(&self, other: &Self) -> bool {
679 self.sources.eq(&other.sources)
680 }
681}
682
683impl Eq for PathMatcher {}
684
685impl PathMatcher {
686 pub fn new(globs: impl IntoIterator<Item = impl AsRef<str>>) -> Result<Self, globset::Error> {
687 let globs = globs
688 .into_iter()
689 .map(|as_str| Glob::new(as_str.as_ref()))
690 .collect::<Result<Vec<_>, _>>()?;
691 let sources = globs.iter().map(|glob| glob.glob().to_owned()).collect();
692 let mut glob_builder = GlobSetBuilder::new();
693 for single_glob in globs {
694 glob_builder.add(single_glob);
695 }
696 let glob = glob_builder.build()?;
697 Ok(PathMatcher { glob, sources })
698 }
699
700 pub fn sources(&self) -> &[String] {
701 &self.sources
702 }
703
704 pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
705 let other_path = other.as_ref();
706 self.sources.iter().any(|source| {
707 let as_bytes = other_path.as_os_str().as_encoded_bytes();
708 as_bytes.starts_with(source.as_bytes()) || as_bytes.ends_with(source.as_bytes())
709 }) || self.glob.is_match(other_path)
710 || self.check_with_end_separator(other_path)
711 }
712
713 fn check_with_end_separator(&self, path: &Path) -> bool {
714 let path_str = path.to_string_lossy();
715 let separator = std::path::MAIN_SEPARATOR_STR;
716 if path_str.ends_with(separator) {
717 false
718 } else {
719 self.glob.is_match(path_str.to_string() + separator)
720 }
721 }
722}
723
724/// Custom character comparison that prioritizes lowercase for same letters
725fn compare_chars(a: char, b: char) -> Ordering {
726 // First compare case-insensitive
727 match a.to_ascii_lowercase().cmp(&b.to_ascii_lowercase()) {
728 Ordering::Equal => {
729 // If same letter, prioritize lowercase (lowercase < uppercase)
730 match (a.is_ascii_lowercase(), b.is_ascii_lowercase()) {
731 (true, false) => Ordering::Less, // lowercase comes first
732 (false, true) => Ordering::Greater, // uppercase comes after
733 _ => Ordering::Equal, // both same case or both non-ascii
734 }
735 }
736 other => other,
737 }
738}
739
740/// Compares two sequences of consecutive digits for natural sorting.
741///
742/// This function is a core component of natural sorting that handles numeric comparison
743/// in a way that feels natural to humans. It extracts and compares consecutive digit
744/// sequences from two iterators, handling various cases like leading zeros and very large numbers.
745///
746/// # Behavior
747///
748/// The function implements the following comparison rules:
749/// 1. Different numeric values: Compares by actual numeric value (e.g., "2" < "10")
750/// 2. Leading zeros: When values are equal, longer sequence wins (e.g., "002" > "2")
751/// 3. Large numbers: Falls back to string comparison for numbers that would overflow u128
752///
753/// # Examples
754///
755/// ```text
756/// "1" vs "2" -> Less (different values)
757/// "2" vs "10" -> Less (numeric comparison)
758/// "002" vs "2" -> Greater (leading zeros)
759/// "10" vs "010" -> Less (leading zeros)
760/// "999..." vs "1000..." -> Less (large number comparison)
761/// ```
762///
763/// # Implementation Details
764///
765/// 1. Extracts consecutive digits into strings
766/// 2. Compares sequence lengths for leading zero handling
767/// 3. For equal lengths, compares digit by digit
768/// 4. For different lengths:
769/// - Attempts numeric comparison first (for numbers up to 2^128 - 1)
770/// - Falls back to string comparison if numbers would overflow
771///
772/// The function advances both iterators past their respective numeric sequences,
773/// regardless of the comparison result.
774fn compare_numeric_segments<I>(
775 a_iter: &mut std::iter::Peekable<I>,
776 b_iter: &mut std::iter::Peekable<I>,
777) -> Ordering
778where
779 I: Iterator<Item = char>,
780{
781 // Collect all consecutive digits into strings
782 let mut a_num_str = String::new();
783 let mut b_num_str = String::new();
784
785 while let Some(&c) = a_iter.peek() {
786 if !c.is_ascii_digit() {
787 break;
788 }
789
790 a_num_str.push(c);
791 a_iter.next();
792 }
793
794 while let Some(&c) = b_iter.peek() {
795 if !c.is_ascii_digit() {
796 break;
797 }
798
799 b_num_str.push(c);
800 b_iter.next();
801 }
802
803 // First compare lengths (handle leading zeros)
804 match a_num_str.len().cmp(&b_num_str.len()) {
805 Ordering::Equal => {
806 // Same length, compare digit by digit
807 match a_num_str.cmp(&b_num_str) {
808 Ordering::Equal => Ordering::Equal,
809 ordering => ordering,
810 }
811 }
812
813 // Different lengths but same value means leading zeros
814 ordering => {
815 // Try parsing as numbers first
816 if let (Ok(a_val), Ok(b_val)) = (a_num_str.parse::<u128>(), b_num_str.parse::<u128>()) {
817 match a_val.cmp(&b_val) {
818 Ordering::Equal => ordering, // Same value, longer one is greater (leading zeros)
819 ord => ord,
820 }
821 } else {
822 // If parsing fails (overflow), compare as strings
823 a_num_str.cmp(&b_num_str)
824 }
825 }
826 }
827}
828
829/// Performs natural sorting comparison between two strings.
830///
831/// Natural sorting is an ordering that handles numeric sequences in a way that matches human expectations.
832/// For example, "file2" comes before "file10" (unlike standard lexicographic sorting).
833///
834/// # Characteristics
835///
836/// * Case-sensitive with lowercase priority: When comparing same letters, lowercase comes before uppercase
837/// * Numbers are compared by numeric value, not character by character
838/// * Leading zeros affect ordering when numeric values are equal
839/// * Can handle numbers larger than u128::MAX (falls back to string comparison)
840///
841/// # Algorithm
842///
843/// The function works by:
844/// 1. Processing strings character by character
845/// 2. When encountering digits, treating consecutive digits as a single number
846/// 3. Comparing numbers by their numeric value rather than lexicographically
847/// 4. For non-numeric characters, using case-sensitive comparison with lowercase priority
848fn natural_sort(a: &str, b: &str) -> Ordering {
849 let mut a_iter = a.chars().peekable();
850 let mut b_iter = b.chars().peekable();
851
852 loop {
853 match (a_iter.peek(), b_iter.peek()) {
854 (None, None) => return Ordering::Equal,
855 (None, _) => return Ordering::Less,
856 (_, None) => return Ordering::Greater,
857 (Some(&a_char), Some(&b_char)) => {
858 if a_char.is_ascii_digit() && b_char.is_ascii_digit() {
859 match compare_numeric_segments(&mut a_iter, &mut b_iter) {
860 Ordering::Equal => continue,
861 ordering => return ordering,
862 }
863 } else {
864 match compare_chars(a_char, b_char) {
865 Ordering::Equal => {
866 a_iter.next();
867 b_iter.next();
868 }
869 ordering => return ordering,
870 }
871 }
872 }
873 }
874 }
875}
876
877pub fn compare_paths(
878 (path_a, a_is_file): (&SanitizedPath, bool),
879 (path_b, b_is_file): (&SanitizedPath, bool),
880) -> Ordering {
881 let mut components_a = path_a.as_path().components().peekable();
882 let mut components_b = path_b.as_path().components().peekable();
883
884 loop {
885 match (components_a.next(), components_b.next()) {
886 (Some(component_a), Some(component_b)) => {
887 let a_is_file = components_a.peek().is_none() && a_is_file;
888 let b_is_file = components_b.peek().is_none() && b_is_file;
889
890 let ordering = a_is_file.cmp(&b_is_file).then_with(|| {
891 let path_a = Path::new(component_a.as_os_str());
892 let path_string_a = if a_is_file {
893 path_a.file_stem()
894 } else {
895 path_a.file_name()
896 }
897 .map(|s| s.to_string_lossy());
898
899 let path_b = Path::new(component_b.as_os_str());
900 let path_string_b = if b_is_file {
901 path_b.file_stem()
902 } else {
903 path_b.file_name()
904 }
905 .map(|s| s.to_string_lossy());
906
907 let compare_components = match (path_string_a, path_string_b) {
908 (Some(a), Some(b)) => natural_sort(&a, &b),
909 (Some(_), None) => Ordering::Greater,
910 (None, Some(_)) => Ordering::Less,
911 (None, None) => Ordering::Equal,
912 };
913
914 compare_components.then_with(|| {
915 if a_is_file && b_is_file {
916 let ext_a = path_a.extension().unwrap_or_default();
917 let ext_b = path_b.extension().unwrap_or_default();
918 ext_a.cmp(ext_b)
919 } else {
920 Ordering::Equal
921 }
922 })
923 });
924
925 if !ordering.is_eq() {
926 return ordering;
927 }
928 }
929 (Some(_), None) => break Ordering::Greater,
930 (None, Some(_)) => break Ordering::Less,
931 (None, None) => break Ordering::Equal,
932 }
933 }
934}
935
936#[cfg(test)]
937mod tests {
938 use super::*;
939
940 #[test]
941 fn compare_paths_with_dots() {
942 let mut paths = vec![
943 (SanitizedPath::new("test_dirs"), false),
944 (SanitizedPath::new("test_dirs/1.46"), false),
945 (SanitizedPath::new("test_dirs/1.46/bar_1"), true),
946 (SanitizedPath::new("test_dirs/1.46/bar_2"), true),
947 (SanitizedPath::new("test_dirs/1.45"), false),
948 (SanitizedPath::new("test_dirs/1.45/foo_2"), true),
949 (SanitizedPath::new("test_dirs/1.45/foo_1"), true),
950 ];
951 paths.sort_by(|&a, &b| compare_paths(a, b));
952 assert_eq!(
953 paths,
954 vec![
955 (SanitizedPath::new("test_dirs"), false),
956 (SanitizedPath::new("test_dirs/1.45"), false),
957 (SanitizedPath::new("test_dirs/1.45/foo_1"), true),
958 (SanitizedPath::new("test_dirs/1.45/foo_2"), true),
959 (SanitizedPath::new("test_dirs/1.46"), false),
960 (SanitizedPath::new("test_dirs/1.46/bar_1"), true),
961 (SanitizedPath::new("test_dirs/1.46/bar_2"), true),
962 ]
963 );
964 let mut paths = vec![
965 (SanitizedPath::new("root1/one.txt"), true),
966 (SanitizedPath::new("root1/one.two.txt"), true),
967 ];
968 paths.sort_by(|&a, &b| compare_paths(a, b));
969 assert_eq!(
970 paths,
971 vec![
972 (SanitizedPath::new("root1/one.txt"), true),
973 (SanitizedPath::new("root1/one.two.txt"), true),
974 ]
975 );
976 }
977
978 #[test]
979 fn compare_paths_with_same_name_different_extensions() {
980 let mut paths = vec![
981 (SanitizedPath::new("test_dirs/file.rs"), true),
982 (SanitizedPath::new("test_dirs/file.txt"), true),
983 (SanitizedPath::new("test_dirs/file.md"), true),
984 (SanitizedPath::new("test_dirs/file"), true),
985 (SanitizedPath::new("test_dirs/file.a"), true),
986 ];
987 paths.sort_by(|&a, &b| compare_paths(a, b));
988 assert_eq!(
989 paths,
990 vec![
991 (SanitizedPath::new("test_dirs/file"), true),
992 (SanitizedPath::new("test_dirs/file.a"), true),
993 (SanitizedPath::new("test_dirs/file.md"), true),
994 (SanitizedPath::new("test_dirs/file.rs"), true),
995 (SanitizedPath::new("test_dirs/file.txt"), true),
996 ]
997 );
998 }
999
1000 #[test]
1001 fn compare_paths_case_semi_sensitive() {
1002 let mut paths = vec![
1003 (SanitizedPath::new("test_DIRS"), false),
1004 (SanitizedPath::new("test_DIRS/foo_1"), true),
1005 (SanitizedPath::new("test_DIRS/foo_2"), true),
1006 (SanitizedPath::new("test_DIRS/bar"), true),
1007 (SanitizedPath::new("test_DIRS/BAR"), true),
1008 (SanitizedPath::new("test_dirs"), false),
1009 (SanitizedPath::new("test_dirs/foo_1"), true),
1010 (SanitizedPath::new("test_dirs/foo_2"), true),
1011 (SanitizedPath::new("test_dirs/bar"), true),
1012 (SanitizedPath::new("test_dirs/BAR"), true),
1013 ];
1014 paths.sort_by(|&a, &b| compare_paths(a, b));
1015 assert_eq!(
1016 paths,
1017 vec![
1018 (SanitizedPath::new("test_dirs"), false),
1019 (SanitizedPath::new("test_dirs/bar"), true),
1020 (SanitizedPath::new("test_dirs/BAR"), true),
1021 (SanitizedPath::new("test_dirs/foo_1"), true),
1022 (SanitizedPath::new("test_dirs/foo_2"), true),
1023 (SanitizedPath::new("test_DIRS"), false),
1024 (SanitizedPath::new("test_DIRS/bar"), true),
1025 (SanitizedPath::new("test_DIRS/BAR"), true),
1026 (SanitizedPath::new("test_DIRS/foo_1"), true),
1027 (SanitizedPath::new("test_DIRS/foo_2"), true),
1028 ]
1029 );
1030 }
1031
1032 #[test]
1033 fn path_with_position_parse_posix_path() {
1034 // Test POSIX filename edge cases
1035 // Read more at https://en.wikipedia.org/wiki/Filename
1036 assert_eq!(
1037 PathWithPosition::parse_str("test_file"),
1038 PathWithPosition {
1039 path: PathBuf::from("test_file"),
1040 row: None,
1041 column: None
1042 }
1043 );
1044
1045 assert_eq!(
1046 PathWithPosition::parse_str("a:bc:.zip:1"),
1047 PathWithPosition {
1048 path: PathBuf::from("a:bc:.zip"),
1049 row: Some(1),
1050 column: None
1051 }
1052 );
1053
1054 assert_eq!(
1055 PathWithPosition::parse_str("one.second.zip:1"),
1056 PathWithPosition {
1057 path: PathBuf::from("one.second.zip"),
1058 row: Some(1),
1059 column: None
1060 }
1061 );
1062
1063 // Trim off trailing `:`s for otherwise valid input.
1064 assert_eq!(
1065 PathWithPosition::parse_str("test_file:10:1:"),
1066 PathWithPosition {
1067 path: PathBuf::from("test_file"),
1068 row: Some(10),
1069 column: Some(1)
1070 }
1071 );
1072
1073 assert_eq!(
1074 PathWithPosition::parse_str("test_file.rs:"),
1075 PathWithPosition {
1076 path: PathBuf::from("test_file.rs:"),
1077 row: None,
1078 column: None
1079 }
1080 );
1081
1082 assert_eq!(
1083 PathWithPosition::parse_str("test_file.rs:1:"),
1084 PathWithPosition {
1085 path: PathBuf::from("test_file.rs"),
1086 row: Some(1),
1087 column: None
1088 }
1089 );
1090
1091 assert_eq!(
1092 PathWithPosition::parse_str("ab\ncd"),
1093 PathWithPosition {
1094 path: PathBuf::from("ab\ncd"),
1095 row: None,
1096 column: None
1097 }
1098 );
1099
1100 assert_eq!(
1101 PathWithPosition::parse_str("👋\nab"),
1102 PathWithPosition {
1103 path: PathBuf::from("👋\nab"),
1104 row: None,
1105 column: None
1106 }
1107 );
1108
1109 assert_eq!(
1110 PathWithPosition::parse_str("Types.hs:(617,9)-(670,28):"),
1111 PathWithPosition {
1112 path: PathBuf::from("Types.hs"),
1113 row: Some(617),
1114 column: Some(9),
1115 }
1116 );
1117 }
1118
1119 #[test]
1120 #[cfg(not(target_os = "windows"))]
1121 fn path_with_position_parse_posix_path_with_suffix() {
1122 assert_eq!(
1123 PathWithPosition::parse_str("foo/bar:34:in"),
1124 PathWithPosition {
1125 path: PathBuf::from("foo/bar"),
1126 row: Some(34),
1127 column: None,
1128 }
1129 );
1130 assert_eq!(
1131 PathWithPosition::parse_str("foo/bar.rs:1902:::15:"),
1132 PathWithPosition {
1133 path: PathBuf::from("foo/bar.rs:1902"),
1134 row: Some(15),
1135 column: None
1136 }
1137 );
1138
1139 assert_eq!(
1140 PathWithPosition::parse_str("app-editors:zed-0.143.6:20240710-201212.log:34:"),
1141 PathWithPosition {
1142 path: PathBuf::from("app-editors:zed-0.143.6:20240710-201212.log"),
1143 row: Some(34),
1144 column: None,
1145 }
1146 );
1147
1148 assert_eq!(
1149 PathWithPosition::parse_str("crates/file_finder/src/file_finder.rs:1902:13:"),
1150 PathWithPosition {
1151 path: PathBuf::from("crates/file_finder/src/file_finder.rs"),
1152 row: Some(1902),
1153 column: Some(13),
1154 }
1155 );
1156
1157 assert_eq!(
1158 PathWithPosition::parse_str("crate/utils/src/test:today.log:34"),
1159 PathWithPosition {
1160 path: PathBuf::from("crate/utils/src/test:today.log"),
1161 row: Some(34),
1162 column: None,
1163 }
1164 );
1165 assert_eq!(
1166 PathWithPosition::parse_str("/testing/out/src/file_finder.odin(7:15)"),
1167 PathWithPosition {
1168 path: PathBuf::from("/testing/out/src/file_finder.odin"),
1169 row: Some(7),
1170 column: Some(15),
1171 }
1172 );
1173 }
1174
1175 #[test]
1176 #[cfg(target_os = "windows")]
1177 fn path_with_position_parse_windows_path() {
1178 assert_eq!(
1179 PathWithPosition::parse_str("crates\\utils\\paths.rs"),
1180 PathWithPosition {
1181 path: PathBuf::from("crates\\utils\\paths.rs"),
1182 row: None,
1183 column: None
1184 }
1185 );
1186
1187 assert_eq!(
1188 PathWithPosition::parse_str("C:\\Users\\someone\\test_file.rs"),
1189 PathWithPosition {
1190 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
1191 row: None,
1192 column: None
1193 }
1194 );
1195 }
1196
1197 #[test]
1198 #[cfg(target_os = "windows")]
1199 fn path_with_position_parse_windows_path_with_suffix() {
1200 assert_eq!(
1201 PathWithPosition::parse_str("crates\\utils\\paths.rs:101"),
1202 PathWithPosition {
1203 path: PathBuf::from("crates\\utils\\paths.rs"),
1204 row: Some(101),
1205 column: None
1206 }
1207 );
1208
1209 assert_eq!(
1210 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs:1:20"),
1211 PathWithPosition {
1212 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
1213 row: Some(1),
1214 column: Some(20)
1215 }
1216 );
1217
1218 assert_eq!(
1219 PathWithPosition::parse_str("C:\\Users\\someone\\test_file.rs(1902,13)"),
1220 PathWithPosition {
1221 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
1222 row: Some(1902),
1223 column: Some(13)
1224 }
1225 );
1226
1227 // Trim off trailing `:`s for otherwise valid input.
1228 assert_eq!(
1229 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs:1902:13:"),
1230 PathWithPosition {
1231 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
1232 row: Some(1902),
1233 column: Some(13)
1234 }
1235 );
1236
1237 assert_eq!(
1238 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs:1902:13:15:"),
1239 PathWithPosition {
1240 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs:1902"),
1241 row: Some(13),
1242 column: Some(15)
1243 }
1244 );
1245
1246 assert_eq!(
1247 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs:1902:::15:"),
1248 PathWithPosition {
1249 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs:1902"),
1250 row: Some(15),
1251 column: None
1252 }
1253 );
1254
1255 assert_eq!(
1256 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs(1902,13):"),
1257 PathWithPosition {
1258 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
1259 row: Some(1902),
1260 column: Some(13),
1261 }
1262 );
1263
1264 assert_eq!(
1265 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs(1902):"),
1266 PathWithPosition {
1267 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
1268 row: Some(1902),
1269 column: None,
1270 }
1271 );
1272
1273 assert_eq!(
1274 PathWithPosition::parse_str("C:\\Users\\someone\\test_file.rs:1902:13:"),
1275 PathWithPosition {
1276 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
1277 row: Some(1902),
1278 column: Some(13),
1279 }
1280 );
1281
1282 assert_eq!(
1283 PathWithPosition::parse_str("C:\\Users\\someone\\test_file.rs(1902,13):"),
1284 PathWithPosition {
1285 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
1286 row: Some(1902),
1287 column: Some(13),
1288 }
1289 );
1290
1291 assert_eq!(
1292 PathWithPosition::parse_str("C:\\Users\\someone\\test_file.rs(1902):"),
1293 PathWithPosition {
1294 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
1295 row: Some(1902),
1296 column: None,
1297 }
1298 );
1299
1300 assert_eq!(
1301 PathWithPosition::parse_str("crates/utils/paths.rs:101"),
1302 PathWithPosition {
1303 path: PathBuf::from("crates\\utils\\paths.rs"),
1304 row: Some(101),
1305 column: None,
1306 }
1307 );
1308 }
1309
1310 #[test]
1311 fn test_path_compact() {
1312 let path: PathBuf = [
1313 home_dir().to_string_lossy().to_string(),
1314 "some_file.txt".to_string(),
1315 ]
1316 .iter()
1317 .collect();
1318 if cfg!(any(target_os = "linux", target_os = "freebsd")) || cfg!(target_os = "macos") {
1319 assert_eq!(path.compact().to_str(), Some("~/some_file.txt"));
1320 } else {
1321 assert_eq!(path.compact().to_str(), path.to_str());
1322 }
1323 }
1324
1325 #[test]
1326 fn test_extension_or_hidden_file_name() {
1327 // No dots in name
1328 let path = Path::new("/a/b/c/file_name.rs");
1329 assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
1330
1331 // Single dot in name
1332 let path = Path::new("/a/b/c/file.name.rs");
1333 assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
1334
1335 // Multiple dots in name
1336 let path = Path::new("/a/b/c/long.file.name.rs");
1337 assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
1338
1339 // Hidden file, no extension
1340 let path = Path::new("/a/b/c/.gitignore");
1341 assert_eq!(path.extension_or_hidden_file_name(), Some("gitignore"));
1342
1343 // Hidden file, with extension
1344 let path = Path::new("/a/b/c/.eslintrc.js");
1345 assert_eq!(path.extension_or_hidden_file_name(), Some("eslintrc.js"));
1346 }
1347
1348 #[test]
1349 fn edge_of_glob() {
1350 let path = Path::new("/work/node_modules");
1351 let path_matcher = PathMatcher::new(&["**/node_modules/**".to_owned()]).unwrap();
1352 assert!(
1353 path_matcher.is_match(path),
1354 "Path matcher should match {path:?}"
1355 );
1356 }
1357
1358 #[test]
1359 fn project_search() {
1360 let path = Path::new("/Users/someonetoignore/work/zed/zed.dev/node_modules");
1361 let path_matcher = PathMatcher::new(&["**/node_modules/**".to_owned()]).unwrap();
1362 assert!(
1363 path_matcher.is_match(path),
1364 "Path matcher should match {path:?}"
1365 );
1366 }
1367
1368 #[test]
1369 #[cfg(target_os = "windows")]
1370 fn test_sanitized_path() {
1371 let path = Path::new("C:\\Users\\someone\\test_file.rs");
1372 let sanitized_path = SanitizedPath::new(path);
1373 assert_eq!(
1374 sanitized_path.to_string(),
1375 "C:\\Users\\someone\\test_file.rs"
1376 );
1377
1378 let path = Path::new("\\\\?\\C:\\Users\\someone\\test_file.rs");
1379 let sanitized_path = SanitizedPath::new(path);
1380 assert_eq!(
1381 sanitized_path.to_string(),
1382 "C:\\Users\\someone\\test_file.rs"
1383 );
1384 }
1385
1386 #[test]
1387 fn test_compare_numeric_segments() {
1388 // Helper function to create peekable iterators and test
1389 fn compare(a: &str, b: &str) -> Ordering {
1390 let mut a_iter = a.chars().peekable();
1391 let mut b_iter = b.chars().peekable();
1392
1393 let result = compare_numeric_segments(&mut a_iter, &mut b_iter);
1394
1395 // Verify iterators advanced correctly
1396 assert!(
1397 !a_iter.next().is_some_and(|c| c.is_ascii_digit()),
1398 "Iterator a should have consumed all digits"
1399 );
1400 assert!(
1401 !b_iter.next().is_some_and(|c| c.is_ascii_digit()),
1402 "Iterator b should have consumed all digits"
1403 );
1404
1405 result
1406 }
1407
1408 // Basic numeric comparisons
1409 assert_eq!(compare("0", "0"), Ordering::Equal);
1410 assert_eq!(compare("1", "2"), Ordering::Less);
1411 assert_eq!(compare("9", "10"), Ordering::Less);
1412 assert_eq!(compare("10", "9"), Ordering::Greater);
1413 assert_eq!(compare("99", "100"), Ordering::Less);
1414
1415 // Leading zeros
1416 assert_eq!(compare("0", "00"), Ordering::Less);
1417 assert_eq!(compare("00", "0"), Ordering::Greater);
1418 assert_eq!(compare("01", "1"), Ordering::Greater);
1419 assert_eq!(compare("001", "1"), Ordering::Greater);
1420 assert_eq!(compare("001", "01"), Ordering::Greater);
1421
1422 // Same value different representation
1423 assert_eq!(compare("000100", "100"), Ordering::Greater);
1424 assert_eq!(compare("100", "0100"), Ordering::Less);
1425 assert_eq!(compare("0100", "00100"), Ordering::Less);
1426
1427 // Large numbers
1428 assert_eq!(compare("9999999999", "10000000000"), Ordering::Less);
1429 assert_eq!(
1430 compare(
1431 "340282366920938463463374607431768211455", // u128::MAX
1432 "340282366920938463463374607431768211456"
1433 ),
1434 Ordering::Less
1435 );
1436 assert_eq!(
1437 compare(
1438 "340282366920938463463374607431768211456", // > u128::MAX
1439 "340282366920938463463374607431768211455"
1440 ),
1441 Ordering::Greater
1442 );
1443
1444 // Iterator advancement verification
1445 let mut a_iter = "123abc".chars().peekable();
1446 let mut b_iter = "456def".chars().peekable();
1447
1448 compare_numeric_segments(&mut a_iter, &mut b_iter);
1449
1450 assert_eq!(a_iter.collect::<String>(), "abc");
1451 assert_eq!(b_iter.collect::<String>(), "def");
1452 }
1453
1454 #[test]
1455 fn test_natural_sort() {
1456 // Basic alphanumeric
1457 assert_eq!(natural_sort("a", "b"), Ordering::Less);
1458 assert_eq!(natural_sort("b", "a"), Ordering::Greater);
1459 assert_eq!(natural_sort("a", "a"), Ordering::Equal);
1460
1461 // Case sensitivity
1462 assert_eq!(natural_sort("a", "A"), Ordering::Less);
1463 assert_eq!(natural_sort("A", "a"), Ordering::Greater);
1464 assert_eq!(natural_sort("aA", "aa"), Ordering::Greater);
1465 assert_eq!(natural_sort("aa", "aA"), Ordering::Less);
1466
1467 // Numbers
1468 assert_eq!(natural_sort("1", "2"), Ordering::Less);
1469 assert_eq!(natural_sort("2", "10"), Ordering::Less);
1470 assert_eq!(natural_sort("02", "10"), Ordering::Less);
1471 assert_eq!(natural_sort("02", "2"), Ordering::Greater);
1472
1473 // Mixed alphanumeric
1474 assert_eq!(natural_sort("a1", "a2"), Ordering::Less);
1475 assert_eq!(natural_sort("a2", "a10"), Ordering::Less);
1476 assert_eq!(natural_sort("a02", "a2"), Ordering::Greater);
1477 assert_eq!(natural_sort("a1b", "a1c"), Ordering::Less);
1478
1479 // Multiple numeric segments
1480 assert_eq!(natural_sort("1a2", "1a10"), Ordering::Less);
1481 assert_eq!(natural_sort("1a10", "1a2"), Ordering::Greater);
1482 assert_eq!(natural_sort("2a1", "10a1"), Ordering::Less);
1483
1484 // Special characters
1485 assert_eq!(natural_sort("a-1", "a-2"), Ordering::Less);
1486 assert_eq!(natural_sort("a_1", "a_2"), Ordering::Less);
1487 assert_eq!(natural_sort("a.1", "a.2"), Ordering::Less);
1488
1489 // Unicode
1490 assert_eq!(natural_sort("文1", "文2"), Ordering::Less);
1491 assert_eq!(natural_sort("文2", "文10"), Ordering::Less);
1492 assert_eq!(natural_sort("🔤1", "🔤2"), Ordering::Less);
1493
1494 // Empty and special cases
1495 assert_eq!(natural_sort("", ""), Ordering::Equal);
1496 assert_eq!(natural_sort("", "a"), Ordering::Less);
1497 assert_eq!(natural_sort("a", ""), Ordering::Greater);
1498 assert_eq!(natural_sort(" ", " "), Ordering::Less);
1499
1500 // Mixed everything
1501 assert_eq!(natural_sort("File-1.txt", "File-2.txt"), Ordering::Less);
1502 assert_eq!(natural_sort("File-02.txt", "File-2.txt"), Ordering::Greater);
1503 assert_eq!(natural_sort("File-2.txt", "File-10.txt"), Ordering::Less);
1504 assert_eq!(natural_sort("File_A1", "File_A2"), Ordering::Less);
1505 assert_eq!(natural_sort("File_a1", "File_A1"), Ordering::Less);
1506 }
1507
1508 #[test]
1509 fn test_compare_paths() {
1510 // Helper function for cleaner tests
1511 fn compare(a: &str, is_a_file: bool, b: &str, is_b_file: bool) -> Ordering {
1512 compare_paths(
1513 (SanitizedPath::new(a), is_a_file),
1514 (SanitizedPath::new(b), is_b_file),
1515 )
1516 }
1517
1518 // Basic path comparison
1519 assert_eq!(compare("a", true, "b", true), Ordering::Less);
1520 assert_eq!(compare("b", true, "a", true), Ordering::Greater);
1521 assert_eq!(compare("a", true, "a", true), Ordering::Equal);
1522
1523 // Files vs Directories
1524 assert_eq!(compare("a", true, "a", false), Ordering::Greater);
1525 assert_eq!(compare("a", false, "a", true), Ordering::Less);
1526 assert_eq!(compare("b", false, "a", true), Ordering::Less);
1527
1528 // Extensions
1529 assert_eq!(compare("a.txt", true, "a.md", true), Ordering::Greater);
1530 assert_eq!(compare("a.md", true, "a.txt", true), Ordering::Less);
1531 assert_eq!(compare("a", true, "a.txt", true), Ordering::Less);
1532
1533 // Nested paths
1534 assert_eq!(compare("dir/a", true, "dir/b", true), Ordering::Less);
1535 assert_eq!(compare("dir1/a", true, "dir2/a", true), Ordering::Less);
1536 assert_eq!(compare("dir/sub/a", true, "dir/a", true), Ordering::Less);
1537
1538 // Case sensitivity in paths
1539 assert_eq!(
1540 compare("Dir/file", true, "dir/file", true),
1541 Ordering::Greater
1542 );
1543 assert_eq!(
1544 compare("dir/File", true, "dir/file", true),
1545 Ordering::Greater
1546 );
1547 assert_eq!(compare("dir/file", true, "Dir/File", true), Ordering::Less);
1548
1549 // Hidden files and special names
1550 assert_eq!(compare(".hidden", true, "visible", true), Ordering::Less);
1551 assert_eq!(compare("_special", true, "normal", true), Ordering::Less);
1552 assert_eq!(compare(".config", false, ".data", false), Ordering::Less);
1553
1554 // Mixed numeric paths
1555 assert_eq!(
1556 compare("dir1/file", true, "dir2/file", true),
1557 Ordering::Less
1558 );
1559 assert_eq!(
1560 compare("dir2/file", true, "dir10/file", true),
1561 Ordering::Less
1562 );
1563 assert_eq!(
1564 compare("dir02/file", true, "dir2/file", true),
1565 Ordering::Greater
1566 );
1567
1568 // Root paths
1569 assert_eq!(compare("/a", true, "/b", true), Ordering::Less);
1570 assert_eq!(compare("/", false, "/a", true), Ordering::Less);
1571
1572 // Complex real-world examples
1573 assert_eq!(
1574 compare("project/src/main.rs", true, "project/src/lib.rs", true),
1575 Ordering::Greater
1576 );
1577 assert_eq!(
1578 compare(
1579 "project/tests/test_1.rs",
1580 true,
1581 "project/tests/test_2.rs",
1582 true
1583 ),
1584 Ordering::Less
1585 );
1586 assert_eq!(
1587 compare(
1588 "project/v1.0.0/README.md",
1589 true,
1590 "project/v1.10.0/README.md",
1591 true
1592 ),
1593 Ordering::Less
1594 );
1595 }
1596
1597 #[test]
1598 fn test_natural_sort_case_sensitivity() {
1599 // Same letter different case - lowercase should come first
1600 assert_eq!(natural_sort("a", "A"), Ordering::Less);
1601 assert_eq!(natural_sort("A", "a"), Ordering::Greater);
1602 assert_eq!(natural_sort("a", "a"), Ordering::Equal);
1603 assert_eq!(natural_sort("A", "A"), Ordering::Equal);
1604
1605 // Mixed case strings
1606 assert_eq!(natural_sort("aaa", "AAA"), Ordering::Less);
1607 assert_eq!(natural_sort("AAA", "aaa"), Ordering::Greater);
1608 assert_eq!(natural_sort("aAa", "AaA"), Ordering::Less);
1609
1610 // Different letters
1611 assert_eq!(natural_sort("a", "b"), Ordering::Less);
1612 assert_eq!(natural_sort("A", "b"), Ordering::Less);
1613 assert_eq!(natural_sort("a", "B"), Ordering::Less);
1614 }
1615
1616 #[test]
1617 fn test_natural_sort_with_numbers() {
1618 // Basic number ordering
1619 assert_eq!(natural_sort("file1", "file2"), Ordering::Less);
1620 assert_eq!(natural_sort("file2", "file10"), Ordering::Less);
1621 assert_eq!(natural_sort("file10", "file2"), Ordering::Greater);
1622
1623 // Numbers in different positions
1624 assert_eq!(natural_sort("1file", "2file"), Ordering::Less);
1625 assert_eq!(natural_sort("file1text", "file2text"), Ordering::Less);
1626 assert_eq!(natural_sort("text1file", "text2file"), Ordering::Less);
1627
1628 // Multiple numbers in string
1629 assert_eq!(natural_sort("file1-2", "file1-10"), Ordering::Less);
1630 assert_eq!(natural_sort("2-1file", "10-1file"), Ordering::Less);
1631
1632 // Leading zeros
1633 assert_eq!(natural_sort("file002", "file2"), Ordering::Greater);
1634 assert_eq!(natural_sort("file002", "file10"), Ordering::Less);
1635
1636 // Very large numbers
1637 assert_eq!(
1638 natural_sort("file999999999999999999999", "file999999999999999999998"),
1639 Ordering::Greater
1640 );
1641
1642 // u128 edge cases
1643
1644 // Numbers near u128::MAX (340,282,366,920,938,463,463,374,607,431,768,211,455)
1645 assert_eq!(
1646 natural_sort(
1647 "file340282366920938463463374607431768211454",
1648 "file340282366920938463463374607431768211455"
1649 ),
1650 Ordering::Less
1651 );
1652
1653 // Equal length numbers that overflow u128
1654 assert_eq!(
1655 natural_sort(
1656 "file340282366920938463463374607431768211456",
1657 "file340282366920938463463374607431768211455"
1658 ),
1659 Ordering::Greater
1660 );
1661
1662 // Different length numbers that overflow u128
1663 assert_eq!(
1664 natural_sort(
1665 "file3402823669209384634633746074317682114560",
1666 "file340282366920938463463374607431768211455"
1667 ),
1668 Ordering::Greater
1669 );
1670
1671 // Leading zeros with numbers near u128::MAX
1672 assert_eq!(
1673 natural_sort(
1674 "file0340282366920938463463374607431768211455",
1675 "file340282366920938463463374607431768211455"
1676 ),
1677 Ordering::Greater
1678 );
1679
1680 // Very large numbers with different lengths (both overflow u128)
1681 assert_eq!(
1682 natural_sort(
1683 "file999999999999999999999999999999999999999999999999",
1684 "file9999999999999999999999999999999999999999999999999"
1685 ),
1686 Ordering::Less
1687 );
1688
1689 // Mixed case with numbers
1690 assert_eq!(natural_sort("File1", "file2"), Ordering::Greater);
1691 assert_eq!(natural_sort("file1", "File2"), Ordering::Less);
1692 }
1693
1694 #[test]
1695 fn test_natural_sort_edge_cases() {
1696 // Empty strings
1697 assert_eq!(natural_sort("", ""), Ordering::Equal);
1698 assert_eq!(natural_sort("", "a"), Ordering::Less);
1699 assert_eq!(natural_sort("a", ""), Ordering::Greater);
1700
1701 // Special characters
1702 assert_eq!(natural_sort("file-1", "file_1"), Ordering::Less);
1703 assert_eq!(natural_sort("file.1", "file_1"), Ordering::Less);
1704 assert_eq!(natural_sort("file 1", "file_1"), Ordering::Less);
1705
1706 // Unicode characters
1707 // 9312 vs 9313
1708 assert_eq!(natural_sort("file①", "file②"), Ordering::Less);
1709 // 9321 vs 9313
1710 assert_eq!(natural_sort("file⑩", "file②"), Ordering::Greater);
1711 // 28450 vs 23383
1712 assert_eq!(natural_sort("file漢", "file字"), Ordering::Greater);
1713
1714 // Mixed alphanumeric with special chars
1715 assert_eq!(natural_sort("file-1a", "file-1b"), Ordering::Less);
1716 assert_eq!(natural_sort("file-1.2", "file-1.10"), Ordering::Less);
1717 assert_eq!(natural_sort("file-1.10", "file-1.2"), Ordering::Greater);
1718 }
1719}