1use globset::{Glob, GlobSet, GlobSetBuilder};
2use regex::Regex;
3use serde::{Deserialize, Serialize};
4use std::cmp::Ordering;
5use std::fmt::{Display, Formatter};
6use std::mem;
7use std::path::StripPrefixError;
8use std::sync::{Arc, OnceLock};
9use std::{
10 ffi::OsStr,
11 path::{Path, PathBuf},
12 sync::LazyLock,
13};
14
15/// Returns the path to the user's home directory.
16pub fn home_dir() -> &'static PathBuf {
17 static HOME_DIR: OnceLock<PathBuf> = OnceLock::new();
18 HOME_DIR.get_or_init(|| dirs::home_dir().expect("failed to determine home directory"))
19}
20
21pub trait PathExt {
22 fn compact(&self) -> PathBuf;
23 fn extension_or_hidden_file_name(&self) -> Option<&str>;
24 fn to_sanitized_string(&self) -> String;
25 fn try_from_bytes<'a>(bytes: &'a [u8]) -> anyhow::Result<Self>
26 where
27 Self: From<&'a Path>,
28 {
29 #[cfg(unix)]
30 {
31 use std::os::unix::prelude::OsStrExt;
32 Ok(Self::from(Path::new(OsStr::from_bytes(bytes))))
33 }
34 #[cfg(windows)]
35 {
36 use anyhow::Context as _;
37 use tendril::fmt::{Format, WTF8};
38 WTF8::validate(bytes)
39 .then(|| {
40 // Safety: bytes are valid WTF-8 sequence.
41 Self::from(Path::new(unsafe {
42 OsStr::from_encoded_bytes_unchecked(bytes)
43 }))
44 })
45 .with_context(|| format!("Invalid WTF-8 sequence: {bytes:?}"))
46 }
47 }
48}
49
50impl<T: AsRef<Path>> PathExt for T {
51 /// Compacts a given file path by replacing the user's home directory
52 /// prefix with a tilde (`~`).
53 ///
54 /// # Returns
55 ///
56 /// * A `PathBuf` containing the compacted file path. If the input path
57 /// does not have the user's home directory prefix, or if we are not on
58 /// Linux or macOS, the original path is returned unchanged.
59 fn compact(&self) -> PathBuf {
60 if cfg!(any(target_os = "linux", target_os = "freebsd")) || cfg!(target_os = "macos") {
61 match self.as_ref().strip_prefix(home_dir().as_path()) {
62 Ok(relative_path) => {
63 let mut shortened_path = PathBuf::new();
64 shortened_path.push("~");
65 shortened_path.push(relative_path);
66 shortened_path
67 }
68 Err(_) => self.as_ref().to_path_buf(),
69 }
70 } else {
71 self.as_ref().to_path_buf()
72 }
73 }
74
75 /// Returns a file's extension or, if the file is hidden, its name without the leading dot
76 fn extension_or_hidden_file_name(&self) -> Option<&str> {
77 let path = self.as_ref();
78 let file_name = path.file_name()?.to_str()?;
79 if file_name.starts_with('.') {
80 return file_name.strip_prefix('.');
81 }
82
83 path.extension()
84 .and_then(|e| e.to_str())
85 .or_else(|| path.file_stem()?.to_str())
86 }
87
88 /// Returns a sanitized string representation of the path.
89 /// Note, on Windows, this assumes that the path is a valid UTF-8 string and
90 /// is not a UNC path.
91 fn to_sanitized_string(&self) -> String {
92 #[cfg(target_os = "windows")]
93 {
94 self.as_ref().to_string_lossy().replace("/", "\\")
95 }
96 #[cfg(not(target_os = "windows"))]
97 {
98 self.as_ref().to_string_lossy().to_string()
99 }
100 }
101}
102
103/// In memory, this is identical to `Path`. On non-Windows conversions to this type are no-ops. On
104/// windows, these conversions sanitize UNC paths by removing the `\\\\?\\` prefix.
105#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)]
106#[repr(transparent)]
107pub struct SanitizedPath(Path);
108
109impl SanitizedPath {
110 pub fn new<T: AsRef<Path> + ?Sized>(path: &T) -> &Self {
111 #[cfg(not(target_os = "windows"))]
112 return Self::unchecked_new(path.as_ref());
113
114 #[cfg(target_os = "windows")]
115 return Self::unchecked_new(dunce::simplified(path.as_ref()));
116 }
117
118 pub fn unchecked_new<T: AsRef<Path> + ?Sized>(path: &T) -> &Self {
119 // safe because `Path` and `SanitizedPath` have the same repr and Drop impl
120 unsafe { mem::transmute::<&Path, &Self>(path.as_ref()) }
121 }
122
123 pub fn from_arc(path: Arc<Path>) -> Arc<Self> {
124 // safe because `Path` and `SanitizedPath` have the same repr and Drop impl
125 #[cfg(not(target_os = "windows"))]
126 return unsafe { mem::transmute::<Arc<Path>, Arc<Self>>(path) };
127
128 // TODO: could avoid allocating here if dunce::simplified results in the same path
129 #[cfg(target_os = "windows")]
130 return Self::new(&path).into();
131 }
132
133 pub fn new_arc<T: AsRef<Path> + ?Sized>(path: &T) -> Arc<Self> {
134 Self::new(path).into()
135 }
136
137 pub fn cast_arc(path: Arc<Self>) -> Arc<Path> {
138 // safe because `Path` and `SanitizedPath` have the same repr and Drop impl
139 unsafe { mem::transmute::<Arc<Self>, Arc<Path>>(path) }
140 }
141
142 pub fn cast_arc_ref(path: &Arc<Self>) -> &Arc<Path> {
143 // safe because `Path` and `SanitizedPath` have the same repr and Drop impl
144 unsafe { mem::transmute::<&Arc<Self>, &Arc<Path>>(path) }
145 }
146
147 pub fn starts_with(&self, prefix: &Self) -> bool {
148 self.0.starts_with(&prefix.0)
149 }
150
151 pub fn as_path(&self) -> &Path {
152 &self.0
153 }
154
155 pub fn file_name(&self) -> Option<&std::ffi::OsStr> {
156 self.0.file_name()
157 }
158
159 pub fn extension(&self) -> Option<&std::ffi::OsStr> {
160 self.0.extension()
161 }
162
163 pub fn join<P: AsRef<Path>>(&self, path: P) -> PathBuf {
164 self.0.join(path)
165 }
166
167 pub fn parent(&self) -> Option<&Self> {
168 self.0.parent().map(Self::unchecked_new)
169 }
170
171 pub fn strip_prefix(&self, base: &Self) -> Result<&Path, StripPrefixError> {
172 self.0.strip_prefix(base.as_path())
173 }
174
175 pub fn to_str(&self) -> Option<&str> {
176 self.0.to_str()
177 }
178
179 pub fn to_path_buf(&self) -> PathBuf {
180 self.0.to_path_buf()
181 }
182
183 pub fn to_glob_string(&self) -> String {
184 #[cfg(target_os = "windows")]
185 {
186 self.0.to_string_lossy().replace("/", "\\")
187 }
188 #[cfg(not(target_os = "windows"))]
189 {
190 self.0.to_string_lossy().to_string()
191 }
192 }
193}
194
195impl std::fmt::Debug for SanitizedPath {
196 fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
197 std::fmt::Debug::fmt(&self.0, formatter)
198 }
199}
200
201impl Display for SanitizedPath {
202 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
203 write!(f, "{}", self.0.display())
204 }
205}
206
207impl From<&SanitizedPath> for Arc<SanitizedPath> {
208 fn from(sanitized_path: &SanitizedPath) -> Self {
209 let path: Arc<Path> = sanitized_path.0.into();
210 // safe because `Path` and `SanitizedPath` have the same repr and Drop impl
211 unsafe { mem::transmute(path) }
212 }
213}
214
215impl From<&SanitizedPath> for PathBuf {
216 fn from(sanitized_path: &SanitizedPath) -> Self {
217 sanitized_path.as_path().into()
218 }
219}
220
221impl AsRef<Path> for SanitizedPath {
222 fn as_ref(&self) -> &Path {
223 &self.0
224 }
225}
226
227#[derive(Debug, Clone, Copy, PartialEq, Eq)]
228pub enum PathStyle {
229 Posix,
230 Windows,
231}
232
233impl PathStyle {
234 #[cfg(target_os = "windows")]
235 pub const fn current() -> Self {
236 PathStyle::Windows
237 }
238
239 #[cfg(not(target_os = "windows"))]
240 pub const fn current() -> Self {
241 PathStyle::Posix
242 }
243
244 #[inline]
245 pub fn separator(&self) -> &str {
246 match self {
247 PathStyle::Posix => "/",
248 PathStyle::Windows => "\\",
249 }
250 }
251}
252
253#[derive(Debug, Clone)]
254pub struct RemotePathBuf {
255 inner: PathBuf,
256 style: PathStyle,
257 string: String, // Cached string representation
258}
259
260impl RemotePathBuf {
261 pub fn new(path: PathBuf, style: PathStyle) -> Self {
262 #[cfg(target_os = "windows")]
263 let string = match style {
264 PathStyle::Posix => path.to_string_lossy().replace('\\', "/"),
265 PathStyle::Windows => path.to_string_lossy().into(),
266 };
267 #[cfg(not(target_os = "windows"))]
268 let string = match style {
269 PathStyle::Posix => path.to_string_lossy().to_string(),
270 PathStyle::Windows => path.to_string_lossy().replace('/', "\\"),
271 };
272 Self {
273 inner: path,
274 style,
275 string,
276 }
277 }
278
279 pub fn from_str(path: &str, style: PathStyle) -> Self {
280 let path_buf = PathBuf::from(path);
281 Self::new(path_buf, style)
282 }
283
284 #[cfg(target_os = "windows")]
285 pub fn to_proto(&self) -> String {
286 match self.path_style() {
287 PathStyle::Posix => self.to_string(),
288 PathStyle::Windows => self.inner.to_string_lossy().replace('\\', "/"),
289 }
290 }
291
292 #[cfg(not(target_os = "windows"))]
293 pub fn to_proto(&self) -> String {
294 match self.path_style() {
295 PathStyle::Posix => self.inner.to_string_lossy().to_string(),
296 PathStyle::Windows => self.to_string(),
297 }
298 }
299
300 pub fn as_path(&self) -> &Path {
301 &self.inner
302 }
303
304 pub fn path_style(&self) -> PathStyle {
305 self.style
306 }
307
308 pub fn parent(&self) -> Option<RemotePathBuf> {
309 self.inner
310 .parent()
311 .map(|p| RemotePathBuf::new(p.to_path_buf(), self.style))
312 }
313}
314
315impl Display for RemotePathBuf {
316 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
317 write!(f, "{}", self.string)
318 }
319}
320
321/// A delimiter to use in `path_query:row_number:column_number` strings parsing.
322pub const FILE_ROW_COLUMN_DELIMITER: char = ':';
323
324const ROW_COL_CAPTURE_REGEX: &str = r"(?xs)
325 ([^\(]+)\:(?:
326 \((\d+)[,:](\d+)\) # filename:(row,column), filename:(row:column)
327 |
328 \((\d+)\)() # filename:(row)
329 )
330 |
331 ([^\(]+)(?:
332 \((\d+)[,:](\d+)\) # filename(row,column), filename(row:column)
333 |
334 \((\d+)\)() # filename(row)
335 )
336 |
337 (.+?)(?:
338 \:+(\d+)\:(\d+)\:*$ # filename:row:column
339 |
340 \:+(\d+)\:*()$ # filename:row
341 )";
342
343/// A representation of a path-like string with optional row and column numbers.
344/// Matching values example: `te`, `test.rs:22`, `te:22:5`, `test.c(22)`, `test.c(22,5)`etc.
345#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
346pub struct PathWithPosition {
347 pub path: PathBuf,
348 pub row: Option<u32>,
349 // Absent if row is absent.
350 pub column: Option<u32>,
351}
352
353impl PathWithPosition {
354 /// Returns a PathWithPosition from a path.
355 pub fn from_path(path: PathBuf) -> Self {
356 Self {
357 path,
358 row: None,
359 column: None,
360 }
361 }
362
363 /// Parses a string that possibly has `:row:column` or `(row, column)` suffix.
364 /// Parenthesis format is used by [MSBuild](https://learn.microsoft.com/en-us/visualstudio/msbuild/msbuild-diagnostic-format-for-tasks) compatible tools
365 /// Ignores trailing `:`s, so `test.rs:22:` is parsed as `test.rs:22`.
366 /// If the suffix parsing fails, the whole string is parsed as a path.
367 ///
368 /// Be mindful that `test_file:10:1:` is a valid posix filename.
369 /// `PathWithPosition` class assumes that the ending position-like suffix is **not** part of the filename.
370 ///
371 /// # Examples
372 ///
373 /// ```
374 /// # use util::paths::PathWithPosition;
375 /// # use std::path::PathBuf;
376 /// assert_eq!(PathWithPosition::parse_str("test_file"), PathWithPosition {
377 /// path: PathBuf::from("test_file"),
378 /// row: None,
379 /// column: None,
380 /// });
381 /// assert_eq!(PathWithPosition::parse_str("test_file:10"), PathWithPosition {
382 /// path: PathBuf::from("test_file"),
383 /// row: Some(10),
384 /// column: None,
385 /// });
386 /// assert_eq!(PathWithPosition::parse_str("test_file.rs"), PathWithPosition {
387 /// path: PathBuf::from("test_file.rs"),
388 /// row: None,
389 /// column: None,
390 /// });
391 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:1"), PathWithPosition {
392 /// path: PathBuf::from("test_file.rs"),
393 /// row: Some(1),
394 /// column: None,
395 /// });
396 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:1:2"), PathWithPosition {
397 /// path: PathBuf::from("test_file.rs"),
398 /// row: Some(1),
399 /// column: Some(2),
400 /// });
401 /// ```
402 ///
403 /// # Expected parsing results when encounter ill-formatted inputs.
404 /// ```
405 /// # use util::paths::PathWithPosition;
406 /// # use std::path::PathBuf;
407 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:a"), PathWithPosition {
408 /// path: PathBuf::from("test_file.rs:a"),
409 /// row: None,
410 /// column: None,
411 /// });
412 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:a:b"), PathWithPosition {
413 /// path: PathBuf::from("test_file.rs:a:b"),
414 /// row: None,
415 /// column: None,
416 /// });
417 /// assert_eq!(PathWithPosition::parse_str("test_file.rs::"), PathWithPosition {
418 /// path: PathBuf::from("test_file.rs::"),
419 /// row: None,
420 /// column: None,
421 /// });
422 /// assert_eq!(PathWithPosition::parse_str("test_file.rs::1"), PathWithPosition {
423 /// path: PathBuf::from("test_file.rs"),
424 /// row: Some(1),
425 /// column: None,
426 /// });
427 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:1::"), PathWithPosition {
428 /// path: PathBuf::from("test_file.rs"),
429 /// row: Some(1),
430 /// column: None,
431 /// });
432 /// assert_eq!(PathWithPosition::parse_str("test_file.rs::1:2"), PathWithPosition {
433 /// path: PathBuf::from("test_file.rs"),
434 /// row: Some(1),
435 /// column: Some(2),
436 /// });
437 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:1::2"), PathWithPosition {
438 /// path: PathBuf::from("test_file.rs:1"),
439 /// row: Some(2),
440 /// column: None,
441 /// });
442 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:1:2:3"), PathWithPosition {
443 /// path: PathBuf::from("test_file.rs:1"),
444 /// row: Some(2),
445 /// column: Some(3),
446 /// });
447 /// ```
448 pub fn parse_str(s: &str) -> Self {
449 let trimmed = s.trim();
450 let path = Path::new(trimmed);
451 let maybe_file_name_with_row_col = path.file_name().unwrap_or_default().to_string_lossy();
452 if maybe_file_name_with_row_col.is_empty() {
453 return Self {
454 path: Path::new(s).to_path_buf(),
455 row: None,
456 column: None,
457 };
458 }
459
460 // Let's avoid repeated init cost on this. It is subject to thread contention, but
461 // so far this code isn't called from multiple hot paths. Getting contention here
462 // in the future seems unlikely.
463 static SUFFIX_RE: LazyLock<Regex> =
464 LazyLock::new(|| Regex::new(ROW_COL_CAPTURE_REGEX).unwrap());
465 match SUFFIX_RE
466 .captures(&maybe_file_name_with_row_col)
467 .map(|caps| caps.extract())
468 {
469 Some((_, [file_name, maybe_row, maybe_column])) => {
470 let row = maybe_row.parse::<u32>().ok();
471 let column = maybe_column.parse::<u32>().ok();
472
473 let suffix_length = maybe_file_name_with_row_col.len() - file_name.len();
474 let path_without_suffix = &trimmed[..trimmed.len() - suffix_length];
475
476 Self {
477 path: Path::new(path_without_suffix).to_path_buf(),
478 row,
479 column,
480 }
481 }
482 None => {
483 // The `ROW_COL_CAPTURE_REGEX` deals with separated digits only,
484 // but in reality there could be `foo/bar.py:22:in` inputs which we want to match too.
485 // The regex mentioned is not very extendable with "digit or random string" checks, so do this here instead.
486 let delimiter = ':';
487 let mut path_parts = s
488 .rsplitn(3, delimiter)
489 .collect::<Vec<_>>()
490 .into_iter()
491 .rev()
492 .fuse();
493 let mut path_string = path_parts.next().expect("rsplitn should have the rest of the string as its last parameter that we reversed").to_owned();
494 let mut row = None;
495 let mut column = None;
496 if let Some(maybe_row) = path_parts.next() {
497 if let Ok(parsed_row) = maybe_row.parse::<u32>() {
498 row = Some(parsed_row);
499 if let Some(parsed_column) = path_parts
500 .next()
501 .and_then(|maybe_col| maybe_col.parse::<u32>().ok())
502 {
503 column = Some(parsed_column);
504 }
505 } else {
506 path_string.push(delimiter);
507 path_string.push_str(maybe_row);
508 }
509 }
510 for split in path_parts {
511 path_string.push(delimiter);
512 path_string.push_str(split);
513 }
514
515 Self {
516 path: PathBuf::from(path_string),
517 row,
518 column,
519 }
520 }
521 }
522 }
523
524 pub fn map_path<E>(
525 self,
526 mapping: impl FnOnce(PathBuf) -> Result<PathBuf, E>,
527 ) -> Result<PathWithPosition, E> {
528 Ok(PathWithPosition {
529 path: mapping(self.path)?,
530 row: self.row,
531 column: self.column,
532 })
533 }
534
535 pub fn to_string(&self, path_to_string: impl Fn(&PathBuf) -> String) -> String {
536 let path_string = path_to_string(&self.path);
537 if let Some(row) = self.row {
538 if let Some(column) = self.column {
539 format!("{path_string}:{row}:{column}")
540 } else {
541 format!("{path_string}:{row}")
542 }
543 } else {
544 path_string
545 }
546 }
547}
548
549#[derive(Clone, Debug, Default)]
550pub struct PathMatcher {
551 sources: Vec<String>,
552 glob: GlobSet,
553}
554
555// impl std::fmt::Display for PathMatcher {
556// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
557// self.sources.fmt(f)
558// }
559// }
560
561impl PartialEq for PathMatcher {
562 fn eq(&self, other: &Self) -> bool {
563 self.sources.eq(&other.sources)
564 }
565}
566
567impl Eq for PathMatcher {}
568
569impl PathMatcher {
570 pub fn new(globs: impl IntoIterator<Item = impl AsRef<str>>) -> Result<Self, globset::Error> {
571 let globs = globs
572 .into_iter()
573 .map(|as_str| Glob::new(as_str.as_ref()))
574 .collect::<Result<Vec<_>, _>>()?;
575 let sources = globs.iter().map(|glob| glob.glob().to_owned()).collect();
576 let mut glob_builder = GlobSetBuilder::new();
577 for single_glob in globs {
578 glob_builder.add(single_glob);
579 }
580 let glob = glob_builder.build()?;
581 Ok(PathMatcher { glob, sources })
582 }
583
584 pub fn sources(&self) -> &[String] {
585 &self.sources
586 }
587
588 pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
589 let other_path = other.as_ref();
590 self.sources.iter().any(|source| {
591 let as_bytes = other_path.as_os_str().as_encoded_bytes();
592 as_bytes.starts_with(source.as_bytes()) || as_bytes.ends_with(source.as_bytes())
593 }) || self.glob.is_match(other_path)
594 || self.check_with_end_separator(other_path)
595 }
596
597 fn check_with_end_separator(&self, path: &Path) -> bool {
598 let path_str = path.to_string_lossy();
599 let separator = std::path::MAIN_SEPARATOR_STR;
600 if path_str.ends_with(separator) {
601 false
602 } else {
603 self.glob.is_match(path_str.to_string() + separator)
604 }
605 }
606}
607
608/// Custom character comparison that prioritizes lowercase for same letters
609fn compare_chars(a: char, b: char) -> Ordering {
610 // First compare case-insensitive
611 match a.to_ascii_lowercase().cmp(&b.to_ascii_lowercase()) {
612 Ordering::Equal => {
613 // If same letter, prioritize lowercase (lowercase < uppercase)
614 match (a.is_ascii_lowercase(), b.is_ascii_lowercase()) {
615 (true, false) => Ordering::Less, // lowercase comes first
616 (false, true) => Ordering::Greater, // uppercase comes after
617 _ => Ordering::Equal, // both same case or both non-ascii
618 }
619 }
620 other => other,
621 }
622}
623
624/// Compares two sequences of consecutive digits for natural sorting.
625///
626/// This function is a core component of natural sorting that handles numeric comparison
627/// in a way that feels natural to humans. It extracts and compares consecutive digit
628/// sequences from two iterators, handling various cases like leading zeros and very large numbers.
629///
630/// # Behavior
631///
632/// The function implements the following comparison rules:
633/// 1. Different numeric values: Compares by actual numeric value (e.g., "2" < "10")
634/// 2. Leading zeros: When values are equal, longer sequence wins (e.g., "002" > "2")
635/// 3. Large numbers: Falls back to string comparison for numbers that would overflow u128
636///
637/// # Examples
638///
639/// ```text
640/// "1" vs "2" -> Less (different values)
641/// "2" vs "10" -> Less (numeric comparison)
642/// "002" vs "2" -> Greater (leading zeros)
643/// "10" vs "010" -> Less (leading zeros)
644/// "999..." vs "1000..." -> Less (large number comparison)
645/// ```
646///
647/// # Implementation Details
648///
649/// 1. Extracts consecutive digits into strings
650/// 2. Compares sequence lengths for leading zero handling
651/// 3. For equal lengths, compares digit by digit
652/// 4. For different lengths:
653/// - Attempts numeric comparison first (for numbers up to 2^128 - 1)
654/// - Falls back to string comparison if numbers would overflow
655///
656/// The function advances both iterators past their respective numeric sequences,
657/// regardless of the comparison result.
658fn compare_numeric_segments<I>(
659 a_iter: &mut std::iter::Peekable<I>,
660 b_iter: &mut std::iter::Peekable<I>,
661) -> Ordering
662where
663 I: Iterator<Item = char>,
664{
665 // Collect all consecutive digits into strings
666 let mut a_num_str = String::new();
667 let mut b_num_str = String::new();
668
669 while let Some(&c) = a_iter.peek() {
670 if !c.is_ascii_digit() {
671 break;
672 }
673
674 a_num_str.push(c);
675 a_iter.next();
676 }
677
678 while let Some(&c) = b_iter.peek() {
679 if !c.is_ascii_digit() {
680 break;
681 }
682
683 b_num_str.push(c);
684 b_iter.next();
685 }
686
687 // First compare lengths (handle leading zeros)
688 match a_num_str.len().cmp(&b_num_str.len()) {
689 Ordering::Equal => {
690 // Same length, compare digit by digit
691 match a_num_str.cmp(&b_num_str) {
692 Ordering::Equal => Ordering::Equal,
693 ordering => ordering,
694 }
695 }
696
697 // Different lengths but same value means leading zeros
698 ordering => {
699 // Try parsing as numbers first
700 if let (Ok(a_val), Ok(b_val)) = (a_num_str.parse::<u128>(), b_num_str.parse::<u128>()) {
701 match a_val.cmp(&b_val) {
702 Ordering::Equal => ordering, // Same value, longer one is greater (leading zeros)
703 ord => ord,
704 }
705 } else {
706 // If parsing fails (overflow), compare as strings
707 a_num_str.cmp(&b_num_str)
708 }
709 }
710 }
711}
712
713/// Performs natural sorting comparison between two strings.
714///
715/// Natural sorting is an ordering that handles numeric sequences in a way that matches human expectations.
716/// For example, "file2" comes before "file10" (unlike standard lexicographic sorting).
717///
718/// # Characteristics
719///
720/// * Case-sensitive with lowercase priority: When comparing same letters, lowercase comes before uppercase
721/// * Numbers are compared by numeric value, not character by character
722/// * Leading zeros affect ordering when numeric values are equal
723/// * Can handle numbers larger than u128::MAX (falls back to string comparison)
724///
725/// # Algorithm
726///
727/// The function works by:
728/// 1. Processing strings character by character
729/// 2. When encountering digits, treating consecutive digits as a single number
730/// 3. Comparing numbers by their numeric value rather than lexicographically
731/// 4. For non-numeric characters, using case-sensitive comparison with lowercase priority
732fn natural_sort(a: &str, b: &str) -> Ordering {
733 let mut a_iter = a.chars().peekable();
734 let mut b_iter = b.chars().peekable();
735
736 loop {
737 match (a_iter.peek(), b_iter.peek()) {
738 (None, None) => return Ordering::Equal,
739 (None, _) => return Ordering::Less,
740 (_, None) => return Ordering::Greater,
741 (Some(&a_char), Some(&b_char)) => {
742 if a_char.is_ascii_digit() && b_char.is_ascii_digit() {
743 match compare_numeric_segments(&mut a_iter, &mut b_iter) {
744 Ordering::Equal => continue,
745 ordering => return ordering,
746 }
747 } else {
748 match compare_chars(a_char, b_char) {
749 Ordering::Equal => {
750 a_iter.next();
751 b_iter.next();
752 }
753 ordering => return ordering,
754 }
755 }
756 }
757 }
758 }
759}
760
761pub fn compare_paths(
762 (path_a, a_is_file): (&Path, bool),
763 (path_b, b_is_file): (&Path, bool),
764) -> Ordering {
765 let mut components_a = path_a.components().peekable();
766 let mut components_b = path_b.components().peekable();
767
768 loop {
769 match (components_a.next(), components_b.next()) {
770 (Some(component_a), Some(component_b)) => {
771 let a_is_file = components_a.peek().is_none() && a_is_file;
772 let b_is_file = components_b.peek().is_none() && b_is_file;
773
774 let ordering = a_is_file.cmp(&b_is_file).then_with(|| {
775 let path_a = Path::new(component_a.as_os_str());
776 let path_string_a = if a_is_file {
777 path_a.file_stem()
778 } else {
779 path_a.file_name()
780 }
781 .map(|s| s.to_string_lossy());
782
783 let path_b = Path::new(component_b.as_os_str());
784 let path_string_b = if b_is_file {
785 path_b.file_stem()
786 } else {
787 path_b.file_name()
788 }
789 .map(|s| s.to_string_lossy());
790
791 let compare_components = match (path_string_a, path_string_b) {
792 (Some(a), Some(b)) => natural_sort(&a, &b),
793 (Some(_), None) => Ordering::Greater,
794 (None, Some(_)) => Ordering::Less,
795 (None, None) => Ordering::Equal,
796 };
797
798 compare_components.then_with(|| {
799 if a_is_file && b_is_file {
800 let ext_a = path_a.extension().unwrap_or_default();
801 let ext_b = path_b.extension().unwrap_or_default();
802 ext_a.cmp(ext_b)
803 } else {
804 Ordering::Equal
805 }
806 })
807 });
808
809 if !ordering.is_eq() {
810 return ordering;
811 }
812 }
813 (Some(_), None) => break Ordering::Greater,
814 (None, Some(_)) => break Ordering::Less,
815 (None, None) => break Ordering::Equal,
816 }
817 }
818}
819
820#[cfg(test)]
821mod tests {
822 use super::*;
823
824 #[test]
825 fn compare_paths_with_dots() {
826 let mut paths = vec![
827 (Path::new("test_dirs"), false),
828 (Path::new("test_dirs/1.46"), false),
829 (Path::new("test_dirs/1.46/bar_1"), true),
830 (Path::new("test_dirs/1.46/bar_2"), true),
831 (Path::new("test_dirs/1.45"), false),
832 (Path::new("test_dirs/1.45/foo_2"), true),
833 (Path::new("test_dirs/1.45/foo_1"), true),
834 ];
835 paths.sort_by(|&a, &b| compare_paths(a, b));
836 assert_eq!(
837 paths,
838 vec![
839 (Path::new("test_dirs"), false),
840 (Path::new("test_dirs/1.45"), false),
841 (Path::new("test_dirs/1.45/foo_1"), true),
842 (Path::new("test_dirs/1.45/foo_2"), true),
843 (Path::new("test_dirs/1.46"), false),
844 (Path::new("test_dirs/1.46/bar_1"), true),
845 (Path::new("test_dirs/1.46/bar_2"), true),
846 ]
847 );
848 let mut paths = vec![
849 (Path::new("root1/one.txt"), true),
850 (Path::new("root1/one.two.txt"), true),
851 ];
852 paths.sort_by(|&a, &b| compare_paths(a, b));
853 assert_eq!(
854 paths,
855 vec![
856 (Path::new("root1/one.txt"), true),
857 (Path::new("root1/one.two.txt"), true),
858 ]
859 );
860 }
861
862 #[test]
863 fn compare_paths_with_same_name_different_extensions() {
864 let mut paths = vec![
865 (Path::new("test_dirs/file.rs"), true),
866 (Path::new("test_dirs/file.txt"), true),
867 (Path::new("test_dirs/file.md"), true),
868 (Path::new("test_dirs/file"), true),
869 (Path::new("test_dirs/file.a"), true),
870 ];
871 paths.sort_by(|&a, &b| compare_paths(a, b));
872 assert_eq!(
873 paths,
874 vec![
875 (Path::new("test_dirs/file"), true),
876 (Path::new("test_dirs/file.a"), true),
877 (Path::new("test_dirs/file.md"), true),
878 (Path::new("test_dirs/file.rs"), true),
879 (Path::new("test_dirs/file.txt"), true),
880 ]
881 );
882 }
883
884 #[test]
885 fn compare_paths_case_semi_sensitive() {
886 let mut paths = vec![
887 (Path::new("test_DIRS"), false),
888 (Path::new("test_DIRS/foo_1"), true),
889 (Path::new("test_DIRS/foo_2"), true),
890 (Path::new("test_DIRS/bar"), true),
891 (Path::new("test_DIRS/BAR"), true),
892 (Path::new("test_dirs"), false),
893 (Path::new("test_dirs/foo_1"), true),
894 (Path::new("test_dirs/foo_2"), true),
895 (Path::new("test_dirs/bar"), true),
896 (Path::new("test_dirs/BAR"), true),
897 ];
898 paths.sort_by(|&a, &b| compare_paths(a, b));
899 assert_eq!(
900 paths,
901 vec![
902 (Path::new("test_dirs"), false),
903 (Path::new("test_dirs/bar"), true),
904 (Path::new("test_dirs/BAR"), true),
905 (Path::new("test_dirs/foo_1"), true),
906 (Path::new("test_dirs/foo_2"), true),
907 (Path::new("test_DIRS"), false),
908 (Path::new("test_DIRS/bar"), true),
909 (Path::new("test_DIRS/BAR"), true),
910 (Path::new("test_DIRS/foo_1"), true),
911 (Path::new("test_DIRS/foo_2"), true),
912 ]
913 );
914 }
915
916 #[test]
917 fn path_with_position_parse_posix_path() {
918 // Test POSIX filename edge cases
919 // Read more at https://en.wikipedia.org/wiki/Filename
920 assert_eq!(
921 PathWithPosition::parse_str("test_file"),
922 PathWithPosition {
923 path: PathBuf::from("test_file"),
924 row: None,
925 column: None
926 }
927 );
928
929 assert_eq!(
930 PathWithPosition::parse_str("a:bc:.zip:1"),
931 PathWithPosition {
932 path: PathBuf::from("a:bc:.zip"),
933 row: Some(1),
934 column: None
935 }
936 );
937
938 assert_eq!(
939 PathWithPosition::parse_str("one.second.zip:1"),
940 PathWithPosition {
941 path: PathBuf::from("one.second.zip"),
942 row: Some(1),
943 column: None
944 }
945 );
946
947 // Trim off trailing `:`s for otherwise valid input.
948 assert_eq!(
949 PathWithPosition::parse_str("test_file:10:1:"),
950 PathWithPosition {
951 path: PathBuf::from("test_file"),
952 row: Some(10),
953 column: Some(1)
954 }
955 );
956
957 assert_eq!(
958 PathWithPosition::parse_str("test_file.rs:"),
959 PathWithPosition {
960 path: PathBuf::from("test_file.rs:"),
961 row: None,
962 column: None
963 }
964 );
965
966 assert_eq!(
967 PathWithPosition::parse_str("test_file.rs:1:"),
968 PathWithPosition {
969 path: PathBuf::from("test_file.rs"),
970 row: Some(1),
971 column: None
972 }
973 );
974
975 assert_eq!(
976 PathWithPosition::parse_str("ab\ncd"),
977 PathWithPosition {
978 path: PathBuf::from("ab\ncd"),
979 row: None,
980 column: None
981 }
982 );
983
984 assert_eq!(
985 PathWithPosition::parse_str("👋\nab"),
986 PathWithPosition {
987 path: PathBuf::from("👋\nab"),
988 row: None,
989 column: None
990 }
991 );
992
993 assert_eq!(
994 PathWithPosition::parse_str("Types.hs:(617,9)-(670,28):"),
995 PathWithPosition {
996 path: PathBuf::from("Types.hs"),
997 row: Some(617),
998 column: Some(9),
999 }
1000 );
1001 }
1002
1003 #[test]
1004 #[cfg(not(target_os = "windows"))]
1005 fn path_with_position_parse_posix_path_with_suffix() {
1006 assert_eq!(
1007 PathWithPosition::parse_str("foo/bar:34:in"),
1008 PathWithPosition {
1009 path: PathBuf::from("foo/bar"),
1010 row: Some(34),
1011 column: None,
1012 }
1013 );
1014 assert_eq!(
1015 PathWithPosition::parse_str("foo/bar.rs:1902:::15:"),
1016 PathWithPosition {
1017 path: PathBuf::from("foo/bar.rs:1902"),
1018 row: Some(15),
1019 column: None
1020 }
1021 );
1022
1023 assert_eq!(
1024 PathWithPosition::parse_str("app-editors:zed-0.143.6:20240710-201212.log:34:"),
1025 PathWithPosition {
1026 path: PathBuf::from("app-editors:zed-0.143.6:20240710-201212.log"),
1027 row: Some(34),
1028 column: None,
1029 }
1030 );
1031
1032 assert_eq!(
1033 PathWithPosition::parse_str("crates/file_finder/src/file_finder.rs:1902:13:"),
1034 PathWithPosition {
1035 path: PathBuf::from("crates/file_finder/src/file_finder.rs"),
1036 row: Some(1902),
1037 column: Some(13),
1038 }
1039 );
1040
1041 assert_eq!(
1042 PathWithPosition::parse_str("crate/utils/src/test:today.log:34"),
1043 PathWithPosition {
1044 path: PathBuf::from("crate/utils/src/test:today.log"),
1045 row: Some(34),
1046 column: None,
1047 }
1048 );
1049 assert_eq!(
1050 PathWithPosition::parse_str("/testing/out/src/file_finder.odin(7:15)"),
1051 PathWithPosition {
1052 path: PathBuf::from("/testing/out/src/file_finder.odin"),
1053 row: Some(7),
1054 column: Some(15),
1055 }
1056 );
1057 }
1058
1059 #[test]
1060 #[cfg(target_os = "windows")]
1061 fn path_with_position_parse_windows_path() {
1062 assert_eq!(
1063 PathWithPosition::parse_str("crates\\utils\\paths.rs"),
1064 PathWithPosition {
1065 path: PathBuf::from("crates\\utils\\paths.rs"),
1066 row: None,
1067 column: None
1068 }
1069 );
1070
1071 assert_eq!(
1072 PathWithPosition::parse_str("C:\\Users\\someone\\test_file.rs"),
1073 PathWithPosition {
1074 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
1075 row: None,
1076 column: None
1077 }
1078 );
1079 }
1080
1081 #[test]
1082 #[cfg(target_os = "windows")]
1083 fn path_with_position_parse_windows_path_with_suffix() {
1084 assert_eq!(
1085 PathWithPosition::parse_str("crates\\utils\\paths.rs:101"),
1086 PathWithPosition {
1087 path: PathBuf::from("crates\\utils\\paths.rs"),
1088 row: Some(101),
1089 column: None
1090 }
1091 );
1092
1093 assert_eq!(
1094 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs:1:20"),
1095 PathWithPosition {
1096 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
1097 row: Some(1),
1098 column: Some(20)
1099 }
1100 );
1101
1102 assert_eq!(
1103 PathWithPosition::parse_str("C:\\Users\\someone\\test_file.rs(1902,13)"),
1104 PathWithPosition {
1105 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
1106 row: Some(1902),
1107 column: Some(13)
1108 }
1109 );
1110
1111 // Trim off trailing `:`s for otherwise valid input.
1112 assert_eq!(
1113 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs:1902:13:"),
1114 PathWithPosition {
1115 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
1116 row: Some(1902),
1117 column: Some(13)
1118 }
1119 );
1120
1121 assert_eq!(
1122 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs:1902:13:15:"),
1123 PathWithPosition {
1124 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs:1902"),
1125 row: Some(13),
1126 column: Some(15)
1127 }
1128 );
1129
1130 assert_eq!(
1131 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs:1902:::15:"),
1132 PathWithPosition {
1133 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs:1902"),
1134 row: Some(15),
1135 column: None
1136 }
1137 );
1138
1139 assert_eq!(
1140 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs(1902,13):"),
1141 PathWithPosition {
1142 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
1143 row: Some(1902),
1144 column: Some(13),
1145 }
1146 );
1147
1148 assert_eq!(
1149 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs(1902):"),
1150 PathWithPosition {
1151 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
1152 row: Some(1902),
1153 column: None,
1154 }
1155 );
1156
1157 assert_eq!(
1158 PathWithPosition::parse_str("C:\\Users\\someone\\test_file.rs:1902:13:"),
1159 PathWithPosition {
1160 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
1161 row: Some(1902),
1162 column: Some(13),
1163 }
1164 );
1165
1166 assert_eq!(
1167 PathWithPosition::parse_str("C:\\Users\\someone\\test_file.rs(1902,13):"),
1168 PathWithPosition {
1169 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
1170 row: Some(1902),
1171 column: Some(13),
1172 }
1173 );
1174
1175 assert_eq!(
1176 PathWithPosition::parse_str("C:\\Users\\someone\\test_file.rs(1902):"),
1177 PathWithPosition {
1178 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
1179 row: Some(1902),
1180 column: None,
1181 }
1182 );
1183
1184 assert_eq!(
1185 PathWithPosition::parse_str("crates/utils/paths.rs:101"),
1186 PathWithPosition {
1187 path: PathBuf::from("crates\\utils\\paths.rs"),
1188 row: Some(101),
1189 column: None,
1190 }
1191 );
1192 }
1193
1194 #[test]
1195 fn test_path_compact() {
1196 let path: PathBuf = [
1197 home_dir().to_string_lossy().to_string(),
1198 "some_file.txt".to_string(),
1199 ]
1200 .iter()
1201 .collect();
1202 if cfg!(any(target_os = "linux", target_os = "freebsd")) || cfg!(target_os = "macos") {
1203 assert_eq!(path.compact().to_str(), Some("~/some_file.txt"));
1204 } else {
1205 assert_eq!(path.compact().to_str(), path.to_str());
1206 }
1207 }
1208
1209 #[test]
1210 fn test_extension_or_hidden_file_name() {
1211 // No dots in name
1212 let path = Path::new("/a/b/c/file_name.rs");
1213 assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
1214
1215 // Single dot in name
1216 let path = Path::new("/a/b/c/file.name.rs");
1217 assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
1218
1219 // Multiple dots in name
1220 let path = Path::new("/a/b/c/long.file.name.rs");
1221 assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
1222
1223 // Hidden file, no extension
1224 let path = Path::new("/a/b/c/.gitignore");
1225 assert_eq!(path.extension_or_hidden_file_name(), Some("gitignore"));
1226
1227 // Hidden file, with extension
1228 let path = Path::new("/a/b/c/.eslintrc.js");
1229 assert_eq!(path.extension_or_hidden_file_name(), Some("eslintrc.js"));
1230 }
1231
1232 #[test]
1233 fn edge_of_glob() {
1234 let path = Path::new("/work/node_modules");
1235 let path_matcher = PathMatcher::new(&["**/node_modules/**".to_owned()]).unwrap();
1236 assert!(
1237 path_matcher.is_match(path),
1238 "Path matcher should match {path:?}"
1239 );
1240 }
1241
1242 #[test]
1243 fn project_search() {
1244 let path = Path::new("/Users/someonetoignore/work/zed/zed.dev/node_modules");
1245 let path_matcher = PathMatcher::new(&["**/node_modules/**".to_owned()]).unwrap();
1246 assert!(
1247 path_matcher.is_match(path),
1248 "Path matcher should match {path:?}"
1249 );
1250 }
1251
1252 #[test]
1253 #[cfg(target_os = "windows")]
1254 fn test_sanitized_path() {
1255 let path = Path::new("C:\\Users\\someone\\test_file.rs");
1256 let sanitized_path = SanitizedPath::new(path);
1257 assert_eq!(
1258 sanitized_path.to_string(),
1259 "C:\\Users\\someone\\test_file.rs"
1260 );
1261
1262 let path = Path::new("\\\\?\\C:\\Users\\someone\\test_file.rs");
1263 let sanitized_path = SanitizedPath::new(path);
1264 assert_eq!(
1265 sanitized_path.to_string(),
1266 "C:\\Users\\someone\\test_file.rs"
1267 );
1268 }
1269
1270 #[test]
1271 fn test_compare_numeric_segments() {
1272 // Helper function to create peekable iterators and test
1273 fn compare(a: &str, b: &str) -> Ordering {
1274 let mut a_iter = a.chars().peekable();
1275 let mut b_iter = b.chars().peekable();
1276
1277 let result = compare_numeric_segments(&mut a_iter, &mut b_iter);
1278
1279 // Verify iterators advanced correctly
1280 assert!(
1281 !a_iter.next().is_some_and(|c| c.is_ascii_digit()),
1282 "Iterator a should have consumed all digits"
1283 );
1284 assert!(
1285 !b_iter.next().is_some_and(|c| c.is_ascii_digit()),
1286 "Iterator b should have consumed all digits"
1287 );
1288
1289 result
1290 }
1291
1292 // Basic numeric comparisons
1293 assert_eq!(compare("0", "0"), Ordering::Equal);
1294 assert_eq!(compare("1", "2"), Ordering::Less);
1295 assert_eq!(compare("9", "10"), Ordering::Less);
1296 assert_eq!(compare("10", "9"), Ordering::Greater);
1297 assert_eq!(compare("99", "100"), Ordering::Less);
1298
1299 // Leading zeros
1300 assert_eq!(compare("0", "00"), Ordering::Less);
1301 assert_eq!(compare("00", "0"), Ordering::Greater);
1302 assert_eq!(compare("01", "1"), Ordering::Greater);
1303 assert_eq!(compare("001", "1"), Ordering::Greater);
1304 assert_eq!(compare("001", "01"), Ordering::Greater);
1305
1306 // Same value different representation
1307 assert_eq!(compare("000100", "100"), Ordering::Greater);
1308 assert_eq!(compare("100", "0100"), Ordering::Less);
1309 assert_eq!(compare("0100", "00100"), Ordering::Less);
1310
1311 // Large numbers
1312 assert_eq!(compare("9999999999", "10000000000"), Ordering::Less);
1313 assert_eq!(
1314 compare(
1315 "340282366920938463463374607431768211455", // u128::MAX
1316 "340282366920938463463374607431768211456"
1317 ),
1318 Ordering::Less
1319 );
1320 assert_eq!(
1321 compare(
1322 "340282366920938463463374607431768211456", // > u128::MAX
1323 "340282366920938463463374607431768211455"
1324 ),
1325 Ordering::Greater
1326 );
1327
1328 // Iterator advancement verification
1329 let mut a_iter = "123abc".chars().peekable();
1330 let mut b_iter = "456def".chars().peekable();
1331
1332 compare_numeric_segments(&mut a_iter, &mut b_iter);
1333
1334 assert_eq!(a_iter.collect::<String>(), "abc");
1335 assert_eq!(b_iter.collect::<String>(), "def");
1336 }
1337
1338 #[test]
1339 fn test_natural_sort() {
1340 // Basic alphanumeric
1341 assert_eq!(natural_sort("a", "b"), Ordering::Less);
1342 assert_eq!(natural_sort("b", "a"), Ordering::Greater);
1343 assert_eq!(natural_sort("a", "a"), Ordering::Equal);
1344
1345 // Case sensitivity
1346 assert_eq!(natural_sort("a", "A"), Ordering::Less);
1347 assert_eq!(natural_sort("A", "a"), Ordering::Greater);
1348 assert_eq!(natural_sort("aA", "aa"), Ordering::Greater);
1349 assert_eq!(natural_sort("aa", "aA"), Ordering::Less);
1350
1351 // Numbers
1352 assert_eq!(natural_sort("1", "2"), Ordering::Less);
1353 assert_eq!(natural_sort("2", "10"), Ordering::Less);
1354 assert_eq!(natural_sort("02", "10"), Ordering::Less);
1355 assert_eq!(natural_sort("02", "2"), Ordering::Greater);
1356
1357 // Mixed alphanumeric
1358 assert_eq!(natural_sort("a1", "a2"), Ordering::Less);
1359 assert_eq!(natural_sort("a2", "a10"), Ordering::Less);
1360 assert_eq!(natural_sort("a02", "a2"), Ordering::Greater);
1361 assert_eq!(natural_sort("a1b", "a1c"), Ordering::Less);
1362
1363 // Multiple numeric segments
1364 assert_eq!(natural_sort("1a2", "1a10"), Ordering::Less);
1365 assert_eq!(natural_sort("1a10", "1a2"), Ordering::Greater);
1366 assert_eq!(natural_sort("2a1", "10a1"), Ordering::Less);
1367
1368 // Special characters
1369 assert_eq!(natural_sort("a-1", "a-2"), Ordering::Less);
1370 assert_eq!(natural_sort("a_1", "a_2"), Ordering::Less);
1371 assert_eq!(natural_sort("a.1", "a.2"), Ordering::Less);
1372
1373 // Unicode
1374 assert_eq!(natural_sort("文1", "文2"), Ordering::Less);
1375 assert_eq!(natural_sort("文2", "文10"), Ordering::Less);
1376 assert_eq!(natural_sort("🔤1", "🔤2"), Ordering::Less);
1377
1378 // Empty and special cases
1379 assert_eq!(natural_sort("", ""), Ordering::Equal);
1380 assert_eq!(natural_sort("", "a"), Ordering::Less);
1381 assert_eq!(natural_sort("a", ""), Ordering::Greater);
1382 assert_eq!(natural_sort(" ", " "), Ordering::Less);
1383
1384 // Mixed everything
1385 assert_eq!(natural_sort("File-1.txt", "File-2.txt"), Ordering::Less);
1386 assert_eq!(natural_sort("File-02.txt", "File-2.txt"), Ordering::Greater);
1387 assert_eq!(natural_sort("File-2.txt", "File-10.txt"), Ordering::Less);
1388 assert_eq!(natural_sort("File_A1", "File_A2"), Ordering::Less);
1389 assert_eq!(natural_sort("File_a1", "File_A1"), Ordering::Less);
1390 }
1391
1392 #[test]
1393 fn test_compare_paths() {
1394 // Helper function for cleaner tests
1395 fn compare(a: &str, is_a_file: bool, b: &str, is_b_file: bool) -> Ordering {
1396 compare_paths((Path::new(a), is_a_file), (Path::new(b), is_b_file))
1397 }
1398
1399 // Basic path comparison
1400 assert_eq!(compare("a", true, "b", true), Ordering::Less);
1401 assert_eq!(compare("b", true, "a", true), Ordering::Greater);
1402 assert_eq!(compare("a", true, "a", true), Ordering::Equal);
1403
1404 // Files vs Directories
1405 assert_eq!(compare("a", true, "a", false), Ordering::Greater);
1406 assert_eq!(compare("a", false, "a", true), Ordering::Less);
1407 assert_eq!(compare("b", false, "a", true), Ordering::Less);
1408
1409 // Extensions
1410 assert_eq!(compare("a.txt", true, "a.md", true), Ordering::Greater);
1411 assert_eq!(compare("a.md", true, "a.txt", true), Ordering::Less);
1412 assert_eq!(compare("a", true, "a.txt", true), Ordering::Less);
1413
1414 // Nested paths
1415 assert_eq!(compare("dir/a", true, "dir/b", true), Ordering::Less);
1416 assert_eq!(compare("dir1/a", true, "dir2/a", true), Ordering::Less);
1417 assert_eq!(compare("dir/sub/a", true, "dir/a", true), Ordering::Less);
1418
1419 // Case sensitivity in paths
1420 assert_eq!(
1421 compare("Dir/file", true, "dir/file", true),
1422 Ordering::Greater
1423 );
1424 assert_eq!(
1425 compare("dir/File", true, "dir/file", true),
1426 Ordering::Greater
1427 );
1428 assert_eq!(compare("dir/file", true, "Dir/File", true), Ordering::Less);
1429
1430 // Hidden files and special names
1431 assert_eq!(compare(".hidden", true, "visible", true), Ordering::Less);
1432 assert_eq!(compare("_special", true, "normal", true), Ordering::Less);
1433 assert_eq!(compare(".config", false, ".data", false), Ordering::Less);
1434
1435 // Mixed numeric paths
1436 assert_eq!(
1437 compare("dir1/file", true, "dir2/file", true),
1438 Ordering::Less
1439 );
1440 assert_eq!(
1441 compare("dir2/file", true, "dir10/file", true),
1442 Ordering::Less
1443 );
1444 assert_eq!(
1445 compare("dir02/file", true, "dir2/file", true),
1446 Ordering::Greater
1447 );
1448
1449 // Root paths
1450 assert_eq!(compare("/a", true, "/b", true), Ordering::Less);
1451 assert_eq!(compare("/", false, "/a", true), Ordering::Less);
1452
1453 // Complex real-world examples
1454 assert_eq!(
1455 compare("project/src/main.rs", true, "project/src/lib.rs", true),
1456 Ordering::Greater
1457 );
1458 assert_eq!(
1459 compare(
1460 "project/tests/test_1.rs",
1461 true,
1462 "project/tests/test_2.rs",
1463 true
1464 ),
1465 Ordering::Less
1466 );
1467 assert_eq!(
1468 compare(
1469 "project/v1.0.0/README.md",
1470 true,
1471 "project/v1.10.0/README.md",
1472 true
1473 ),
1474 Ordering::Less
1475 );
1476 }
1477
1478 #[test]
1479 fn test_natural_sort_case_sensitivity() {
1480 // Same letter different case - lowercase should come first
1481 assert_eq!(natural_sort("a", "A"), Ordering::Less);
1482 assert_eq!(natural_sort("A", "a"), Ordering::Greater);
1483 assert_eq!(natural_sort("a", "a"), Ordering::Equal);
1484 assert_eq!(natural_sort("A", "A"), Ordering::Equal);
1485
1486 // Mixed case strings
1487 assert_eq!(natural_sort("aaa", "AAA"), Ordering::Less);
1488 assert_eq!(natural_sort("AAA", "aaa"), Ordering::Greater);
1489 assert_eq!(natural_sort("aAa", "AaA"), Ordering::Less);
1490
1491 // Different letters
1492 assert_eq!(natural_sort("a", "b"), Ordering::Less);
1493 assert_eq!(natural_sort("A", "b"), Ordering::Less);
1494 assert_eq!(natural_sort("a", "B"), Ordering::Less);
1495 }
1496
1497 #[test]
1498 fn test_natural_sort_with_numbers() {
1499 // Basic number ordering
1500 assert_eq!(natural_sort("file1", "file2"), Ordering::Less);
1501 assert_eq!(natural_sort("file2", "file10"), Ordering::Less);
1502 assert_eq!(natural_sort("file10", "file2"), Ordering::Greater);
1503
1504 // Numbers in different positions
1505 assert_eq!(natural_sort("1file", "2file"), Ordering::Less);
1506 assert_eq!(natural_sort("file1text", "file2text"), Ordering::Less);
1507 assert_eq!(natural_sort("text1file", "text2file"), Ordering::Less);
1508
1509 // Multiple numbers in string
1510 assert_eq!(natural_sort("file1-2", "file1-10"), Ordering::Less);
1511 assert_eq!(natural_sort("2-1file", "10-1file"), Ordering::Less);
1512
1513 // Leading zeros
1514 assert_eq!(natural_sort("file002", "file2"), Ordering::Greater);
1515 assert_eq!(natural_sort("file002", "file10"), Ordering::Less);
1516
1517 // Very large numbers
1518 assert_eq!(
1519 natural_sort("file999999999999999999999", "file999999999999999999998"),
1520 Ordering::Greater
1521 );
1522
1523 // u128 edge cases
1524
1525 // Numbers near u128::MAX (340,282,366,920,938,463,463,374,607,431,768,211,455)
1526 assert_eq!(
1527 natural_sort(
1528 "file340282366920938463463374607431768211454",
1529 "file340282366920938463463374607431768211455"
1530 ),
1531 Ordering::Less
1532 );
1533
1534 // Equal length numbers that overflow u128
1535 assert_eq!(
1536 natural_sort(
1537 "file340282366920938463463374607431768211456",
1538 "file340282366920938463463374607431768211455"
1539 ),
1540 Ordering::Greater
1541 );
1542
1543 // Different length numbers that overflow u128
1544 assert_eq!(
1545 natural_sort(
1546 "file3402823669209384634633746074317682114560",
1547 "file340282366920938463463374607431768211455"
1548 ),
1549 Ordering::Greater
1550 );
1551
1552 // Leading zeros with numbers near u128::MAX
1553 assert_eq!(
1554 natural_sort(
1555 "file0340282366920938463463374607431768211455",
1556 "file340282366920938463463374607431768211455"
1557 ),
1558 Ordering::Greater
1559 );
1560
1561 // Very large numbers with different lengths (both overflow u128)
1562 assert_eq!(
1563 natural_sort(
1564 "file999999999999999999999999999999999999999999999999",
1565 "file9999999999999999999999999999999999999999999999999"
1566 ),
1567 Ordering::Less
1568 );
1569
1570 // Mixed case with numbers
1571 assert_eq!(natural_sort("File1", "file2"), Ordering::Greater);
1572 assert_eq!(natural_sort("file1", "File2"), Ordering::Less);
1573 }
1574
1575 #[test]
1576 fn test_natural_sort_edge_cases() {
1577 // Empty strings
1578 assert_eq!(natural_sort("", ""), Ordering::Equal);
1579 assert_eq!(natural_sort("", "a"), Ordering::Less);
1580 assert_eq!(natural_sort("a", ""), Ordering::Greater);
1581
1582 // Special characters
1583 assert_eq!(natural_sort("file-1", "file_1"), Ordering::Less);
1584 assert_eq!(natural_sort("file.1", "file_1"), Ordering::Less);
1585 assert_eq!(natural_sort("file 1", "file_1"), Ordering::Less);
1586
1587 // Unicode characters
1588 // 9312 vs 9313
1589 assert_eq!(natural_sort("file①", "file②"), Ordering::Less);
1590 // 9321 vs 9313
1591 assert_eq!(natural_sort("file⑩", "file②"), Ordering::Greater);
1592 // 28450 vs 23383
1593 assert_eq!(natural_sort("file漢", "file字"), Ordering::Greater);
1594
1595 // Mixed alphanumeric with special chars
1596 assert_eq!(natural_sort("file-1a", "file-1b"), Ordering::Less);
1597 assert_eq!(natural_sort("file-1.2", "file-1.10"), Ordering::Less);
1598 assert_eq!(natural_sort("file-1.10", "file-1.2"), Ordering::Greater);
1599 }
1600}