1use anyhow::Context;
2use globset::{Glob, GlobSet, GlobSetBuilder};
3use regex::Regex;
4use serde::{Deserialize, Serialize};
5use std::cmp::Ordering;
6use std::fmt::{Display, Formatter};
7use std::mem;
8use std::path::StripPrefixError;
9use std::sync::{Arc, OnceLock};
10use std::{
11 ffi::OsStr,
12 path::{Path, PathBuf},
13 sync::LazyLock,
14};
15
16static HOME_DIR: OnceLock<PathBuf> = OnceLock::new();
17
18/// Returns the path to the user's home directory.
19pub fn home_dir() -> &'static PathBuf {
20 HOME_DIR.get_or_init(|| {
21 if cfg!(any(test, feature = "test-support")) {
22 if cfg!(target_os = "macos") {
23 PathBuf::from("/Users/zed")
24 } else if cfg!(target_os = "windows") {
25 PathBuf::from("C:\\Users\\zed")
26 } else {
27 PathBuf::from("/home/zed")
28 }
29 } else {
30 dirs::home_dir().expect("failed to determine home directory")
31 }
32 })
33}
34
35pub trait PathExt {
36 /// Compacts a given file path by replacing the user's home directory
37 /// prefix with a tilde (`~`).
38 ///
39 /// # Returns
40 ///
41 /// * A `PathBuf` containing the compacted file path. If the input path
42 /// does not have the user's home directory prefix, or if we are not on
43 /// Linux or macOS, the original path is returned unchanged.
44 fn compact(&self) -> PathBuf;
45
46 /// Returns a file's extension or, if the file is hidden, its name without the leading dot
47 fn extension_or_hidden_file_name(&self) -> Option<&str>;
48
49 fn try_from_bytes<'a>(bytes: &'a [u8]) -> anyhow::Result<Self>
50 where
51 Self: From<&'a Path>,
52 {
53 #[cfg(unix)]
54 {
55 use std::os::unix::prelude::OsStrExt;
56 Ok(Self::from(Path::new(OsStr::from_bytes(bytes))))
57 }
58 #[cfg(windows)]
59 {
60 use tendril::fmt::{Format, WTF8};
61 WTF8::validate(bytes)
62 .then(|| {
63 // Safety: bytes are valid WTF-8 sequence.
64 Self::from(Path::new(unsafe {
65 OsStr::from_encoded_bytes_unchecked(bytes)
66 }))
67 })
68 .with_context(|| format!("Invalid WTF-8 sequence: {bytes:?}"))
69 }
70 }
71
72 /// Converts a local path to one that can be used inside of WSL.
73 /// Returns `None` if the path cannot be converted into a WSL one (network share).
74 fn local_to_wsl(&self) -> Option<PathBuf>;
75
76 /// Returns a file's "full" joined collection of extensions, in the case where a file does not
77 /// just have a singular extension but instead has multiple (e.g File.tar.gz, Component.stories.tsx)
78 ///
79 /// Will provide back the extensions joined together such as tar.gz or stories.tsx
80 fn multiple_extensions(&self) -> Option<String>;
81
82 /// Try to make a shell-safe representation of the path.
83 ///
84 /// For Unix, the path is escaped to be safe for POSIX shells
85 fn try_shell_safe(&self) -> anyhow::Result<String>;
86}
87
88impl<T: AsRef<Path>> PathExt for T {
89 fn compact(&self) -> PathBuf {
90 if cfg!(any(target_os = "linux", target_os = "freebsd")) || cfg!(target_os = "macos") {
91 match self.as_ref().strip_prefix(home_dir().as_path()) {
92 Ok(relative_path) => {
93 let mut shortened_path = PathBuf::new();
94 shortened_path.push("~");
95 shortened_path.push(relative_path);
96 shortened_path
97 }
98 Err(_) => self.as_ref().to_path_buf(),
99 }
100 } else {
101 self.as_ref().to_path_buf()
102 }
103 }
104
105 fn extension_or_hidden_file_name(&self) -> Option<&str> {
106 let path = self.as_ref();
107 let file_name = path.file_name()?.to_str()?;
108 if file_name.starts_with('.') {
109 return file_name.strip_prefix('.');
110 }
111
112 path.extension()
113 .and_then(|e| e.to_str())
114 .or_else(|| path.file_stem()?.to_str())
115 }
116
117 fn local_to_wsl(&self) -> Option<PathBuf> {
118 let mut new_path = PathBuf::new();
119 for component in self.as_ref().components() {
120 match component {
121 std::path::Component::Prefix(prefix) => {
122 let drive_letter = prefix.as_os_str().to_string_lossy().to_lowercase();
123 let drive_letter = drive_letter.strip_suffix(':')?;
124
125 new_path.push(format!("/mnt/{}", drive_letter));
126 }
127 std::path::Component::RootDir => {}
128 _ => new_path.push(component),
129 }
130 }
131
132 Some(new_path)
133 }
134
135 fn multiple_extensions(&self) -> Option<String> {
136 let path = self.as_ref();
137 let file_name = path.file_name()?.to_str()?;
138
139 let parts: Vec<&str> = file_name
140 .split('.')
141 // Skip the part with the file name extension
142 .skip(1)
143 .collect();
144
145 if parts.len() < 2 {
146 return None;
147 }
148
149 Some(parts.join("."))
150 }
151
152 fn try_shell_safe(&self) -> anyhow::Result<String> {
153 #[cfg(target_os = "windows")]
154 {
155 Ok(self.as_ref().to_string_lossy().to_string())
156 }
157
158 #[cfg(not(target_os = "windows"))]
159 {
160 let path_str = self
161 .as_ref()
162 .to_str()
163 .with_context(|| "Path contains invalid UTF-8")?;
164
165 // As of writing, this can only be fail if the path contains a null byte, which shouldn't be possible
166 // but shlex has annotated the error as #[non_exhaustive] so we can't make it a compile error if other
167 // errors are introduced in the future :(
168 Ok(shlex::try_quote(path_str)?.into_owned())
169 }
170 }
171}
172
173/// In memory, this is identical to `Path`. On non-Windows conversions to this type are no-ops. On
174/// windows, these conversions sanitize UNC paths by removing the `\\\\?\\` prefix.
175#[derive(Eq, PartialEq, Hash, Ord, PartialOrd)]
176#[repr(transparent)]
177pub struct SanitizedPath(Path);
178
179impl SanitizedPath {
180 pub fn new<T: AsRef<Path> + ?Sized>(path: &T) -> &Self {
181 #[cfg(not(target_os = "windows"))]
182 return Self::unchecked_new(path.as_ref());
183
184 #[cfg(target_os = "windows")]
185 return Self::unchecked_new(dunce::simplified(path.as_ref()));
186 }
187
188 pub fn unchecked_new<T: AsRef<Path> + ?Sized>(path: &T) -> &Self {
189 // safe because `Path` and `SanitizedPath` have the same repr and Drop impl
190 unsafe { mem::transmute::<&Path, &Self>(path.as_ref()) }
191 }
192
193 pub fn from_arc(path: Arc<Path>) -> Arc<Self> {
194 // safe because `Path` and `SanitizedPath` have the same repr and Drop impl
195 #[cfg(not(target_os = "windows"))]
196 return unsafe { mem::transmute::<Arc<Path>, Arc<Self>>(path) };
197
198 // TODO: could avoid allocating here if dunce::simplified results in the same path
199 #[cfg(target_os = "windows")]
200 return Self::new(&path).into();
201 }
202
203 pub fn new_arc<T: AsRef<Path> + ?Sized>(path: &T) -> Arc<Self> {
204 Self::new(path).into()
205 }
206
207 pub fn cast_arc(path: Arc<Self>) -> Arc<Path> {
208 // safe because `Path` and `SanitizedPath` have the same repr and Drop impl
209 unsafe { mem::transmute::<Arc<Self>, Arc<Path>>(path) }
210 }
211
212 pub fn cast_arc_ref(path: &Arc<Self>) -> &Arc<Path> {
213 // safe because `Path` and `SanitizedPath` have the same repr and Drop impl
214 unsafe { mem::transmute::<&Arc<Self>, &Arc<Path>>(path) }
215 }
216
217 pub fn starts_with(&self, prefix: &Self) -> bool {
218 self.0.starts_with(&prefix.0)
219 }
220
221 pub fn as_path(&self) -> &Path {
222 &self.0
223 }
224
225 pub fn file_name(&self) -> Option<&std::ffi::OsStr> {
226 self.0.file_name()
227 }
228
229 pub fn extension(&self) -> Option<&std::ffi::OsStr> {
230 self.0.extension()
231 }
232
233 pub fn join<P: AsRef<Path>>(&self, path: P) -> PathBuf {
234 self.0.join(path)
235 }
236
237 pub fn parent(&self) -> Option<&Self> {
238 self.0.parent().map(Self::unchecked_new)
239 }
240
241 pub fn strip_prefix(&self, base: &Self) -> Result<&Path, StripPrefixError> {
242 self.0.strip_prefix(base.as_path())
243 }
244
245 pub fn to_str(&self) -> Option<&str> {
246 self.0.to_str()
247 }
248
249 pub fn to_path_buf(&self) -> PathBuf {
250 self.0.to_path_buf()
251 }
252}
253
254impl std::fmt::Debug for SanitizedPath {
255 fn fmt(&self, formatter: &mut Formatter<'_>) -> std::fmt::Result {
256 std::fmt::Debug::fmt(&self.0, formatter)
257 }
258}
259
260impl Display for SanitizedPath {
261 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
262 write!(f, "{}", self.0.display())
263 }
264}
265
266impl From<&SanitizedPath> for Arc<SanitizedPath> {
267 fn from(sanitized_path: &SanitizedPath) -> Self {
268 let path: Arc<Path> = sanitized_path.0.into();
269 // safe because `Path` and `SanitizedPath` have the same repr and Drop impl
270 unsafe { mem::transmute(path) }
271 }
272}
273
274impl From<&SanitizedPath> for PathBuf {
275 fn from(sanitized_path: &SanitizedPath) -> Self {
276 sanitized_path.as_path().into()
277 }
278}
279
280impl AsRef<Path> for SanitizedPath {
281 fn as_ref(&self) -> &Path {
282 &self.0
283 }
284}
285
286#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
287pub enum PathStyle {
288 Posix,
289 Windows,
290}
291
292impl PathStyle {
293 #[cfg(target_os = "windows")]
294 pub const fn local() -> Self {
295 PathStyle::Windows
296 }
297
298 #[cfg(not(target_os = "windows"))]
299 pub const fn local() -> Self {
300 PathStyle::Posix
301 }
302
303 #[inline]
304 pub fn separator(&self) -> &'static str {
305 match self {
306 PathStyle::Posix => "/",
307 PathStyle::Windows => "\\",
308 }
309 }
310
311 pub fn is_windows(&self) -> bool {
312 *self == PathStyle::Windows
313 }
314
315 pub fn join(self, left: impl AsRef<Path>, right: impl AsRef<Path>) -> Option<String> {
316 let right = right.as_ref().to_str()?;
317 if is_absolute(right, self) {
318 return None;
319 }
320 let left = left.as_ref().to_str()?;
321 if left.is_empty() {
322 Some(right.into())
323 } else {
324 Some(format!(
325 "{left}{}{right}",
326 if left.ends_with(self.separator()) {
327 ""
328 } else {
329 self.separator()
330 }
331 ))
332 }
333 }
334
335 pub fn split(self, path_like: &str) -> (Option<&str>, &str) {
336 let Some(pos) = path_like.rfind(self.separator()) else {
337 return (None, path_like);
338 };
339 let filename_start = pos + self.separator().len();
340 (
341 Some(&path_like[..filename_start]),
342 &path_like[filename_start..],
343 )
344 }
345}
346
347#[derive(Debug, Clone)]
348pub struct RemotePathBuf {
349 style: PathStyle,
350 string: String,
351}
352
353impl RemotePathBuf {
354 pub fn new(string: String, style: PathStyle) -> Self {
355 Self { style, string }
356 }
357
358 pub fn from_str(path: &str, style: PathStyle) -> Self {
359 Self::new(path.to_string(), style)
360 }
361
362 pub fn path_style(&self) -> PathStyle {
363 self.style
364 }
365
366 pub fn to_proto(self) -> String {
367 self.string
368 }
369}
370
371impl Display for RemotePathBuf {
372 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
373 write!(f, "{}", self.string)
374 }
375}
376
377pub fn is_absolute(path_like: &str, path_style: PathStyle) -> bool {
378 path_like.starts_with('/')
379 || path_style == PathStyle::Windows
380 && (path_like.starts_with('\\')
381 || path_like
382 .chars()
383 .next()
384 .is_some_and(|c| c.is_ascii_alphabetic())
385 && path_like[1..]
386 .strip_prefix(':')
387 .is_some_and(|path| path.starts_with('/') || path.starts_with('\\')))
388}
389
390/// A delimiter to use in `path_query:row_number:column_number` strings parsing.
391pub const FILE_ROW_COLUMN_DELIMITER: char = ':';
392
393const ROW_COL_CAPTURE_REGEX: &str = r"(?xs)
394 ([^\(]+)\:(?:
395 \((\d+)[,:](\d+)\) # filename:(row,column), filename:(row:column)
396 |
397 \((\d+)\)() # filename:(row)
398 )
399 |
400 ([^\(]+)(?:
401 \((\d+)[,:](\d+)\) # filename(row,column), filename(row:column)
402 |
403 \((\d+)\)() # filename(row)
404 )
405 |
406 (.+?)(?:
407 \:+(\d+)\:(\d+)\:*$ # filename:row:column
408 |
409 \:+(\d+)\:*()$ # filename:row
410 |
411 \:+()()$
412 )";
413
414/// A representation of a path-like string with optional row and column numbers.
415/// Matching values example: `te`, `test.rs:22`, `te:22:5`, `test.c(22)`, `test.c(22,5)`etc.
416#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)]
417pub struct PathWithPosition {
418 pub path: PathBuf,
419 pub row: Option<u32>,
420 // Absent if row is absent.
421 pub column: Option<u32>,
422}
423
424impl PathWithPosition {
425 /// Returns a PathWithPosition from a path.
426 pub fn from_path(path: PathBuf) -> Self {
427 Self {
428 path,
429 row: None,
430 column: None,
431 }
432 }
433
434 /// Parses a string that possibly has `:row:column` or `(row, column)` suffix.
435 /// Parenthesis format is used by [MSBuild](https://learn.microsoft.com/en-us/visualstudio/msbuild/msbuild-diagnostic-format-for-tasks) compatible tools
436 /// Ignores trailing `:`s, so `test.rs:22:` is parsed as `test.rs:22`.
437 /// If the suffix parsing fails, the whole string is parsed as a path.
438 ///
439 /// Be mindful that `test_file:10:1:` is a valid posix filename.
440 /// `PathWithPosition` class assumes that the ending position-like suffix is **not** part of the filename.
441 ///
442 /// # Examples
443 ///
444 /// ```
445 /// # use util::paths::PathWithPosition;
446 /// # use std::path::PathBuf;
447 /// assert_eq!(PathWithPosition::parse_str("test_file"), PathWithPosition {
448 /// path: PathBuf::from("test_file"),
449 /// row: None,
450 /// column: None,
451 /// });
452 /// assert_eq!(PathWithPosition::parse_str("test_file:10"), PathWithPosition {
453 /// path: PathBuf::from("test_file"),
454 /// row: Some(10),
455 /// column: None,
456 /// });
457 /// assert_eq!(PathWithPosition::parse_str("test_file.rs"), PathWithPosition {
458 /// path: PathBuf::from("test_file.rs"),
459 /// row: None,
460 /// column: None,
461 /// });
462 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:1"), PathWithPosition {
463 /// path: PathBuf::from("test_file.rs"),
464 /// row: Some(1),
465 /// column: None,
466 /// });
467 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:1:2"), PathWithPosition {
468 /// path: PathBuf::from("test_file.rs"),
469 /// row: Some(1),
470 /// column: Some(2),
471 /// });
472 /// ```
473 ///
474 /// # Expected parsing results when encounter ill-formatted inputs.
475 /// ```
476 /// # use util::paths::PathWithPosition;
477 /// # use std::path::PathBuf;
478 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:a"), PathWithPosition {
479 /// path: PathBuf::from("test_file.rs:a"),
480 /// row: None,
481 /// column: None,
482 /// });
483 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:a:b"), PathWithPosition {
484 /// path: PathBuf::from("test_file.rs:a:b"),
485 /// row: None,
486 /// column: None,
487 /// });
488 /// assert_eq!(PathWithPosition::parse_str("test_file.rs"), PathWithPosition {
489 /// path: PathBuf::from("test_file.rs"),
490 /// row: None,
491 /// column: None,
492 /// });
493 /// assert_eq!(PathWithPosition::parse_str("test_file.rs::1"), PathWithPosition {
494 /// path: PathBuf::from("test_file.rs"),
495 /// row: Some(1),
496 /// column: None,
497 /// });
498 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:1::"), PathWithPosition {
499 /// path: PathBuf::from("test_file.rs"),
500 /// row: Some(1),
501 /// column: None,
502 /// });
503 /// assert_eq!(PathWithPosition::parse_str("test_file.rs::1:2"), PathWithPosition {
504 /// path: PathBuf::from("test_file.rs"),
505 /// row: Some(1),
506 /// column: Some(2),
507 /// });
508 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:1::2"), PathWithPosition {
509 /// path: PathBuf::from("test_file.rs:1"),
510 /// row: Some(2),
511 /// column: None,
512 /// });
513 /// assert_eq!(PathWithPosition::parse_str("test_file.rs:1:2:3"), PathWithPosition {
514 /// path: PathBuf::from("test_file.rs:1"),
515 /// row: Some(2),
516 /// column: Some(3),
517 /// });
518 /// ```
519 pub fn parse_str(s: &str) -> Self {
520 let trimmed = s.trim();
521 let path = Path::new(trimmed);
522 let maybe_file_name_with_row_col = path.file_name().unwrap_or_default().to_string_lossy();
523 if maybe_file_name_with_row_col.is_empty() {
524 return Self {
525 path: Path::new(s).to_path_buf(),
526 row: None,
527 column: None,
528 };
529 }
530
531 // Let's avoid repeated init cost on this. It is subject to thread contention, but
532 // so far this code isn't called from multiple hot paths. Getting contention here
533 // in the future seems unlikely.
534 static SUFFIX_RE: LazyLock<Regex> =
535 LazyLock::new(|| Regex::new(ROW_COL_CAPTURE_REGEX).unwrap());
536 match SUFFIX_RE
537 .captures(&maybe_file_name_with_row_col)
538 .map(|caps| caps.extract())
539 {
540 Some((_, [file_name, maybe_row, maybe_column])) => {
541 let row = maybe_row.parse::<u32>().ok();
542 let column = maybe_column.parse::<u32>().ok();
543
544 let suffix_length = maybe_file_name_with_row_col.len() - file_name.len();
545 let path_without_suffix = &trimmed[..trimmed.len() - suffix_length];
546
547 Self {
548 path: Path::new(path_without_suffix).to_path_buf(),
549 row,
550 column,
551 }
552 }
553 None => {
554 // The `ROW_COL_CAPTURE_REGEX` deals with separated digits only,
555 // but in reality there could be `foo/bar.py:22:in` inputs which we want to match too.
556 // The regex mentioned is not very extendable with "digit or random string" checks, so do this here instead.
557 let delimiter = ':';
558 let mut path_parts = s
559 .rsplitn(3, delimiter)
560 .collect::<Vec<_>>()
561 .into_iter()
562 .rev()
563 .fuse();
564 let mut path_string = path_parts.next().expect("rsplitn should have the rest of the string as its last parameter that we reversed").to_owned();
565 let mut row = None;
566 let mut column = None;
567 if let Some(maybe_row) = path_parts.next() {
568 if let Ok(parsed_row) = maybe_row.parse::<u32>() {
569 row = Some(parsed_row);
570 if let Some(parsed_column) = path_parts
571 .next()
572 .and_then(|maybe_col| maybe_col.parse::<u32>().ok())
573 {
574 column = Some(parsed_column);
575 }
576 } else {
577 path_string.push(delimiter);
578 path_string.push_str(maybe_row);
579 }
580 }
581 for split in path_parts {
582 path_string.push(delimiter);
583 path_string.push_str(split);
584 }
585
586 Self {
587 path: PathBuf::from(path_string),
588 row,
589 column,
590 }
591 }
592 }
593 }
594
595 pub fn map_path<E>(
596 self,
597 mapping: impl FnOnce(PathBuf) -> Result<PathBuf, E>,
598 ) -> Result<PathWithPosition, E> {
599 Ok(PathWithPosition {
600 path: mapping(self.path)?,
601 row: self.row,
602 column: self.column,
603 })
604 }
605
606 pub fn to_string(&self, path_to_string: impl Fn(&PathBuf) -> String) -> String {
607 let path_string = path_to_string(&self.path);
608 if let Some(row) = self.row {
609 if let Some(column) = self.column {
610 format!("{path_string}:{row}:{column}")
611 } else {
612 format!("{path_string}:{row}")
613 }
614 } else {
615 path_string
616 }
617 }
618}
619
620#[derive(Clone, Debug)]
621pub struct PathMatcher {
622 sources: Vec<String>,
623 glob: GlobSet,
624 path_style: PathStyle,
625}
626
627// impl std::fmt::Display for PathMatcher {
628// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
629// self.sources.fmt(f)
630// }
631// }
632
633impl PartialEq for PathMatcher {
634 fn eq(&self, other: &Self) -> bool {
635 self.sources.eq(&other.sources)
636 }
637}
638
639impl Eq for PathMatcher {}
640
641impl PathMatcher {
642 pub fn new(
643 globs: impl IntoIterator<Item = impl AsRef<str>>,
644 path_style: PathStyle,
645 ) -> Result<Self, globset::Error> {
646 let globs = globs
647 .into_iter()
648 .map(|as_str| Glob::new(as_str.as_ref()))
649 .collect::<Result<Vec<_>, _>>()?;
650 let sources = globs.iter().map(|glob| glob.glob().to_owned()).collect();
651 let mut glob_builder = GlobSetBuilder::new();
652 for single_glob in globs {
653 glob_builder.add(single_glob);
654 }
655 let glob = glob_builder.build()?;
656 Ok(PathMatcher {
657 glob,
658 sources,
659 path_style,
660 })
661 }
662
663 pub fn sources(&self) -> &[String] {
664 &self.sources
665 }
666
667 pub fn is_match<P: AsRef<Path>>(&self, other: P) -> bool {
668 let other_path = other.as_ref();
669 self.sources.iter().any(|source| {
670 let as_bytes = other_path.as_os_str().as_encoded_bytes();
671 as_bytes.starts_with(source.as_bytes()) || as_bytes.ends_with(source.as_bytes())
672 }) || self.glob.is_match(other_path)
673 || self.check_with_end_separator(other_path)
674 }
675
676 fn check_with_end_separator(&self, path: &Path) -> bool {
677 let path_str = path.to_string_lossy();
678 let separator = self.path_style.separator();
679 if path_str.ends_with(separator) {
680 false
681 } else {
682 self.glob.is_match(path_str.to_string() + separator)
683 }
684 }
685}
686
687impl Default for PathMatcher {
688 fn default() -> Self {
689 Self {
690 path_style: PathStyle::local(),
691 glob: GlobSet::empty(),
692 sources: vec![],
693 }
694 }
695}
696
697/// Custom character comparison that prioritizes lowercase for same letters
698fn compare_chars(a: char, b: char) -> Ordering {
699 // First compare case-insensitive
700 match a.to_ascii_lowercase().cmp(&b.to_ascii_lowercase()) {
701 Ordering::Equal => {
702 // If same letter, prioritize lowercase (lowercase < uppercase)
703 match (a.is_ascii_lowercase(), b.is_ascii_lowercase()) {
704 (true, false) => Ordering::Less, // lowercase comes first
705 (false, true) => Ordering::Greater, // uppercase comes after
706 _ => Ordering::Equal, // both same case or both non-ascii
707 }
708 }
709 other => other,
710 }
711}
712
713/// Compares two sequences of consecutive digits for natural sorting.
714///
715/// This function is a core component of natural sorting that handles numeric comparison
716/// in a way that feels natural to humans. It extracts and compares consecutive digit
717/// sequences from two iterators, handling various cases like leading zeros and very large numbers.
718///
719/// # Behavior
720///
721/// The function implements the following comparison rules:
722/// 1. Different numeric values: Compares by actual numeric value (e.g., "2" < "10")
723/// 2. Leading zeros: When values are equal, longer sequence wins (e.g., "002" > "2")
724/// 3. Large numbers: Falls back to string comparison for numbers that would overflow u128
725///
726/// # Examples
727///
728/// ```text
729/// "1" vs "2" -> Less (different values)
730/// "2" vs "10" -> Less (numeric comparison)
731/// "002" vs "2" -> Greater (leading zeros)
732/// "10" vs "010" -> Less (leading zeros)
733/// "999..." vs "1000..." -> Less (large number comparison)
734/// ```
735///
736/// # Implementation Details
737///
738/// 1. Extracts consecutive digits into strings
739/// 2. Compares sequence lengths for leading zero handling
740/// 3. For equal lengths, compares digit by digit
741/// 4. For different lengths:
742/// - Attempts numeric comparison first (for numbers up to 2^128 - 1)
743/// - Falls back to string comparison if numbers would overflow
744///
745/// The function advances both iterators past their respective numeric sequences,
746/// regardless of the comparison result.
747fn compare_numeric_segments<I>(
748 a_iter: &mut std::iter::Peekable<I>,
749 b_iter: &mut std::iter::Peekable<I>,
750) -> Ordering
751where
752 I: Iterator<Item = char>,
753{
754 // Collect all consecutive digits into strings
755 let mut a_num_str = String::new();
756 let mut b_num_str = String::new();
757
758 while let Some(&c) = a_iter.peek() {
759 if !c.is_ascii_digit() {
760 break;
761 }
762
763 a_num_str.push(c);
764 a_iter.next();
765 }
766
767 while let Some(&c) = b_iter.peek() {
768 if !c.is_ascii_digit() {
769 break;
770 }
771
772 b_num_str.push(c);
773 b_iter.next();
774 }
775
776 // First compare lengths (handle leading zeros)
777 match a_num_str.len().cmp(&b_num_str.len()) {
778 Ordering::Equal => {
779 // Same length, compare digit by digit
780 match a_num_str.cmp(&b_num_str) {
781 Ordering::Equal => Ordering::Equal,
782 ordering => ordering,
783 }
784 }
785
786 // Different lengths but same value means leading zeros
787 ordering => {
788 // Try parsing as numbers first
789 if let (Ok(a_val), Ok(b_val)) = (a_num_str.parse::<u128>(), b_num_str.parse::<u128>()) {
790 match a_val.cmp(&b_val) {
791 Ordering::Equal => ordering, // Same value, longer one is greater (leading zeros)
792 ord => ord,
793 }
794 } else {
795 // If parsing fails (overflow), compare as strings
796 a_num_str.cmp(&b_num_str)
797 }
798 }
799 }
800}
801
802/// Performs natural sorting comparison between two strings.
803///
804/// Natural sorting is an ordering that handles numeric sequences in a way that matches human expectations.
805/// For example, "file2" comes before "file10" (unlike standard lexicographic sorting).
806///
807/// # Characteristics
808///
809/// * Case-sensitive with lowercase priority: When comparing same letters, lowercase comes before uppercase
810/// * Numbers are compared by numeric value, not character by character
811/// * Leading zeros affect ordering when numeric values are equal
812/// * Can handle numbers larger than u128::MAX (falls back to string comparison)
813///
814/// # Algorithm
815///
816/// The function works by:
817/// 1. Processing strings character by character
818/// 2. When encountering digits, treating consecutive digits as a single number
819/// 3. Comparing numbers by their numeric value rather than lexicographically
820/// 4. For non-numeric characters, using case-sensitive comparison with lowercase priority
821fn natural_sort(a: &str, b: &str) -> Ordering {
822 let mut a_iter = a.chars().peekable();
823 let mut b_iter = b.chars().peekable();
824
825 loop {
826 match (a_iter.peek(), b_iter.peek()) {
827 (None, None) => return Ordering::Equal,
828 (None, _) => return Ordering::Less,
829 (_, None) => return Ordering::Greater,
830 (Some(&a_char), Some(&b_char)) => {
831 if a_char.is_ascii_digit() && b_char.is_ascii_digit() {
832 match compare_numeric_segments(&mut a_iter, &mut b_iter) {
833 Ordering::Equal => continue,
834 ordering => return ordering,
835 }
836 } else {
837 match compare_chars(a_char, b_char) {
838 Ordering::Equal => {
839 a_iter.next();
840 b_iter.next();
841 }
842 ordering => return ordering,
843 }
844 }
845 }
846 }
847 }
848}
849
850pub fn compare_paths(
851 (path_a, a_is_file): (&Path, bool),
852 (path_b, b_is_file): (&Path, bool),
853) -> Ordering {
854 let mut components_a = path_a.components().peekable();
855 let mut components_b = path_b.components().peekable();
856
857 loop {
858 match (components_a.next(), components_b.next()) {
859 (Some(component_a), Some(component_b)) => {
860 let a_is_file = components_a.peek().is_none() && a_is_file;
861 let b_is_file = components_b.peek().is_none() && b_is_file;
862
863 let ordering = a_is_file.cmp(&b_is_file).then_with(|| {
864 let path_a = Path::new(component_a.as_os_str());
865 let path_string_a = if a_is_file {
866 path_a.file_stem()
867 } else {
868 path_a.file_name()
869 }
870 .map(|s| s.to_string_lossy());
871
872 let path_b = Path::new(component_b.as_os_str());
873 let path_string_b = if b_is_file {
874 path_b.file_stem()
875 } else {
876 path_b.file_name()
877 }
878 .map(|s| s.to_string_lossy());
879
880 let compare_components = match (path_string_a, path_string_b) {
881 (Some(a), Some(b)) => natural_sort(&a, &b),
882 (Some(_), None) => Ordering::Greater,
883 (None, Some(_)) => Ordering::Less,
884 (None, None) => Ordering::Equal,
885 };
886
887 compare_components.then_with(|| {
888 if a_is_file && b_is_file {
889 let ext_a = path_a.extension().unwrap_or_default();
890 let ext_b = path_b.extension().unwrap_or_default();
891 ext_a.cmp(ext_b)
892 } else {
893 Ordering::Equal
894 }
895 })
896 });
897
898 if !ordering.is_eq() {
899 return ordering;
900 }
901 }
902 (Some(_), None) => break Ordering::Greater,
903 (None, Some(_)) => break Ordering::Less,
904 (None, None) => break Ordering::Equal,
905 }
906 }
907}
908
909#[cfg(test)]
910mod tests {
911 use super::*;
912 use util_macros::perf;
913
914 #[perf]
915 fn compare_paths_with_dots() {
916 let mut paths = vec![
917 (Path::new("test_dirs"), false),
918 (Path::new("test_dirs/1.46"), false),
919 (Path::new("test_dirs/1.46/bar_1"), true),
920 (Path::new("test_dirs/1.46/bar_2"), true),
921 (Path::new("test_dirs/1.45"), false),
922 (Path::new("test_dirs/1.45/foo_2"), true),
923 (Path::new("test_dirs/1.45/foo_1"), true),
924 ];
925 paths.sort_by(|&a, &b| compare_paths(a, b));
926 assert_eq!(
927 paths,
928 vec![
929 (Path::new("test_dirs"), false),
930 (Path::new("test_dirs/1.45"), false),
931 (Path::new("test_dirs/1.45/foo_1"), true),
932 (Path::new("test_dirs/1.45/foo_2"), true),
933 (Path::new("test_dirs/1.46"), false),
934 (Path::new("test_dirs/1.46/bar_1"), true),
935 (Path::new("test_dirs/1.46/bar_2"), true),
936 ]
937 );
938 let mut paths = vec![
939 (Path::new("root1/one.txt"), true),
940 (Path::new("root1/one.two.txt"), true),
941 ];
942 paths.sort_by(|&a, &b| compare_paths(a, b));
943 assert_eq!(
944 paths,
945 vec![
946 (Path::new("root1/one.txt"), true),
947 (Path::new("root1/one.two.txt"), true),
948 ]
949 );
950 }
951
952 #[perf]
953 fn compare_paths_with_same_name_different_extensions() {
954 let mut paths = vec![
955 (Path::new("test_dirs/file.rs"), true),
956 (Path::new("test_dirs/file.txt"), true),
957 (Path::new("test_dirs/file.md"), true),
958 (Path::new("test_dirs/file"), true),
959 (Path::new("test_dirs/file.a"), true),
960 ];
961 paths.sort_by(|&a, &b| compare_paths(a, b));
962 assert_eq!(
963 paths,
964 vec![
965 (Path::new("test_dirs/file"), true),
966 (Path::new("test_dirs/file.a"), true),
967 (Path::new("test_dirs/file.md"), true),
968 (Path::new("test_dirs/file.rs"), true),
969 (Path::new("test_dirs/file.txt"), true),
970 ]
971 );
972 }
973
974 #[perf]
975 fn compare_paths_case_semi_sensitive() {
976 let mut paths = vec![
977 (Path::new("test_DIRS"), false),
978 (Path::new("test_DIRS/foo_1"), true),
979 (Path::new("test_DIRS/foo_2"), true),
980 (Path::new("test_DIRS/bar"), true),
981 (Path::new("test_DIRS/BAR"), true),
982 (Path::new("test_dirs"), false),
983 (Path::new("test_dirs/foo_1"), true),
984 (Path::new("test_dirs/foo_2"), true),
985 (Path::new("test_dirs/bar"), true),
986 (Path::new("test_dirs/BAR"), true),
987 ];
988 paths.sort_by(|&a, &b| compare_paths(a, b));
989 assert_eq!(
990 paths,
991 vec![
992 (Path::new("test_dirs"), false),
993 (Path::new("test_dirs/bar"), true),
994 (Path::new("test_dirs/BAR"), true),
995 (Path::new("test_dirs/foo_1"), true),
996 (Path::new("test_dirs/foo_2"), true),
997 (Path::new("test_DIRS"), false),
998 (Path::new("test_DIRS/bar"), true),
999 (Path::new("test_DIRS/BAR"), true),
1000 (Path::new("test_DIRS/foo_1"), true),
1001 (Path::new("test_DIRS/foo_2"), true),
1002 ]
1003 );
1004 }
1005
1006 #[perf]
1007 fn path_with_position_parse_posix_path() {
1008 // Test POSIX filename edge cases
1009 // Read more at https://en.wikipedia.org/wiki/Filename
1010 assert_eq!(
1011 PathWithPosition::parse_str("test_file"),
1012 PathWithPosition {
1013 path: PathBuf::from("test_file"),
1014 row: None,
1015 column: None
1016 }
1017 );
1018
1019 assert_eq!(
1020 PathWithPosition::parse_str("a:bc:.zip:1"),
1021 PathWithPosition {
1022 path: PathBuf::from("a:bc:.zip"),
1023 row: Some(1),
1024 column: None
1025 }
1026 );
1027
1028 assert_eq!(
1029 PathWithPosition::parse_str("one.second.zip:1"),
1030 PathWithPosition {
1031 path: PathBuf::from("one.second.zip"),
1032 row: Some(1),
1033 column: None
1034 }
1035 );
1036
1037 // Trim off trailing `:`s for otherwise valid input.
1038 assert_eq!(
1039 PathWithPosition::parse_str("test_file:10:1:"),
1040 PathWithPosition {
1041 path: PathBuf::from("test_file"),
1042 row: Some(10),
1043 column: Some(1)
1044 }
1045 );
1046
1047 assert_eq!(
1048 PathWithPosition::parse_str("test_file.rs:"),
1049 PathWithPosition {
1050 path: PathBuf::from("test_file.rs"),
1051 row: None,
1052 column: None
1053 }
1054 );
1055
1056 assert_eq!(
1057 PathWithPosition::parse_str("test_file.rs:1:"),
1058 PathWithPosition {
1059 path: PathBuf::from("test_file.rs"),
1060 row: Some(1),
1061 column: None
1062 }
1063 );
1064
1065 assert_eq!(
1066 PathWithPosition::parse_str("ab\ncd"),
1067 PathWithPosition {
1068 path: PathBuf::from("ab\ncd"),
1069 row: None,
1070 column: None
1071 }
1072 );
1073
1074 assert_eq!(
1075 PathWithPosition::parse_str("👋\nab"),
1076 PathWithPosition {
1077 path: PathBuf::from("👋\nab"),
1078 row: None,
1079 column: None
1080 }
1081 );
1082
1083 assert_eq!(
1084 PathWithPosition::parse_str("Types.hs:(617,9)-(670,28):"),
1085 PathWithPosition {
1086 path: PathBuf::from("Types.hs"),
1087 row: Some(617),
1088 column: Some(9),
1089 }
1090 );
1091 }
1092
1093 #[perf]
1094 #[cfg(not(target_os = "windows"))]
1095 fn path_with_position_parse_posix_path_with_suffix() {
1096 assert_eq!(
1097 PathWithPosition::parse_str("foo/bar:34:in"),
1098 PathWithPosition {
1099 path: PathBuf::from("foo/bar"),
1100 row: Some(34),
1101 column: None,
1102 }
1103 );
1104 assert_eq!(
1105 PathWithPosition::parse_str("foo/bar.rs:1902:::15:"),
1106 PathWithPosition {
1107 path: PathBuf::from("foo/bar.rs:1902"),
1108 row: Some(15),
1109 column: None
1110 }
1111 );
1112
1113 assert_eq!(
1114 PathWithPosition::parse_str("app-editors:zed-0.143.6:20240710-201212.log:34:"),
1115 PathWithPosition {
1116 path: PathBuf::from("app-editors:zed-0.143.6:20240710-201212.log"),
1117 row: Some(34),
1118 column: None,
1119 }
1120 );
1121
1122 assert_eq!(
1123 PathWithPosition::parse_str("crates/file_finder/src/file_finder.rs:1902:13:"),
1124 PathWithPosition {
1125 path: PathBuf::from("crates/file_finder/src/file_finder.rs"),
1126 row: Some(1902),
1127 column: Some(13),
1128 }
1129 );
1130
1131 assert_eq!(
1132 PathWithPosition::parse_str("crate/utils/src/test:today.log:34"),
1133 PathWithPosition {
1134 path: PathBuf::from("crate/utils/src/test:today.log"),
1135 row: Some(34),
1136 column: None,
1137 }
1138 );
1139 assert_eq!(
1140 PathWithPosition::parse_str("/testing/out/src/file_finder.odin(7:15)"),
1141 PathWithPosition {
1142 path: PathBuf::from("/testing/out/src/file_finder.odin"),
1143 row: Some(7),
1144 column: Some(15),
1145 }
1146 );
1147 }
1148
1149 #[perf]
1150 #[cfg(target_os = "windows")]
1151 fn path_with_position_parse_windows_path() {
1152 assert_eq!(
1153 PathWithPosition::parse_str("crates\\utils\\paths.rs"),
1154 PathWithPosition {
1155 path: PathBuf::from("crates\\utils\\paths.rs"),
1156 row: None,
1157 column: None
1158 }
1159 );
1160
1161 assert_eq!(
1162 PathWithPosition::parse_str("C:\\Users\\someone\\test_file.rs"),
1163 PathWithPosition {
1164 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
1165 row: None,
1166 column: None
1167 }
1168 );
1169 }
1170
1171 #[perf]
1172 #[cfg(target_os = "windows")]
1173 fn path_with_position_parse_windows_path_with_suffix() {
1174 assert_eq!(
1175 PathWithPosition::parse_str("crates\\utils\\paths.rs:101"),
1176 PathWithPosition {
1177 path: PathBuf::from("crates\\utils\\paths.rs"),
1178 row: Some(101),
1179 column: None
1180 }
1181 );
1182
1183 assert_eq!(
1184 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs:1:20"),
1185 PathWithPosition {
1186 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
1187 row: Some(1),
1188 column: Some(20)
1189 }
1190 );
1191
1192 assert_eq!(
1193 PathWithPosition::parse_str("C:\\Users\\someone\\test_file.rs(1902,13)"),
1194 PathWithPosition {
1195 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
1196 row: Some(1902),
1197 column: Some(13)
1198 }
1199 );
1200
1201 // Trim off trailing `:`s for otherwise valid input.
1202 assert_eq!(
1203 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs:1902:13:"),
1204 PathWithPosition {
1205 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
1206 row: Some(1902),
1207 column: Some(13)
1208 }
1209 );
1210
1211 assert_eq!(
1212 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs:1902:13:15:"),
1213 PathWithPosition {
1214 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs:1902"),
1215 row: Some(13),
1216 column: Some(15)
1217 }
1218 );
1219
1220 assert_eq!(
1221 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs:1902:::15:"),
1222 PathWithPosition {
1223 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs:1902"),
1224 row: Some(15),
1225 column: None
1226 }
1227 );
1228
1229 assert_eq!(
1230 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs(1902,13):"),
1231 PathWithPosition {
1232 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
1233 row: Some(1902),
1234 column: Some(13),
1235 }
1236 );
1237
1238 assert_eq!(
1239 PathWithPosition::parse_str("\\\\?\\C:\\Users\\someone\\test_file.rs(1902):"),
1240 PathWithPosition {
1241 path: PathBuf::from("\\\\?\\C:\\Users\\someone\\test_file.rs"),
1242 row: Some(1902),
1243 column: None,
1244 }
1245 );
1246
1247 assert_eq!(
1248 PathWithPosition::parse_str("C:\\Users\\someone\\test_file.rs:1902:13:"),
1249 PathWithPosition {
1250 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
1251 row: Some(1902),
1252 column: Some(13),
1253 }
1254 );
1255
1256 assert_eq!(
1257 PathWithPosition::parse_str("C:\\Users\\someone\\test_file.rs(1902,13):"),
1258 PathWithPosition {
1259 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
1260 row: Some(1902),
1261 column: Some(13),
1262 }
1263 );
1264
1265 assert_eq!(
1266 PathWithPosition::parse_str("C:\\Users\\someone\\test_file.rs(1902):"),
1267 PathWithPosition {
1268 path: PathBuf::from("C:\\Users\\someone\\test_file.rs"),
1269 row: Some(1902),
1270 column: None,
1271 }
1272 );
1273
1274 assert_eq!(
1275 PathWithPosition::parse_str("crates/utils/paths.rs:101"),
1276 PathWithPosition {
1277 path: PathBuf::from("crates\\utils\\paths.rs"),
1278 row: Some(101),
1279 column: None,
1280 }
1281 );
1282 }
1283
1284 #[perf]
1285 fn test_path_compact() {
1286 let path: PathBuf = [
1287 home_dir().to_string_lossy().into_owned(),
1288 "some_file.txt".to_string(),
1289 ]
1290 .iter()
1291 .collect();
1292 if cfg!(any(target_os = "linux", target_os = "freebsd")) || cfg!(target_os = "macos") {
1293 assert_eq!(path.compact().to_str(), Some("~/some_file.txt"));
1294 } else {
1295 assert_eq!(path.compact().to_str(), path.to_str());
1296 }
1297 }
1298
1299 #[perf]
1300 fn test_extension_or_hidden_file_name() {
1301 // No dots in name
1302 let path = Path::new("/a/b/c/file_name.rs");
1303 assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
1304
1305 // Single dot in name
1306 let path = Path::new("/a/b/c/file.name.rs");
1307 assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
1308
1309 // Multiple dots in name
1310 let path = Path::new("/a/b/c/long.file.name.rs");
1311 assert_eq!(path.extension_or_hidden_file_name(), Some("rs"));
1312
1313 // Hidden file, no extension
1314 let path = Path::new("/a/b/c/.gitignore");
1315 assert_eq!(path.extension_or_hidden_file_name(), Some("gitignore"));
1316
1317 // Hidden file, with extension
1318 let path = Path::new("/a/b/c/.eslintrc.js");
1319 assert_eq!(path.extension_or_hidden_file_name(), Some("eslintrc.js"));
1320 }
1321
1322 #[perf]
1323 fn edge_of_glob() {
1324 let path = Path::new("/work/node_modules");
1325 let path_matcher =
1326 PathMatcher::new(&["**/node_modules/**".to_owned()], PathStyle::Posix).unwrap();
1327 assert!(
1328 path_matcher.is_match(path),
1329 "Path matcher should match {path:?}"
1330 );
1331 }
1332
1333 #[perf]
1334 fn project_search() {
1335 let path = Path::new("/Users/someonetoignore/work/zed/zed.dev/node_modules");
1336 let path_matcher =
1337 PathMatcher::new(&["**/node_modules/**".to_owned()], PathStyle::Posix).unwrap();
1338 assert!(
1339 path_matcher.is_match(path),
1340 "Path matcher should match {path:?}"
1341 );
1342 }
1343
1344 #[perf]
1345 #[cfg(target_os = "windows")]
1346 fn test_sanitized_path() {
1347 let path = Path::new("C:\\Users\\someone\\test_file.rs");
1348 let sanitized_path = SanitizedPath::new(path);
1349 assert_eq!(
1350 sanitized_path.to_string(),
1351 "C:\\Users\\someone\\test_file.rs"
1352 );
1353
1354 let path = Path::new("\\\\?\\C:\\Users\\someone\\test_file.rs");
1355 let sanitized_path = SanitizedPath::new(path);
1356 assert_eq!(
1357 sanitized_path.to_string(),
1358 "C:\\Users\\someone\\test_file.rs"
1359 );
1360 }
1361
1362 #[perf]
1363 fn test_compare_numeric_segments() {
1364 // Helper function to create peekable iterators and test
1365 fn compare(a: &str, b: &str) -> Ordering {
1366 let mut a_iter = a.chars().peekable();
1367 let mut b_iter = b.chars().peekable();
1368
1369 let result = compare_numeric_segments(&mut a_iter, &mut b_iter);
1370
1371 // Verify iterators advanced correctly
1372 assert!(
1373 !a_iter.next().is_some_and(|c| c.is_ascii_digit()),
1374 "Iterator a should have consumed all digits"
1375 );
1376 assert!(
1377 !b_iter.next().is_some_and(|c| c.is_ascii_digit()),
1378 "Iterator b should have consumed all digits"
1379 );
1380
1381 result
1382 }
1383
1384 // Basic numeric comparisons
1385 assert_eq!(compare("0", "0"), Ordering::Equal);
1386 assert_eq!(compare("1", "2"), Ordering::Less);
1387 assert_eq!(compare("9", "10"), Ordering::Less);
1388 assert_eq!(compare("10", "9"), Ordering::Greater);
1389 assert_eq!(compare("99", "100"), Ordering::Less);
1390
1391 // Leading zeros
1392 assert_eq!(compare("0", "00"), Ordering::Less);
1393 assert_eq!(compare("00", "0"), Ordering::Greater);
1394 assert_eq!(compare("01", "1"), Ordering::Greater);
1395 assert_eq!(compare("001", "1"), Ordering::Greater);
1396 assert_eq!(compare("001", "01"), Ordering::Greater);
1397
1398 // Same value different representation
1399 assert_eq!(compare("000100", "100"), Ordering::Greater);
1400 assert_eq!(compare("100", "0100"), Ordering::Less);
1401 assert_eq!(compare("0100", "00100"), Ordering::Less);
1402
1403 // Large numbers
1404 assert_eq!(compare("9999999999", "10000000000"), Ordering::Less);
1405 assert_eq!(
1406 compare(
1407 "340282366920938463463374607431768211455", // u128::MAX
1408 "340282366920938463463374607431768211456"
1409 ),
1410 Ordering::Less
1411 );
1412 assert_eq!(
1413 compare(
1414 "340282366920938463463374607431768211456", // > u128::MAX
1415 "340282366920938463463374607431768211455"
1416 ),
1417 Ordering::Greater
1418 );
1419
1420 // Iterator advancement verification
1421 let mut a_iter = "123abc".chars().peekable();
1422 let mut b_iter = "456def".chars().peekable();
1423
1424 compare_numeric_segments(&mut a_iter, &mut b_iter);
1425
1426 assert_eq!(a_iter.collect::<String>(), "abc");
1427 assert_eq!(b_iter.collect::<String>(), "def");
1428 }
1429
1430 #[perf]
1431 fn test_natural_sort() {
1432 // Basic alphanumeric
1433 assert_eq!(natural_sort("a", "b"), Ordering::Less);
1434 assert_eq!(natural_sort("b", "a"), Ordering::Greater);
1435 assert_eq!(natural_sort("a", "a"), Ordering::Equal);
1436
1437 // Case sensitivity
1438 assert_eq!(natural_sort("a", "A"), Ordering::Less);
1439 assert_eq!(natural_sort("A", "a"), Ordering::Greater);
1440 assert_eq!(natural_sort("aA", "aa"), Ordering::Greater);
1441 assert_eq!(natural_sort("aa", "aA"), Ordering::Less);
1442
1443 // Numbers
1444 assert_eq!(natural_sort("1", "2"), Ordering::Less);
1445 assert_eq!(natural_sort("2", "10"), Ordering::Less);
1446 assert_eq!(natural_sort("02", "10"), Ordering::Less);
1447 assert_eq!(natural_sort("02", "2"), Ordering::Greater);
1448
1449 // Mixed alphanumeric
1450 assert_eq!(natural_sort("a1", "a2"), Ordering::Less);
1451 assert_eq!(natural_sort("a2", "a10"), Ordering::Less);
1452 assert_eq!(natural_sort("a02", "a2"), Ordering::Greater);
1453 assert_eq!(natural_sort("a1b", "a1c"), Ordering::Less);
1454
1455 // Multiple numeric segments
1456 assert_eq!(natural_sort("1a2", "1a10"), Ordering::Less);
1457 assert_eq!(natural_sort("1a10", "1a2"), Ordering::Greater);
1458 assert_eq!(natural_sort("2a1", "10a1"), Ordering::Less);
1459
1460 // Special characters
1461 assert_eq!(natural_sort("a-1", "a-2"), Ordering::Less);
1462 assert_eq!(natural_sort("a_1", "a_2"), Ordering::Less);
1463 assert_eq!(natural_sort("a.1", "a.2"), Ordering::Less);
1464
1465 // Unicode
1466 assert_eq!(natural_sort("文1", "文2"), Ordering::Less);
1467 assert_eq!(natural_sort("文2", "文10"), Ordering::Less);
1468 assert_eq!(natural_sort("🔤1", "🔤2"), Ordering::Less);
1469
1470 // Empty and special cases
1471 assert_eq!(natural_sort("", ""), Ordering::Equal);
1472 assert_eq!(natural_sort("", "a"), Ordering::Less);
1473 assert_eq!(natural_sort("a", ""), Ordering::Greater);
1474 assert_eq!(natural_sort(" ", " "), Ordering::Less);
1475
1476 // Mixed everything
1477 assert_eq!(natural_sort("File-1.txt", "File-2.txt"), Ordering::Less);
1478 assert_eq!(natural_sort("File-02.txt", "File-2.txt"), Ordering::Greater);
1479 assert_eq!(natural_sort("File-2.txt", "File-10.txt"), Ordering::Less);
1480 assert_eq!(natural_sort("File_A1", "File_A2"), Ordering::Less);
1481 assert_eq!(natural_sort("File_a1", "File_A1"), Ordering::Less);
1482 }
1483
1484 #[perf]
1485 fn test_compare_paths() {
1486 // Helper function for cleaner tests
1487 fn compare(a: &str, is_a_file: bool, b: &str, is_b_file: bool) -> Ordering {
1488 compare_paths((Path::new(a), is_a_file), (Path::new(b), is_b_file))
1489 }
1490
1491 // Basic path comparison
1492 assert_eq!(compare("a", true, "b", true), Ordering::Less);
1493 assert_eq!(compare("b", true, "a", true), Ordering::Greater);
1494 assert_eq!(compare("a", true, "a", true), Ordering::Equal);
1495
1496 // Files vs Directories
1497 assert_eq!(compare("a", true, "a", false), Ordering::Greater);
1498 assert_eq!(compare("a", false, "a", true), Ordering::Less);
1499 assert_eq!(compare("b", false, "a", true), Ordering::Less);
1500
1501 // Extensions
1502 assert_eq!(compare("a.txt", true, "a.md", true), Ordering::Greater);
1503 assert_eq!(compare("a.md", true, "a.txt", true), Ordering::Less);
1504 assert_eq!(compare("a", true, "a.txt", true), Ordering::Less);
1505
1506 // Nested paths
1507 assert_eq!(compare("dir/a", true, "dir/b", true), Ordering::Less);
1508 assert_eq!(compare("dir1/a", true, "dir2/a", true), Ordering::Less);
1509 assert_eq!(compare("dir/sub/a", true, "dir/a", true), Ordering::Less);
1510
1511 // Case sensitivity in paths
1512 assert_eq!(
1513 compare("Dir/file", true, "dir/file", true),
1514 Ordering::Greater
1515 );
1516 assert_eq!(
1517 compare("dir/File", true, "dir/file", true),
1518 Ordering::Greater
1519 );
1520 assert_eq!(compare("dir/file", true, "Dir/File", true), Ordering::Less);
1521
1522 // Hidden files and special names
1523 assert_eq!(compare(".hidden", true, "visible", true), Ordering::Less);
1524 assert_eq!(compare("_special", true, "normal", true), Ordering::Less);
1525 assert_eq!(compare(".config", false, ".data", false), Ordering::Less);
1526
1527 // Mixed numeric paths
1528 assert_eq!(
1529 compare("dir1/file", true, "dir2/file", true),
1530 Ordering::Less
1531 );
1532 assert_eq!(
1533 compare("dir2/file", true, "dir10/file", true),
1534 Ordering::Less
1535 );
1536 assert_eq!(
1537 compare("dir02/file", true, "dir2/file", true),
1538 Ordering::Greater
1539 );
1540
1541 // Root paths
1542 assert_eq!(compare("/a", true, "/b", true), Ordering::Less);
1543 assert_eq!(compare("/", false, "/a", true), Ordering::Less);
1544
1545 // Complex real-world examples
1546 assert_eq!(
1547 compare("project/src/main.rs", true, "project/src/lib.rs", true),
1548 Ordering::Greater
1549 );
1550 assert_eq!(
1551 compare(
1552 "project/tests/test_1.rs",
1553 true,
1554 "project/tests/test_2.rs",
1555 true
1556 ),
1557 Ordering::Less
1558 );
1559 assert_eq!(
1560 compare(
1561 "project/v1.0.0/README.md",
1562 true,
1563 "project/v1.10.0/README.md",
1564 true
1565 ),
1566 Ordering::Less
1567 );
1568 }
1569
1570 #[perf]
1571 fn test_natural_sort_case_sensitivity() {
1572 std::thread::sleep(std::time::Duration::from_millis(100));
1573 // Same letter different case - lowercase should come first
1574 assert_eq!(natural_sort("a", "A"), Ordering::Less);
1575 assert_eq!(natural_sort("A", "a"), Ordering::Greater);
1576 assert_eq!(natural_sort("a", "a"), Ordering::Equal);
1577 assert_eq!(natural_sort("A", "A"), Ordering::Equal);
1578
1579 // Mixed case strings
1580 assert_eq!(natural_sort("aaa", "AAA"), Ordering::Less);
1581 assert_eq!(natural_sort("AAA", "aaa"), Ordering::Greater);
1582 assert_eq!(natural_sort("aAa", "AaA"), Ordering::Less);
1583
1584 // Different letters
1585 assert_eq!(natural_sort("a", "b"), Ordering::Less);
1586 assert_eq!(natural_sort("A", "b"), Ordering::Less);
1587 assert_eq!(natural_sort("a", "B"), Ordering::Less);
1588 }
1589
1590 #[perf]
1591 fn test_natural_sort_with_numbers() {
1592 // Basic number ordering
1593 assert_eq!(natural_sort("file1", "file2"), Ordering::Less);
1594 assert_eq!(natural_sort("file2", "file10"), Ordering::Less);
1595 assert_eq!(natural_sort("file10", "file2"), Ordering::Greater);
1596
1597 // Numbers in different positions
1598 assert_eq!(natural_sort("1file", "2file"), Ordering::Less);
1599 assert_eq!(natural_sort("file1text", "file2text"), Ordering::Less);
1600 assert_eq!(natural_sort("text1file", "text2file"), Ordering::Less);
1601
1602 // Multiple numbers in string
1603 assert_eq!(natural_sort("file1-2", "file1-10"), Ordering::Less);
1604 assert_eq!(natural_sort("2-1file", "10-1file"), Ordering::Less);
1605
1606 // Leading zeros
1607 assert_eq!(natural_sort("file002", "file2"), Ordering::Greater);
1608 assert_eq!(natural_sort("file002", "file10"), Ordering::Less);
1609
1610 // Very large numbers
1611 assert_eq!(
1612 natural_sort("file999999999999999999999", "file999999999999999999998"),
1613 Ordering::Greater
1614 );
1615
1616 // u128 edge cases
1617
1618 // Numbers near u128::MAX (340,282,366,920,938,463,463,374,607,431,768,211,455)
1619 assert_eq!(
1620 natural_sort(
1621 "file340282366920938463463374607431768211454",
1622 "file340282366920938463463374607431768211455"
1623 ),
1624 Ordering::Less
1625 );
1626
1627 // Equal length numbers that overflow u128
1628 assert_eq!(
1629 natural_sort(
1630 "file340282366920938463463374607431768211456",
1631 "file340282366920938463463374607431768211455"
1632 ),
1633 Ordering::Greater
1634 );
1635
1636 // Different length numbers that overflow u128
1637 assert_eq!(
1638 natural_sort(
1639 "file3402823669209384634633746074317682114560",
1640 "file340282366920938463463374607431768211455"
1641 ),
1642 Ordering::Greater
1643 );
1644
1645 // Leading zeros with numbers near u128::MAX
1646 assert_eq!(
1647 natural_sort(
1648 "file0340282366920938463463374607431768211455",
1649 "file340282366920938463463374607431768211455"
1650 ),
1651 Ordering::Greater
1652 );
1653
1654 // Very large numbers with different lengths (both overflow u128)
1655 assert_eq!(
1656 natural_sort(
1657 "file999999999999999999999999999999999999999999999999",
1658 "file9999999999999999999999999999999999999999999999999"
1659 ),
1660 Ordering::Less
1661 );
1662
1663 // Mixed case with numbers
1664 assert_eq!(natural_sort("File1", "file2"), Ordering::Greater);
1665 assert_eq!(natural_sort("file1", "File2"), Ordering::Less);
1666 }
1667
1668 #[perf]
1669 fn test_natural_sort_edge_cases() {
1670 // Empty strings
1671 assert_eq!(natural_sort("", ""), Ordering::Equal);
1672 assert_eq!(natural_sort("", "a"), Ordering::Less);
1673 assert_eq!(natural_sort("a", ""), Ordering::Greater);
1674
1675 // Special characters
1676 assert_eq!(natural_sort("file-1", "file_1"), Ordering::Less);
1677 assert_eq!(natural_sort("file.1", "file_1"), Ordering::Less);
1678 assert_eq!(natural_sort("file 1", "file_1"), Ordering::Less);
1679
1680 // Unicode characters
1681 // 9312 vs 9313
1682 assert_eq!(natural_sort("file①", "file②"), Ordering::Less);
1683 // 9321 vs 9313
1684 assert_eq!(natural_sort("file⑩", "file②"), Ordering::Greater);
1685 // 28450 vs 23383
1686 assert_eq!(natural_sort("file漢", "file字"), Ordering::Greater);
1687
1688 // Mixed alphanumeric with special chars
1689 assert_eq!(natural_sort("file-1a", "file-1b"), Ordering::Less);
1690 assert_eq!(natural_sort("file-1.2", "file-1.10"), Ordering::Less);
1691 assert_eq!(natural_sort("file-1.10", "file-1.2"), Ordering::Greater);
1692 }
1693}