util.rs

   1pub mod arc_cow;
   2pub mod archive;
   3pub mod command;
   4pub mod fs;
   5pub mod markdown;
   6pub mod paths;
   7pub mod serde;
   8pub mod size;
   9#[cfg(any(test, feature = "test-support"))]
  10pub mod test;
  11pub mod time;
  12
  13use anyhow::Result;
  14use futures::Future;
  15use itertools::Either;
  16use regex::Regex;
  17use std::num::NonZeroU32;
  18use std::sync::{LazyLock, OnceLock};
  19use std::{
  20    borrow::Cow,
  21    cmp::{self, Ordering},
  22    env,
  23    ops::{AddAssign, Range, RangeInclusive},
  24    panic::Location,
  25    pin::Pin,
  26    task::{Context, Poll},
  27    time::Instant,
  28};
  29use unicase::UniCase;
  30
  31#[cfg(unix)]
  32use anyhow::Context as _;
  33
  34pub use take_until::*;
  35#[cfg(any(test, feature = "test-support"))]
  36pub use util_macros::{line_endings, separator, uri};
  37
  38#[macro_export]
  39macro_rules! debug_panic {
  40    ( $($fmt_arg:tt)* ) => {
  41        if cfg!(debug_assertions) {
  42            panic!( $($fmt_arg)* );
  43        } else {
  44            let backtrace = std::backtrace::Backtrace::capture();
  45            log::error!("{}\n{:?}", format_args!($($fmt_arg)*), backtrace);
  46        }
  47    };
  48}
  49
  50/// A macro to add "C:" to the beginning of a path literal on Windows, and replace all
  51/// the separator from `/` to `\`.
  52/// But on non-Windows platforms, it will return the path literal as is.
  53///
  54/// # Examples
  55/// ```rust
  56/// use util::path;
  57///
  58/// let path = path!("/Users/user/file.txt");
  59/// #[cfg(target_os = "windows")]
  60/// assert_eq!(path, "C:\\Users\\user\\file.txt");
  61/// #[cfg(not(target_os = "windows"))]
  62/// assert_eq!(path, "/Users/user/file.txt");
  63/// ```
  64#[cfg(all(any(test, feature = "test-support"), target_os = "windows"))]
  65#[macro_export]
  66macro_rules! path {
  67    ($path:literal) => {
  68        concat!("C:", util::separator!($path))
  69    };
  70}
  71
  72/// A macro to add "C:" to the beginning of a path literal on Windows, and replace all
  73/// the separator from `/` to `\`.
  74/// But on non-Windows platforms, it will return the path literal as is.
  75///
  76/// # Examples
  77/// ```rust
  78/// use util::path;
  79///
  80/// let path = path!("/Users/user/file.txt");
  81/// #[cfg(target_os = "windows")]
  82/// assert_eq!(path, "C:\\Users\\user\\file.txt");
  83/// #[cfg(not(target_os = "windows"))]
  84/// assert_eq!(path, "/Users/user/file.txt");
  85/// ```
  86#[cfg(all(any(test, feature = "test-support"), not(target_os = "windows")))]
  87#[macro_export]
  88macro_rules! path {
  89    ($path:literal) => {
  90        $path
  91    };
  92}
  93
  94pub fn truncate(s: &str, max_chars: usize) -> &str {
  95    match s.char_indices().nth(max_chars) {
  96        None => s,
  97        Some((idx, _)) => &s[..idx],
  98    }
  99}
 100
 101/// Removes characters from the end of the string if its length is greater than `max_chars` and
 102/// appends "..." to the string. Returns string unchanged if its length is smaller than max_chars.
 103pub fn truncate_and_trailoff(s: &str, max_chars: usize) -> String {
 104    debug_assert!(max_chars >= 5);
 105
 106    // If the string's byte length is <= max_chars, walking the string can be skipped since the
 107    // number of chars is <= the number of bytes.
 108    if s.len() <= max_chars {
 109        return s.to_string();
 110    }
 111    let truncation_ix = s.char_indices().map(|(i, _)| i).nth(max_chars);
 112    match truncation_ix {
 113        Some(index) => s[..index].to_string() + "…",
 114        _ => s.to_string(),
 115    }
 116}
 117
 118/// Removes characters from the front of the string if its length is greater than `max_chars` and
 119/// prepends the string with "...". Returns string unchanged if its length is smaller than max_chars.
 120pub fn truncate_and_remove_front(s: &str, max_chars: usize) -> String {
 121    debug_assert!(max_chars >= 5);
 122
 123    // If the string's byte length is <= max_chars, walking the string can be skipped since the
 124    // number of chars is <= the number of bytes.
 125    if s.len() <= max_chars {
 126        return s.to_string();
 127    }
 128    let suffix_char_length = max_chars.saturating_sub(1);
 129    let truncation_ix = s
 130        .char_indices()
 131        .map(|(i, _)| i)
 132        .nth_back(suffix_char_length);
 133    match truncation_ix {
 134        Some(index) if index > 0 => "…".to_string() + &s[index..],
 135        _ => s.to_string(),
 136    }
 137}
 138
 139/// Takes only `max_lines` from the string and, if there were more than `max_lines-1`, appends a
 140/// a newline and "..." to the string, so that `max_lines` are returned.
 141/// Returns string unchanged if its length is smaller than max_lines.
 142pub fn truncate_lines_and_trailoff(s: &str, max_lines: usize) -> String {
 143    let mut lines = s.lines().take(max_lines).collect::<Vec<_>>();
 144    if lines.len() > max_lines - 1 {
 145        lines.pop();
 146        lines.join("\n") + "\n…"
 147    } else {
 148        lines.join("\n")
 149    }
 150}
 151
 152/// Truncates the string at a character boundary, such that the result is less than `max_bytes` in
 153/// length.
 154pub fn truncate_to_byte_limit(s: &str, max_bytes: usize) -> &str {
 155    if s.len() < max_bytes {
 156        return s;
 157    }
 158
 159    for i in (0..max_bytes).rev() {
 160        if s.is_char_boundary(i) {
 161            return &s[..i];
 162        }
 163    }
 164
 165    ""
 166}
 167
 168/// Takes a prefix of complete lines which fit within the byte limit. If the first line is longer
 169/// than the limit, truncates at a character boundary.
 170pub fn truncate_lines_to_byte_limit(s: &str, max_bytes: usize) -> &str {
 171    if s.len() < max_bytes {
 172        return s;
 173    }
 174
 175    for i in (0..max_bytes).rev() {
 176        if s.is_char_boundary(i) {
 177            if s.as_bytes()[i] == b'\n' {
 178                // Since the i-th character is \n, valid to slice at i + 1.
 179                return &s[..i + 1];
 180            }
 181        }
 182    }
 183
 184    truncate_to_byte_limit(s, max_bytes)
 185}
 186
 187fn char_len_with_expanded_tabs(offset: usize, text: &str, tab_size: NonZeroU32) -> usize {
 188    let tab_size = tab_size.get() as usize;
 189    let mut width = offset;
 190
 191    for ch in text.chars() {
 192        width += if ch == '\t' {
 193            tab_size - (width % tab_size)
 194        } else {
 195            1
 196        };
 197    }
 198
 199    width - offset
 200}
 201
 202/// Tokenizes a string into runs of text that should stick together, or that is whitespace.
 203struct WordBreakingTokenizer<'a> {
 204    input: &'a str,
 205}
 206
 207impl<'a> WordBreakingTokenizer<'a> {
 208    fn new(input: &'a str) -> Self {
 209        Self { input }
 210    }
 211}
 212
 213fn is_char_ideographic(ch: char) -> bool {
 214    use unicode_script::Script::*;
 215    use unicode_script::UnicodeScript;
 216    matches!(ch.script(), Han | Tangut | Yi)
 217}
 218
 219fn is_grapheme_ideographic(text: &str) -> bool {
 220    text.chars().any(is_char_ideographic)
 221}
 222
 223fn is_grapheme_whitespace(text: &str) -> bool {
 224    text.chars().any(|x| x.is_whitespace())
 225}
 226
 227fn should_stay_with_preceding_ideograph(text: &str) -> bool {
 228    text.chars().next().map_or(false, |ch| {
 229        matches!(ch, '。' | '、' | '，' | '？' | '！' | '：' | '；' | '…')
 230    })
 231}
 232
 233#[derive(PartialEq, Eq, Debug, Clone, Copy)]
 234enum WordBreakToken<'a> {
 235    Word { token: &'a str, grapheme_len: usize },
 236    InlineWhitespace { token: &'a str, grapheme_len: usize },
 237    Newline,
 238}
 239
 240impl<'a> Iterator for WordBreakingTokenizer<'a> {
 241    /// Yields a span, the count of graphemes in the token, and whether it was
 242    /// whitespace. Note that it also breaks at word boundaries.
 243    type Item = WordBreakToken<'a>;
 244
 245    fn next(&mut self) -> Option<Self::Item> {
 246        use unicode_segmentation::UnicodeSegmentation;
 247        if self.input.is_empty() {
 248            return None;
 249        }
 250
 251        let mut iter = self.input.graphemes(true).peekable();
 252        let mut offset = 0;
 253        let mut grapheme_len = 0;
 254        if let Some(first_grapheme) = iter.next() {
 255            let is_newline = first_grapheme == "\n";
 256            let is_whitespace = is_grapheme_whitespace(first_grapheme);
 257            offset += first_grapheme.len();
 258            grapheme_len += 1;
 259            if is_grapheme_ideographic(first_grapheme) && !is_whitespace {
 260                if let Some(grapheme) = iter.peek().copied() {
 261                    if should_stay_with_preceding_ideograph(grapheme) {
 262                        offset += grapheme.len();
 263                        grapheme_len += 1;
 264                    }
 265                }
 266            } else {
 267                let mut words = self.input[offset..].split_word_bound_indices().peekable();
 268                let mut next_word_bound = words.peek().copied();
 269                if next_word_bound.map_or(false, |(i, _)| i == 0) {
 270                    next_word_bound = words.next();
 271                }
 272                while let Some(grapheme) = iter.peek().copied() {
 273                    if next_word_bound.map_or(false, |(i, _)| i == offset) {
 274                        break;
 275                    };
 276                    if is_grapheme_whitespace(grapheme) != is_whitespace
 277                        || (grapheme == "\n") != is_newline
 278                    {
 279                        break;
 280                    };
 281                    offset += grapheme.len();
 282                    grapheme_len += 1;
 283                    iter.next();
 284                }
 285            }
 286            let token = &self.input[..offset];
 287            self.input = &self.input[offset..];
 288            if token == "\n" {
 289                Some(WordBreakToken::Newline)
 290            } else if is_whitespace {
 291                Some(WordBreakToken::InlineWhitespace {
 292                    token,
 293                    grapheme_len,
 294                })
 295            } else {
 296                Some(WordBreakToken::Word {
 297                    token,
 298                    grapheme_len,
 299                })
 300            }
 301        } else {
 302            None
 303        }
 304    }
 305}
 306
 307pub fn wrap_with_prefix(
 308    line_prefix: String,
 309    unwrapped_text: String,
 310    wrap_column: usize,
 311    tab_size: NonZeroU32,
 312    preserve_existing_whitespace: bool,
 313) -> String {
 314    let line_prefix_len = char_len_with_expanded_tabs(0, &line_prefix, tab_size);
 315    let mut wrapped_text = String::new();
 316    let mut current_line = line_prefix.clone();
 317
 318    let tokenizer = WordBreakingTokenizer::new(&unwrapped_text);
 319    let mut current_line_len = line_prefix_len;
 320    let mut in_whitespace = false;
 321    for token in tokenizer {
 322        let have_preceding_whitespace = in_whitespace;
 323        match token {
 324            WordBreakToken::Word {
 325                token,
 326                grapheme_len,
 327            } => {
 328                in_whitespace = false;
 329                if current_line_len + grapheme_len > wrap_column
 330                    && current_line_len != line_prefix_len
 331                {
 332                    wrapped_text.push_str(current_line.trim_end());
 333                    wrapped_text.push('\n');
 334                    current_line.truncate(line_prefix.len());
 335                    current_line_len = line_prefix_len;
 336                }
 337                current_line.push_str(token);
 338                current_line_len += grapheme_len;
 339            }
 340            WordBreakToken::InlineWhitespace {
 341                mut token,
 342                mut grapheme_len,
 343            } => {
 344                in_whitespace = true;
 345                if have_preceding_whitespace && !preserve_existing_whitespace {
 346                    continue;
 347                }
 348                if !preserve_existing_whitespace {
 349                    token = " ";
 350                    grapheme_len = 1;
 351                }
 352                if current_line_len + grapheme_len > wrap_column {
 353                    wrapped_text.push_str(current_line.trim_end());
 354                    wrapped_text.push('\n');
 355                    current_line.truncate(line_prefix.len());
 356                    current_line_len = line_prefix_len;
 357                } else if current_line_len != line_prefix_len || preserve_existing_whitespace {
 358                    current_line.push_str(token);
 359                    current_line_len += grapheme_len;
 360                }
 361            }
 362            WordBreakToken::Newline => {
 363                in_whitespace = true;
 364                if preserve_existing_whitespace {
 365                    wrapped_text.push_str(current_line.trim_end());
 366                    wrapped_text.push('\n');
 367                    current_line.truncate(line_prefix.len());
 368                    current_line_len = line_prefix_len;
 369                } else if have_preceding_whitespace {
 370                    continue;
 371                } else if current_line_len + 1 > wrap_column && current_line_len != line_prefix_len
 372                {
 373                    wrapped_text.push_str(current_line.trim_end());
 374                    wrapped_text.push('\n');
 375                    current_line.truncate(line_prefix.len());
 376                    current_line_len = line_prefix_len;
 377                } else if current_line_len != line_prefix_len {
 378                    current_line.push(' ');
 379                    current_line_len += 1;
 380                }
 381            }
 382        }
 383    }
 384
 385    if !current_line.is_empty() {
 386        wrapped_text.push_str(&current_line);
 387    }
 388    wrapped_text
 389}
 390
 391pub fn post_inc<T: From<u8> + AddAssign<T> + Copy>(value: &mut T) -> T {
 392    let prev = *value;
 393    *value += T::from(1);
 394    prev
 395}
 396
 397/// Extend a sorted vector with a sorted sequence of items, maintaining the vector's sort order and
 398/// enforcing a maximum length. This also de-duplicates items. Sort the items according to the given callback. Before calling this,
 399/// both `vec` and `new_items` should already be sorted according to the `cmp` comparator.
 400pub fn extend_sorted<T, I, F>(vec: &mut Vec<T>, new_items: I, limit: usize, mut cmp: F)
 401where
 402    I: IntoIterator<Item = T>,
 403    F: FnMut(&T, &T) -> Ordering,
 404{
 405    let mut start_index = 0;
 406    for new_item in new_items {
 407        if let Err(i) = vec[start_index..].binary_search_by(|m| cmp(m, &new_item)) {
 408            let index = start_index + i;
 409            if vec.len() < limit {
 410                vec.insert(index, new_item);
 411            } else if index < vec.len() {
 412                vec.pop();
 413                vec.insert(index, new_item);
 414            }
 415            start_index = index;
 416        }
 417    }
 418}
 419
 420pub fn truncate_to_bottom_n_sorted_by<T, F>(items: &mut Vec<T>, limit: usize, compare: &F)
 421where
 422    F: Fn(&T, &T) -> Ordering,
 423{
 424    if limit == 0 {
 425        items.truncate(0);
 426    }
 427    if items.len() <= limit {
 428        items.sort_by(compare);
 429        return;
 430    }
 431    // When limit is near to items.len() it may be more efficient to sort the whole list and
 432    // truncate, rather than always doing selection first as is done below. It's hard to analyze
 433    // where the threshold for this should be since the quickselect style algorithm used by
 434    // `select_nth_unstable_by` makes the prefix partially sorted, and so its work is not wasted -
 435    // the expected number of comparisons needed by `sort_by` is less than it is for some arbitrary
 436    // unsorted input.
 437    items.select_nth_unstable_by(limit, compare);
 438    items.truncate(limit);
 439    items.sort_by(compare);
 440}
 441
 442#[cfg(unix)]
 443fn load_shell_from_passwd() -> Result<()> {
 444    let buflen = match unsafe { libc::sysconf(libc::_SC_GETPW_R_SIZE_MAX) } {
 445        n if n < 0 => 1024,
 446        n => n as usize,
 447    };
 448    let mut buffer = Vec::with_capacity(buflen);
 449
 450    let mut pwd: std::mem::MaybeUninit<libc::passwd> = std::mem::MaybeUninit::uninit();
 451    let mut result: *mut libc::passwd = std::ptr::null_mut();
 452
 453    let uid = unsafe { libc::getuid() };
 454    let status = unsafe {
 455        libc::getpwuid_r(
 456            uid,
 457            pwd.as_mut_ptr(),
 458            buffer.as_mut_ptr() as *mut libc::c_char,
 459            buflen,
 460            &mut result,
 461        )
 462    };
 463    let entry = unsafe { pwd.assume_init() };
 464
 465    anyhow::ensure!(
 466        status == 0,
 467        "call to getpwuid_r failed. uid: {}, status: {}",
 468        uid,
 469        status
 470    );
 471    anyhow::ensure!(!result.is_null(), "passwd entry for uid {} not found", uid);
 472    anyhow::ensure!(
 473        entry.pw_uid == uid,
 474        "passwd entry has different uid ({}) than getuid ({}) returned",
 475        entry.pw_uid,
 476        uid,
 477    );
 478
 479    let shell = unsafe { std::ffi::CStr::from_ptr(entry.pw_shell).to_str().unwrap() };
 480    if env::var("SHELL").map_or(true, |shell_env| shell_env != shell) {
 481        log::info!(
 482            "updating SHELL environment variable to value from passwd entry: {:?}",
 483            shell,
 484        );
 485        unsafe { env::set_var("SHELL", shell) };
 486    }
 487
 488    Ok(())
 489}
 490
 491#[cfg(unix)]
 492pub fn load_login_shell_environment() -> Result<()> {
 493    load_shell_from_passwd().log_err();
 494
 495    let marker = "ZED_LOGIN_SHELL_START";
 496    let shell = env::var("SHELL").context(
 497        "SHELL environment variable is not assigned so we can't source login environment variables",
 498    )?;
 499
 500    // If possible, we want to `cd` in the user's `$HOME` to trigger programs
 501    // such as direnv, asdf, mise, ... to adjust the PATH. These tools often hook
 502    // into shell's `cd` command (and hooks) to manipulate env.
 503    // We do this so that we get the env a user would have when spawning a shell
 504    // in home directory.
 505    let shell_cmd_prefix = std::env::var_os("HOME")
 506        .and_then(|home| home.into_string().ok())
 507        .map(|home| format!("cd '{home}';"));
 508
 509    let shell_cmd = format!(
 510        "{}printf '%s' {marker}; /usr/bin/env;",
 511        shell_cmd_prefix.as_deref().unwrap_or("")
 512    );
 513
 514    let output = set_pre_exec_to_start_new_session(
 515        std::process::Command::new(&shell).args(["-l", "-i", "-c", &shell_cmd]),
 516    )
 517    .output()
 518    .context("failed to spawn login shell to source login environment variables")?;
 519    anyhow::ensure!(output.status.success(), "login shell exited with error");
 520
 521    let stdout = String::from_utf8_lossy(&output.stdout);
 522
 523    if let Some(env_output_start) = stdout.find(marker) {
 524        let env_output = &stdout[env_output_start + marker.len()..];
 525
 526        parse_env_output(env_output, |key, value| unsafe { env::set_var(key, value) });
 527
 528        log::info!(
 529            "set environment variables from shell:{}, path:{}",
 530            shell,
 531            env::var("PATH").unwrap_or_default(),
 532        );
 533    }
 534
 535    Ok(())
 536}
 537
 538/// Configures the process to start a new session, to prevent interactive shells from taking control
 539/// of the terminal.
 540///
 541/// For more details: https://registerspill.thorstenball.com/p/how-to-lose-control-of-your-shell
 542pub fn set_pre_exec_to_start_new_session(
 543    command: &mut std::process::Command,
 544) -> &mut std::process::Command {
 545    // safety: code in pre_exec should be signal safe.
 546    // https://man7.org/linux/man-pages/man7/signal-safety.7.html
 547    #[cfg(not(target_os = "windows"))]
 548    unsafe {
 549        use std::os::unix::process::CommandExt;
 550        command.pre_exec(|| {
 551            libc::setsid();
 552            Ok(())
 553        });
 554    };
 555    command
 556}
 557
 558/// Parse the result of calling `usr/bin/env` with no arguments
 559pub fn parse_env_output(env: &str, mut f: impl FnMut(String, String)) {
 560    let mut current_key: Option<String> = None;
 561    let mut current_value: Option<String> = None;
 562
 563    for line in env.split_terminator('\n') {
 564        if let Some(separator_index) = line.find('=') {
 565            if !line[..separator_index].is_empty() {
 566                if let Some((key, value)) = Option::zip(current_key.take(), current_value.take()) {
 567                    f(key, value)
 568                }
 569                current_key = Some(line[..separator_index].to_string());
 570                current_value = Some(line[separator_index + 1..].to_string());
 571                continue;
 572            };
 573        }
 574        if let Some(value) = current_value.as_mut() {
 575            value.push('\n');
 576            value.push_str(line);
 577        }
 578    }
 579    if let Some((key, value)) = Option::zip(current_key.take(), current_value.take()) {
 580        f(key, value)
 581    }
 582}
 583
 584pub fn merge_json_lenient_value_into(
 585    source: serde_json_lenient::Value,
 586    target: &mut serde_json_lenient::Value,
 587) {
 588    match (source, target) {
 589        (serde_json_lenient::Value::Object(source), serde_json_lenient::Value::Object(target)) => {
 590            for (key, value) in source {
 591                if let Some(target) = target.get_mut(&key) {
 592                    merge_json_lenient_value_into(value, target);
 593                } else {
 594                    target.insert(key, value);
 595                }
 596            }
 597        }
 598
 599        (serde_json_lenient::Value::Array(source), serde_json_lenient::Value::Array(target)) => {
 600            for value in source {
 601                target.push(value);
 602            }
 603        }
 604
 605        (source, target) => *target = source,
 606    }
 607}
 608
 609pub fn merge_json_value_into(source: serde_json::Value, target: &mut serde_json::Value) {
 610    use serde_json::Value;
 611
 612    match (source, target) {
 613        (Value::Object(source), Value::Object(target)) => {
 614            for (key, value) in source {
 615                if let Some(target) = target.get_mut(&key) {
 616                    merge_json_value_into(value, target);
 617                } else {
 618                    target.insert(key, value);
 619                }
 620            }
 621        }
 622
 623        (Value::Array(source), Value::Array(target)) => {
 624            for value in source {
 625                target.push(value);
 626            }
 627        }
 628
 629        (source, target) => *target = source,
 630    }
 631}
 632
 633pub fn merge_non_null_json_value_into(source: serde_json::Value, target: &mut serde_json::Value) {
 634    use serde_json::Value;
 635    if let Value::Object(source_object) = source {
 636        let target_object = if let Value::Object(target) = target {
 637            target
 638        } else {
 639            *target = Value::Object(Default::default());
 640            target.as_object_mut().unwrap()
 641        };
 642        for (key, value) in source_object {
 643            if let Some(target) = target_object.get_mut(&key) {
 644                merge_non_null_json_value_into(value, target);
 645            } else if !value.is_null() {
 646                target_object.insert(key, value);
 647            }
 648        }
 649    } else if !source.is_null() {
 650        *target = source
 651    }
 652}
 653
 654pub fn measure<R>(label: &str, f: impl FnOnce() -> R) -> R {
 655    static ZED_MEASUREMENTS: OnceLock<bool> = OnceLock::new();
 656    let zed_measurements = ZED_MEASUREMENTS.get_or_init(|| {
 657        env::var("ZED_MEASUREMENTS")
 658            .map(|measurements| measurements == "1" || measurements == "true")
 659            .unwrap_or(false)
 660    });
 661
 662    if *zed_measurements {
 663        let start = Instant::now();
 664        let result = f();
 665        let elapsed = start.elapsed();
 666        eprintln!("{}: {:?}", label, elapsed);
 667        result
 668    } else {
 669        f()
 670    }
 671}
 672
 673pub fn expanded_and_wrapped_usize_range(
 674    range: Range<usize>,
 675    additional_before: usize,
 676    additional_after: usize,
 677    wrap_length: usize,
 678) -> impl Iterator<Item = usize> {
 679    let start_wraps = range.start < additional_before;
 680    let end_wraps = wrap_length < range.end + additional_after;
 681    if start_wraps && end_wraps {
 682        Either::Left(0..wrap_length)
 683    } else if start_wraps {
 684        let wrapped_start = (range.start + wrap_length).saturating_sub(additional_before);
 685        if wrapped_start <= range.end {
 686            Either::Left(0..wrap_length)
 687        } else {
 688            Either::Right((0..range.end + additional_after).chain(wrapped_start..wrap_length))
 689        }
 690    } else if end_wraps {
 691        let wrapped_end = range.end + additional_after - wrap_length;
 692        if range.start <= wrapped_end {
 693            Either::Left(0..wrap_length)
 694        } else {
 695            Either::Right((0..wrapped_end).chain(range.start - additional_before..wrap_length))
 696        }
 697    } else {
 698        Either::Left((range.start - additional_before)..(range.end + additional_after))
 699    }
 700}
 701
 702/// Yields `[i, i + 1, i - 1, i + 2, ..]`, each modulo `wrap_length` and bounded by
 703/// `additional_before` and `additional_after`. If the wrapping causes overlap, duplicates are not
 704/// emitted. If wrap_length is 0, nothing is yielded.
 705pub fn wrapped_usize_outward_from(
 706    start: usize,
 707    additional_before: usize,
 708    additional_after: usize,
 709    wrap_length: usize,
 710) -> impl Iterator<Item = usize> {
 711    let mut count = 0;
 712    let mut after_offset = 1;
 713    let mut before_offset = 1;
 714
 715    std::iter::from_fn(move || {
 716        count += 1;
 717        if count > wrap_length {
 718            None
 719        } else if count == 1 {
 720            Some(start % wrap_length)
 721        } else if after_offset <= additional_after && after_offset <= before_offset {
 722            let value = (start + after_offset) % wrap_length;
 723            after_offset += 1;
 724            Some(value)
 725        } else if before_offset <= additional_before {
 726            let value = (start + wrap_length - before_offset) % wrap_length;
 727            before_offset += 1;
 728            Some(value)
 729        } else if after_offset <= additional_after {
 730            let value = (start + after_offset) % wrap_length;
 731            after_offset += 1;
 732            Some(value)
 733        } else {
 734            None
 735        }
 736    })
 737}
 738
 739#[cfg(target_os = "windows")]
 740pub fn get_windows_system_shell() -> String {
 741    use std::path::PathBuf;
 742
 743    fn find_pwsh_in_programfiles(find_alternate: bool, find_preview: bool) -> Option<PathBuf> {
 744        #[cfg(target_pointer_width = "64")]
 745        let env_var = if find_alternate {
 746            "ProgramFiles(x86)"
 747        } else {
 748            "ProgramFiles"
 749        };
 750
 751        #[cfg(target_pointer_width = "32")]
 752        let env_var = if find_alternate {
 753            "ProgramW6432"
 754        } else {
 755            "ProgramFiles"
 756        };
 757
 758        let install_base_dir = PathBuf::from(std::env::var_os(env_var)?).join("PowerShell");
 759        install_base_dir
 760            .read_dir()
 761            .ok()?
 762            .filter_map(Result::ok)
 763            .filter(|entry| matches!(entry.file_type(), Ok(ft) if ft.is_dir()))
 764            .filter_map(|entry| {
 765                let dir_name = entry.file_name();
 766                let dir_name = dir_name.to_string_lossy();
 767
 768                let version = if find_preview {
 769                    let dash_index = dir_name.find('-')?;
 770                    if &dir_name[dash_index + 1..] != "preview" {
 771                        return None;
 772                    };
 773                    dir_name[..dash_index].parse::<u32>().ok()?
 774                } else {
 775                    dir_name.parse::<u32>().ok()?
 776                };
 777
 778                let exe_path = entry.path().join("pwsh.exe");
 779                if exe_path.exists() {
 780                    Some((version, exe_path))
 781                } else {
 782                    None
 783                }
 784            })
 785            .max_by_key(|(version, _)| *version)
 786            .map(|(_, path)| path)
 787    }
 788
 789    fn find_pwsh_in_msix(find_preview: bool) -> Option<PathBuf> {
 790        let msix_app_dir =
 791            PathBuf::from(std::env::var_os("LOCALAPPDATA")?).join("Microsoft\\WindowsApps");
 792        if !msix_app_dir.exists() {
 793            return None;
 794        }
 795
 796        let prefix = if find_preview {
 797            "Microsoft.PowerShellPreview_"
 798        } else {
 799            "Microsoft.PowerShell_"
 800        };
 801        msix_app_dir
 802            .read_dir()
 803            .ok()?
 804            .filter_map(|entry| {
 805                let entry = entry.ok()?;
 806                if !matches!(entry.file_type(), Ok(ft) if ft.is_dir()) {
 807                    return None;
 808                }
 809
 810                if !entry.file_name().to_string_lossy().starts_with(prefix) {
 811                    return None;
 812                }
 813
 814                let exe_path = entry.path().join("pwsh.exe");
 815                exe_path.exists().then_some(exe_path)
 816            })
 817            .next()
 818    }
 819
 820    fn find_pwsh_in_scoop() -> Option<PathBuf> {
 821        let pwsh_exe =
 822            PathBuf::from(std::env::var_os("USERPROFILE")?).join("scoop\\shims\\pwsh.exe");
 823        pwsh_exe.exists().then_some(pwsh_exe)
 824    }
 825
 826    static SYSTEM_SHELL: LazyLock<String> = LazyLock::new(|| {
 827        find_pwsh_in_programfiles(false, false)
 828            .or_else(|| find_pwsh_in_programfiles(true, false))
 829            .or_else(|| find_pwsh_in_msix(false))
 830            .or_else(|| find_pwsh_in_programfiles(false, true))
 831            .or_else(|| find_pwsh_in_msix(true))
 832            .or_else(|| find_pwsh_in_programfiles(true, true))
 833            .or_else(find_pwsh_in_scoop)
 834            .map(|p| p.to_string_lossy().to_string())
 835            .unwrap_or("powershell.exe".to_string())
 836    });
 837
 838    (*SYSTEM_SHELL).clone()
 839}
 840
 841pub trait ResultExt<E> {
 842    type Ok;
 843
 844    fn log_err(self) -> Option<Self::Ok>;
 845    /// Assert that this result should never be an error in development or tests.
 846    fn debug_assert_ok(self, reason: &str) -> Self;
 847    fn warn_on_err(self) -> Option<Self::Ok>;
 848    fn log_with_level(self, level: log::Level) -> Option<Self::Ok>;
 849    fn anyhow(self) -> anyhow::Result<Self::Ok>
 850    where
 851        E: Into<anyhow::Error>;
 852}
 853
 854impl<T, E> ResultExt<E> for Result<T, E>
 855where
 856    E: std::fmt::Debug,
 857{
 858    type Ok = T;
 859
 860    #[track_caller]
 861    fn log_err(self) -> Option<T> {
 862        self.log_with_level(log::Level::Error)
 863    }
 864
 865    #[track_caller]
 866    fn debug_assert_ok(self, reason: &str) -> Self {
 867        if let Err(error) = &self {
 868            debug_panic!("{reason} - {error:?}");
 869        }
 870        self
 871    }
 872
 873    #[track_caller]
 874    fn warn_on_err(self) -> Option<T> {
 875        self.log_with_level(log::Level::Warn)
 876    }
 877
 878    #[track_caller]
 879    fn log_with_level(self, level: log::Level) -> Option<T> {
 880        match self {
 881            Ok(value) => Some(value),
 882            Err(error) => {
 883                log_error_with_caller(*Location::caller(), error, level);
 884                None
 885            }
 886        }
 887    }
 888
 889    fn anyhow(self) -> anyhow::Result<T>
 890    where
 891        E: Into<anyhow::Error>,
 892    {
 893        self.map_err(Into::into)
 894    }
 895}
 896
 897fn log_error_with_caller<E>(caller: core::panic::Location<'_>, error: E, level: log::Level)
 898where
 899    E: std::fmt::Debug,
 900{
 901    #[cfg(not(target_os = "windows"))]
 902    let file = caller.file();
 903    #[cfg(target_os = "windows")]
 904    let file = caller.file().replace('\\', "/");
 905    // In this codebase, the first segment of the file path is
 906    // the 'crates' folder, followed by the crate name.
 907    let target = file.split('/').nth(1);
 908
 909    log::logger().log(
 910        &log::Record::builder()
 911            .target(target.unwrap_or(""))
 912            .module_path(target)
 913            .args(format_args!("{:?}", error))
 914            .file(Some(caller.file()))
 915            .line(Some(caller.line()))
 916            .level(level)
 917            .build(),
 918    );
 919}
 920
 921pub fn log_err<E: std::fmt::Debug>(error: &E) {
 922    log_error_with_caller(*Location::caller(), error, log::Level::Warn);
 923}
 924
 925pub trait TryFutureExt {
 926    fn log_err(self) -> LogErrorFuture<Self>
 927    where
 928        Self: Sized;
 929
 930    fn log_tracked_err(self, location: core::panic::Location<'static>) -> LogErrorFuture<Self>
 931    where
 932        Self: Sized;
 933
 934    fn warn_on_err(self) -> LogErrorFuture<Self>
 935    where
 936        Self: Sized;
 937    fn unwrap(self) -> UnwrapFuture<Self>
 938    where
 939        Self: Sized;
 940}
 941
 942impl<F, T, E> TryFutureExt for F
 943where
 944    F: Future<Output = Result<T, E>>,
 945    E: std::fmt::Debug,
 946{
 947    #[track_caller]
 948    fn log_err(self) -> LogErrorFuture<Self>
 949    where
 950        Self: Sized,
 951    {
 952        let location = Location::caller();
 953        LogErrorFuture(self, log::Level::Error, *location)
 954    }
 955
 956    fn log_tracked_err(self, location: core::panic::Location<'static>) -> LogErrorFuture<Self>
 957    where
 958        Self: Sized,
 959    {
 960        LogErrorFuture(self, log::Level::Error, location)
 961    }
 962
 963    #[track_caller]
 964    fn warn_on_err(self) -> LogErrorFuture<Self>
 965    where
 966        Self: Sized,
 967    {
 968        let location = Location::caller();
 969        LogErrorFuture(self, log::Level::Warn, *location)
 970    }
 971
 972    fn unwrap(self) -> UnwrapFuture<Self>
 973    where
 974        Self: Sized,
 975    {
 976        UnwrapFuture(self)
 977    }
 978}
 979
 980#[must_use]
 981pub struct LogErrorFuture<F>(F, log::Level, core::panic::Location<'static>);
 982
 983impl<F, T, E> Future for LogErrorFuture<F>
 984where
 985    F: Future<Output = Result<T, E>>,
 986    E: std::fmt::Debug,
 987{
 988    type Output = Option<T>;
 989
 990    fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
 991        let level = self.1;
 992        let location = self.2;
 993        let inner = unsafe { Pin::new_unchecked(&mut self.get_unchecked_mut().0) };
 994        match inner.poll(cx) {
 995            Poll::Ready(output) => Poll::Ready(match output {
 996                Ok(output) => Some(output),
 997                Err(error) => {
 998                    log_error_with_caller(location, error, level);
 999                    None
1000                }
1001            }),
1002            Poll::Pending => Poll::Pending,
1003        }
1004    }
1005}
1006
1007pub struct UnwrapFuture<F>(F);
1008
1009impl<F, T, E> Future for UnwrapFuture<F>
1010where
1011    F: Future<Output = Result<T, E>>,
1012    E: std::fmt::Debug,
1013{
1014    type Output = T;
1015
1016    fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
1017        let inner = unsafe { Pin::new_unchecked(&mut self.get_unchecked_mut().0) };
1018        match inner.poll(cx) {
1019            Poll::Ready(result) => Poll::Ready(result.unwrap()),
1020            Poll::Pending => Poll::Pending,
1021        }
1022    }
1023}
1024
1025pub struct Deferred<F: FnOnce()>(Option<F>);
1026
1027impl<F: FnOnce()> Deferred<F> {
1028    /// Drop without running the deferred function.
1029    pub fn abort(mut self) {
1030        self.0.take();
1031    }
1032}
1033
1034impl<F: FnOnce()> Drop for Deferred<F> {
1035    fn drop(&mut self) {
1036        if let Some(f) = self.0.take() {
1037            f()
1038        }
1039    }
1040}
1041
1042/// Run the given function when the returned value is dropped (unless it's cancelled).
1043#[must_use]
1044pub fn defer<F: FnOnce()>(f: F) -> Deferred<F> {
1045    Deferred(Some(f))
1046}
1047
1048#[cfg(any(test, feature = "test-support"))]
1049mod rng {
1050    use rand::{Rng, seq::SliceRandom};
1051    pub struct RandomCharIter<T: Rng> {
1052        rng: T,
1053        simple_text: bool,
1054    }
1055
1056    impl<T: Rng> RandomCharIter<T> {
1057        pub fn new(rng: T) -> Self {
1058            Self {
1059                rng,
1060                simple_text: std::env::var("SIMPLE_TEXT").map_or(false, |v| !v.is_empty()),
1061            }
1062        }
1063
1064        pub fn with_simple_text(mut self) -> Self {
1065            self.simple_text = true;
1066            self
1067        }
1068    }
1069
1070    impl<T: Rng> Iterator for RandomCharIter<T> {
1071        type Item = char;
1072
1073        fn next(&mut self) -> Option<Self::Item> {
1074            if self.simple_text {
1075                return if self.rng.gen_range(0..100) < 5 {
1076                    Some('\n')
1077                } else {
1078                    Some(self.rng.gen_range(b'a'..b'z' + 1).into())
1079                };
1080            }
1081
1082            match self.rng.gen_range(0..100) {
1083                // whitespace
1084                0..=19 => [' ', '\n', '\r', '\t'].choose(&mut self.rng).copied(),
1085                // two-byte greek letters
1086                20..=32 => char::from_u32(self.rng.gen_range(('α' as u32)..('ω' as u32 + 1))),
1087                // // three-byte characters
1088                33..=45 => ['✋', '✅', '❌', '❎', '⭐']
1089                    .choose(&mut self.rng)
1090                    .copied(),
1091                // // four-byte characters
1092                46..=58 => ['🍐', '🏀', '🍗', '🎉'].choose(&mut self.rng).copied(),
1093                // ascii letters
1094                _ => Some(self.rng.gen_range(b'a'..b'z' + 1).into()),
1095            }
1096        }
1097    }
1098}
1099#[cfg(any(test, feature = "test-support"))]
1100pub use rng::RandomCharIter;
1101/// Get an embedded file as a string.
1102pub fn asset_str<A: rust_embed::RustEmbed>(path: &str) -> Cow<'static, str> {
1103    match A::get(path).expect(path).data {
1104        Cow::Borrowed(bytes) => Cow::Borrowed(std::str::from_utf8(bytes).unwrap()),
1105        Cow::Owned(bytes) => Cow::Owned(String::from_utf8(bytes).unwrap()),
1106    }
1107}
1108
1109/// Expands to an immediately-invoked function expression. Good for using the ? operator
1110/// in functions which do not return an Option or Result.
1111///
1112/// Accepts a normal block, an async block, or an async move block.
1113#[macro_export]
1114macro_rules! maybe {
1115    ($block:block) => {
1116        (|| $block)()
1117    };
1118    (async $block:block) => {
1119        (|| async $block)()
1120    };
1121    (async move $block:block) => {
1122        (|| async move $block)()
1123    };
1124}
1125
1126pub trait RangeExt<T> {
1127    fn sorted(&self) -> Self;
1128    fn to_inclusive(&self) -> RangeInclusive<T>;
1129    fn overlaps(&self, other: &Range<T>) -> bool;
1130    fn contains_inclusive(&self, other: &Range<T>) -> bool;
1131}
1132
1133impl<T: Ord + Clone> RangeExt<T> for Range<T> {
1134    fn sorted(&self) -> Self {
1135        cmp::min(&self.start, &self.end).clone()..cmp::max(&self.start, &self.end).clone()
1136    }
1137
1138    fn to_inclusive(&self) -> RangeInclusive<T> {
1139        self.start.clone()..=self.end.clone()
1140    }
1141
1142    fn overlaps(&self, other: &Range<T>) -> bool {
1143        self.start < other.end && other.start < self.end
1144    }
1145
1146    fn contains_inclusive(&self, other: &Range<T>) -> bool {
1147        self.start <= other.start && other.end <= self.end
1148    }
1149}
1150
1151impl<T: Ord + Clone> RangeExt<T> for RangeInclusive<T> {
1152    fn sorted(&self) -> Self {
1153        cmp::min(self.start(), self.end()).clone()..=cmp::max(self.start(), self.end()).clone()
1154    }
1155
1156    fn to_inclusive(&self) -> RangeInclusive<T> {
1157        self.clone()
1158    }
1159
1160    fn overlaps(&self, other: &Range<T>) -> bool {
1161        self.start() < &other.end && &other.start <= self.end()
1162    }
1163
1164    fn contains_inclusive(&self, other: &Range<T>) -> bool {
1165        self.start() <= &other.start && &other.end <= self.end()
1166    }
1167}
1168
1169/// A way to sort strings with starting numbers numerically first, falling back to alphanumeric one,
1170/// case-insensitive.
1171///
1172/// This is useful for turning regular alphanumerically sorted sequences as `1-abc, 10, 11-def, .., 2, 21-abc`
1173/// into `1-abc, 2, 10, 11-def, .., 21-abc`
1174#[derive(Debug, PartialEq, Eq)]
1175pub struct NumericPrefixWithSuffix<'a>(Option<u64>, &'a str);
1176
1177impl<'a> NumericPrefixWithSuffix<'a> {
1178    pub fn from_numeric_prefixed_str(str: &'a str) -> Self {
1179        let i = str.chars().take_while(|c| c.is_ascii_digit()).count();
1180        let (prefix, remainder) = str.split_at(i);
1181
1182        let prefix = prefix.parse().ok();
1183        Self(prefix, remainder)
1184    }
1185}
1186
1187/// When dealing with equality, we need to consider the case of the strings to achieve strict equality
1188/// to handle cases like "a" < "A" instead of "a" == "A".
1189impl Ord for NumericPrefixWithSuffix<'_> {
1190    fn cmp(&self, other: &Self) -> Ordering {
1191        match (self.0, other.0) {
1192            (None, None) => UniCase::new(self.1)
1193                .cmp(&UniCase::new(other.1))
1194                .then_with(|| self.1.cmp(other.1).reverse()),
1195            (None, Some(_)) => Ordering::Greater,
1196            (Some(_), None) => Ordering::Less,
1197            (Some(a), Some(b)) => a.cmp(&b).then_with(|| {
1198                UniCase::new(self.1)
1199                    .cmp(&UniCase::new(other.1))
1200                    .then_with(|| self.1.cmp(other.1).reverse())
1201            }),
1202        }
1203    }
1204}
1205
1206impl PartialOrd for NumericPrefixWithSuffix<'_> {
1207    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1208        Some(self.cmp(other))
1209    }
1210}
1211
1212/// Capitalizes the first character of a string.
1213///
1214/// This function takes a string slice as input and returns a new `String` with the first character
1215/// capitalized.
1216///
1217/// # Examples
1218///
1219/// ```
1220/// use util::capitalize;
1221///
1222/// assert_eq!(capitalize("hello"), "Hello");
1223/// assert_eq!(capitalize("WORLD"), "WORLD");
1224/// assert_eq!(capitalize(""), "");
1225/// ```
1226pub fn capitalize(str: &str) -> String {
1227    let mut chars = str.chars();
1228    match chars.next() {
1229        None => String::new(),
1230        Some(first_char) => first_char.to_uppercase().collect::<String>() + chars.as_str(),
1231    }
1232}
1233
1234fn emoji_regex() -> &'static Regex {
1235    static EMOJI_REGEX: LazyLock<Regex> =
1236        LazyLock::new(|| Regex::new("(\\p{Emoji}|\u{200D})").unwrap());
1237    &EMOJI_REGEX
1238}
1239
1240/// Returns true if the given string consists of emojis only.
1241/// E.g. "👨‍👩‍👧‍👧👋" will return true, but "👋!" will return false.
1242pub fn word_consists_of_emojis(s: &str) -> bool {
1243    let mut prev_end = 0;
1244    for capture in emoji_regex().find_iter(s) {
1245        if capture.start() != prev_end {
1246            return false;
1247        }
1248        prev_end = capture.end();
1249    }
1250    prev_end == s.len()
1251}
1252
1253/// Similar to `str::split`, but also provides byte-offset ranges of the results. Unlike
1254/// `str::split`, this is not generic on pattern types and does not return an `Iterator`.
1255pub fn split_str_with_ranges(s: &str, pat: impl Fn(char) -> bool) -> Vec<(Range<usize>, &str)> {
1256    let mut result = Vec::new();
1257    let mut start = 0;
1258
1259    for (i, ch) in s.char_indices() {
1260        if pat(ch) {
1261            if i > start {
1262                result.push((start..i, &s[start..i]));
1263            }
1264            start = i + ch.len_utf8();
1265        }
1266    }
1267
1268    if s.len() > start {
1269        result.push((start..s.len(), &s[start..s.len()]));
1270    }
1271
1272    result
1273}
1274
1275pub fn default<D: Default>() -> D {
1276    Default::default()
1277}
1278
1279pub fn get_system_shell() -> String {
1280    #[cfg(target_os = "windows")]
1281    {
1282        get_windows_system_shell()
1283    }
1284
1285    #[cfg(not(target_os = "windows"))]
1286    {
1287        std::env::var("SHELL").unwrap_or("/bin/sh".to_string())
1288    }
1289}
1290
1291#[derive(Debug)]
1292pub enum ConnectionResult<O> {
1293    Timeout,
1294    ConnectionReset,
1295    Result(anyhow::Result<O>),
1296}
1297
1298impl<O> ConnectionResult<O> {
1299    pub fn into_response(self) -> anyhow::Result<O> {
1300        match self {
1301            ConnectionResult::Timeout => anyhow::bail!("Request timed out"),
1302            ConnectionResult::ConnectionReset => anyhow::bail!("Server reset the connection"),
1303            ConnectionResult::Result(r) => r,
1304        }
1305    }
1306}
1307
1308impl<O> From<anyhow::Result<O>> for ConnectionResult<O> {
1309    fn from(result: anyhow::Result<O>) -> Self {
1310        ConnectionResult::Result(result)
1311    }
1312}
1313
1314#[cfg(test)]
1315mod tests {
1316    use super::*;
1317
1318    #[test]
1319    fn test_extend_sorted() {
1320        let mut vec = vec![];
1321
1322        extend_sorted(&mut vec, vec![21, 17, 13, 8, 1, 0], 5, |a, b| b.cmp(a));
1323        assert_eq!(vec, &[21, 17, 13, 8, 1]);
1324
1325        extend_sorted(&mut vec, vec![101, 19, 17, 8, 2], 8, |a, b| b.cmp(a));
1326        assert_eq!(vec, &[101, 21, 19, 17, 13, 8, 2, 1]);
1327
1328        extend_sorted(&mut vec, vec![1000, 19, 17, 9, 5], 8, |a, b| b.cmp(a));
1329        assert_eq!(vec, &[1000, 101, 21, 19, 17, 13, 9, 8]);
1330    }
1331
1332    #[test]
1333    fn test_truncate_to_bottom_n_sorted_by() {
1334        let mut vec: Vec<u32> = vec![5, 2, 3, 4, 1];
1335        truncate_to_bottom_n_sorted_by(&mut vec, 10, &u32::cmp);
1336        assert_eq!(vec, &[1, 2, 3, 4, 5]);
1337
1338        vec = vec![5, 2, 3, 4, 1];
1339        truncate_to_bottom_n_sorted_by(&mut vec, 5, &u32::cmp);
1340        assert_eq!(vec, &[1, 2, 3, 4, 5]);
1341
1342        vec = vec![5, 2, 3, 4, 1];
1343        truncate_to_bottom_n_sorted_by(&mut vec, 4, &u32::cmp);
1344        assert_eq!(vec, &[1, 2, 3, 4]);
1345
1346        vec = vec![5, 2, 3, 4, 1];
1347        truncate_to_bottom_n_sorted_by(&mut vec, 1, &u32::cmp);
1348        assert_eq!(vec, &[1]);
1349
1350        vec = vec![5, 2, 3, 4, 1];
1351        truncate_to_bottom_n_sorted_by(&mut vec, 0, &u32::cmp);
1352        assert!(vec.is_empty());
1353    }
1354
1355    #[test]
1356    fn test_iife() {
1357        fn option_returning_function() -> Option<()> {
1358            None
1359        }
1360
1361        let foo = maybe!({
1362            option_returning_function()?;
1363            Some(())
1364        });
1365
1366        assert_eq!(foo, None);
1367    }
1368
1369    #[test]
1370    fn test_truncate_and_trailoff() {
1371        assert_eq!(truncate_and_trailoff("", 5), "");
1372        assert_eq!(truncate_and_trailoff("aaaaaa", 7), "aaaaaa");
1373        assert_eq!(truncate_and_trailoff("aaaaaa", 6), "aaaaaa");
1374        assert_eq!(truncate_and_trailoff("aaaaaa", 5), "aaaaa…");
1375        assert_eq!(truncate_and_trailoff("èèèèèè", 7), "èèèèèè");
1376        assert_eq!(truncate_and_trailoff("èèèèèè", 6), "èèèèèè");
1377        assert_eq!(truncate_and_trailoff("èèèèèè", 5), "èèèèè…");
1378    }
1379
1380    #[test]
1381    fn test_truncate_and_remove_front() {
1382        assert_eq!(truncate_and_remove_front("", 5), "");
1383        assert_eq!(truncate_and_remove_front("aaaaaa", 7), "aaaaaa");
1384        assert_eq!(truncate_and_remove_front("aaaaaa", 6), "aaaaaa");
1385        assert_eq!(truncate_and_remove_front("aaaaaa", 5), "…aaaaa");
1386        assert_eq!(truncate_and_remove_front("èèèèèè", 7), "èèèèèè");
1387        assert_eq!(truncate_and_remove_front("èèèèèè", 6), "èèèèèè");
1388        assert_eq!(truncate_and_remove_front("èèèèèè", 5), "…èèèèè");
1389    }
1390
1391    #[test]
1392    fn test_numeric_prefix_str_method() {
1393        let target = "1a";
1394        assert_eq!(
1395            NumericPrefixWithSuffix::from_numeric_prefixed_str(target),
1396            NumericPrefixWithSuffix(Some(1), "a")
1397        );
1398
1399        let target = "12ab";
1400        assert_eq!(
1401            NumericPrefixWithSuffix::from_numeric_prefixed_str(target),
1402            NumericPrefixWithSuffix(Some(12), "ab")
1403        );
1404
1405        let target = "12_ab";
1406        assert_eq!(
1407            NumericPrefixWithSuffix::from_numeric_prefixed_str(target),
1408            NumericPrefixWithSuffix(Some(12), "_ab")
1409        );
1410
1411        let target = "1_2ab";
1412        assert_eq!(
1413            NumericPrefixWithSuffix::from_numeric_prefixed_str(target),
1414            NumericPrefixWithSuffix(Some(1), "_2ab")
1415        );
1416
1417        let target = "1.2";
1418        assert_eq!(
1419            NumericPrefixWithSuffix::from_numeric_prefixed_str(target),
1420            NumericPrefixWithSuffix(Some(1), ".2")
1421        );
1422
1423        let target = "1.2_a";
1424        assert_eq!(
1425            NumericPrefixWithSuffix::from_numeric_prefixed_str(target),
1426            NumericPrefixWithSuffix(Some(1), ".2_a")
1427        );
1428
1429        let target = "12.2_a";
1430        assert_eq!(
1431            NumericPrefixWithSuffix::from_numeric_prefixed_str(target),
1432            NumericPrefixWithSuffix(Some(12), ".2_a")
1433        );
1434
1435        let target = "12a.2_a";
1436        assert_eq!(
1437            NumericPrefixWithSuffix::from_numeric_prefixed_str(target),
1438            NumericPrefixWithSuffix(Some(12), "a.2_a")
1439        );
1440    }
1441
1442    #[test]
1443    fn test_numeric_prefix_with_suffix() {
1444        let mut sorted = vec!["1-abc", "10", "11def", "2", "21-abc"];
1445        sorted.sort_by_key(|s| NumericPrefixWithSuffix::from_numeric_prefixed_str(s));
1446        assert_eq!(sorted, ["1-abc", "2", "10", "11def", "21-abc"]);
1447
1448        for numeric_prefix_less in ["numeric_prefix_less", "aaa", "~™£"] {
1449            assert_eq!(
1450                NumericPrefixWithSuffix::from_numeric_prefixed_str(numeric_prefix_less),
1451                NumericPrefixWithSuffix(None, numeric_prefix_less),
1452                "String without numeric prefix `{numeric_prefix_less}` should not be converted into NumericPrefixWithSuffix"
1453            )
1454        }
1455    }
1456
1457    #[test]
1458    fn test_word_consists_of_emojis() {
1459        let words_to_test = vec![
1460            ("👨‍👩‍👧‍👧👋🥒", true),
1461            ("👋", true),
1462            ("!👋", false),
1463            ("👋!", false),
1464            ("👋 ", false),
1465            (" 👋", false),
1466            ("Test", false),
1467        ];
1468
1469        for (text, expected_result) in words_to_test {
1470            assert_eq!(word_consists_of_emojis(text), expected_result);
1471        }
1472    }
1473
1474    #[test]
1475    fn test_truncate_lines_and_trailoff() {
1476        let text = r#"Line 1
1477Line 2
1478Line 3"#;
1479
1480        assert_eq!(
1481            truncate_lines_and_trailoff(text, 2),
1482            r#"Line 1
1483…"#
1484        );
1485
1486        assert_eq!(
1487            truncate_lines_and_trailoff(text, 3),
1488            r#"Line 1
1489Line 2
1490…"#
1491        );
1492
1493        assert_eq!(
1494            truncate_lines_and_trailoff(text, 4),
1495            r#"Line 1
1496Line 2
1497Line 3"#
1498        );
1499    }
1500
1501    #[test]
1502    fn test_expanded_and_wrapped_usize_range() {
1503        // Neither wrap
1504        assert_eq!(
1505            expanded_and_wrapped_usize_range(2..4, 1, 1, 8).collect::<Vec<usize>>(),
1506            (1..5).collect::<Vec<usize>>()
1507        );
1508        // Start wraps
1509        assert_eq!(
1510            expanded_and_wrapped_usize_range(2..4, 3, 1, 8).collect::<Vec<usize>>(),
1511            ((0..5).chain(7..8)).collect::<Vec<usize>>()
1512        );
1513        // Start wraps all the way around
1514        assert_eq!(
1515            expanded_and_wrapped_usize_range(2..4, 5, 1, 8).collect::<Vec<usize>>(),
1516            (0..8).collect::<Vec<usize>>()
1517        );
1518        // Start wraps all the way around and past 0
1519        assert_eq!(
1520            expanded_and_wrapped_usize_range(2..4, 10, 1, 8).collect::<Vec<usize>>(),
1521            (0..8).collect::<Vec<usize>>()
1522        );
1523        // End wraps
1524        assert_eq!(
1525            expanded_and_wrapped_usize_range(3..5, 1, 4, 8).collect::<Vec<usize>>(),
1526            (0..1).chain(2..8).collect::<Vec<usize>>()
1527        );
1528        // End wraps all the way around
1529        assert_eq!(
1530            expanded_and_wrapped_usize_range(3..5, 1, 5, 8).collect::<Vec<usize>>(),
1531            (0..8).collect::<Vec<usize>>()
1532        );
1533        // End wraps all the way around and past the end
1534        assert_eq!(
1535            expanded_and_wrapped_usize_range(3..5, 1, 10, 8).collect::<Vec<usize>>(),
1536            (0..8).collect::<Vec<usize>>()
1537        );
1538        // Both start and end wrap
1539        assert_eq!(
1540            expanded_and_wrapped_usize_range(3..5, 4, 4, 8).collect::<Vec<usize>>(),
1541            (0..8).collect::<Vec<usize>>()
1542        );
1543    }
1544
1545    #[test]
1546    fn test_wrapped_usize_outward_from() {
1547        // No wrapping
1548        assert_eq!(
1549            wrapped_usize_outward_from(4, 2, 2, 10).collect::<Vec<usize>>(),
1550            vec![4, 5, 3, 6, 2]
1551        );
1552        // Wrapping at end
1553        assert_eq!(
1554            wrapped_usize_outward_from(8, 2, 3, 10).collect::<Vec<usize>>(),
1555            vec![8, 9, 7, 0, 6, 1]
1556        );
1557        // Wrapping at start
1558        assert_eq!(
1559            wrapped_usize_outward_from(1, 3, 2, 10).collect::<Vec<usize>>(),
1560            vec![1, 2, 0, 3, 9, 8]
1561        );
1562        // All values wrap around
1563        assert_eq!(
1564            wrapped_usize_outward_from(5, 10, 10, 8).collect::<Vec<usize>>(),
1565            vec![5, 6, 4, 7, 3, 0, 2, 1]
1566        );
1567        // None before / after
1568        assert_eq!(
1569            wrapped_usize_outward_from(3, 0, 0, 8).collect::<Vec<usize>>(),
1570            vec![3]
1571        );
1572        // Starting point already wrapped
1573        assert_eq!(
1574            wrapped_usize_outward_from(15, 2, 2, 10).collect::<Vec<usize>>(),
1575            vec![5, 6, 4, 7, 3]
1576        );
1577        // wrap_length of 0
1578        assert_eq!(
1579            wrapped_usize_outward_from(4, 2, 2, 0).collect::<Vec<usize>>(),
1580            Vec::<usize>::new()
1581        );
1582    }
1583
1584    #[test]
1585    fn test_truncate_lines_to_byte_limit() {
1586        let text = "Line 1\nLine 2\nLine 3\nLine 4";
1587
1588        // Limit that includes all lines
1589        assert_eq!(truncate_lines_to_byte_limit(text, 100), text);
1590
1591        // Exactly the first line
1592        assert_eq!(truncate_lines_to_byte_limit(text, 7), "Line 1\n");
1593
1594        // Limit between lines
1595        assert_eq!(truncate_lines_to_byte_limit(text, 13), "Line 1\n");
1596        assert_eq!(truncate_lines_to_byte_limit(text, 20), "Line 1\nLine 2\n");
1597
1598        // Limit before first newline
1599        assert_eq!(truncate_lines_to_byte_limit(text, 6), "Line ");
1600
1601        // Test with non-ASCII characters
1602        let text_utf8 = "Line 1\nLíne 2\nLine 3";
1603        assert_eq!(
1604            truncate_lines_to_byte_limit(text_utf8, 15),
1605            "Line 1\nLíne 2\n"
1606        );
1607    }
1608
1609    #[test]
1610    fn test_string_size_with_expanded_tabs() {
1611        let nz = |val| NonZeroU32::new(val).unwrap();
1612        assert_eq!(char_len_with_expanded_tabs(0, "", nz(4)), 0);
1613        assert_eq!(char_len_with_expanded_tabs(0, "hello", nz(4)), 5);
1614        assert_eq!(char_len_with_expanded_tabs(0, "\thello", nz(4)), 9);
1615        assert_eq!(char_len_with_expanded_tabs(0, "abc\tab", nz(4)), 6);
1616        assert_eq!(char_len_with_expanded_tabs(0, "hello\t", nz(4)), 8);
1617        assert_eq!(char_len_with_expanded_tabs(0, "\t\t", nz(8)), 16);
1618        assert_eq!(char_len_with_expanded_tabs(0, "x\t", nz(8)), 8);
1619        assert_eq!(char_len_with_expanded_tabs(7, "x\t", nz(8)), 9);
1620    }
1621
1622    #[test]
1623    fn test_word_breaking_tokenizer() {
1624        let tests: &[(&str, &[WordBreakToken<'static>])] = &[
1625            ("", &[]),
1626            ("  ", &[whitespace("  ", 2)]),
1627            ("Ʒ", &[word("Ʒ", 1)]),
1628            ("Ǽ", &[word("Ǽ", 1)]),
1629            ("⋑", &[word("⋑", 1)]),
1630            ("⋑⋑", &[word("⋑⋑", 2)]),
1631            (
1632                "原理，进而",
1633                &[word("原", 1), word("理，", 2), word("进", 1), word("而", 1)],
1634            ),
1635            (
1636                "hello world",
1637                &[word("hello", 5), whitespace(" ", 1), word("world", 5)],
1638            ),
1639            (
1640                "hello, world",
1641                &[word("hello,", 6), whitespace(" ", 1), word("world", 5)],
1642            ),
1643            (
1644                "  hello world",
1645                &[
1646                    whitespace("  ", 2),
1647                    word("hello", 5),
1648                    whitespace(" ", 1),
1649                    word("world", 5),
1650                ],
1651            ),
1652            (
1653                "这是什么 \n 钢笔",
1654                &[
1655                    word("这", 1),
1656                    word("是", 1),
1657                    word("什", 1),
1658                    word("么", 1),
1659                    whitespace(" ", 1),
1660                    newline(),
1661                    whitespace(" ", 1),
1662                    word("钢", 1),
1663                    word("笔", 1),
1664                ],
1665            ),
1666            (" mutton", &[whitespace(" ", 1), word("mutton", 6)]),
1667        ];
1668
1669        fn word(token: &'static str, grapheme_len: usize) -> WordBreakToken<'static> {
1670            WordBreakToken::Word {
1671                token,
1672                grapheme_len,
1673            }
1674        }
1675
1676        fn whitespace(token: &'static str, grapheme_len: usize) -> WordBreakToken<'static> {
1677            WordBreakToken::InlineWhitespace {
1678                token,
1679                grapheme_len,
1680            }
1681        }
1682
1683        fn newline() -> WordBreakToken<'static> {
1684            WordBreakToken::Newline
1685        }
1686
1687        for (input, result) in tests {
1688            assert_eq!(
1689                WordBreakingTokenizer::new(input)
1690                    .collect::<Vec<_>>()
1691                    .as_slice(),
1692                *result,
1693            );
1694        }
1695    }
1696
1697    #[test]
1698    fn test_wrap_with_prefix() {
1699        assert_eq!(
1700            wrap_with_prefix(
1701                "# ".to_string(),
1702                "abcdefg".to_string(),
1703                4,
1704                NonZeroU32::new(4).unwrap(),
1705                false,
1706            ),
1707            "# abcdefg"
1708        );
1709        assert_eq!(
1710            wrap_with_prefix(
1711                "".to_string(),
1712                "\thello world".to_string(),
1713                8,
1714                NonZeroU32::new(4).unwrap(),
1715                false,
1716            ),
1717            "hello\nworld"
1718        );
1719        assert_eq!(
1720            wrap_with_prefix(
1721                "// ".to_string(),
1722                "xx \nyy zz aa bb cc".to_string(),
1723                12,
1724                NonZeroU32::new(4).unwrap(),
1725                false,
1726            ),
1727            "// xx yy zz\n// aa bb cc"
1728        );
1729        assert_eq!(
1730            wrap_with_prefix(
1731                String::new(),
1732                "这是什么 \n 钢笔".to_string(),
1733                3,
1734                NonZeroU32::new(4).unwrap(),
1735                false,
1736            ),
1737            "这是什\n么 钢\n笔"
1738        );
1739    }
1740
1741    #[test]
1742    fn test_split_with_ranges() {
1743        let input = "hi";
1744        let result = split_str_with_ranges(input, |c| c == ' ');
1745
1746        assert_eq!(result.len(), 1);
1747        assert_eq!(result[0], (0..2, "hi"));
1748
1749        let input = "héllo🦀world";
1750        let result = split_str_with_ranges(input, |c| c == '🦀');
1751
1752        assert_eq!(result.len(), 2);
1753        assert_eq!(result[0], (0..6, "héllo")); // 'é' is 2 bytes
1754        assert_eq!(result[1], (10..15, "world")); // '🦀' is 4 bytes
1755    }
1756}