1pub mod arc_cow;
2pub mod archive;
3pub mod command;
4pub mod fs;
5pub mod markdown;
6pub mod paths;
7pub mod serde;
8pub mod size;
9#[cfg(any(test, feature = "test-support"))]
10pub mod test;
11pub mod time;
12
13use anyhow::Result;
14use futures::Future;
15use itertools::Either;
16use regex::Regex;
17use std::num::NonZeroU32;
18use std::sync::{LazyLock, OnceLock};
19use std::{
20 borrow::Cow,
21 cmp::{self, Ordering},
22 env,
23 ops::{AddAssign, Range, RangeInclusive},
24 panic::Location,
25 pin::Pin,
26 task::{Context, Poll},
27 time::Instant,
28};
29use unicase::UniCase;
30
31#[cfg(unix)]
32use anyhow::Context as _;
33
34pub use take_until::*;
35#[cfg(any(test, feature = "test-support"))]
36pub use util_macros::{line_endings, separator, uri};
37
38#[macro_export]
39macro_rules! debug_panic {
40 ( $($fmt_arg:tt)* ) => {
41 if cfg!(debug_assertions) {
42 panic!( $($fmt_arg)* );
43 } else {
44 let backtrace = std::backtrace::Backtrace::capture();
45 log::error!("{}\n{:?}", format_args!($($fmt_arg)*), backtrace);
46 }
47 };
48}
49
50/// A macro to add "C:" to the beginning of a path literal on Windows, and replace all
51/// the separator from `/` to `\`.
52/// But on non-Windows platforms, it will return the path literal as is.
53///
54/// # Examples
55/// ```rust
56/// use util::path;
57///
58/// let path = path!("/Users/user/file.txt");
59/// #[cfg(target_os = "windows")]
60/// assert_eq!(path, "C:\\Users\\user\\file.txt");
61/// #[cfg(not(target_os = "windows"))]
62/// assert_eq!(path, "/Users/user/file.txt");
63/// ```
64#[cfg(all(any(test, feature = "test-support"), target_os = "windows"))]
65#[macro_export]
66macro_rules! path {
67 ($path:literal) => {
68 concat!("C:", util::separator!($path))
69 };
70}
71
72/// A macro to add "C:" to the beginning of a path literal on Windows, and replace all
73/// the separator from `/` to `\`.
74/// But on non-Windows platforms, it will return the path literal as is.
75///
76/// # Examples
77/// ```rust
78/// use util::path;
79///
80/// let path = path!("/Users/user/file.txt");
81/// #[cfg(target_os = "windows")]
82/// assert_eq!(path, "C:\\Users\\user\\file.txt");
83/// #[cfg(not(target_os = "windows"))]
84/// assert_eq!(path, "/Users/user/file.txt");
85/// ```
86#[cfg(all(any(test, feature = "test-support"), not(target_os = "windows")))]
87#[macro_export]
88macro_rules! path {
89 ($path:literal) => {
90 $path
91 };
92}
93
94pub fn truncate(s: &str, max_chars: usize) -> &str {
95 match s.char_indices().nth(max_chars) {
96 None => s,
97 Some((idx, _)) => &s[..idx],
98 }
99}
100
101/// Removes characters from the end of the string if its length is greater than `max_chars` and
102/// appends "..." to the string. Returns string unchanged if its length is smaller than max_chars.
103pub fn truncate_and_trailoff(s: &str, max_chars: usize) -> String {
104 debug_assert!(max_chars >= 5);
105
106 // If the string's byte length is <= max_chars, walking the string can be skipped since the
107 // number of chars is <= the number of bytes.
108 if s.len() <= max_chars {
109 return s.to_string();
110 }
111 let truncation_ix = s.char_indices().map(|(i, _)| i).nth(max_chars);
112 match truncation_ix {
113 Some(index) => s[..index].to_string() + "…",
114 _ => s.to_string(),
115 }
116}
117
118/// Removes characters from the front of the string if its length is greater than `max_chars` and
119/// prepends the string with "...". Returns string unchanged if its length is smaller than max_chars.
120pub fn truncate_and_remove_front(s: &str, max_chars: usize) -> String {
121 debug_assert!(max_chars >= 5);
122
123 // If the string's byte length is <= max_chars, walking the string can be skipped since the
124 // number of chars is <= the number of bytes.
125 if s.len() <= max_chars {
126 return s.to_string();
127 }
128 let suffix_char_length = max_chars.saturating_sub(1);
129 let truncation_ix = s
130 .char_indices()
131 .map(|(i, _)| i)
132 .nth_back(suffix_char_length);
133 match truncation_ix {
134 Some(index) if index > 0 => "…".to_string() + &s[index..],
135 _ => s.to_string(),
136 }
137}
138
139/// Takes only `max_lines` from the string and, if there were more than `max_lines-1`, appends a
140/// a newline and "..." to the string, so that `max_lines` are returned.
141/// Returns string unchanged if its length is smaller than max_lines.
142pub fn truncate_lines_and_trailoff(s: &str, max_lines: usize) -> String {
143 let mut lines = s.lines().take(max_lines).collect::<Vec<_>>();
144 if lines.len() > max_lines - 1 {
145 lines.pop();
146 lines.join("\n") + "\n…"
147 } else {
148 lines.join("\n")
149 }
150}
151
152/// Truncates the string at a character boundary, such that the result is less than `max_bytes` in
153/// length.
154pub fn truncate_to_byte_limit(s: &str, max_bytes: usize) -> &str {
155 if s.len() < max_bytes {
156 return s;
157 }
158
159 for i in (0..max_bytes).rev() {
160 if s.is_char_boundary(i) {
161 return &s[..i];
162 }
163 }
164
165 ""
166}
167
168/// Takes a prefix of complete lines which fit within the byte limit. If the first line is longer
169/// than the limit, truncates at a character boundary.
170pub fn truncate_lines_to_byte_limit(s: &str, max_bytes: usize) -> &str {
171 if s.len() < max_bytes {
172 return s;
173 }
174
175 for i in (0..max_bytes).rev() {
176 if s.is_char_boundary(i) {
177 if s.as_bytes()[i] == b'\n' {
178 // Since the i-th character is \n, valid to slice at i + 1.
179 return &s[..i + 1];
180 }
181 }
182 }
183
184 truncate_to_byte_limit(s, max_bytes)
185}
186
187fn char_len_with_expanded_tabs(offset: usize, text: &str, tab_size: NonZeroU32) -> usize {
188 let tab_size = tab_size.get() as usize;
189 let mut width = offset;
190
191 for ch in text.chars() {
192 width += if ch == '\t' {
193 tab_size - (width % tab_size)
194 } else {
195 1
196 };
197 }
198
199 width - offset
200}
201
202/// Tokenizes a string into runs of text that should stick together, or that is whitespace.
203struct WordBreakingTokenizer<'a> {
204 input: &'a str,
205}
206
207impl<'a> WordBreakingTokenizer<'a> {
208 fn new(input: &'a str) -> Self {
209 Self { input }
210 }
211}
212
213fn is_char_ideographic(ch: char) -> bool {
214 use unicode_script::Script::*;
215 use unicode_script::UnicodeScript;
216 matches!(ch.script(), Han | Tangut | Yi)
217}
218
219fn is_grapheme_ideographic(text: &str) -> bool {
220 text.chars().any(is_char_ideographic)
221}
222
223fn is_grapheme_whitespace(text: &str) -> bool {
224 text.chars().any(|x| x.is_whitespace())
225}
226
227fn should_stay_with_preceding_ideograph(text: &str) -> bool {
228 text.chars().next().map_or(false, |ch| {
229 matches!(ch, '。' | '、' | ',' | '?' | '!' | ':' | ';' | '…')
230 })
231}
232
233#[derive(PartialEq, Eq, Debug, Clone, Copy)]
234enum WordBreakToken<'a> {
235 Word { token: &'a str, grapheme_len: usize },
236 InlineWhitespace { token: &'a str, grapheme_len: usize },
237 Newline,
238}
239
240impl<'a> Iterator for WordBreakingTokenizer<'a> {
241 /// Yields a span, the count of graphemes in the token, and whether it was
242 /// whitespace. Note that it also breaks at word boundaries.
243 type Item = WordBreakToken<'a>;
244
245 fn next(&mut self) -> Option<Self::Item> {
246 use unicode_segmentation::UnicodeSegmentation;
247 if self.input.is_empty() {
248 return None;
249 }
250
251 let mut iter = self.input.graphemes(true).peekable();
252 let mut offset = 0;
253 let mut grapheme_len = 0;
254 if let Some(first_grapheme) = iter.next() {
255 let is_newline = first_grapheme == "\n";
256 let is_whitespace = is_grapheme_whitespace(first_grapheme);
257 offset += first_grapheme.len();
258 grapheme_len += 1;
259 if is_grapheme_ideographic(first_grapheme) && !is_whitespace {
260 if let Some(grapheme) = iter.peek().copied() {
261 if should_stay_with_preceding_ideograph(grapheme) {
262 offset += grapheme.len();
263 grapheme_len += 1;
264 }
265 }
266 } else {
267 let mut words = self.input[offset..].split_word_bound_indices().peekable();
268 let mut next_word_bound = words.peek().copied();
269 if next_word_bound.map_or(false, |(i, _)| i == 0) {
270 next_word_bound = words.next();
271 }
272 while let Some(grapheme) = iter.peek().copied() {
273 if next_word_bound.map_or(false, |(i, _)| i == offset) {
274 break;
275 };
276 if is_grapheme_whitespace(grapheme) != is_whitespace
277 || (grapheme == "\n") != is_newline
278 {
279 break;
280 };
281 offset += grapheme.len();
282 grapheme_len += 1;
283 iter.next();
284 }
285 }
286 let token = &self.input[..offset];
287 self.input = &self.input[offset..];
288 if token == "\n" {
289 Some(WordBreakToken::Newline)
290 } else if is_whitespace {
291 Some(WordBreakToken::InlineWhitespace {
292 token,
293 grapheme_len,
294 })
295 } else {
296 Some(WordBreakToken::Word {
297 token,
298 grapheme_len,
299 })
300 }
301 } else {
302 None
303 }
304 }
305}
306
307pub fn wrap_with_prefix(
308 line_prefix: String,
309 unwrapped_text: String,
310 wrap_column: usize,
311 tab_size: NonZeroU32,
312 preserve_existing_whitespace: bool,
313) -> String {
314 let line_prefix_len = char_len_with_expanded_tabs(0, &line_prefix, tab_size);
315 let mut wrapped_text = String::new();
316 let mut current_line = line_prefix.clone();
317
318 let tokenizer = WordBreakingTokenizer::new(&unwrapped_text);
319 let mut current_line_len = line_prefix_len;
320 let mut in_whitespace = false;
321 for token in tokenizer {
322 let have_preceding_whitespace = in_whitespace;
323 match token {
324 WordBreakToken::Word {
325 token,
326 grapheme_len,
327 } => {
328 in_whitespace = false;
329 if current_line_len + grapheme_len > wrap_column
330 && current_line_len != line_prefix_len
331 {
332 wrapped_text.push_str(current_line.trim_end());
333 wrapped_text.push('\n');
334 current_line.truncate(line_prefix.len());
335 current_line_len = line_prefix_len;
336 }
337 current_line.push_str(token);
338 current_line_len += grapheme_len;
339 }
340 WordBreakToken::InlineWhitespace {
341 mut token,
342 mut grapheme_len,
343 } => {
344 in_whitespace = true;
345 if have_preceding_whitespace && !preserve_existing_whitespace {
346 continue;
347 }
348 if !preserve_existing_whitespace {
349 token = " ";
350 grapheme_len = 1;
351 }
352 if current_line_len + grapheme_len > wrap_column {
353 wrapped_text.push_str(current_line.trim_end());
354 wrapped_text.push('\n');
355 current_line.truncate(line_prefix.len());
356 current_line_len = line_prefix_len;
357 } else if current_line_len != line_prefix_len || preserve_existing_whitespace {
358 current_line.push_str(token);
359 current_line_len += grapheme_len;
360 }
361 }
362 WordBreakToken::Newline => {
363 in_whitespace = true;
364 if preserve_existing_whitespace {
365 wrapped_text.push_str(current_line.trim_end());
366 wrapped_text.push('\n');
367 current_line.truncate(line_prefix.len());
368 current_line_len = line_prefix_len;
369 } else if have_preceding_whitespace {
370 continue;
371 } else if current_line_len + 1 > wrap_column && current_line_len != line_prefix_len
372 {
373 wrapped_text.push_str(current_line.trim_end());
374 wrapped_text.push('\n');
375 current_line.truncate(line_prefix.len());
376 current_line_len = line_prefix_len;
377 } else if current_line_len != line_prefix_len {
378 current_line.push(' ');
379 current_line_len += 1;
380 }
381 }
382 }
383 }
384
385 if !current_line.is_empty() {
386 wrapped_text.push_str(¤t_line);
387 }
388 wrapped_text
389}
390
391pub fn post_inc<T: From<u8> + AddAssign<T> + Copy>(value: &mut T) -> T {
392 let prev = *value;
393 *value += T::from(1);
394 prev
395}
396
397/// Extend a sorted vector with a sorted sequence of items, maintaining the vector's sort order and
398/// enforcing a maximum length. This also de-duplicates items. Sort the items according to the given callback. Before calling this,
399/// both `vec` and `new_items` should already be sorted according to the `cmp` comparator.
400pub fn extend_sorted<T, I, F>(vec: &mut Vec<T>, new_items: I, limit: usize, mut cmp: F)
401where
402 I: IntoIterator<Item = T>,
403 F: FnMut(&T, &T) -> Ordering,
404{
405 let mut start_index = 0;
406 for new_item in new_items {
407 if let Err(i) = vec[start_index..].binary_search_by(|m| cmp(m, &new_item)) {
408 let index = start_index + i;
409 if vec.len() < limit {
410 vec.insert(index, new_item);
411 } else if index < vec.len() {
412 vec.pop();
413 vec.insert(index, new_item);
414 }
415 start_index = index;
416 }
417 }
418}
419
420pub fn truncate_to_bottom_n_sorted_by<T, F>(items: &mut Vec<T>, limit: usize, compare: &F)
421where
422 F: Fn(&T, &T) -> Ordering,
423{
424 if limit == 0 {
425 items.truncate(0);
426 }
427 if items.len() <= limit {
428 items.sort_by(compare);
429 return;
430 }
431 // When limit is near to items.len() it may be more efficient to sort the whole list and
432 // truncate, rather than always doing selection first as is done below. It's hard to analyze
433 // where the threshold for this should be since the quickselect style algorithm used by
434 // `select_nth_unstable_by` makes the prefix partially sorted, and so its work is not wasted -
435 // the expected number of comparisons needed by `sort_by` is less than it is for some arbitrary
436 // unsorted input.
437 items.select_nth_unstable_by(limit, compare);
438 items.truncate(limit);
439 items.sort_by(compare);
440}
441
442#[cfg(unix)]
443fn load_shell_from_passwd() -> Result<()> {
444 let buflen = match unsafe { libc::sysconf(libc::_SC_GETPW_R_SIZE_MAX) } {
445 n if n < 0 => 1024,
446 n => n as usize,
447 };
448 let mut buffer = Vec::with_capacity(buflen);
449
450 let mut pwd: std::mem::MaybeUninit<libc::passwd> = std::mem::MaybeUninit::uninit();
451 let mut result: *mut libc::passwd = std::ptr::null_mut();
452
453 let uid = unsafe { libc::getuid() };
454 let status = unsafe {
455 libc::getpwuid_r(
456 uid,
457 pwd.as_mut_ptr(),
458 buffer.as_mut_ptr() as *mut libc::c_char,
459 buflen,
460 &mut result,
461 )
462 };
463 let entry = unsafe { pwd.assume_init() };
464
465 anyhow::ensure!(
466 status == 0,
467 "call to getpwuid_r failed. uid: {}, status: {}",
468 uid,
469 status
470 );
471 anyhow::ensure!(!result.is_null(), "passwd entry for uid {} not found", uid);
472 anyhow::ensure!(
473 entry.pw_uid == uid,
474 "passwd entry has different uid ({}) than getuid ({}) returned",
475 entry.pw_uid,
476 uid,
477 );
478
479 let shell = unsafe { std::ffi::CStr::from_ptr(entry.pw_shell).to_str().unwrap() };
480 if env::var("SHELL").map_or(true, |shell_env| shell_env != shell) {
481 log::info!(
482 "updating SHELL environment variable to value from passwd entry: {:?}",
483 shell,
484 );
485 unsafe { env::set_var("SHELL", shell) };
486 }
487
488 Ok(())
489}
490
491#[cfg(unix)]
492pub fn load_login_shell_environment() -> Result<()> {
493 load_shell_from_passwd().log_err();
494
495 let marker = "ZED_LOGIN_SHELL_START";
496 let shell = env::var("SHELL").context(
497 "SHELL environment variable is not assigned so we can't source login environment variables",
498 )?;
499
500 // If possible, we want to `cd` in the user's `$HOME` to trigger programs
501 // such as direnv, asdf, mise, ... to adjust the PATH. These tools often hook
502 // into shell's `cd` command (and hooks) to manipulate env.
503 // We do this so that we get the env a user would have when spawning a shell
504 // in home directory.
505 let shell_cmd_prefix = std::env::var_os("HOME")
506 .and_then(|home| home.into_string().ok())
507 .map(|home| format!("cd '{home}';"));
508
509 let shell_cmd = format!(
510 "{}printf '%s' {marker}; /usr/bin/env;",
511 shell_cmd_prefix.as_deref().unwrap_or("")
512 );
513
514 let output = set_pre_exec_to_start_new_session(
515 std::process::Command::new(&shell).args(["-l", "-i", "-c", &shell_cmd]),
516 )
517 .output()
518 .context("failed to spawn login shell to source login environment variables")?;
519 anyhow::ensure!(output.status.success(), "login shell exited with error");
520
521 let stdout = String::from_utf8_lossy(&output.stdout);
522
523 if let Some(env_output_start) = stdout.find(marker) {
524 let env_output = &stdout[env_output_start + marker.len()..];
525
526 parse_env_output(env_output, |key, value| unsafe { env::set_var(key, value) });
527
528 log::info!(
529 "set environment variables from shell:{}, path:{}",
530 shell,
531 env::var("PATH").unwrap_or_default(),
532 );
533 }
534
535 Ok(())
536}
537
538/// Configures the process to start a new session, to prevent interactive shells from taking control
539/// of the terminal.
540///
541/// For more details: https://registerspill.thorstenball.com/p/how-to-lose-control-of-your-shell
542pub fn set_pre_exec_to_start_new_session(
543 command: &mut std::process::Command,
544) -> &mut std::process::Command {
545 // safety: code in pre_exec should be signal safe.
546 // https://man7.org/linux/man-pages/man7/signal-safety.7.html
547 #[cfg(not(target_os = "windows"))]
548 unsafe {
549 use std::os::unix::process::CommandExt;
550 command.pre_exec(|| {
551 libc::setsid();
552 Ok(())
553 });
554 };
555 command
556}
557
558/// Parse the result of calling `usr/bin/env` with no arguments
559pub fn parse_env_output(env: &str, mut f: impl FnMut(String, String)) {
560 let mut current_key: Option<String> = None;
561 let mut current_value: Option<String> = None;
562
563 for line in env.split_terminator('\n') {
564 if let Some(separator_index) = line.find('=') {
565 if !line[..separator_index].is_empty() {
566 if let Some((key, value)) = Option::zip(current_key.take(), current_value.take()) {
567 f(key, value)
568 }
569 current_key = Some(line[..separator_index].to_string());
570 current_value = Some(line[separator_index + 1..].to_string());
571 continue;
572 };
573 }
574 if let Some(value) = current_value.as_mut() {
575 value.push('\n');
576 value.push_str(line);
577 }
578 }
579 if let Some((key, value)) = Option::zip(current_key.take(), current_value.take()) {
580 f(key, value)
581 }
582}
583
584pub fn merge_json_lenient_value_into(
585 source: serde_json_lenient::Value,
586 target: &mut serde_json_lenient::Value,
587) {
588 match (source, target) {
589 (serde_json_lenient::Value::Object(source), serde_json_lenient::Value::Object(target)) => {
590 for (key, value) in source {
591 if let Some(target) = target.get_mut(&key) {
592 merge_json_lenient_value_into(value, target);
593 } else {
594 target.insert(key, value);
595 }
596 }
597 }
598
599 (serde_json_lenient::Value::Array(source), serde_json_lenient::Value::Array(target)) => {
600 for value in source {
601 target.push(value);
602 }
603 }
604
605 (source, target) => *target = source,
606 }
607}
608
609pub fn merge_json_value_into(source: serde_json::Value, target: &mut serde_json::Value) {
610 use serde_json::Value;
611
612 match (source, target) {
613 (Value::Object(source), Value::Object(target)) => {
614 for (key, value) in source {
615 if let Some(target) = target.get_mut(&key) {
616 merge_json_value_into(value, target);
617 } else {
618 target.insert(key, value);
619 }
620 }
621 }
622
623 (Value::Array(source), Value::Array(target)) => {
624 for value in source {
625 target.push(value);
626 }
627 }
628
629 (source, target) => *target = source,
630 }
631}
632
633pub fn merge_non_null_json_value_into(source: serde_json::Value, target: &mut serde_json::Value) {
634 use serde_json::Value;
635 if let Value::Object(source_object) = source {
636 let target_object = if let Value::Object(target) = target {
637 target
638 } else {
639 *target = Value::Object(Default::default());
640 target.as_object_mut().unwrap()
641 };
642 for (key, value) in source_object {
643 if let Some(target) = target_object.get_mut(&key) {
644 merge_non_null_json_value_into(value, target);
645 } else if !value.is_null() {
646 target_object.insert(key, value);
647 }
648 }
649 } else if !source.is_null() {
650 *target = source
651 }
652}
653
654pub fn measure<R>(label: &str, f: impl FnOnce() -> R) -> R {
655 static ZED_MEASUREMENTS: OnceLock<bool> = OnceLock::new();
656 let zed_measurements = ZED_MEASUREMENTS.get_or_init(|| {
657 env::var("ZED_MEASUREMENTS")
658 .map(|measurements| measurements == "1" || measurements == "true")
659 .unwrap_or(false)
660 });
661
662 if *zed_measurements {
663 let start = Instant::now();
664 let result = f();
665 let elapsed = start.elapsed();
666 eprintln!("{}: {:?}", label, elapsed);
667 result
668 } else {
669 f()
670 }
671}
672
673pub fn expanded_and_wrapped_usize_range(
674 range: Range<usize>,
675 additional_before: usize,
676 additional_after: usize,
677 wrap_length: usize,
678) -> impl Iterator<Item = usize> {
679 let start_wraps = range.start < additional_before;
680 let end_wraps = wrap_length < range.end + additional_after;
681 if start_wraps && end_wraps {
682 Either::Left(0..wrap_length)
683 } else if start_wraps {
684 let wrapped_start = (range.start + wrap_length).saturating_sub(additional_before);
685 if wrapped_start <= range.end {
686 Either::Left(0..wrap_length)
687 } else {
688 Either::Right((0..range.end + additional_after).chain(wrapped_start..wrap_length))
689 }
690 } else if end_wraps {
691 let wrapped_end = range.end + additional_after - wrap_length;
692 if range.start <= wrapped_end {
693 Either::Left(0..wrap_length)
694 } else {
695 Either::Right((0..wrapped_end).chain(range.start - additional_before..wrap_length))
696 }
697 } else {
698 Either::Left((range.start - additional_before)..(range.end + additional_after))
699 }
700}
701
702/// Yields `[i, i + 1, i - 1, i + 2, ..]`, each modulo `wrap_length` and bounded by
703/// `additional_before` and `additional_after`. If the wrapping causes overlap, duplicates are not
704/// emitted. If wrap_length is 0, nothing is yielded.
705pub fn wrapped_usize_outward_from(
706 start: usize,
707 additional_before: usize,
708 additional_after: usize,
709 wrap_length: usize,
710) -> impl Iterator<Item = usize> {
711 let mut count = 0;
712 let mut after_offset = 1;
713 let mut before_offset = 1;
714
715 std::iter::from_fn(move || {
716 count += 1;
717 if count > wrap_length {
718 None
719 } else if count == 1 {
720 Some(start % wrap_length)
721 } else if after_offset <= additional_after && after_offset <= before_offset {
722 let value = (start + after_offset) % wrap_length;
723 after_offset += 1;
724 Some(value)
725 } else if before_offset <= additional_before {
726 let value = (start + wrap_length - before_offset) % wrap_length;
727 before_offset += 1;
728 Some(value)
729 } else if after_offset <= additional_after {
730 let value = (start + after_offset) % wrap_length;
731 after_offset += 1;
732 Some(value)
733 } else {
734 None
735 }
736 })
737}
738
739#[cfg(target_os = "windows")]
740pub fn get_windows_system_shell() -> String {
741 use std::path::PathBuf;
742
743 fn find_pwsh_in_programfiles(find_alternate: bool, find_preview: bool) -> Option<PathBuf> {
744 #[cfg(target_pointer_width = "64")]
745 let env_var = if find_alternate {
746 "ProgramFiles(x86)"
747 } else {
748 "ProgramFiles"
749 };
750
751 #[cfg(target_pointer_width = "32")]
752 let env_var = if find_alternate {
753 "ProgramW6432"
754 } else {
755 "ProgramFiles"
756 };
757
758 let install_base_dir = PathBuf::from(std::env::var_os(env_var)?).join("PowerShell");
759 install_base_dir
760 .read_dir()
761 .ok()?
762 .filter_map(Result::ok)
763 .filter(|entry| matches!(entry.file_type(), Ok(ft) if ft.is_dir()))
764 .filter_map(|entry| {
765 let dir_name = entry.file_name();
766 let dir_name = dir_name.to_string_lossy();
767
768 let version = if find_preview {
769 let dash_index = dir_name.find('-')?;
770 if &dir_name[dash_index + 1..] != "preview" {
771 return None;
772 };
773 dir_name[..dash_index].parse::<u32>().ok()?
774 } else {
775 dir_name.parse::<u32>().ok()?
776 };
777
778 let exe_path = entry.path().join("pwsh.exe");
779 if exe_path.exists() {
780 Some((version, exe_path))
781 } else {
782 None
783 }
784 })
785 .max_by_key(|(version, _)| *version)
786 .map(|(_, path)| path)
787 }
788
789 fn find_pwsh_in_msix(find_preview: bool) -> Option<PathBuf> {
790 let msix_app_dir =
791 PathBuf::from(std::env::var_os("LOCALAPPDATA")?).join("Microsoft\\WindowsApps");
792 if !msix_app_dir.exists() {
793 return None;
794 }
795
796 let prefix = if find_preview {
797 "Microsoft.PowerShellPreview_"
798 } else {
799 "Microsoft.PowerShell_"
800 };
801 msix_app_dir
802 .read_dir()
803 .ok()?
804 .filter_map(|entry| {
805 let entry = entry.ok()?;
806 if !matches!(entry.file_type(), Ok(ft) if ft.is_dir()) {
807 return None;
808 }
809
810 if !entry.file_name().to_string_lossy().starts_with(prefix) {
811 return None;
812 }
813
814 let exe_path = entry.path().join("pwsh.exe");
815 exe_path.exists().then_some(exe_path)
816 })
817 .next()
818 }
819
820 fn find_pwsh_in_scoop() -> Option<PathBuf> {
821 let pwsh_exe =
822 PathBuf::from(std::env::var_os("USERPROFILE")?).join("scoop\\shims\\pwsh.exe");
823 pwsh_exe.exists().then_some(pwsh_exe)
824 }
825
826 static SYSTEM_SHELL: LazyLock<String> = LazyLock::new(|| {
827 find_pwsh_in_programfiles(false, false)
828 .or_else(|| find_pwsh_in_programfiles(true, false))
829 .or_else(|| find_pwsh_in_msix(false))
830 .or_else(|| find_pwsh_in_programfiles(false, true))
831 .or_else(|| find_pwsh_in_msix(true))
832 .or_else(|| find_pwsh_in_programfiles(true, true))
833 .or_else(find_pwsh_in_scoop)
834 .map(|p| p.to_string_lossy().to_string())
835 .unwrap_or("powershell.exe".to_string())
836 });
837
838 (*SYSTEM_SHELL).clone()
839}
840
841pub trait ResultExt<E> {
842 type Ok;
843
844 fn log_err(self) -> Option<Self::Ok>;
845 /// Assert that this result should never be an error in development or tests.
846 fn debug_assert_ok(self, reason: &str) -> Self;
847 fn warn_on_err(self) -> Option<Self::Ok>;
848 fn log_with_level(self, level: log::Level) -> Option<Self::Ok>;
849 fn anyhow(self) -> anyhow::Result<Self::Ok>
850 where
851 E: Into<anyhow::Error>;
852}
853
854impl<T, E> ResultExt<E> for Result<T, E>
855where
856 E: std::fmt::Debug,
857{
858 type Ok = T;
859
860 #[track_caller]
861 fn log_err(self) -> Option<T> {
862 self.log_with_level(log::Level::Error)
863 }
864
865 #[track_caller]
866 fn debug_assert_ok(self, reason: &str) -> Self {
867 if let Err(error) = &self {
868 debug_panic!("{reason} - {error:?}");
869 }
870 self
871 }
872
873 #[track_caller]
874 fn warn_on_err(self) -> Option<T> {
875 self.log_with_level(log::Level::Warn)
876 }
877
878 #[track_caller]
879 fn log_with_level(self, level: log::Level) -> Option<T> {
880 match self {
881 Ok(value) => Some(value),
882 Err(error) => {
883 log_error_with_caller(*Location::caller(), error, level);
884 None
885 }
886 }
887 }
888
889 fn anyhow(self) -> anyhow::Result<T>
890 where
891 E: Into<anyhow::Error>,
892 {
893 self.map_err(Into::into)
894 }
895}
896
897fn log_error_with_caller<E>(caller: core::panic::Location<'_>, error: E, level: log::Level)
898where
899 E: std::fmt::Debug,
900{
901 #[cfg(not(target_os = "windows"))]
902 let file = caller.file();
903 #[cfg(target_os = "windows")]
904 let file = caller.file().replace('\\', "/");
905 // In this codebase, the first segment of the file path is
906 // the 'crates' folder, followed by the crate name.
907 let target = file.split('/').nth(1);
908
909 log::logger().log(
910 &log::Record::builder()
911 .target(target.unwrap_or(""))
912 .module_path(target)
913 .args(format_args!("{:?}", error))
914 .file(Some(caller.file()))
915 .line(Some(caller.line()))
916 .level(level)
917 .build(),
918 );
919}
920
921pub fn log_err<E: std::fmt::Debug>(error: &E) {
922 log_error_with_caller(*Location::caller(), error, log::Level::Warn);
923}
924
925pub trait TryFutureExt {
926 fn log_err(self) -> LogErrorFuture<Self>
927 where
928 Self: Sized;
929
930 fn log_tracked_err(self, location: core::panic::Location<'static>) -> LogErrorFuture<Self>
931 where
932 Self: Sized;
933
934 fn warn_on_err(self) -> LogErrorFuture<Self>
935 where
936 Self: Sized;
937 fn unwrap(self) -> UnwrapFuture<Self>
938 where
939 Self: Sized;
940}
941
942impl<F, T, E> TryFutureExt for F
943where
944 F: Future<Output = Result<T, E>>,
945 E: std::fmt::Debug,
946{
947 #[track_caller]
948 fn log_err(self) -> LogErrorFuture<Self>
949 where
950 Self: Sized,
951 {
952 let location = Location::caller();
953 LogErrorFuture(self, log::Level::Error, *location)
954 }
955
956 fn log_tracked_err(self, location: core::panic::Location<'static>) -> LogErrorFuture<Self>
957 where
958 Self: Sized,
959 {
960 LogErrorFuture(self, log::Level::Error, location)
961 }
962
963 #[track_caller]
964 fn warn_on_err(self) -> LogErrorFuture<Self>
965 where
966 Self: Sized,
967 {
968 let location = Location::caller();
969 LogErrorFuture(self, log::Level::Warn, *location)
970 }
971
972 fn unwrap(self) -> UnwrapFuture<Self>
973 where
974 Self: Sized,
975 {
976 UnwrapFuture(self)
977 }
978}
979
980#[must_use]
981pub struct LogErrorFuture<F>(F, log::Level, core::panic::Location<'static>);
982
983impl<F, T, E> Future for LogErrorFuture<F>
984where
985 F: Future<Output = Result<T, E>>,
986 E: std::fmt::Debug,
987{
988 type Output = Option<T>;
989
990 fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
991 let level = self.1;
992 let location = self.2;
993 let inner = unsafe { Pin::new_unchecked(&mut self.get_unchecked_mut().0) };
994 match inner.poll(cx) {
995 Poll::Ready(output) => Poll::Ready(match output {
996 Ok(output) => Some(output),
997 Err(error) => {
998 log_error_with_caller(location, error, level);
999 None
1000 }
1001 }),
1002 Poll::Pending => Poll::Pending,
1003 }
1004 }
1005}
1006
1007pub struct UnwrapFuture<F>(F);
1008
1009impl<F, T, E> Future for UnwrapFuture<F>
1010where
1011 F: Future<Output = Result<T, E>>,
1012 E: std::fmt::Debug,
1013{
1014 type Output = T;
1015
1016 fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
1017 let inner = unsafe { Pin::new_unchecked(&mut self.get_unchecked_mut().0) };
1018 match inner.poll(cx) {
1019 Poll::Ready(result) => Poll::Ready(result.unwrap()),
1020 Poll::Pending => Poll::Pending,
1021 }
1022 }
1023}
1024
1025pub struct Deferred<F: FnOnce()>(Option<F>);
1026
1027impl<F: FnOnce()> Deferred<F> {
1028 /// Drop without running the deferred function.
1029 pub fn abort(mut self) {
1030 self.0.take();
1031 }
1032}
1033
1034impl<F: FnOnce()> Drop for Deferred<F> {
1035 fn drop(&mut self) {
1036 if let Some(f) = self.0.take() {
1037 f()
1038 }
1039 }
1040}
1041
1042/// Run the given function when the returned value is dropped (unless it's cancelled).
1043#[must_use]
1044pub fn defer<F: FnOnce()>(f: F) -> Deferred<F> {
1045 Deferred(Some(f))
1046}
1047
1048#[cfg(any(test, feature = "test-support"))]
1049mod rng {
1050 use rand::{Rng, seq::SliceRandom};
1051 pub struct RandomCharIter<T: Rng> {
1052 rng: T,
1053 simple_text: bool,
1054 }
1055
1056 impl<T: Rng> RandomCharIter<T> {
1057 pub fn new(rng: T) -> Self {
1058 Self {
1059 rng,
1060 simple_text: std::env::var("SIMPLE_TEXT").map_or(false, |v| !v.is_empty()),
1061 }
1062 }
1063
1064 pub fn with_simple_text(mut self) -> Self {
1065 self.simple_text = true;
1066 self
1067 }
1068 }
1069
1070 impl<T: Rng> Iterator for RandomCharIter<T> {
1071 type Item = char;
1072
1073 fn next(&mut self) -> Option<Self::Item> {
1074 if self.simple_text {
1075 return if self.rng.gen_range(0..100) < 5 {
1076 Some('\n')
1077 } else {
1078 Some(self.rng.gen_range(b'a'..b'z' + 1).into())
1079 };
1080 }
1081
1082 match self.rng.gen_range(0..100) {
1083 // whitespace
1084 0..=19 => [' ', '\n', '\r', '\t'].choose(&mut self.rng).copied(),
1085 // two-byte greek letters
1086 20..=32 => char::from_u32(self.rng.gen_range(('α' as u32)..('ω' as u32 + 1))),
1087 // // three-byte characters
1088 33..=45 => ['✋', '✅', '❌', '❎', '⭐']
1089 .choose(&mut self.rng)
1090 .copied(),
1091 // // four-byte characters
1092 46..=58 => ['🍐', '🏀', '🍗', '🎉'].choose(&mut self.rng).copied(),
1093 // ascii letters
1094 _ => Some(self.rng.gen_range(b'a'..b'z' + 1).into()),
1095 }
1096 }
1097 }
1098}
1099#[cfg(any(test, feature = "test-support"))]
1100pub use rng::RandomCharIter;
1101/// Get an embedded file as a string.
1102pub fn asset_str<A: rust_embed::RustEmbed>(path: &str) -> Cow<'static, str> {
1103 match A::get(path).expect(path).data {
1104 Cow::Borrowed(bytes) => Cow::Borrowed(std::str::from_utf8(bytes).unwrap()),
1105 Cow::Owned(bytes) => Cow::Owned(String::from_utf8(bytes).unwrap()),
1106 }
1107}
1108
1109/// Expands to an immediately-invoked function expression. Good for using the ? operator
1110/// in functions which do not return an Option or Result.
1111///
1112/// Accepts a normal block, an async block, or an async move block.
1113#[macro_export]
1114macro_rules! maybe {
1115 ($block:block) => {
1116 (|| $block)()
1117 };
1118 (async $block:block) => {
1119 (|| async $block)()
1120 };
1121 (async move $block:block) => {
1122 (|| async move $block)()
1123 };
1124}
1125
1126pub trait RangeExt<T> {
1127 fn sorted(&self) -> Self;
1128 fn to_inclusive(&self) -> RangeInclusive<T>;
1129 fn overlaps(&self, other: &Range<T>) -> bool;
1130 fn contains_inclusive(&self, other: &Range<T>) -> bool;
1131}
1132
1133impl<T: Ord + Clone> RangeExt<T> for Range<T> {
1134 fn sorted(&self) -> Self {
1135 cmp::min(&self.start, &self.end).clone()..cmp::max(&self.start, &self.end).clone()
1136 }
1137
1138 fn to_inclusive(&self) -> RangeInclusive<T> {
1139 self.start.clone()..=self.end.clone()
1140 }
1141
1142 fn overlaps(&self, other: &Range<T>) -> bool {
1143 self.start < other.end && other.start < self.end
1144 }
1145
1146 fn contains_inclusive(&self, other: &Range<T>) -> bool {
1147 self.start <= other.start && other.end <= self.end
1148 }
1149}
1150
1151impl<T: Ord + Clone> RangeExt<T> for RangeInclusive<T> {
1152 fn sorted(&self) -> Self {
1153 cmp::min(self.start(), self.end()).clone()..=cmp::max(self.start(), self.end()).clone()
1154 }
1155
1156 fn to_inclusive(&self) -> RangeInclusive<T> {
1157 self.clone()
1158 }
1159
1160 fn overlaps(&self, other: &Range<T>) -> bool {
1161 self.start() < &other.end && &other.start <= self.end()
1162 }
1163
1164 fn contains_inclusive(&self, other: &Range<T>) -> bool {
1165 self.start() <= &other.start && &other.end <= self.end()
1166 }
1167}
1168
1169/// A way to sort strings with starting numbers numerically first, falling back to alphanumeric one,
1170/// case-insensitive.
1171///
1172/// This is useful for turning regular alphanumerically sorted sequences as `1-abc, 10, 11-def, .., 2, 21-abc`
1173/// into `1-abc, 2, 10, 11-def, .., 21-abc`
1174#[derive(Debug, PartialEq, Eq)]
1175pub struct NumericPrefixWithSuffix<'a>(Option<u64>, &'a str);
1176
1177impl<'a> NumericPrefixWithSuffix<'a> {
1178 pub fn from_numeric_prefixed_str(str: &'a str) -> Self {
1179 let i = str.chars().take_while(|c| c.is_ascii_digit()).count();
1180 let (prefix, remainder) = str.split_at(i);
1181
1182 let prefix = prefix.parse().ok();
1183 Self(prefix, remainder)
1184 }
1185}
1186
1187/// When dealing with equality, we need to consider the case of the strings to achieve strict equality
1188/// to handle cases like "a" < "A" instead of "a" == "A".
1189impl Ord for NumericPrefixWithSuffix<'_> {
1190 fn cmp(&self, other: &Self) -> Ordering {
1191 match (self.0, other.0) {
1192 (None, None) => UniCase::new(self.1)
1193 .cmp(&UniCase::new(other.1))
1194 .then_with(|| self.1.cmp(other.1).reverse()),
1195 (None, Some(_)) => Ordering::Greater,
1196 (Some(_), None) => Ordering::Less,
1197 (Some(a), Some(b)) => a.cmp(&b).then_with(|| {
1198 UniCase::new(self.1)
1199 .cmp(&UniCase::new(other.1))
1200 .then_with(|| self.1.cmp(other.1).reverse())
1201 }),
1202 }
1203 }
1204}
1205
1206impl PartialOrd for NumericPrefixWithSuffix<'_> {
1207 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
1208 Some(self.cmp(other))
1209 }
1210}
1211
1212/// Capitalizes the first character of a string.
1213///
1214/// This function takes a string slice as input and returns a new `String` with the first character
1215/// capitalized.
1216///
1217/// # Examples
1218///
1219/// ```
1220/// use util::capitalize;
1221///
1222/// assert_eq!(capitalize("hello"), "Hello");
1223/// assert_eq!(capitalize("WORLD"), "WORLD");
1224/// assert_eq!(capitalize(""), "");
1225/// ```
1226pub fn capitalize(str: &str) -> String {
1227 let mut chars = str.chars();
1228 match chars.next() {
1229 None => String::new(),
1230 Some(first_char) => first_char.to_uppercase().collect::<String>() + chars.as_str(),
1231 }
1232}
1233
1234fn emoji_regex() -> &'static Regex {
1235 static EMOJI_REGEX: LazyLock<Regex> =
1236 LazyLock::new(|| Regex::new("(\\p{Emoji}|\u{200D})").unwrap());
1237 &EMOJI_REGEX
1238}
1239
1240/// Returns true if the given string consists of emojis only.
1241/// E.g. "👨👩👧👧👋" will return true, but "👋!" will return false.
1242pub fn word_consists_of_emojis(s: &str) -> bool {
1243 let mut prev_end = 0;
1244 for capture in emoji_regex().find_iter(s) {
1245 if capture.start() != prev_end {
1246 return false;
1247 }
1248 prev_end = capture.end();
1249 }
1250 prev_end == s.len()
1251}
1252
1253/// Similar to `str::split`, but also provides byte-offset ranges of the results. Unlike
1254/// `str::split`, this is not generic on pattern types and does not return an `Iterator`.
1255pub fn split_str_with_ranges(s: &str, pat: impl Fn(char) -> bool) -> Vec<(Range<usize>, &str)> {
1256 let mut result = Vec::new();
1257 let mut start = 0;
1258
1259 for (i, ch) in s.char_indices() {
1260 if pat(ch) {
1261 if i > start {
1262 result.push((start..i, &s[start..i]));
1263 }
1264 start = i + ch.len_utf8();
1265 }
1266 }
1267
1268 if s.len() > start {
1269 result.push((start..s.len(), &s[start..s.len()]));
1270 }
1271
1272 result
1273}
1274
1275pub fn default<D: Default>() -> D {
1276 Default::default()
1277}
1278
1279pub fn get_system_shell() -> String {
1280 #[cfg(target_os = "windows")]
1281 {
1282 get_windows_system_shell()
1283 }
1284
1285 #[cfg(not(target_os = "windows"))]
1286 {
1287 std::env::var("SHELL").unwrap_or("/bin/sh".to_string())
1288 }
1289}
1290
1291#[derive(Debug)]
1292pub enum ConnectionResult<O> {
1293 Timeout,
1294 ConnectionReset,
1295 Result(anyhow::Result<O>),
1296}
1297
1298impl<O> ConnectionResult<O> {
1299 pub fn into_response(self) -> anyhow::Result<O> {
1300 match self {
1301 ConnectionResult::Timeout => anyhow::bail!("Request timed out"),
1302 ConnectionResult::ConnectionReset => anyhow::bail!("Server reset the connection"),
1303 ConnectionResult::Result(r) => r,
1304 }
1305 }
1306}
1307
1308impl<O> From<anyhow::Result<O>> for ConnectionResult<O> {
1309 fn from(result: anyhow::Result<O>) -> Self {
1310 ConnectionResult::Result(result)
1311 }
1312}
1313
1314#[cfg(test)]
1315mod tests {
1316 use super::*;
1317
1318 #[test]
1319 fn test_extend_sorted() {
1320 let mut vec = vec![];
1321
1322 extend_sorted(&mut vec, vec![21, 17, 13, 8, 1, 0], 5, |a, b| b.cmp(a));
1323 assert_eq!(vec, &[21, 17, 13, 8, 1]);
1324
1325 extend_sorted(&mut vec, vec![101, 19, 17, 8, 2], 8, |a, b| b.cmp(a));
1326 assert_eq!(vec, &[101, 21, 19, 17, 13, 8, 2, 1]);
1327
1328 extend_sorted(&mut vec, vec![1000, 19, 17, 9, 5], 8, |a, b| b.cmp(a));
1329 assert_eq!(vec, &[1000, 101, 21, 19, 17, 13, 9, 8]);
1330 }
1331
1332 #[test]
1333 fn test_truncate_to_bottom_n_sorted_by() {
1334 let mut vec: Vec<u32> = vec![5, 2, 3, 4, 1];
1335 truncate_to_bottom_n_sorted_by(&mut vec, 10, &u32::cmp);
1336 assert_eq!(vec, &[1, 2, 3, 4, 5]);
1337
1338 vec = vec![5, 2, 3, 4, 1];
1339 truncate_to_bottom_n_sorted_by(&mut vec, 5, &u32::cmp);
1340 assert_eq!(vec, &[1, 2, 3, 4, 5]);
1341
1342 vec = vec![5, 2, 3, 4, 1];
1343 truncate_to_bottom_n_sorted_by(&mut vec, 4, &u32::cmp);
1344 assert_eq!(vec, &[1, 2, 3, 4]);
1345
1346 vec = vec![5, 2, 3, 4, 1];
1347 truncate_to_bottom_n_sorted_by(&mut vec, 1, &u32::cmp);
1348 assert_eq!(vec, &[1]);
1349
1350 vec = vec![5, 2, 3, 4, 1];
1351 truncate_to_bottom_n_sorted_by(&mut vec, 0, &u32::cmp);
1352 assert!(vec.is_empty());
1353 }
1354
1355 #[test]
1356 fn test_iife() {
1357 fn option_returning_function() -> Option<()> {
1358 None
1359 }
1360
1361 let foo = maybe!({
1362 option_returning_function()?;
1363 Some(())
1364 });
1365
1366 assert_eq!(foo, None);
1367 }
1368
1369 #[test]
1370 fn test_truncate_and_trailoff() {
1371 assert_eq!(truncate_and_trailoff("", 5), "");
1372 assert_eq!(truncate_and_trailoff("aaaaaa", 7), "aaaaaa");
1373 assert_eq!(truncate_and_trailoff("aaaaaa", 6), "aaaaaa");
1374 assert_eq!(truncate_and_trailoff("aaaaaa", 5), "aaaaa…");
1375 assert_eq!(truncate_and_trailoff("èèèèèè", 7), "èèèèèè");
1376 assert_eq!(truncate_and_trailoff("èèèèèè", 6), "èèèèèè");
1377 assert_eq!(truncate_and_trailoff("èèèèèè", 5), "èèèèè…");
1378 }
1379
1380 #[test]
1381 fn test_truncate_and_remove_front() {
1382 assert_eq!(truncate_and_remove_front("", 5), "");
1383 assert_eq!(truncate_and_remove_front("aaaaaa", 7), "aaaaaa");
1384 assert_eq!(truncate_and_remove_front("aaaaaa", 6), "aaaaaa");
1385 assert_eq!(truncate_and_remove_front("aaaaaa", 5), "…aaaaa");
1386 assert_eq!(truncate_and_remove_front("èèèèèè", 7), "èèèèèè");
1387 assert_eq!(truncate_and_remove_front("èèèèèè", 6), "èèèèèè");
1388 assert_eq!(truncate_and_remove_front("èèèèèè", 5), "…èèèèè");
1389 }
1390
1391 #[test]
1392 fn test_numeric_prefix_str_method() {
1393 let target = "1a";
1394 assert_eq!(
1395 NumericPrefixWithSuffix::from_numeric_prefixed_str(target),
1396 NumericPrefixWithSuffix(Some(1), "a")
1397 );
1398
1399 let target = "12ab";
1400 assert_eq!(
1401 NumericPrefixWithSuffix::from_numeric_prefixed_str(target),
1402 NumericPrefixWithSuffix(Some(12), "ab")
1403 );
1404
1405 let target = "12_ab";
1406 assert_eq!(
1407 NumericPrefixWithSuffix::from_numeric_prefixed_str(target),
1408 NumericPrefixWithSuffix(Some(12), "_ab")
1409 );
1410
1411 let target = "1_2ab";
1412 assert_eq!(
1413 NumericPrefixWithSuffix::from_numeric_prefixed_str(target),
1414 NumericPrefixWithSuffix(Some(1), "_2ab")
1415 );
1416
1417 let target = "1.2";
1418 assert_eq!(
1419 NumericPrefixWithSuffix::from_numeric_prefixed_str(target),
1420 NumericPrefixWithSuffix(Some(1), ".2")
1421 );
1422
1423 let target = "1.2_a";
1424 assert_eq!(
1425 NumericPrefixWithSuffix::from_numeric_prefixed_str(target),
1426 NumericPrefixWithSuffix(Some(1), ".2_a")
1427 );
1428
1429 let target = "12.2_a";
1430 assert_eq!(
1431 NumericPrefixWithSuffix::from_numeric_prefixed_str(target),
1432 NumericPrefixWithSuffix(Some(12), ".2_a")
1433 );
1434
1435 let target = "12a.2_a";
1436 assert_eq!(
1437 NumericPrefixWithSuffix::from_numeric_prefixed_str(target),
1438 NumericPrefixWithSuffix(Some(12), "a.2_a")
1439 );
1440 }
1441
1442 #[test]
1443 fn test_numeric_prefix_with_suffix() {
1444 let mut sorted = vec!["1-abc", "10", "11def", "2", "21-abc"];
1445 sorted.sort_by_key(|s| NumericPrefixWithSuffix::from_numeric_prefixed_str(s));
1446 assert_eq!(sorted, ["1-abc", "2", "10", "11def", "21-abc"]);
1447
1448 for numeric_prefix_less in ["numeric_prefix_less", "aaa", "~™£"] {
1449 assert_eq!(
1450 NumericPrefixWithSuffix::from_numeric_prefixed_str(numeric_prefix_less),
1451 NumericPrefixWithSuffix(None, numeric_prefix_less),
1452 "String without numeric prefix `{numeric_prefix_less}` should not be converted into NumericPrefixWithSuffix"
1453 )
1454 }
1455 }
1456
1457 #[test]
1458 fn test_word_consists_of_emojis() {
1459 let words_to_test = vec![
1460 ("👨👩👧👧👋🥒", true),
1461 ("👋", true),
1462 ("!👋", false),
1463 ("👋!", false),
1464 ("👋 ", false),
1465 (" 👋", false),
1466 ("Test", false),
1467 ];
1468
1469 for (text, expected_result) in words_to_test {
1470 assert_eq!(word_consists_of_emojis(text), expected_result);
1471 }
1472 }
1473
1474 #[test]
1475 fn test_truncate_lines_and_trailoff() {
1476 let text = r#"Line 1
1477Line 2
1478Line 3"#;
1479
1480 assert_eq!(
1481 truncate_lines_and_trailoff(text, 2),
1482 r#"Line 1
1483…"#
1484 );
1485
1486 assert_eq!(
1487 truncate_lines_and_trailoff(text, 3),
1488 r#"Line 1
1489Line 2
1490…"#
1491 );
1492
1493 assert_eq!(
1494 truncate_lines_and_trailoff(text, 4),
1495 r#"Line 1
1496Line 2
1497Line 3"#
1498 );
1499 }
1500
1501 #[test]
1502 fn test_expanded_and_wrapped_usize_range() {
1503 // Neither wrap
1504 assert_eq!(
1505 expanded_and_wrapped_usize_range(2..4, 1, 1, 8).collect::<Vec<usize>>(),
1506 (1..5).collect::<Vec<usize>>()
1507 );
1508 // Start wraps
1509 assert_eq!(
1510 expanded_and_wrapped_usize_range(2..4, 3, 1, 8).collect::<Vec<usize>>(),
1511 ((0..5).chain(7..8)).collect::<Vec<usize>>()
1512 );
1513 // Start wraps all the way around
1514 assert_eq!(
1515 expanded_and_wrapped_usize_range(2..4, 5, 1, 8).collect::<Vec<usize>>(),
1516 (0..8).collect::<Vec<usize>>()
1517 );
1518 // Start wraps all the way around and past 0
1519 assert_eq!(
1520 expanded_and_wrapped_usize_range(2..4, 10, 1, 8).collect::<Vec<usize>>(),
1521 (0..8).collect::<Vec<usize>>()
1522 );
1523 // End wraps
1524 assert_eq!(
1525 expanded_and_wrapped_usize_range(3..5, 1, 4, 8).collect::<Vec<usize>>(),
1526 (0..1).chain(2..8).collect::<Vec<usize>>()
1527 );
1528 // End wraps all the way around
1529 assert_eq!(
1530 expanded_and_wrapped_usize_range(3..5, 1, 5, 8).collect::<Vec<usize>>(),
1531 (0..8).collect::<Vec<usize>>()
1532 );
1533 // End wraps all the way around and past the end
1534 assert_eq!(
1535 expanded_and_wrapped_usize_range(3..5, 1, 10, 8).collect::<Vec<usize>>(),
1536 (0..8).collect::<Vec<usize>>()
1537 );
1538 // Both start and end wrap
1539 assert_eq!(
1540 expanded_and_wrapped_usize_range(3..5, 4, 4, 8).collect::<Vec<usize>>(),
1541 (0..8).collect::<Vec<usize>>()
1542 );
1543 }
1544
1545 #[test]
1546 fn test_wrapped_usize_outward_from() {
1547 // No wrapping
1548 assert_eq!(
1549 wrapped_usize_outward_from(4, 2, 2, 10).collect::<Vec<usize>>(),
1550 vec![4, 5, 3, 6, 2]
1551 );
1552 // Wrapping at end
1553 assert_eq!(
1554 wrapped_usize_outward_from(8, 2, 3, 10).collect::<Vec<usize>>(),
1555 vec![8, 9, 7, 0, 6, 1]
1556 );
1557 // Wrapping at start
1558 assert_eq!(
1559 wrapped_usize_outward_from(1, 3, 2, 10).collect::<Vec<usize>>(),
1560 vec![1, 2, 0, 3, 9, 8]
1561 );
1562 // All values wrap around
1563 assert_eq!(
1564 wrapped_usize_outward_from(5, 10, 10, 8).collect::<Vec<usize>>(),
1565 vec![5, 6, 4, 7, 3, 0, 2, 1]
1566 );
1567 // None before / after
1568 assert_eq!(
1569 wrapped_usize_outward_from(3, 0, 0, 8).collect::<Vec<usize>>(),
1570 vec![3]
1571 );
1572 // Starting point already wrapped
1573 assert_eq!(
1574 wrapped_usize_outward_from(15, 2, 2, 10).collect::<Vec<usize>>(),
1575 vec![5, 6, 4, 7, 3]
1576 );
1577 // wrap_length of 0
1578 assert_eq!(
1579 wrapped_usize_outward_from(4, 2, 2, 0).collect::<Vec<usize>>(),
1580 Vec::<usize>::new()
1581 );
1582 }
1583
1584 #[test]
1585 fn test_truncate_lines_to_byte_limit() {
1586 let text = "Line 1\nLine 2\nLine 3\nLine 4";
1587
1588 // Limit that includes all lines
1589 assert_eq!(truncate_lines_to_byte_limit(text, 100), text);
1590
1591 // Exactly the first line
1592 assert_eq!(truncate_lines_to_byte_limit(text, 7), "Line 1\n");
1593
1594 // Limit between lines
1595 assert_eq!(truncate_lines_to_byte_limit(text, 13), "Line 1\n");
1596 assert_eq!(truncate_lines_to_byte_limit(text, 20), "Line 1\nLine 2\n");
1597
1598 // Limit before first newline
1599 assert_eq!(truncate_lines_to_byte_limit(text, 6), "Line ");
1600
1601 // Test with non-ASCII characters
1602 let text_utf8 = "Line 1\nLíne 2\nLine 3";
1603 assert_eq!(
1604 truncate_lines_to_byte_limit(text_utf8, 15),
1605 "Line 1\nLíne 2\n"
1606 );
1607 }
1608
1609 #[test]
1610 fn test_string_size_with_expanded_tabs() {
1611 let nz = |val| NonZeroU32::new(val).unwrap();
1612 assert_eq!(char_len_with_expanded_tabs(0, "", nz(4)), 0);
1613 assert_eq!(char_len_with_expanded_tabs(0, "hello", nz(4)), 5);
1614 assert_eq!(char_len_with_expanded_tabs(0, "\thello", nz(4)), 9);
1615 assert_eq!(char_len_with_expanded_tabs(0, "abc\tab", nz(4)), 6);
1616 assert_eq!(char_len_with_expanded_tabs(0, "hello\t", nz(4)), 8);
1617 assert_eq!(char_len_with_expanded_tabs(0, "\t\t", nz(8)), 16);
1618 assert_eq!(char_len_with_expanded_tabs(0, "x\t", nz(8)), 8);
1619 assert_eq!(char_len_with_expanded_tabs(7, "x\t", nz(8)), 9);
1620 }
1621
1622 #[test]
1623 fn test_word_breaking_tokenizer() {
1624 let tests: &[(&str, &[WordBreakToken<'static>])] = &[
1625 ("", &[]),
1626 (" ", &[whitespace(" ", 2)]),
1627 ("Ʒ", &[word("Ʒ", 1)]),
1628 ("Ǽ", &[word("Ǽ", 1)]),
1629 ("⋑", &[word("⋑", 1)]),
1630 ("⋑⋑", &[word("⋑⋑", 2)]),
1631 (
1632 "原理,进而",
1633 &[word("原", 1), word("理,", 2), word("进", 1), word("而", 1)],
1634 ),
1635 (
1636 "hello world",
1637 &[word("hello", 5), whitespace(" ", 1), word("world", 5)],
1638 ),
1639 (
1640 "hello, world",
1641 &[word("hello,", 6), whitespace(" ", 1), word("world", 5)],
1642 ),
1643 (
1644 " hello world",
1645 &[
1646 whitespace(" ", 2),
1647 word("hello", 5),
1648 whitespace(" ", 1),
1649 word("world", 5),
1650 ],
1651 ),
1652 (
1653 "这是什么 \n 钢笔",
1654 &[
1655 word("这", 1),
1656 word("是", 1),
1657 word("什", 1),
1658 word("么", 1),
1659 whitespace(" ", 1),
1660 newline(),
1661 whitespace(" ", 1),
1662 word("钢", 1),
1663 word("笔", 1),
1664 ],
1665 ),
1666 (" mutton", &[whitespace(" ", 1), word("mutton", 6)]),
1667 ];
1668
1669 fn word(token: &'static str, grapheme_len: usize) -> WordBreakToken<'static> {
1670 WordBreakToken::Word {
1671 token,
1672 grapheme_len,
1673 }
1674 }
1675
1676 fn whitespace(token: &'static str, grapheme_len: usize) -> WordBreakToken<'static> {
1677 WordBreakToken::InlineWhitespace {
1678 token,
1679 grapheme_len,
1680 }
1681 }
1682
1683 fn newline() -> WordBreakToken<'static> {
1684 WordBreakToken::Newline
1685 }
1686
1687 for (input, result) in tests {
1688 assert_eq!(
1689 WordBreakingTokenizer::new(input)
1690 .collect::<Vec<_>>()
1691 .as_slice(),
1692 *result,
1693 );
1694 }
1695 }
1696
1697 #[test]
1698 fn test_wrap_with_prefix() {
1699 assert_eq!(
1700 wrap_with_prefix(
1701 "# ".to_string(),
1702 "abcdefg".to_string(),
1703 4,
1704 NonZeroU32::new(4).unwrap(),
1705 false,
1706 ),
1707 "# abcdefg"
1708 );
1709 assert_eq!(
1710 wrap_with_prefix(
1711 "".to_string(),
1712 "\thello world".to_string(),
1713 8,
1714 NonZeroU32::new(4).unwrap(),
1715 false,
1716 ),
1717 "hello\nworld"
1718 );
1719 assert_eq!(
1720 wrap_with_prefix(
1721 "// ".to_string(),
1722 "xx \nyy zz aa bb cc".to_string(),
1723 12,
1724 NonZeroU32::new(4).unwrap(),
1725 false,
1726 ),
1727 "// xx yy zz\n// aa bb cc"
1728 );
1729 assert_eq!(
1730 wrap_with_prefix(
1731 String::new(),
1732 "这是什么 \n 钢笔".to_string(),
1733 3,
1734 NonZeroU32::new(4).unwrap(),
1735 false,
1736 ),
1737 "这是什\n么 钢\n笔"
1738 );
1739 }
1740
1741 #[test]
1742 fn test_split_with_ranges() {
1743 let input = "hi";
1744 let result = split_str_with_ranges(input, |c| c == ' ');
1745
1746 assert_eq!(result.len(), 1);
1747 assert_eq!(result[0], (0..2, "hi"));
1748
1749 let input = "héllo🦀world";
1750 let result = split_str_with_ranges(input, |c| c == '🦀');
1751
1752 assert_eq!(result.len(), 2);
1753 assert_eq!(result[0], (0..6, "héllo")); // 'é' is 2 bytes
1754 assert_eq!(result[1], (10..15, "world")); // '🦀' is 4 bytes
1755 }
1756}