1use std::{collections::HashMap, ops::Range};
2
3pub fn marked_text_by(
4 marked_text: &str,
5 markers: Vec<char>,
6) -> (String, HashMap<char, Vec<usize>>) {
7 let mut extracted_markers: HashMap<char, Vec<usize>> = Default::default();
8 let mut unmarked_text = String::new();
9
10 for char in marked_text.chars() {
11 if markers.contains(&char) {
12 let char_offsets = extracted_markers.entry(char).or_insert(Vec::new());
13 char_offsets.push(unmarked_text.len());
14 } else {
15 unmarked_text.push(char);
16 }
17 }
18
19 (unmarked_text, extracted_markers)
20}
21
22pub fn marked_text(marked_text: &str) -> (String, Vec<usize>) {
23 let (unmarked_text, mut markers) = marked_text_by(marked_text, vec!['|']);
24 (unmarked_text, markers.remove(&'|').unwrap_or_default())
25}
26
27pub fn marked_text_ranges_by(
28 marked_text: &str,
29 delimiters: Vec<(char, char)>,
30) -> (String, HashMap<(char, char), Vec<Range<usize>>>) {
31 let all_markers = delimiters
32 .iter()
33 .flat_map(|(start, end)| [*start, *end])
34 .collect();
35 let (unmarked_text, mut markers) = marked_text_by(marked_text, all_markers);
36 let range_lookup = delimiters
37 .into_iter()
38 .map(|(start_marker, end_marker)| {
39 let starts = markers.remove(&start_marker).unwrap_or_default();
40 let ends = markers.remove(&end_marker).unwrap_or_default();
41 assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
42
43 let ranges = starts
44 .into_iter()
45 .zip(ends)
46 .map(|(start, end)| {
47 assert!(end >= start, "marked ranges must be disjoint");
48 start..end
49 })
50 .collect::<Vec<Range<usize>>>();
51 ((start_marker, end_marker), ranges)
52 })
53 .collect();
54
55 (unmarked_text, range_lookup)
56}
57
58// Returns ranges delimited by (), [], and <> ranges. Ranges using the same markers
59// must not be overlapping. May also include | for empty ranges
60pub fn marked_text_ranges(full_marked_text: &str) -> (String, Vec<Range<usize>>) {
61 let (range_marked_text, empty_offsets) = marked_text(full_marked_text);
62 let (unmarked, range_lookup) =
63 marked_text_ranges_by(&range_marked_text, vec![('[', ']'), ('(', ')'), ('<', '>')]);
64 let mut combined_ranges: Vec<_> = range_lookup
65 .into_values()
66 .flatten()
67 .chain(empty_offsets.into_iter().map(|offset| offset..offset))
68 .collect();
69
70 combined_ranges.sort_by_key(|range| range.start);
71 (unmarked, combined_ranges)
72}