1use std::{collections::HashMap, ops::Range};
2
3pub fn marked_text_by(
4 marked_text: &str,
5 markers: Vec<char>,
6) -> (String, HashMap<char, Vec<usize>>) {
7 let mut extracted_markers: HashMap<char, Vec<usize>> = Default::default();
8 let mut unmarked_text = String::new();
9
10 for char in marked_text.chars() {
11 if markers.contains(&char) {
12 let char_offsets = extracted_markers.entry(char).or_insert(Vec::new());
13 char_offsets.push(unmarked_text.len());
14 } else {
15 unmarked_text.push(char);
16 }
17 }
18
19 (unmarked_text, extracted_markers)
20}
21
22pub fn marked_text(marked_text: &str) -> (String, Vec<usize>) {
23 let (unmarked_text, mut markers) = marked_text_by(marked_text, vec!['|']);
24 (unmarked_text, markers.remove(&'|').unwrap_or_default())
25}
26
27#[derive(Clone, Eq, PartialEq, Hash)]
28pub enum TextRangeMarker {
29 Empty(char),
30 Range(char, char),
31 ReverseRange(char, char),
32}
33
34impl TextRangeMarker {
35 fn markers(&self) -> Vec<char> {
36 match self {
37 Self::Empty(m) => vec![*m],
38 Self::Range(l, r) => vec![*l, *r],
39 Self::ReverseRange(l, r) => vec![*l, *r],
40 }
41 }
42}
43
44impl From<char> for TextRangeMarker {
45 fn from(marker: char) -> Self {
46 Self::Empty(marker)
47 }
48}
49
50impl From<(char, char)> for TextRangeMarker {
51 fn from((left_marker, right_marker): (char, char)) -> Self {
52 Self::Range(left_marker, right_marker)
53 }
54}
55
56pub fn marked_text_ranges_by(
57 marked_text: &str,
58 markers: Vec<TextRangeMarker>,
59) -> (String, HashMap<TextRangeMarker, Vec<Range<usize>>>) {
60 let all_markers = markers.iter().flat_map(|m| m.markers()).collect();
61
62 let (unmarked_text, mut marker_offsets) = marked_text_by(marked_text, all_markers);
63 let range_lookup = markers
64 .into_iter()
65 .map(|marker| match marker {
66 TextRangeMarker::Empty(empty_marker_char) => {
67 let ranges = marker_offsets
68 .remove(&empty_marker_char)
69 .unwrap_or_default()
70 .into_iter()
71 .map(|empty_index| empty_index..empty_index)
72 .collect::<Vec<Range<usize>>>();
73 (marker, ranges)
74 }
75 TextRangeMarker::Range(start_marker, end_marker) => {
76 let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
77 let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
78 assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
79
80 let ranges = starts
81 .into_iter()
82 .zip(ends)
83 .map(|(start, end)| {
84 assert!(end >= start, "marked ranges must be disjoint");
85 start..end
86 })
87 .collect::<Vec<Range<usize>>>();
88 (marker, ranges)
89 }
90 TextRangeMarker::ReverseRange(start_marker, end_marker) => {
91 let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
92 let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
93 assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
94
95 let ranges = starts
96 .into_iter()
97 .zip(ends)
98 .map(|(start, end)| {
99 assert!(end >= start, "marked ranges must be disjoint");
100 end..start
101 })
102 .collect::<Vec<Range<usize>>>();
103 (marker, ranges)
104 }
105 })
106 .collect();
107
108 (unmarked_text, range_lookup)
109}
110
111// Returns ranges delimited by (), [], and <> ranges. Ranges using the same markers
112// must not be overlapping. May also include | for empty ranges
113pub fn marked_text_ranges(full_marked_text: &str) -> (String, Vec<Range<usize>>) {
114 let (unmarked, range_lookup) = marked_text_ranges_by(
115 &full_marked_text,
116 vec![
117 '|'.into(),
118 ('[', ']').into(),
119 ('(', ')').into(),
120 ('<', '>').into(),
121 ],
122 );
123 let mut combined_ranges: Vec<_> = range_lookup.into_values().flatten().collect();
124
125 combined_ranges.sort_by_key(|range| range.start);
126 (unmarked, combined_ranges)
127}