1use anyhow::{anyhow, Result};
2use std::{cmp::Ordering, collections::HashMap, ops::Range};
3
4pub fn marked_text_by(
5 marked_text: &str,
6 markers: Vec<char>,
7) -> (String, HashMap<char, Vec<usize>>) {
8 let mut extracted_markers: HashMap<char, Vec<usize>> = Default::default();
9 let mut unmarked_text = String::new();
10
11 for char in marked_text.chars() {
12 if markers.contains(&char) {
13 let char_offsets = extracted_markers.entry(char).or_insert(Vec::new());
14 char_offsets.push(unmarked_text.len());
15 } else {
16 unmarked_text.push(char);
17 }
18 }
19
20 (unmarked_text, extracted_markers)
21}
22
23pub fn marked_text(marked_text: &str) -> (String, Vec<usize>) {
24 let (unmarked_text, mut markers) = marked_text_by(marked_text, vec!['|']);
25 (unmarked_text, markers.remove(&'|').unwrap_or_default())
26}
27
28#[derive(Clone, Eq, PartialEq, Hash)]
29pub enum TextRangeMarker {
30 Empty(char),
31 Range(char, char),
32 ReverseRange(char, char),
33}
34
35impl TextRangeMarker {
36 fn markers(&self) -> Vec<char> {
37 match self {
38 Self::Empty(m) => vec![*m],
39 Self::Range(l, r) => vec![*l, *r],
40 Self::ReverseRange(l, r) => vec![*l, *r],
41 }
42 }
43}
44
45impl From<char> for TextRangeMarker {
46 fn from(marker: char) -> Self {
47 Self::Empty(marker)
48 }
49}
50
51impl From<(char, char)> for TextRangeMarker {
52 fn from((left_marker, right_marker): (char, char)) -> Self {
53 Self::Range(left_marker, right_marker)
54 }
55}
56
57pub fn marked_text_ranges_by(
58 marked_text: &str,
59 markers: Vec<TextRangeMarker>,
60) -> (String, HashMap<TextRangeMarker, Vec<Range<usize>>>) {
61 let all_markers = markers.iter().flat_map(|m| m.markers()).collect();
62
63 let (unmarked_text, mut marker_offsets) = marked_text_by(marked_text, all_markers);
64 let range_lookup = markers
65 .into_iter()
66 .map(|marker| match marker {
67 TextRangeMarker::Empty(empty_marker_char) => {
68 let ranges = marker_offsets
69 .remove(&empty_marker_char)
70 .unwrap_or_default()
71 .into_iter()
72 .map(|empty_index| empty_index..empty_index)
73 .collect::<Vec<Range<usize>>>();
74 (marker, ranges)
75 }
76 TextRangeMarker::Range(start_marker, end_marker) => {
77 let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
78 let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
79 assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
80
81 let ranges = starts
82 .into_iter()
83 .zip(ends)
84 .map(|(start, end)| {
85 assert!(end >= start, "marked ranges must be disjoint");
86 start..end
87 })
88 .collect::<Vec<Range<usize>>>();
89 (marker, ranges)
90 }
91 TextRangeMarker::ReverseRange(start_marker, end_marker) => {
92 let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
93 let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
94 assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
95
96 let ranges = starts
97 .into_iter()
98 .zip(ends)
99 .map(|(start, end)| {
100 assert!(end >= start, "marked ranges must be disjoint");
101 end..start
102 })
103 .collect::<Vec<Range<usize>>>();
104 (marker, ranges)
105 }
106 })
107 .collect();
108
109 (unmarked_text, range_lookup)
110}
111
112// Returns ranges delimited by (), [], and <> ranges. Ranges using the same markers
113// must not be overlapping. May also include | for empty ranges
114pub fn marked_text_ranges(full_marked_text: &str) -> (String, Vec<Range<usize>>) {
115 let (unmarked, range_lookup) = marked_text_ranges_by(
116 &full_marked_text,
117 vec![
118 '|'.into(),
119 ('[', ']').into(),
120 ('(', ')').into(),
121 ('<', '>').into(),
122 ],
123 );
124 let mut combined_ranges: Vec<_> = range_lookup.into_values().flatten().collect();
125
126 combined_ranges.sort_by_key(|range| range.start);
127 (unmarked, combined_ranges)
128}
129
130///
131pub fn parse_marked_text(
132 input_text: &str,
133 indicate_cursors: bool,
134) -> Result<(String, Vec<Range<usize>>)> {
135 let mut output_text = String::with_capacity(input_text.len());
136 let mut ranges = Vec::new();
137 let mut prev_input_ix = 0;
138 let mut current_range_start = None;
139 let mut current_range_cursor = None;
140
141 for (input_ix, marker) in input_text.match_indices(&['«', '»', 'ˇ']) {
142 output_text.push_str(&input_text[prev_input_ix..input_ix]);
143 let output_len = output_text.len();
144 let len = marker.len();
145 prev_input_ix = input_ix + len;
146
147 match marker {
148 "ˇ" => {
149 if current_range_start.is_some() {
150 if current_range_cursor.is_some() {
151 Err(anyhow!("duplicate point marker 'ˇ' at index {input_ix}"))?;
152 } else {
153 current_range_cursor = Some(output_len);
154 }
155 } else {
156 ranges.push(output_len..output_len);
157 }
158 }
159 "«" => {
160 if current_range_start.is_some() {
161 Err(anyhow!(
162 "unexpected range start marker '«' at index {input_ix}"
163 ))?;
164 }
165 current_range_start = Some(output_len);
166 }
167 "»" => {
168 let current_range_start = current_range_start.take().ok_or_else(|| {
169 anyhow!("unexpected range end marker '»' at index {input_ix}")
170 })?;
171
172 let mut reversed = false;
173 if let Some(current_range_cursor) = current_range_cursor.take() {
174 if current_range_cursor == current_range_start {
175 reversed = true;
176 } else if current_range_cursor != output_len {
177 Err(anyhow!("unexpected 'ˇ' marker in the middle of a range"))?;
178 }
179 } else if indicate_cursors {
180 Err(anyhow!("missing 'ˇ' marker to indicate range direction"))?;
181 }
182
183 ranges.push(if reversed {
184 output_len..current_range_start
185 } else {
186 current_range_start..output_len
187 });
188 }
189 _ => unreachable!(),
190 }
191 }
192
193 output_text.push_str(&input_text[prev_input_ix..]);
194 Ok((output_text, ranges))
195}
196
197pub fn generate_marked_text(
198 output_text: &str,
199 ranges: &[Range<usize>],
200 indicate_cursors: bool,
201) -> String {
202 let mut marked_text = output_text.to_string();
203 for range in ranges.iter().rev() {
204 if indicate_cursors {
205 match range.start.cmp(&range.end) {
206 Ordering::Less => {
207 marked_text.insert_str(range.end, "ˇ»");
208 marked_text.insert_str(range.start, "«");
209 }
210 Ordering::Equal => {
211 marked_text.insert_str(range.start, "ˇ");
212 }
213 Ordering::Greater => {
214 marked_text.insert_str(range.start, "»");
215 marked_text.insert_str(range.end, "«ˇ");
216 }
217 }
218 } else {
219 marked_text.insert_str(range.end, "»");
220 marked_text.insert_str(range.start, "«");
221 }
222 }
223 marked_text
224}
225
226#[cfg(test)]
227mod tests {
228 use super::{generate_marked_text, parse_marked_text};
229
230 #[test]
231 fn test_marked_text() {
232 let (text, ranges) =
233 parse_marked_text("one «ˇtwo» «threeˇ» «ˇfour» fiveˇ six", true).unwrap();
234
235 assert_eq!(text, "one two three four five six");
236 assert_eq!(ranges.len(), 4);
237 assert_eq!(ranges[0], 7..4);
238 assert_eq!(ranges[1], 8..13);
239 assert_eq!(ranges[2], 18..14);
240 assert_eq!(ranges[3], 23..23);
241
242 assert_eq!(
243 generate_marked_text(&text, &ranges, true),
244 "one «ˇtwo» «threeˇ» «ˇfour» fiveˇ six"
245 );
246 }
247}