1use collections::HashMap;
2use std::{cmp::Ordering, ops::Range};
3
4/// Construct a string and a list of offsets within that string using a single
5/// string containing embedded position markers.
6pub fn marked_text_offsets_by(
7 marked_text: &str,
8 markers: Vec<char>,
9) -> (String, HashMap<char, Vec<usize>>) {
10 let mut extracted_markers: HashMap<char, Vec<usize>> = Default::default();
11 let mut unmarked_text = String::new();
12
13 for char in marked_text.chars() {
14 if markers.contains(&char) {
15 let char_offsets = extracted_markers.entry(char).or_default();
16 char_offsets.push(unmarked_text.len());
17 } else {
18 unmarked_text.push(char);
19 }
20 }
21
22 (unmarked_text, extracted_markers)
23}
24
25/// Construct a string and a list of ranges within that string using a single
26/// string containing embedded range markers, using arbitrary characters as
27/// range markers. By using multiple different range markers, you can construct
28/// ranges that overlap each other.
29///
30/// The returned ranges will be grouped by their range marking characters.
31pub fn marked_text_ranges_by(
32 marked_text: &str,
33 markers: Vec<TextRangeMarker>,
34) -> (String, HashMap<TextRangeMarker, Vec<Range<usize>>>) {
35 let all_markers = markers.iter().flat_map(|m| m.markers()).collect();
36
37 let (unmarked_text, mut marker_offsets) = marked_text_offsets_by(marked_text, all_markers);
38 let range_lookup = markers
39 .into_iter()
40 .map(|marker| {
41 (
42 marker.clone(),
43 match marker {
44 TextRangeMarker::Empty(empty_marker_char) => marker_offsets
45 .remove(&empty_marker_char)
46 .unwrap_or_default()
47 .into_iter()
48 .map(|empty_index| empty_index..empty_index)
49 .collect::<Vec<Range<usize>>>(),
50 TextRangeMarker::Range(start_marker, end_marker) => {
51 let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
52 let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
53 assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
54 starts
55 .into_iter()
56 .zip(ends)
57 .map(|(start, end)| {
58 assert!(end >= start, "marked ranges must be disjoint");
59 start..end
60 })
61 .collect::<Vec<Range<usize>>>()
62 }
63 TextRangeMarker::ReverseRange(start_marker, end_marker) => {
64 let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
65 let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
66 assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
67 starts
68 .into_iter()
69 .zip(ends)
70 .map(|(start, end)| {
71 assert!(end >= start, "marked ranges must be disjoint");
72 end..start
73 })
74 .collect::<Vec<Range<usize>>>()
75 }
76 },
77 )
78 })
79 .collect();
80
81 (unmarked_text, range_lookup)
82}
83
84/// Construct a string and a list of ranges within that string using a single
85/// string containing embedded range markers. The characters used to mark the
86/// ranges are as follows:
87///
88/// 1. To mark a range of text, surround it with the `«` and `»` angle brackets,
89/// which can be typed on a US keyboard with the `alt-|` and `alt-shift-|` keys.
90///
91/// ```text
92/// foo «selected text» bar
93/// ```
94///
95/// 2. To mark a single position in the text, use the `ˇ` caron,
96/// which can be typed on a US keyboard with the `alt-shift-t` key.
97///
98/// ```text
99/// the cursors are hereˇ and hereˇ.
100/// ```
101///
102/// 3. To mark a range whose direction is meaningful (like a selection),
103/// put a caron character beside one of its bounds, on the inside:
104///
105/// ```text
106/// one «ˇreversed» selection and one «forwardˇ» selection
107/// ```
108///
109/// Any • characters in the input string will be replaced with spaces. This makes
110/// it easier to test cases with trailing spaces, which tend to get trimmed from the
111/// source code.
112#[track_caller]
113pub fn marked_text_ranges(
114 marked_text: &str,
115 ranges_are_directed: bool,
116) -> (String, Vec<Range<usize>>) {
117 let mut unmarked_text = String::with_capacity(marked_text.len());
118 let mut ranges = Vec::new();
119 let mut prev_marked_ix = 0;
120 let mut current_range_start = None;
121 let mut current_range_cursor = None;
122
123 let marked_text = marked_text.replace('•', " ");
124 for (marked_ix, marker) in marked_text.match_indices(&['«', '»', 'ˇ']) {
125 unmarked_text.push_str(&marked_text[prev_marked_ix..marked_ix]);
126 let unmarked_len = unmarked_text.len();
127 let len = marker.len();
128 prev_marked_ix = marked_ix + len;
129
130 match marker {
131 "ˇ" => {
132 if current_range_start.is_some() {
133 if current_range_cursor.is_some() {
134 panic!("duplicate point marker 'ˇ' at index {marked_ix}");
135 }
136
137 current_range_cursor = Some(unmarked_len);
138 } else {
139 ranges.push(unmarked_len..unmarked_len);
140 }
141 }
142 "«" => {
143 if current_range_start.is_some() {
144 panic!("unexpected range start marker '«' at index {marked_ix}");
145 }
146 current_range_start = Some(unmarked_len);
147 }
148 "»" => {
149 let current_range_start = if let Some(start) = current_range_start.take() {
150 start
151 } else {
152 panic!("unexpected range end marker '»' at index {marked_ix}");
153 };
154
155 let mut reversed = false;
156 if let Some(current_range_cursor) = current_range_cursor.take() {
157 if current_range_cursor == current_range_start {
158 reversed = true;
159 } else if current_range_cursor != unmarked_len {
160 panic!("unexpected 'ˇ' marker in the middle of a range");
161 }
162 } else if ranges_are_directed {
163 panic!("missing 'ˇ' marker to indicate range direction");
164 }
165
166 ranges.push(if reversed {
167 unmarked_len..current_range_start
168 } else {
169 current_range_start..unmarked_len
170 });
171 }
172 _ => unreachable!(),
173 }
174 }
175
176 unmarked_text.push_str(&marked_text[prev_marked_ix..]);
177 (unmarked_text, ranges)
178}
179
180#[track_caller]
181pub fn marked_text_offsets(marked_text: &str) -> (String, Vec<usize>) {
182 let (text, ranges) = marked_text_ranges(marked_text, false);
183 (
184 text,
185 ranges
186 .into_iter()
187 .map(|range| {
188 assert_eq!(range.start, range.end);
189 range.start
190 })
191 .collect(),
192 )
193}
194
195pub fn generate_marked_text(
196 unmarked_text: &str,
197 ranges: &[Range<usize>],
198 indicate_cursors: bool,
199) -> String {
200 let mut marked_text = unmarked_text.to_string();
201 for range in ranges.iter().rev() {
202 if indicate_cursors {
203 match range.start.cmp(&range.end) {
204 Ordering::Less => {
205 marked_text.insert_str(range.end, "ˇ»");
206 marked_text.insert(range.start, '«');
207 }
208 Ordering::Equal => {
209 marked_text.insert(range.start, 'ˇ');
210 }
211 Ordering::Greater => {
212 marked_text.insert(range.start, '»');
213 marked_text.insert_str(range.end, "«ˇ");
214 }
215 }
216 } else {
217 match range.start.cmp(&range.end) {
218 Ordering::Equal => {
219 marked_text.insert(range.start, 'ˇ');
220 }
221 _ => {
222 marked_text.insert(range.end, '»');
223 marked_text.insert(range.start, '«');
224 }
225 }
226 }
227 }
228 marked_text
229}
230
231#[derive(Clone, Eq, PartialEq, Hash)]
232pub enum TextRangeMarker {
233 Empty(char),
234 Range(char, char),
235 ReverseRange(char, char),
236}
237
238impl TextRangeMarker {
239 fn markers(&self) -> Vec<char> {
240 match self {
241 Self::Empty(m) => vec![*m],
242 Self::Range(l, r) => vec![*l, *r],
243 Self::ReverseRange(l, r) => vec![*l, *r],
244 }
245 }
246}
247
248impl From<char> for TextRangeMarker {
249 fn from(marker: char) -> Self {
250 Self::Empty(marker)
251 }
252}
253
254impl From<(char, char)> for TextRangeMarker {
255 fn from((left_marker, right_marker): (char, char)) -> Self {
256 Self::Range(left_marker, right_marker)
257 }
258}
259
260#[cfg(test)]
261mod tests {
262 use super::{generate_marked_text, marked_text_ranges};
263
264 #[allow(clippy::reversed_empty_ranges)]
265 #[test]
266 fn test_marked_text() {
267 let (text, ranges) = marked_text_ranges("one «ˇtwo» «threeˇ» «ˇfour» fiveˇ six", true);
268
269 assert_eq!(text, "one two three four five six");
270 assert_eq!(ranges.len(), 4);
271 assert_eq!(ranges[0], 7..4);
272 assert_eq!(ranges[1], 8..13);
273 assert_eq!(ranges[2], 18..14);
274 assert_eq!(ranges[3], 23..23);
275
276 assert_eq!(
277 generate_marked_text(&text, &ranges, true),
278 "one «ˇtwo» «threeˇ» «ˇfour» fiveˇ six"
279 );
280 }
281}