1use std::{cmp::Ordering, collections::HashMap, ops::Range};
2
3/// Construct a string and a list of offsets within that string using a single
4/// string containing embedded position markers.
5pub fn marked_text_offsets_by(
6 marked_text: &str,
7 markers: Vec<char>,
8) -> (String, HashMap<char, Vec<usize>>) {
9 let mut extracted_markers: HashMap<char, Vec<usize>> = Default::default();
10 let mut unmarked_text = String::new();
11
12 for char in marked_text.chars() {
13 if markers.contains(&char) {
14 let char_offsets = extracted_markers.entry(char).or_insert(Vec::new());
15 char_offsets.push(unmarked_text.len());
16 } else {
17 unmarked_text.push(char);
18 }
19 }
20
21 (unmarked_text, extracted_markers)
22}
23
24/// Construct a string and a list of ranges within that string using a single
25/// string containing embedded range markers, using arbitrary characters as
26/// range markers. By using multiple different range markers, you can construct
27/// ranges that overlap each other.
28///
29/// The returned ranges will be grouped by their range marking characters.
30pub fn marked_text_ranges_by(
31 marked_text: &str,
32 markers: Vec<TextRangeMarker>,
33) -> (String, HashMap<TextRangeMarker, Vec<Range<usize>>>) {
34 let all_markers = markers.iter().flat_map(|m| m.markers()).collect();
35
36 let (unmarked_text, mut marker_offsets) = marked_text_offsets_by(marked_text, all_markers);
37 let range_lookup = markers
38 .into_iter()
39 .map(|marker| {
40 (
41 marker.clone(),
42 match marker {
43 TextRangeMarker::Empty(empty_marker_char) => marker_offsets
44 .remove(&empty_marker_char)
45 .unwrap_or_default()
46 .into_iter()
47 .map(|empty_index| empty_index..empty_index)
48 .collect::<Vec<Range<usize>>>(),
49 TextRangeMarker::Range(start_marker, end_marker) => {
50 let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
51 let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
52 assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
53 starts
54 .into_iter()
55 .zip(ends)
56 .map(|(start, end)| {
57 assert!(end >= start, "marked ranges must be disjoint");
58 start..end
59 })
60 .collect::<Vec<Range<usize>>>()
61 }
62 TextRangeMarker::ReverseRange(start_marker, end_marker) => {
63 let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
64 let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
65 assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
66 starts
67 .into_iter()
68 .zip(ends)
69 .map(|(start, end)| {
70 assert!(end >= start, "marked ranges must be disjoint");
71 end..start
72 })
73 .collect::<Vec<Range<usize>>>()
74 }
75 },
76 )
77 })
78 .collect();
79
80 (unmarked_text, range_lookup)
81}
82
83/// Construct a string and a list of ranges within that string using a single
84/// string containing embedded range markers. The characters used to mark the
85/// ranges are as follows:
86///
87/// 1. To mark a range of text, surround it with the `«` and `»` angle brackets,
88/// which can be typed on a US keyboard with the `alt-|` and `alt-shift-|` keys.
89///
90/// ```text
91/// foo «selected text» bar
92/// ```
93///
94/// 2. To mark a single position in the text, use the `ˇ` caron,
95/// which can be typed on a US keyboard with the `alt-shift-t` key.
96///
97/// ```text
98/// the cursors are hereˇ and hereˇ.
99/// ```
100///
101/// 3. To mark a range whose direction is meaningful (like a selection),
102/// put a caron character beside one of its bounds, on the inside:
103///
104/// ```text
105/// one «ˇreversed» selection and one «forwardˇ» selection
106/// ```
107///
108/// Any • characters in the input string will be replaced with spaces. This makes
109/// it easier to test cases with trailing spaces, which tend to get trimmed from the
110/// source code.
111pub fn marked_text_ranges(
112 marked_text: &str,
113 ranges_are_directed: bool,
114) -> (String, Vec<Range<usize>>) {
115 let mut unmarked_text = String::with_capacity(marked_text.len());
116 let mut ranges = Vec::new();
117 let mut prev_marked_ix = 0;
118 let mut current_range_start = None;
119 let mut current_range_cursor = None;
120
121 let marked_text = marked_text.replace("•", " ");
122 for (marked_ix, marker) in marked_text.match_indices(&['«', '»', 'ˇ']) {
123 unmarked_text.push_str(&marked_text[prev_marked_ix..marked_ix]);
124 let unmarked_len = unmarked_text.len();
125 let len = marker.len();
126 prev_marked_ix = marked_ix + len;
127
128 match marker {
129 "ˇ" => {
130 if current_range_start.is_some() {
131 if current_range_cursor.is_some() {
132 panic!("duplicate point marker 'ˇ' at index {marked_ix}");
133 } else {
134 current_range_cursor = Some(unmarked_len);
135 }
136 } else {
137 ranges.push(unmarked_len..unmarked_len);
138 }
139 }
140 "«" => {
141 if current_range_start.is_some() {
142 panic!("unexpected range start marker '«' at index {marked_ix}");
143 }
144 current_range_start = Some(unmarked_len);
145 }
146 "»" => {
147 let current_range_start = if let Some(start) = current_range_start.take() {
148 start
149 } else {
150 panic!("unexpected range end marker '»' at index {marked_ix}");
151 };
152
153 let mut reversed = false;
154 if let Some(current_range_cursor) = current_range_cursor.take() {
155 if current_range_cursor == current_range_start {
156 reversed = true;
157 } else if current_range_cursor != unmarked_len {
158 panic!("unexpected 'ˇ' marker in the middle of a range");
159 }
160 } else if ranges_are_directed {
161 panic!("missing 'ˇ' marker to indicate range direction");
162 }
163
164 ranges.push(if reversed {
165 unmarked_len..current_range_start
166 } else {
167 current_range_start..unmarked_len
168 });
169 }
170 _ => unreachable!(),
171 }
172 }
173
174 unmarked_text.push_str(&marked_text[prev_marked_ix..]);
175 (unmarked_text, ranges)
176}
177
178pub fn marked_text_offsets(marked_text: &str) -> (String, Vec<usize>) {
179 let (text, ranges) = marked_text_ranges(marked_text, false);
180 (
181 text,
182 ranges
183 .into_iter()
184 .map(|range| {
185 assert_eq!(range.start, range.end);
186 range.start
187 })
188 .collect(),
189 )
190}
191
192pub fn generate_marked_text(
193 unmarked_text: &str,
194 ranges: &[Range<usize>],
195 indicate_cursors: bool,
196) -> String {
197 let mut marked_text = unmarked_text.to_string();
198 for range in ranges.iter().rev() {
199 if indicate_cursors {
200 match range.start.cmp(&range.end) {
201 Ordering::Less => {
202 marked_text.insert_str(range.end, "ˇ»");
203 marked_text.insert(range.start, '«');
204 }
205 Ordering::Equal => {
206 marked_text.insert(range.start, 'ˇ');
207 }
208 Ordering::Greater => {
209 marked_text.insert(range.start, '»');
210 marked_text.insert_str(range.end, "«ˇ");
211 }
212 }
213 } else {
214 marked_text.insert(range.end, '»');
215 marked_text.insert(range.start, '«');
216 }
217 }
218 marked_text
219}
220
221#[derive(Clone, Eq, PartialEq, Hash)]
222pub enum TextRangeMarker {
223 Empty(char),
224 Range(char, char),
225 ReverseRange(char, char),
226}
227
228impl TextRangeMarker {
229 fn markers(&self) -> Vec<char> {
230 match self {
231 Self::Empty(m) => vec![*m],
232 Self::Range(l, r) => vec![*l, *r],
233 Self::ReverseRange(l, r) => vec![*l, *r],
234 }
235 }
236}
237
238impl From<char> for TextRangeMarker {
239 fn from(marker: char) -> Self {
240 Self::Empty(marker)
241 }
242}
243
244impl From<(char, char)> for TextRangeMarker {
245 fn from((left_marker, right_marker): (char, char)) -> Self {
246 Self::Range(left_marker, right_marker)
247 }
248}
249
250#[cfg(test)]
251mod tests {
252 use super::{generate_marked_text, marked_text_ranges};
253
254 #[test]
255 fn test_marked_text() {
256 let (text, ranges) = marked_text_ranges("one «ˇtwo» «threeˇ» «ˇfour» fiveˇ six", true);
257
258 assert_eq!(text, "one two three four five six");
259 assert_eq!(ranges.len(), 4);
260 assert_eq!(ranges[0], 7..4);
261 assert_eq!(ranges[1], 8..13);
262 assert_eq!(ranges[2], 18..14);
263 assert_eq!(ranges[3], 23..23);
264
265 assert_eq!(
266 generate_marked_text(&text, &ranges, true),
267 "one «ˇtwo» «threeˇ» «ˇfour» fiveˇ six"
268 );
269 }
270}