marked_text.rs

  1use collections::HashMap;
  2use std::{cmp::Ordering, ops::Range};
  3
  4/// Construct a string and a list of offsets within that string using a single
  5/// string containing embedded position markers.
  6pub fn marked_text_offsets_by(
  7    marked_text: &str,
  8    markers: Vec<char>,
  9) -> (String, HashMap<char, Vec<usize>>) {
 10    let mut extracted_markers: HashMap<char, Vec<usize>> = Default::default();
 11    let mut unmarked_text = String::new();
 12
 13    for char in marked_text.chars() {
 14        if markers.contains(&char) {
 15            let char_offsets = extracted_markers.entry(char).or_default();
 16            char_offsets.push(unmarked_text.len());
 17        } else {
 18            unmarked_text.push(char);
 19        }
 20    }
 21
 22    (unmarked_text, extracted_markers)
 23}
 24
 25/// Construct a string and a list of ranges within that string using a single
 26/// string containing embedded range markers, using arbitrary characters as
 27/// range markers. By using multiple different range markers, you can construct
 28/// ranges that overlap each other.
 29///
 30/// The returned ranges will be grouped by their range marking characters.
 31pub fn marked_text_ranges_by(
 32    marked_text: &str,
 33    markers: Vec<TextRangeMarker>,
 34) -> (String, HashMap<TextRangeMarker, Vec<Range<usize>>>) {
 35    let all_markers = markers.iter().flat_map(|m| m.markers()).collect();
 36
 37    let (unmarked_text, mut marker_offsets) = marked_text_offsets_by(marked_text, all_markers);
 38    let range_lookup = markers
 39        .into_iter()
 40        .map(|marker| {
 41            (
 42                marker.clone(),
 43                match marker {
 44                    TextRangeMarker::Empty(empty_marker_char) => marker_offsets
 45                        .remove(&empty_marker_char)
 46                        .unwrap_or_default()
 47                        .into_iter()
 48                        .map(|empty_index| empty_index..empty_index)
 49                        .collect::<Vec<Range<usize>>>(),
 50                    TextRangeMarker::Range(start_marker, end_marker) => {
 51                        let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
 52                        let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
 53                        assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
 54                        starts
 55                            .into_iter()
 56                            .zip(ends)
 57                            .map(|(start, end)| {
 58                                assert!(end >= start, "marked ranges must be disjoint");
 59                                start..end
 60                            })
 61                            .collect::<Vec<Range<usize>>>()
 62                    }
 63                    TextRangeMarker::ReverseRange(start_marker, end_marker) => {
 64                        let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
 65                        let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
 66                        assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
 67                        starts
 68                            .into_iter()
 69                            .zip(ends)
 70                            .map(|(start, end)| {
 71                                assert!(end >= start, "marked ranges must be disjoint");
 72                                end..start
 73                            })
 74                            .collect::<Vec<Range<usize>>>()
 75                    }
 76                },
 77            )
 78        })
 79        .collect();
 80
 81    (unmarked_text, range_lookup)
 82}
 83
 84/// Construct a string and a list of ranges within that string using a single
 85/// string containing embedded range markers. The characters used to mark the
 86/// ranges are as follows:
 87///
 88/// 1. To mark a range of text, surround it with the `«` and `»` angle brackets,
 89///    which can be typed on a US keyboard with the `alt-|` and `alt-shift-|` keys.
 90///
 91///    ```text
 92///    foo «selected text» bar
 93///    ```
 94///
 95/// 2. To mark a single position in the text, use the `ˇ` caron,
 96///    which can be typed on a US keyboard with the `alt-shift-t` key.
 97///
 98///    ```text
 99///    the cursors are hereˇ and hereˇ.
100///    ```
101///
102/// 3. To mark a range whose direction is meaningful (like a selection),
103///    put a caron character beside one of its bounds, on the inside:
104///
105///    ```text
106///    one «ˇreversed» selection and one «forwardˇ» selection
107///    ```
108///
109/// Any • characters in the input string will be replaced with spaces. This makes
110/// it easier to test cases with trailing spaces, which tend to get trimmed from the
111/// source code.
112#[track_caller]
113pub fn marked_text_ranges(
114    marked_text: &str,
115    ranges_are_directed: bool,
116) -> (String, Vec<Range<usize>>) {
117    let mut unmarked_text = String::with_capacity(marked_text.len());
118    let mut ranges = Vec::new();
119    let mut prev_marked_ix = 0;
120    let mut current_range_start = None;
121    let mut current_range_cursor = None;
122
123    let marked_text = marked_text.replace('•', " ");
124    for (marked_ix, marker) in marked_text.match_indices(&['«', '»', 'ˇ']) {
125        unmarked_text.push_str(&marked_text[prev_marked_ix..marked_ix]);
126        let unmarked_len = unmarked_text.len();
127        let len = marker.len();
128        prev_marked_ix = marked_ix + len;
129
130        match marker {
131            "ˇ" => {
132                if current_range_start.is_some() {
133                    if current_range_cursor.is_some() {
134                        panic!("duplicate point marker 'ˇ' at index {marked_ix}");
135                    }
136
137                    current_range_cursor = Some(unmarked_len);
138                } else {
139                    ranges.push(unmarked_len..unmarked_len);
140                }
141            }
142            "«" => {
143                if current_range_start.is_some() {
144                    panic!("unexpected range start marker '«' at index {marked_ix}");
145                }
146                current_range_start = Some(unmarked_len);
147            }
148            "»" => {
149                let current_range_start = if let Some(start) = current_range_start.take() {
150                    start
151                } else {
152                    panic!("unexpected range end marker '»' at index {marked_ix}");
153                };
154
155                let mut reversed = false;
156                if let Some(current_range_cursor) = current_range_cursor.take() {
157                    if current_range_cursor == current_range_start {
158                        reversed = true;
159                    } else if current_range_cursor != unmarked_len {
160                        panic!("unexpected 'ˇ' marker in the middle of a range");
161                    }
162                } else if ranges_are_directed {
163                    panic!("missing 'ˇ' marker to indicate range direction");
164                }
165
166                ranges.push(if reversed {
167                    unmarked_len..current_range_start
168                } else {
169                    current_range_start..unmarked_len
170                });
171            }
172            _ => unreachable!(),
173        }
174    }
175
176    unmarked_text.push_str(&marked_text[prev_marked_ix..]);
177    (unmarked_text, ranges)
178}
179
180#[track_caller]
181pub fn marked_text_offsets(marked_text: &str) -> (String, Vec<usize>) {
182    let (text, ranges) = marked_text_ranges(marked_text, false);
183    (
184        text,
185        ranges
186            .into_iter()
187            .map(|range| {
188                assert_eq!(range.start, range.end);
189                range.start
190            })
191            .collect(),
192    )
193}
194
195pub fn generate_marked_text(
196    unmarked_text: &str,
197    ranges: &[Range<usize>],
198    indicate_cursors: bool,
199) -> String {
200    let mut marked_text = unmarked_text.to_string();
201    for range in ranges.iter().rev() {
202        if indicate_cursors {
203            match range.start.cmp(&range.end) {
204                Ordering::Less => {
205                    marked_text.insert_str(range.end, "ˇ»");
206                    marked_text.insert(range.start, '«');
207                }
208                Ordering::Equal => {
209                    marked_text.insert(range.start, 'ˇ');
210                }
211                Ordering::Greater => {
212                    marked_text.insert(range.start, '»');
213                    marked_text.insert_str(range.end, "«ˇ");
214                }
215            }
216        } else {
217            match range.start.cmp(&range.end) {
218                Ordering::Equal => {
219                    marked_text.insert(range.start, 'ˇ');
220                }
221                _ => {
222                    marked_text.insert(range.end, '»');
223                    marked_text.insert(range.start, '«');
224                }
225            }
226        }
227    }
228    marked_text
229}
230
231#[derive(Clone, Eq, PartialEq, Hash)]
232pub enum TextRangeMarker {
233    Empty(char),
234    Range(char, char),
235    ReverseRange(char, char),
236}
237
238impl TextRangeMarker {
239    fn markers(&self) -> Vec<char> {
240        match self {
241            Self::Empty(m) => vec![*m],
242            Self::Range(l, r) => vec![*l, *r],
243            Self::ReverseRange(l, r) => vec![*l, *r],
244        }
245    }
246}
247
248impl From<char> for TextRangeMarker {
249    fn from(marker: char) -> Self {
250        Self::Empty(marker)
251    }
252}
253
254impl From<(char, char)> for TextRangeMarker {
255    fn from((left_marker, right_marker): (char, char)) -> Self {
256        Self::Range(left_marker, right_marker)
257    }
258}
259
260#[cfg(test)]
261mod tests {
262    use super::{generate_marked_text, marked_text_ranges};
263
264    #[allow(clippy::reversed_empty_ranges)]
265    #[test]
266    fn test_marked_text() {
267        let (text, ranges) = marked_text_ranges("one «ˇtwo» «threeˇ» «ˇfour» fiveˇ six", true);
268
269        assert_eq!(text, "one two three four five six");
270        assert_eq!(ranges.len(), 4);
271        assert_eq!(ranges[0], 7..4);
272        assert_eq!(ranges[1], 8..13);
273        assert_eq!(ranges[2], 18..14);
274        assert_eq!(ranges[3], 23..23);
275
276        assert_eq!(
277            generate_marked_text(&text, &ranges, true),
278            "one «ˇtwo» «threeˇ» «ˇfour» fiveˇ six"
279        );
280    }
281}