marked_text.rs

  1use std::{cmp::Ordering, collections::HashMap, ops::Range};
  2
  3/// Construct a string and a list of offsets within that string using a single
  4/// string containing embedded position markers.
  5pub fn marked_text_offsets_by(
  6    marked_text: &str,
  7    markers: Vec<char>,
  8) -> (String, HashMap<char, Vec<usize>>) {
  9    let mut extracted_markers: HashMap<char, Vec<usize>> = Default::default();
 10    let mut unmarked_text = String::new();
 11
 12    for char in marked_text.chars() {
 13        if markers.contains(&char) {
 14            let char_offsets = extracted_markers.entry(char).or_insert(Vec::new());
 15            char_offsets.push(unmarked_text.len());
 16        } else {
 17            unmarked_text.push(char);
 18        }
 19    }
 20
 21    (unmarked_text, extracted_markers)
 22}
 23
 24/// Construct a string and a list of ranges within that string using a single
 25/// string containing embedded range markers, using arbitrary characters as
 26/// range markers. By using multiple different range markers, you can construct
 27/// ranges that overlap each other.
 28///
 29/// The returned ranges will be grouped by their range marking characters.
 30pub fn marked_text_ranges_by(
 31    marked_text: &str,
 32    markers: Vec<TextRangeMarker>,
 33) -> (String, HashMap<TextRangeMarker, Vec<Range<usize>>>) {
 34    let all_markers = markers.iter().flat_map(|m| m.markers()).collect();
 35
 36    let (unmarked_text, mut marker_offsets) = marked_text_offsets_by(marked_text, all_markers);
 37    let range_lookup = markers
 38        .into_iter()
 39        .map(|marker| {
 40            (
 41                marker.clone(),
 42                match marker {
 43                    TextRangeMarker::Empty(empty_marker_char) => marker_offsets
 44                        .remove(&empty_marker_char)
 45                        .unwrap_or_default()
 46                        .into_iter()
 47                        .map(|empty_index| empty_index..empty_index)
 48                        .collect::<Vec<Range<usize>>>(),
 49                    TextRangeMarker::Range(start_marker, end_marker) => {
 50                        let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
 51                        let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
 52                        assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
 53                        starts
 54                            .into_iter()
 55                            .zip(ends)
 56                            .map(|(start, end)| {
 57                                assert!(end >= start, "marked ranges must be disjoint");
 58                                start..end
 59                            })
 60                            .collect::<Vec<Range<usize>>>()
 61                    }
 62                    TextRangeMarker::ReverseRange(start_marker, end_marker) => {
 63                        let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
 64                        let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
 65                        assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
 66                        starts
 67                            .into_iter()
 68                            .zip(ends)
 69                            .map(|(start, end)| {
 70                                assert!(end >= start, "marked ranges must be disjoint");
 71                                end..start
 72                            })
 73                            .collect::<Vec<Range<usize>>>()
 74                    }
 75                },
 76            )
 77        })
 78        .collect();
 79
 80    (unmarked_text, range_lookup)
 81}
 82
 83/// Construct a string and a list of ranges within that string using a single
 84/// string containing embedded range markers. The characters used to mark the
 85/// ranges are as follows:
 86///
 87/// 1. To mark a range of text, surround it with the `«` and `»` angle brackets,
 88///    which can be typed on a US keyboard with the `alt-|` and `alt-shift-|` keys.
 89///
 90///    ```text
 91///    foo «selected text» bar
 92///    ```
 93///
 94/// 2. To mark a single position in the text, use the `ˇ` caron,
 95///    which can be typed on a US keyboard with the `alt-shift-t` key.
 96///
 97///    ```text
 98///    the cursors are hereˇ and hereˇ.
 99///    ```
100///
101/// 3. To mark a range whose direction is meaningful (like a selection),
102///    put a caron character beside one of its bounds, on the inside:
103///
104///    ```text
105///    one «ˇreversed» selection and one «forwardˇ» selection
106///    ```
107///
108/// Any • characters in the input string will be replaced with spaces. This makes
109/// it easier to test cases with trailing spaces, which tend to get trimmed from the
110/// source code.
111pub fn marked_text_ranges(
112    marked_text: &str,
113    ranges_are_directed: bool,
114) -> (String, Vec<Range<usize>>) {
115    let mut unmarked_text = String::with_capacity(marked_text.len());
116    let mut ranges = Vec::new();
117    let mut prev_marked_ix = 0;
118    let mut current_range_start = None;
119    let mut current_range_cursor = None;
120
121    let marked_text = marked_text.replace("", " ");
122    for (marked_ix, marker) in marked_text.match_indices(&['«', '»', 'ˇ']) {
123        unmarked_text.push_str(&marked_text[prev_marked_ix..marked_ix]);
124        let unmarked_len = unmarked_text.len();
125        let len = marker.len();
126        prev_marked_ix = marked_ix + len;
127
128        match marker {
129            "ˇ" => {
130                if current_range_start.is_some() {
131                    if current_range_cursor.is_some() {
132                        panic!("duplicate point marker 'ˇ' at index {marked_ix}");
133                    } else {
134                        current_range_cursor = Some(unmarked_len);
135                    }
136                } else {
137                    ranges.push(unmarked_len..unmarked_len);
138                }
139            }
140            "«" => {
141                if current_range_start.is_some() {
142                    panic!("unexpected range start marker '«' at index {marked_ix}");
143                }
144                current_range_start = Some(unmarked_len);
145            }
146            "»" => {
147                let current_range_start = if let Some(start) = current_range_start.take() {
148                    start
149                } else {
150                    panic!("unexpected range end marker '»' at index {marked_ix}");
151                };
152
153                let mut reversed = false;
154                if let Some(current_range_cursor) = current_range_cursor.take() {
155                    if current_range_cursor == current_range_start {
156                        reversed = true;
157                    } else if current_range_cursor != unmarked_len {
158                        panic!("unexpected 'ˇ' marker in the middle of a range");
159                    }
160                } else if ranges_are_directed {
161                    panic!("missing 'ˇ' marker to indicate range direction");
162                }
163
164                ranges.push(if reversed {
165                    unmarked_len..current_range_start
166                } else {
167                    current_range_start..unmarked_len
168                });
169            }
170            _ => unreachable!(),
171        }
172    }
173
174    unmarked_text.push_str(&marked_text[prev_marked_ix..]);
175    (unmarked_text, ranges)
176}
177
178pub fn marked_text_offsets(marked_text: &str) -> (String, Vec<usize>) {
179    let (text, ranges) = marked_text_ranges(marked_text, false);
180    (
181        text,
182        ranges
183            .into_iter()
184            .map(|range| {
185                assert_eq!(range.start, range.end);
186                range.start
187            })
188            .collect(),
189    )
190}
191
192pub fn generate_marked_text(
193    unmarked_text: &str,
194    ranges: &[Range<usize>],
195    indicate_cursors: bool,
196) -> String {
197    let mut marked_text = unmarked_text.to_string();
198    for range in ranges.iter().rev() {
199        if indicate_cursors {
200            match range.start.cmp(&range.end) {
201                Ordering::Less => {
202                    marked_text.insert_str(range.end, "ˇ»");
203                    marked_text.insert(range.start, '«');
204                }
205                Ordering::Equal => {
206                    marked_text.insert(range.start, 'ˇ');
207                }
208                Ordering::Greater => {
209                    marked_text.insert(range.start, '»');
210                    marked_text.insert_str(range.end, "«ˇ");
211                }
212            }
213        } else {
214            marked_text.insert(range.end, '»');
215            marked_text.insert(range.start, '«');
216        }
217    }
218    marked_text
219}
220
221#[derive(Clone, Eq, PartialEq, Hash)]
222pub enum TextRangeMarker {
223    Empty(char),
224    Range(char, char),
225    ReverseRange(char, char),
226}
227
228impl TextRangeMarker {
229    fn markers(&self) -> Vec<char> {
230        match self {
231            Self::Empty(m) => vec![*m],
232            Self::Range(l, r) => vec![*l, *r],
233            Self::ReverseRange(l, r) => vec![*l, *r],
234        }
235    }
236}
237
238impl From<char> for TextRangeMarker {
239    fn from(marker: char) -> Self {
240        Self::Empty(marker)
241    }
242}
243
244impl From<(char, char)> for TextRangeMarker {
245    fn from((left_marker, right_marker): (char, char)) -> Self {
246        Self::Range(left_marker, right_marker)
247    }
248}
249
250#[cfg(test)]
251mod tests {
252    use super::{generate_marked_text, marked_text_ranges};
253
254    #[test]
255    fn test_marked_text() {
256        let (text, ranges) = marked_text_ranges("one «ˇtwo» «threeˇ» «ˇfour» fiveˇ six", true);
257
258        assert_eq!(text, "one two three four five six");
259        assert_eq!(ranges.len(), 4);
260        assert_eq!(ranges[0], 7..4);
261        assert_eq!(ranges[1], 8..13);
262        assert_eq!(ranges[2], 18..14);
263        assert_eq!(ranges[3], 23..23);
264
265        assert_eq!(
266            generate_marked_text(&text, &ranges, true),
267            "one «ˇtwo» «threeˇ» «ˇfour» fiveˇ six"
268        );
269    }
270}