marked_text.rs

  1use anyhow::{anyhow, Result};
  2use std::{cmp::Ordering, collections::HashMap, ops::Range};
  3
  4pub fn marked_text_by(
  5    marked_text: &str,
  6    markers: Vec<char>,
  7) -> (String, HashMap<char, Vec<usize>>) {
  8    let mut extracted_markers: HashMap<char, Vec<usize>> = Default::default();
  9    let mut unmarked_text = String::new();
 10
 11    for char in marked_text.chars() {
 12        if markers.contains(&char) {
 13            let char_offsets = extracted_markers.entry(char).or_insert(Vec::new());
 14            char_offsets.push(unmarked_text.len());
 15        } else {
 16            unmarked_text.push(char);
 17        }
 18    }
 19
 20    (unmarked_text, extracted_markers)
 21}
 22
 23pub fn marked_text(marked_text: &str) -> (String, Vec<usize>) {
 24    let (unmarked_text, mut markers) = marked_text_by(marked_text, vec!['|']);
 25    (unmarked_text, markers.remove(&'|').unwrap_or_default())
 26}
 27
 28#[derive(Clone, Eq, PartialEq, Hash)]
 29pub enum TextRangeMarker {
 30    Empty(char),
 31    Range(char, char),
 32    ReverseRange(char, char),
 33}
 34
 35impl TextRangeMarker {
 36    fn markers(&self) -> Vec<char> {
 37        match self {
 38            Self::Empty(m) => vec![*m],
 39            Self::Range(l, r) => vec![*l, *r],
 40            Self::ReverseRange(l, r) => vec![*l, *r],
 41        }
 42    }
 43}
 44
 45impl From<char> for TextRangeMarker {
 46    fn from(marker: char) -> Self {
 47        Self::Empty(marker)
 48    }
 49}
 50
 51impl From<(char, char)> for TextRangeMarker {
 52    fn from((left_marker, right_marker): (char, char)) -> Self {
 53        Self::Range(left_marker, right_marker)
 54    }
 55}
 56
 57pub fn marked_text_ranges_by(
 58    marked_text: &str,
 59    markers: Vec<TextRangeMarker>,
 60) -> (String, HashMap<TextRangeMarker, Vec<Range<usize>>>) {
 61    let all_markers = markers.iter().flat_map(|m| m.markers()).collect();
 62
 63    let (unmarked_text, mut marker_offsets) = marked_text_by(marked_text, all_markers);
 64    let range_lookup = markers
 65        .into_iter()
 66        .map(|marker| match marker {
 67            TextRangeMarker::Empty(empty_marker_char) => {
 68                let ranges = marker_offsets
 69                    .remove(&empty_marker_char)
 70                    .unwrap_or_default()
 71                    .into_iter()
 72                    .map(|empty_index| empty_index..empty_index)
 73                    .collect::<Vec<Range<usize>>>();
 74                (marker, ranges)
 75            }
 76            TextRangeMarker::Range(start_marker, end_marker) => {
 77                let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
 78                let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
 79                assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
 80
 81                let ranges = starts
 82                    .into_iter()
 83                    .zip(ends)
 84                    .map(|(start, end)| {
 85                        assert!(end >= start, "marked ranges must be disjoint");
 86                        start..end
 87                    })
 88                    .collect::<Vec<Range<usize>>>();
 89                (marker, ranges)
 90            }
 91            TextRangeMarker::ReverseRange(start_marker, end_marker) => {
 92                let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
 93                let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
 94                assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
 95
 96                let ranges = starts
 97                    .into_iter()
 98                    .zip(ends)
 99                    .map(|(start, end)| {
100                        assert!(end >= start, "marked ranges must be disjoint");
101                        end..start
102                    })
103                    .collect::<Vec<Range<usize>>>();
104                (marker, ranges)
105            }
106        })
107        .collect();
108
109    (unmarked_text, range_lookup)
110}
111
112// Returns ranges delimited by (), [], and <> ranges. Ranges using the same markers
113// must not be overlapping. May also include | for empty ranges
114pub fn marked_text_ranges(full_marked_text: &str) -> (String, Vec<Range<usize>>) {
115    let (unmarked, range_lookup) = marked_text_ranges_by(
116        &full_marked_text,
117        vec![
118            '|'.into(),
119            ('[', ']').into(),
120            ('(', ')').into(),
121            ('<', '>').into(),
122        ],
123    );
124    let mut combined_ranges: Vec<_> = range_lookup.into_values().flatten().collect();
125
126    combined_ranges.sort_by_key(|range| range.start);
127    (unmarked, combined_ranges)
128}
129
130///
131pub fn parse_marked_text(
132    input_text: &str,
133    indicate_cursors: bool,
134) -> Result<(String, Vec<Range<usize>>)> {
135    let mut output_text = String::with_capacity(input_text.len());
136    let mut ranges = Vec::new();
137    let mut prev_input_ix = 0;
138    let mut current_range_start = None;
139    let mut current_range_cursor = None;
140
141    for (input_ix, marker) in input_text.match_indices(&['«', '»', 'ˇ']) {
142        output_text.push_str(&input_text[prev_input_ix..input_ix]);
143        let output_len = output_text.len();
144        let len = marker.len();
145        prev_input_ix = input_ix + len;
146
147        match marker {
148            "ˇ" => {
149                if current_range_start.is_some() {
150                    if current_range_cursor.is_some() {
151                        Err(anyhow!("duplicate point marker 'ˇ' at index {input_ix}"))?;
152                    } else {
153                        current_range_cursor = Some(output_len);
154                    }
155                } else {
156                    ranges.push(output_len..output_len);
157                }
158            }
159            "«" => {
160                if current_range_start.is_some() {
161                    Err(anyhow!(
162                        "unexpected range start marker '«' at index {input_ix}"
163                    ))?;
164                }
165                current_range_start = Some(output_len);
166            }
167            "»" => {
168                let current_range_start = current_range_start.take().ok_or_else(|| {
169                    anyhow!("unexpected range end marker '»' at index {input_ix}")
170                })?;
171
172                let mut reversed = false;
173                if let Some(current_range_cursor) = current_range_cursor.take() {
174                    if current_range_cursor == current_range_start {
175                        reversed = true;
176                    } else if current_range_cursor != output_len {
177                        Err(anyhow!("unexpected 'ˇ' marker in the middle of a range"))?;
178                    }
179                } else if indicate_cursors {
180                    Err(anyhow!("missing 'ˇ' marker to indicate range direction"))?;
181                }
182
183                ranges.push(if reversed {
184                    output_len..current_range_start
185                } else {
186                    current_range_start..output_len
187                });
188            }
189            _ => unreachable!(),
190        }
191    }
192
193    output_text.push_str(&input_text[prev_input_ix..]);
194    Ok((output_text, ranges))
195}
196
197pub fn generate_marked_text(
198    output_text: &str,
199    ranges: &[Range<usize>],
200    indicate_cursors: bool,
201) -> String {
202    let mut marked_text = output_text.to_string();
203    for range in ranges.iter().rev() {
204        if indicate_cursors {
205            match range.start.cmp(&range.end) {
206                Ordering::Less => {
207                    marked_text.insert_str(range.end, "ˇ»");
208                    marked_text.insert_str(range.start, "«");
209                }
210                Ordering::Equal => {
211                    marked_text.insert_str(range.start, "ˇ");
212                }
213                Ordering::Greater => {
214                    marked_text.insert_str(range.start, "»");
215                    marked_text.insert_str(range.end, "«ˇ");
216                }
217            }
218        } else {
219            marked_text.insert_str(range.end, "»");
220            marked_text.insert_str(range.start, "«");
221        }
222    }
223    marked_text
224}
225
226#[cfg(test)]
227mod tests {
228    use super::{generate_marked_text, parse_marked_text};
229
230    #[test]
231    fn test_marked_text() {
232        let (text, ranges) =
233            parse_marked_text("one «ˇtwo» «threeˇ» «ˇfour» fiveˇ six", true).unwrap();
234
235        assert_eq!(text, "one two three four five six");
236        assert_eq!(ranges.len(), 4);
237        assert_eq!(ranges[0], 7..4);
238        assert_eq!(ranges[1], 8..13);
239        assert_eq!(ranges[2], 18..14);
240        assert_eq!(ranges[3], 23..23);
241
242        assert_eq!(
243            generate_marked_text(&text, &ranges, true),
244            "one «ˇtwo» «threeˇ» «ˇfour» fiveˇ six"
245        );
246    }
247}