marked_text.rs

  1use std::{collections::HashMap, ops::Range};
  2
  3pub fn marked_text_by(
  4    marked_text: &str,
  5    markers: Vec<char>,
  6) -> (String, HashMap<char, Vec<usize>>) {
  7    let mut extracted_markers: HashMap<char, Vec<usize>> = Default::default();
  8    let mut unmarked_text = String::new();
  9
 10    for char in marked_text.chars() {
 11        if markers.contains(&char) {
 12            let char_offsets = extracted_markers.entry(char).or_insert(Vec::new());
 13            char_offsets.push(unmarked_text.len());
 14        } else {
 15            unmarked_text.push(char);
 16        }
 17    }
 18
 19    (unmarked_text, extracted_markers)
 20}
 21
 22pub fn marked_text(marked_text: &str) -> (String, Vec<usize>) {
 23    let (unmarked_text, mut markers) = marked_text_by(marked_text, vec!['|']);
 24    (unmarked_text, markers.remove(&'|').unwrap_or_default())
 25}
 26
 27#[derive(Clone, Eq, PartialEq, Hash)]
 28pub enum TextRangeMarker {
 29    Empty(char),
 30    Range(char, char),
 31    ReverseRange(char, char),
 32}
 33
 34impl TextRangeMarker {
 35    fn markers(&self) -> Vec<char> {
 36        match self {
 37            Self::Empty(m) => vec![*m],
 38            Self::Range(l, r) => vec![*l, *r],
 39            Self::ReverseRange(l, r) => vec![*l, *r],
 40        }
 41    }
 42}
 43
 44impl From<char> for TextRangeMarker {
 45    fn from(marker: char) -> Self {
 46        Self::Empty(marker)
 47    }
 48}
 49
 50impl From<(char, char)> for TextRangeMarker {
 51    fn from((left_marker, right_marker): (char, char)) -> Self {
 52        Self::Range(left_marker, right_marker)
 53    }
 54}
 55
 56pub fn marked_text_ranges_by(
 57    marked_text: &str,
 58    markers: Vec<TextRangeMarker>,
 59) -> (String, HashMap<TextRangeMarker, Vec<Range<usize>>>) {
 60    let all_markers = markers.iter().flat_map(|m| m.markers()).collect();
 61
 62    let (unmarked_text, mut marker_offsets) = marked_text_by(marked_text, all_markers);
 63    let range_lookup = markers
 64        .into_iter()
 65        .map(|marker| match marker {
 66            TextRangeMarker::Empty(empty_marker_char) => {
 67                let ranges = marker_offsets
 68                    .remove(&empty_marker_char)
 69                    .unwrap_or_default()
 70                    .into_iter()
 71                    .map(|empty_index| empty_index..empty_index)
 72                    .collect::<Vec<Range<usize>>>();
 73                (marker, ranges)
 74            }
 75            TextRangeMarker::Range(start_marker, end_marker) => {
 76                let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
 77                let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
 78                assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
 79
 80                let ranges = starts
 81                    .into_iter()
 82                    .zip(ends)
 83                    .map(|(start, end)| {
 84                        assert!(end >= start, "marked ranges must be disjoint");
 85                        start..end
 86                    })
 87                    .collect::<Vec<Range<usize>>>();
 88                (marker, ranges)
 89            }
 90            TextRangeMarker::ReverseRange(start_marker, end_marker) => {
 91                let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
 92                let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
 93                assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
 94
 95                let ranges = starts
 96                    .into_iter()
 97                    .zip(ends)
 98                    .map(|(start, end)| {
 99                        assert!(end >= start, "marked ranges must be disjoint");
100                        end..start
101                    })
102                    .collect::<Vec<Range<usize>>>();
103                (marker, ranges)
104            }
105        })
106        .collect();
107
108    (unmarked_text, range_lookup)
109}
110
111// Returns ranges delimited by (), [], and <> ranges. Ranges using the same markers
112// must not be overlapping. May also include | for empty ranges
113pub fn marked_text_ranges(full_marked_text: &str) -> (String, Vec<Range<usize>>) {
114    let (unmarked, range_lookup) = marked_text_ranges_by(
115        &full_marked_text,
116        vec![
117            '|'.into(),
118            ('[', ']').into(),
119            ('(', ')').into(),
120            ('<', '>').into(),
121        ],
122    );
123    let mut combined_ranges: Vec<_> = range_lookup.into_values().flatten().collect();
124
125    combined_ranges.sort_by_key(|range| range.start);
126    (unmarked, combined_ranges)
127}