marked_text.rs

  1use std::{collections::HashMap, ops::Range};
  2
  3pub fn marked_text_by(
  4    marked_text: &str,
  5    markers: Vec<char>,
  6) -> (String, HashMap<char, Vec<usize>>) {
  7    let mut extracted_markers: HashMap<char, Vec<usize>> = Default::default();
  8    let mut unmarked_text = String::new();
  9
 10    for char in marked_text.chars() {
 11        if markers.contains(&char) {
 12            let char_offsets = extracted_markers.entry(char).or_insert(Vec::new());
 13            char_offsets.push(unmarked_text.len());
 14        } else {
 15            unmarked_text.push(char);
 16        }
 17    }
 18
 19    (unmarked_text, extracted_markers)
 20}
 21
 22pub fn marked_text(marked_text: &str) -> (String, Vec<usize>) {
 23    let (unmarked_text, mut markers) = marked_text_by(marked_text, vec!['|']);
 24    (unmarked_text, markers.remove(&'|').unwrap_or_default())
 25}
 26
 27#[derive(Eq, PartialEq, Hash)]
 28pub enum TextRangeMarker {
 29    Empty(char),
 30    Range(char, char),
 31}
 32
 33impl TextRangeMarker {
 34    fn markers(&self) -> Vec<char> {
 35        match self {
 36            Self::Empty(m) => vec![*m],
 37            Self::Range(l, r) => vec![*l, *r],
 38        }
 39    }
 40}
 41
 42impl From<char> for TextRangeMarker {
 43    fn from(marker: char) -> Self {
 44        Self::Empty(marker)
 45    }
 46}
 47
 48impl From<(char, char)> for TextRangeMarker {
 49    fn from((left_marker, right_marker): (char, char)) -> Self {
 50        Self::Range(left_marker, right_marker)
 51    }
 52}
 53
 54pub fn marked_text_ranges_by(
 55    marked_text: &str,
 56    markers: Vec<TextRangeMarker>,
 57) -> (String, HashMap<TextRangeMarker, Vec<Range<usize>>>) {
 58    let all_markers = markers.iter().flat_map(|m| m.markers()).collect();
 59
 60    let (unmarked_text, mut marker_offsets) = marked_text_by(marked_text, all_markers);
 61    let range_lookup = markers
 62        .into_iter()
 63        .map(|marker| match marker {
 64            TextRangeMarker::Empty(empty_marker_char) => {
 65                let ranges = marker_offsets
 66                    .remove(&empty_marker_char)
 67                    .unwrap_or_default()
 68                    .into_iter()
 69                    .map(|empty_index| empty_index..empty_index)
 70                    .collect::<Vec<Range<usize>>>();
 71                (marker, ranges)
 72            }
 73            TextRangeMarker::Range(start_marker, end_marker) => {
 74                let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
 75                let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
 76                assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
 77
 78                let ranges = starts
 79                    .into_iter()
 80                    .zip(ends)
 81                    .map(|(start, end)| {
 82                        assert!(end >= start, "marked ranges must be disjoint");
 83                        start..end
 84                    })
 85                    .collect::<Vec<Range<usize>>>();
 86                (marker, ranges)
 87            }
 88        })
 89        .collect();
 90
 91    (unmarked_text, range_lookup)
 92}
 93
 94// Returns ranges delimited by (), [], and <> ranges. Ranges using the same markers
 95// must not be overlapping. May also include | for empty ranges
 96pub fn marked_text_ranges(full_marked_text: &str) -> (String, Vec<Range<usize>>) {
 97    let (unmarked, range_lookup) = marked_text_ranges_by(
 98        &full_marked_text,
 99        vec![
100            '|'.into(),
101            ('[', ']').into(),
102            ('(', ')').into(),
103            ('<', '>').into(),
104        ],
105    );
106    let mut combined_ranges: Vec<_> = range_lookup.into_values().flatten().collect();
107
108    combined_ranges.sort_by_key(|range| range.start);
109    (unmarked, combined_ranges)
110}