marked_text.rs

 1use std::{collections::HashMap, ops::Range};
 2
 3pub fn marked_text_by(
 4    marked_text: &str,
 5    markers: Vec<char>,
 6) -> (String, HashMap<char, Vec<usize>>) {
 7    let mut extracted_markers: HashMap<char, Vec<usize>> = Default::default();
 8    let mut unmarked_text = String::new();
 9
10    for char in marked_text.chars() {
11        if markers.contains(&char) {
12            let char_offsets = extracted_markers.entry(char).or_insert(Vec::new());
13            char_offsets.push(unmarked_text.len());
14        } else {
15            unmarked_text.push(char);
16        }
17    }
18
19    (unmarked_text, extracted_markers)
20}
21
22pub fn marked_text(marked_text: &str) -> (String, Vec<usize>) {
23    let (unmarked_text, mut markers) = marked_text_by(marked_text, vec!['|']);
24    (unmarked_text, markers.remove(&'|').unwrap_or_default())
25}
26
27pub fn marked_text_ranges_by(
28    marked_text: &str,
29    delimiters: Vec<(char, char)>,
30) -> (String, HashMap<(char, char), Vec<Range<usize>>>) {
31    let all_markers = delimiters
32        .iter()
33        .flat_map(|(start, end)| [*start, *end])
34        .collect();
35    let (unmarked_text, mut markers) = marked_text_by(marked_text, all_markers);
36    let range_lookup = delimiters
37        .into_iter()
38        .map(|(start_marker, end_marker)| {
39            let starts = markers.remove(&start_marker).unwrap_or_default();
40            let ends = markers.remove(&end_marker).unwrap_or_default();
41            assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
42
43            let ranges = starts
44                .into_iter()
45                .zip(ends)
46                .map(|(start, end)| {
47                    assert!(end >= start, "marked ranges must be disjoint");
48                    start..end
49                })
50                .collect::<Vec<Range<usize>>>();
51            ((start_marker, end_marker), ranges)
52        })
53        .collect();
54
55    (unmarked_text, range_lookup)
56}
57
58// Returns ranges delimited by (), [], and <> ranges. Ranges using the same markers
59// must not be overlapping. May also include | for empty ranges
60pub fn marked_text_ranges(full_marked_text: &str) -> (String, Vec<Range<usize>>) {
61    let (range_marked_text, empty_offsets) = marked_text(full_marked_text);
62    let (unmarked, range_lookup) =
63        marked_text_ranges_by(&range_marked_text, vec![('[', ']'), ('(', ')'), ('<', '>')]);
64    let mut combined_ranges: Vec<_> = range_lookup
65        .into_values()
66        .flatten()
67        .chain(empty_offsets.into_iter().map(|offset| offset..offset))
68        .collect();
69
70    combined_ranges.sort_by_key(|range| range.start);
71    (unmarked, combined_ranges)
72}