1use std::{collections::HashMap, ops::Range};
2
3pub fn marked_text_by(
4 marked_text: &str,
5 markers: Vec<char>,
6) -> (String, HashMap<char, Vec<usize>>) {
7 let mut extracted_markers: HashMap<char, Vec<usize>> = Default::default();
8 let mut unmarked_text = String::new();
9
10 for char in marked_text.chars() {
11 if markers.contains(&char) {
12 let char_offsets = extracted_markers.entry(char).or_insert(Vec::new());
13 char_offsets.push(unmarked_text.len());
14 } else {
15 unmarked_text.push(char);
16 }
17 }
18
19 (unmarked_text, extracted_markers)
20}
21
22pub fn marked_text(marked_text: &str) -> (String, Vec<usize>) {
23 let (unmarked_text, mut markers) = marked_text_by(marked_text, vec!['|']);
24 (unmarked_text, markers.remove(&'|').unwrap_or_default())
25}
26
27#[derive(Eq, PartialEq, Hash)]
28pub enum TextRangeMarker {
29 Empty(char),
30 Range(char, char),
31}
32
33impl TextRangeMarker {
34 fn markers(&self) -> Vec<char> {
35 match self {
36 Self::Empty(m) => vec![*m],
37 Self::Range(l, r) => vec![*l, *r],
38 }
39 }
40}
41
42impl From<char> for TextRangeMarker {
43 fn from(marker: char) -> Self {
44 Self::Empty(marker)
45 }
46}
47
48impl From<(char, char)> for TextRangeMarker {
49 fn from((left_marker, right_marker): (char, char)) -> Self {
50 Self::Range(left_marker, right_marker)
51 }
52}
53
54pub fn marked_text_ranges_by(
55 marked_text: &str,
56 markers: Vec<TextRangeMarker>,
57) -> (String, HashMap<TextRangeMarker, Vec<Range<usize>>>) {
58 let all_markers = markers.iter().flat_map(|m| m.markers()).collect();
59
60 let (unmarked_text, mut marker_offsets) = marked_text_by(marked_text, all_markers);
61 let range_lookup = markers
62 .into_iter()
63 .map(|marker| match marker {
64 TextRangeMarker::Empty(empty_marker_char) => {
65 let ranges = marker_offsets
66 .remove(&empty_marker_char)
67 .unwrap_or_default()
68 .into_iter()
69 .map(|empty_index| empty_index..empty_index)
70 .collect::<Vec<Range<usize>>>();
71 (marker, ranges)
72 }
73 TextRangeMarker::Range(start_marker, end_marker) => {
74 let starts = marker_offsets.remove(&start_marker).unwrap_or_default();
75 let ends = marker_offsets.remove(&end_marker).unwrap_or_default();
76 assert_eq!(starts.len(), ends.len(), "marked ranges are unbalanced");
77
78 let ranges = starts
79 .into_iter()
80 .zip(ends)
81 .map(|(start, end)| {
82 assert!(end >= start, "marked ranges must be disjoint");
83 start..end
84 })
85 .collect::<Vec<Range<usize>>>();
86 (marker, ranges)
87 }
88 })
89 .collect();
90
91 (unmarked_text, range_lookup)
92}
93
94// Returns ranges delimited by (), [], and <> ranges. Ranges using the same markers
95// must not be overlapping. May also include | for empty ranges
96pub fn marked_text_ranges(full_marked_text: &str) -> (String, Vec<Range<usize>>) {
97 let (unmarked, range_lookup) = marked_text_ranges_by(
98 &full_marked_text,
99 vec![
100 '|'.into(),
101 ('[', ']').into(),
102 ('(', ')').into(),
103 ('<', '>').into(),
104 ],
105 );
106 let mut combined_ranges: Vec<_> = range_lookup.into_values().flatten().collect();
107
108 combined_ranges.sort_by_key(|range| range.start);
109 (unmarked, combined_ranges)
110}