@@ -61,24 +61,24 @@ impl StringMatch {
let mut positions = self.positions.iter().peekable();
iter::from_fn(move || {
if let Some(start) = positions.next().copied() {
- if start >= self.string.len() {
+ let Some(char_len) = self.char_len_at_index(start) else {
log::error!(
- "Invariant violation: Index {start} out of range in string {:?}",
+ "Invariant violation: Index {start} out of range or not on a utf-8 boundary in string {:?}",
self.string
);
return None;
- }- let mut end = start + self.char_len_at_index(start);
+ };
+ let mut end = start + char_len;
while let Some(next_start) = positions.peek() {
if end == **next_start {
- if end >= self.string.len() {
+ let Some(char_len) = self.char_len_at_index(end) else {
log::error!(
- "Invariant violation: Index {end} out of range in string {:?}",
+ "Invariant violation: Index {end} out of range or not on a utf-8 boundary in string {:?}",
self.string
);
return None;
- }- end += self.char_len_at_index(end);
+ };
+ end += char_len;
positions.next();
} else {
break;
@@ -91,8 +91,12 @@ impl StringMatch {
})
}
- fn char_len_at_index(&self, ix: usize) -> usize {- self.string[ix..].chars().next().unwrap().len_utf8()
+ /// Gets the byte length of the utf-8 character at a byte offset. If the index is out of range
+ /// or not on a utf-8 boundary then None is returned.
+ fn char_len_at_index(&self, ix: usize) -> Option<usize> {
+ self.string
+ .get(ix..)
+ .and_then(|slice| slice.chars().next().map(|char| char.len_utf8()))
}
}