create_file_parser.rs

  1use std::sync::OnceLock;
  2
  3use regex::Regex;
  4use smallvec::SmallVec;
  5use util::debug_panic;
  6
  7static START_MARKER: OnceLock<Regex> = OnceLock::new();
  8static END_MARKER: OnceLock<Regex> = OnceLock::new();
  9
 10#[derive(Debug)]
 11pub enum CreateFileParserEvent {
 12    NewTextChunk { chunk: String },
 13}
 14
 15#[derive(Debug)]
 16pub struct CreateFileParser {
 17    state: ParserState,
 18    buffer: String,
 19}
 20
 21#[derive(Debug, PartialEq)]
 22enum ParserState {
 23    Pending,
 24    WithinText,
 25    Finishing,
 26    Finished,
 27}
 28
 29impl CreateFileParser {
 30    pub const fn new() -> Self {
 31        CreateFileParser {
 32            state: ParserState::Pending,
 33            buffer: String::new(),
 34        }
 35    }
 36
 37    pub fn push(&mut self, chunk: Option<&str>) -> SmallVec<[CreateFileParserEvent; 1]> {
 38        if chunk.is_none() {
 39            self.state = ParserState::Finishing;
 40        }
 41
 42        let chunk = chunk.unwrap_or_default();
 43
 44        self.buffer.push_str(chunk);
 45
 46        let mut edit_events = SmallVec::new();
 47        let start_marker_regex = START_MARKER.get_or_init(|| Regex::new(r"\n?```\S*\n").unwrap());
 48        let end_marker_regex = END_MARKER.get_or_init(|| Regex::new(r"(^|\n)```\s*$").unwrap());
 49        loop {
 50            match &mut self.state {
 51                ParserState::Pending => {
 52                    if let Some(m) = start_marker_regex.find(&self.buffer) {
 53                        self.buffer.drain(..m.end());
 54                        self.state = ParserState::WithinText;
 55                    } else {
 56                        break;
 57                    }
 58                }
 59                ParserState::WithinText => {
 60                    let text = self.buffer.trim_end_matches(&['`', '\n', ' ']);
 61                    let text_len = text.len();
 62
 63                    if text_len > 0 {
 64                        edit_events.push(CreateFileParserEvent::NewTextChunk {
 65                            chunk: self.buffer.drain(..text_len).collect(),
 66                        });
 67                    }
 68                    break;
 69                }
 70                ParserState::Finishing => {
 71                    if let Some(m) = end_marker_regex.find(&self.buffer) {
 72                        self.buffer.drain(m.start()..);
 73                    }
 74                    if !self.buffer.is_empty() {
 75                        if !self.buffer.ends_with('\n') {
 76                            self.buffer.push('\n');
 77                        }
 78                        edit_events.push(CreateFileParserEvent::NewTextChunk {
 79                            chunk: self.buffer.drain(..).collect(),
 80                        });
 81                    }
 82                    self.state = ParserState::Finished;
 83                    break;
 84                }
 85                ParserState::Finished => debug_panic!("Can't call parser after finishing"),
 86            }
 87        }
 88        edit_events
 89    }
 90}
 91
 92#[cfg(test)]
 93mod tests {
 94    use super::*;
 95    use indoc::indoc;
 96    use rand::prelude::*;
 97    use std::cmp;
 98
 99    #[gpui::test(iterations = 100)]
100    fn test_happy_path(mut rng: StdRng) {
101        let mut parser = CreateFileParser::new();
102        assert_eq!(
103            parse_random_chunks("```\nHello world\n```", &mut parser, &mut rng),
104            "Hello world".to_string()
105        );
106    }
107
108    #[gpui::test(iterations = 100)]
109    fn test_cut_prefix(mut rng: StdRng) {
110        let mut parser = CreateFileParser::new();
111        assert_eq!(
112            parse_random_chunks(
113                indoc! {"
114                    Let me write this file for you:
115
116                    ```
117                    Hello world
118                    ```
119
120                "},
121                &mut parser,
122                &mut rng
123            ),
124            "Hello world".to_string()
125        );
126    }
127
128    #[gpui::test(iterations = 100)]
129    fn test_language_name_on_fences(mut rng: StdRng) {
130        let mut parser = CreateFileParser::new();
131        assert_eq!(
132            parse_random_chunks(
133                indoc! {"
134                    ```rust
135                    Hello world
136                    ```
137
138                "},
139                &mut parser,
140                &mut rng
141            ),
142            "Hello world".to_string()
143        );
144    }
145
146    #[gpui::test(iterations = 100)]
147    fn test_leave_suffix(mut rng: StdRng) {
148        let mut parser = CreateFileParser::new();
149        assert_eq!(
150            parse_random_chunks(
151                indoc! {"
152                    Let me write this file for you:
153
154                    ```
155                    Hello world
156                    ```
157
158                    The end
159                "},
160                &mut parser,
161                &mut rng
162            ),
163            // This output is malformed, so we're doing our best effort
164            "Hello world\n```\n\nThe end\n".to_string()
165        );
166    }
167
168    #[gpui::test(iterations = 100)]
169    fn test_inner_fences(mut rng: StdRng) {
170        let mut parser = CreateFileParser::new();
171        assert_eq!(
172            parse_random_chunks(
173                indoc! {"
174                    Let me write this file for you:
175
176                    ```
177                    ```
178                    Hello world
179                    ```
180                    ```
181                "},
182                &mut parser,
183                &mut rng
184            ),
185            // This output is malformed, so we're doing our best effort
186            "```\nHello world\n```\n".to_string()
187        );
188    }
189
190    #[gpui::test(iterations = 10)]
191    fn test_empty_file(mut rng: StdRng) {
192        let mut parser = CreateFileParser::new();
193        assert_eq!(
194            parse_random_chunks(
195                indoc! {"
196                    ```
197                    ```
198                "},
199                &mut parser,
200                &mut rng
201            ),
202            "".to_string()
203        );
204    }
205
206    fn parse_random_chunks(input: &str, parser: &mut CreateFileParser, rng: &mut StdRng) -> String {
207        let chunk_count = rng.random_range(1..=cmp::min(input.len(), 50));
208        let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count);
209        chunk_indices.sort();
210        chunk_indices.push(input.len());
211
212        let chunk_indices = chunk_indices
213            .into_iter()
214            .map(Some)
215            .chain(vec![None])
216            .collect::<Vec<Option<usize>>>();
217
218        let mut edit = String::default();
219        let mut last_ix = 0;
220        for chunk_ix in chunk_indices {
221            let mut chunk = None;
222            if let Some(chunk_ix) = chunk_ix {
223                chunk = Some(&input[last_ix..chunk_ix]);
224                last_ix = chunk_ix;
225            }
226
227            for event in parser.push(chunk) {
228                match event {
229                    CreateFileParserEvent::NewTextChunk { chunk } => {
230                        edit.push_str(&chunk);
231                    }
232                }
233            }
234        }
235        edit
236    }
237}