create_file_parser.rs

  1use regex::Regex;
  2use smallvec::SmallVec;
  3use std::cell::LazyCell;
  4use util::debug_panic;
  5
  6const START_MARKER: LazyCell<Regex> = LazyCell::new(|| Regex::new(r"\n?```\S*\n").unwrap());
  7const END_MARKER: LazyCell<Regex> = LazyCell::new(|| Regex::new(r"(^|\n)```\s*$").unwrap());
  8
  9#[derive(Debug)]
 10pub enum CreateFileParserEvent {
 11    NewTextChunk { chunk: String },
 12}
 13
 14#[derive(Debug)]
 15pub struct CreateFileParser {
 16    state: ParserState,
 17    buffer: String,
 18}
 19
 20#[derive(Debug, PartialEq)]
 21enum ParserState {
 22    Pending,
 23    WithinText,
 24    Finishing,
 25    Finished,
 26}
 27
 28impl CreateFileParser {
 29    pub fn new() -> Self {
 30        CreateFileParser {
 31            state: ParserState::Pending,
 32            buffer: String::new(),
 33        }
 34    }
 35
 36    pub fn push(&mut self, chunk: Option<&str>) -> SmallVec<[CreateFileParserEvent; 1]> {
 37        if chunk.is_none() {
 38            self.state = ParserState::Finishing;
 39        }
 40
 41        let chunk = chunk.unwrap_or_default();
 42
 43        self.buffer.push_str(chunk);
 44
 45        let mut edit_events = SmallVec::new();
 46        loop {
 47            match &mut self.state {
 48                ParserState::Pending => {
 49                    if let Some(m) = START_MARKER.find(&self.buffer) {
 50                        self.buffer.drain(..m.end());
 51                        self.state = ParserState::WithinText;
 52                    } else {
 53                        break;
 54                    }
 55                }
 56                ParserState::WithinText => {
 57                    let text = self.buffer.trim_end_matches(&['`', '\n', ' ']);
 58                    let text_len = text.len();
 59
 60                    if text_len > 0 {
 61                        edit_events.push(CreateFileParserEvent::NewTextChunk {
 62                            chunk: self.buffer.drain(..text_len).collect(),
 63                        });
 64                    }
 65                    break;
 66                }
 67                ParserState::Finishing => {
 68                    if let Some(m) = END_MARKER.find(&self.buffer) {
 69                        self.buffer.drain(m.start()..);
 70                    }
 71                    if !self.buffer.is_empty() {
 72                        if !self.buffer.ends_with('\n') {
 73                            self.buffer.push('\n');
 74                        }
 75                        edit_events.push(CreateFileParserEvent::NewTextChunk {
 76                            chunk: self.buffer.drain(..).collect(),
 77                        });
 78                    }
 79                    self.state = ParserState::Finished;
 80                    break;
 81                }
 82                ParserState::Finished => debug_panic!("Can't call parser after finishing"),
 83            }
 84        }
 85        edit_events
 86    }
 87}
 88
 89#[cfg(test)]
 90mod tests {
 91    use super::*;
 92    use indoc::indoc;
 93    use rand::prelude::*;
 94    use std::cmp;
 95
 96    #[gpui::test(iterations = 100)]
 97    fn test_happy_path(mut rng: StdRng) {
 98        let mut parser = CreateFileParser::new();
 99        assert_eq!(
100            parse_random_chunks("```\nHello world\n```", &mut parser, &mut rng),
101            "Hello world".to_string()
102        );
103    }
104
105    #[gpui::test(iterations = 100)]
106    fn test_cut_prefix(mut rng: StdRng) {
107        let mut parser = CreateFileParser::new();
108        assert_eq!(
109            parse_random_chunks(
110                indoc! {"
111                    Let me write this file for you:
112
113                    ```
114                    Hello world
115                    ```
116
117                "},
118                &mut parser,
119                &mut rng
120            ),
121            "Hello world".to_string()
122        );
123    }
124
125    #[gpui::test(iterations = 100)]
126    fn test_language_name_on_fences(mut rng: StdRng) {
127        let mut parser = CreateFileParser::new();
128        assert_eq!(
129            parse_random_chunks(
130                indoc! {"
131                    ```rust
132                    Hello world
133                    ```
134
135                "},
136                &mut parser,
137                &mut rng
138            ),
139            "Hello world".to_string()
140        );
141    }
142
143    #[gpui::test(iterations = 100)]
144    fn test_leave_suffix(mut rng: StdRng) {
145        let mut parser = CreateFileParser::new();
146        assert_eq!(
147            parse_random_chunks(
148                indoc! {"
149                    Let me write this file for you:
150
151                    ```
152                    Hello world
153                    ```
154
155                    The end
156                "},
157                &mut parser,
158                &mut rng
159            ),
160            // This output is marlformed, so we're doing our best effort
161            "Hello world\n```\n\nThe end\n".to_string()
162        );
163    }
164
165    #[gpui::test(iterations = 100)]
166    fn test_inner_fences(mut rng: StdRng) {
167        let mut parser = CreateFileParser::new();
168        assert_eq!(
169            parse_random_chunks(
170                indoc! {"
171                    Let me write this file for you:
172
173                    ```
174                    ```
175                    Hello world
176                    ```
177                    ```
178                "},
179                &mut parser,
180                &mut rng
181            ),
182            // This output is marlformed, so we're doing our best effort
183            "```\nHello world\n```\n".to_string()
184        );
185    }
186
187    #[gpui::test(iterations = 10)]
188    fn test_empty_file(mut rng: StdRng) {
189        let mut parser = CreateFileParser::new();
190        assert_eq!(
191            parse_random_chunks(
192                indoc! {"
193                    ```
194                    ```
195                "},
196                &mut parser,
197                &mut rng
198            ),
199            "".to_string()
200        );
201    }
202
203    fn parse_random_chunks(input: &str, parser: &mut CreateFileParser, rng: &mut StdRng) -> String {
204        let chunk_count = rng.gen_range(1..=cmp::min(input.len(), 50));
205        let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count);
206        chunk_indices.sort();
207        chunk_indices.push(input.len());
208
209        let chunk_indices = chunk_indices
210            .into_iter()
211            .map(Some)
212            .chain(vec![None])
213            .collect::<Vec<Option<usize>>>();
214
215        let mut edit = String::default();
216        let mut last_ix = 0;
217        for chunk_ix in chunk_indices {
218            let mut chunk = None;
219            if let Some(chunk_ix) = chunk_ix {
220                chunk = Some(&input[last_ix..chunk_ix]);
221                last_ix = chunk_ix;
222            }
223
224            for event in parser.push(chunk) {
225                match event {
226                    CreateFileParserEvent::NewTextChunk { chunk } => {
227                        edit.push_str(&chunk);
228                    }
229                }
230            }
231        }
232        edit
233    }
234}