parser.rs

  1use crate::{
  2    CsvPreviewView,
  3    types::TableLikeContent,
  4    types::{LineNumber, TableCell},
  5};
  6use editor::Editor;
  7use gpui::{AppContext, Context, Entity, Subscription, Task};
  8use std::time::{Duration, Instant};
  9use text::BufferSnapshot;
 10use ui::{SharedString, table_row::TableRow};
 11
 12pub(crate) const REPARSE_DEBOUNCE: Duration = Duration::from_millis(200);
 13
 14pub(crate) struct EditorState {
 15    pub editor: Entity<Editor>,
 16    pub _subscription: Subscription,
 17}
 18
 19impl CsvPreviewView {
 20    pub(crate) fn parse_csv_from_active_editor(
 21        &mut self,
 22        wait_for_debounce: bool,
 23        cx: &mut Context<Self>,
 24    ) {
 25        let editor = self.active_editor_state.editor.clone();
 26        self.parsing_task = Some(self.parse_csv_in_background(wait_for_debounce, editor, cx));
 27    }
 28
 29    fn parse_csv_in_background(
 30        &mut self,
 31        wait_for_debounce: bool,
 32        editor: Entity<Editor>,
 33        cx: &mut Context<Self>,
 34    ) -> Task<anyhow::Result<()>> {
 35        cx.spawn(async move |view, cx| {
 36            if wait_for_debounce {
 37                // Smart debouncing: check if cooldown period has already passed
 38                let now = Instant::now();
 39                let should_wait = view.update(cx, |view, _| {
 40                    if let Some(last_end) = view.last_parse_end_time {
 41                        let cooldown_until = last_end + REPARSE_DEBOUNCE;
 42                        if now < cooldown_until {
 43                            Some(cooldown_until - now)
 44                        } else {
 45                            None // Cooldown already passed, parse immediately
 46                        }
 47                    } else {
 48                        None // First parse, no debounce
 49                    }
 50                })?;
 51
 52                if let Some(wait_duration) = should_wait {
 53                    cx.background_executor().timer(wait_duration).await;
 54                }
 55            }
 56
 57            let buffer_snapshot = view.update(cx, |_, cx| {
 58                editor
 59                    .read(cx)
 60                    .buffer()
 61                    .read(cx)
 62                    .as_singleton()
 63                    .map(|b| b.read(cx).text_snapshot())
 64            })?;
 65
 66            let Some(buffer_snapshot) = buffer_snapshot else {
 67                return Ok(());
 68            };
 69
 70            let instant = Instant::now();
 71            let parsed_csv = cx
 72                .background_spawn(async move { from_buffer(&buffer_snapshot) })
 73                .await;
 74            let parse_duration = instant.elapsed();
 75            let parse_end_time: Instant = Instant::now();
 76            log::debug!("Parsed CSV in {}ms", parse_duration.as_millis());
 77            view.update(cx, move |view, cx| {
 78                view.performance_metrics
 79                    .timings
 80                    .insert("Parsing", (parse_duration, Instant::now()));
 81
 82                log::debug!("Parsed {} rows", parsed_csv.rows.len());
 83                // Update table width so it can be rendered properly
 84                let cols = parsed_csv.headers.cols();
 85                view.column_widths.replace(cx, cols + 1); // Add 1 for the line number column
 86
 87                view.engine.contents = parsed_csv;
 88                view.last_parse_end_time = Some(parse_end_time);
 89
 90                view.apply_filter_sort();
 91                cx.notify();
 92            })
 93        })
 94    }
 95}
 96
 97pub fn from_buffer(buffer_snapshot: &BufferSnapshot) -> TableLikeContent {
 98    let text = buffer_snapshot.text();
 99
100    if text.trim().is_empty() {
101        return TableLikeContent::default();
102    }
103
104    let (parsed_cells_with_positions, line_numbers) = parse_csv_with_positions(&text);
105    if parsed_cells_with_positions.is_empty() {
106        return TableLikeContent::default();
107    }
108    let raw_headers = parsed_cells_with_positions[0].clone();
109
110    // Calculating the longest row, as CSV might have less headers than max row width
111    let Some(max_number_of_cols) = parsed_cells_with_positions.iter().map(|r| r.len()).max() else {
112        return TableLikeContent::default();
113    };
114
115    // Convert to TableCell objects with buffer positions
116    let headers = create_table_row(&buffer_snapshot, max_number_of_cols, raw_headers);
117
118    let rows = parsed_cells_with_positions
119        .into_iter()
120        .skip(1)
121        .map(|row| create_table_row(&buffer_snapshot, max_number_of_cols, row))
122        .collect();
123
124    let row_line_numbers = line_numbers.into_iter().skip(1).collect();
125
126    TableLikeContent {
127        headers,
128        rows,
129        line_numbers: row_line_numbers,
130        number_of_cols: max_number_of_cols,
131    }
132}
133
134/// Parse CSV and track byte positions for each cell
135fn parse_csv_with_positions(
136    text: &str,
137) -> (
138    Vec<Vec<(SharedString, std::ops::Range<usize>)>>,
139    Vec<LineNumber>,
140) {
141    let mut rows = Vec::new();
142    let mut line_numbers = Vec::new();
143    let mut current_row: Vec<(SharedString, std::ops::Range<usize>)> = Vec::new();
144    let mut current_field = String::new();
145    let mut field_start_offset = 0;
146    let mut current_offset = 0;
147    let mut in_quotes = false;
148    let mut current_line = 1; // 1-based line numbering
149    let mut row_start_line = 1;
150    let mut chars = text.chars().peekable();
151
152    while let Some(ch) = chars.next() {
153        let char_byte_len = ch.len_utf8();
154
155        match ch {
156            '"' => {
157                if in_quotes {
158                    if chars.peek() == Some(&'"') {
159                        // Escaped quote
160                        chars.next();
161                        current_field.push('"');
162                        current_offset += 1; // Skip the second quote
163                    } else {
164                        // End of quoted field
165                        in_quotes = false;
166                    }
167                } else {
168                    // Start of quoted field
169                    in_quotes = true;
170                    if current_field.is_empty() {
171                        // Include the opening quote in the range
172                        field_start_offset = current_offset;
173                    }
174                }
175            }
176            ',' if !in_quotes => {
177                // Field separator
178                let field_end_offset = current_offset;
179                if current_field.is_empty() && !in_quotes {
180                    field_start_offset = current_offset;
181                }
182                current_row.push((
183                    current_field.clone().into(),
184                    field_start_offset..field_end_offset,
185                ));
186                current_field.clear();
187                field_start_offset = current_offset + char_byte_len;
188            }
189            '\n' => {
190                current_line += 1;
191                if !in_quotes {
192                    // Row separator (only when not inside quotes)
193                    let field_end_offset = current_offset;
194                    if current_field.is_empty() && current_row.is_empty() {
195                        field_start_offset = 0;
196                    }
197                    current_row.push((
198                        current_field.clone().into(),
199                        field_start_offset..field_end_offset,
200                    ));
201                    current_field.clear();
202
203                    // Only add non-empty rows
204                    if !current_row.is_empty()
205                        && !current_row.iter().all(|(field, _)| field.trim().is_empty())
206                    {
207                        rows.push(current_row);
208                        // Add line number info for this row
209                        let line_info = if row_start_line == current_line - 1 {
210                            LineNumber::Line(row_start_line)
211                        } else {
212                            LineNumber::LineRange(row_start_line, current_line - 1)
213                        };
214                        line_numbers.push(line_info);
215                    }
216                    current_row = Vec::new();
217                    row_start_line = current_line;
218                    field_start_offset = current_offset + char_byte_len;
219                } else {
220                    // Newline inside quotes - preserve it
221                    current_field.push(ch);
222                }
223            }
224            '\r' => {
225                if chars.peek() == Some(&'\n') {
226                    // Handle Windows line endings (\r\n): account for \r byte, let \n be handled next
227                    current_offset += char_byte_len;
228                    continue;
229                } else {
230                    // Standalone \r
231                    current_line += 1;
232                    if !in_quotes {
233                        // Row separator (only when not inside quotes)
234                        let field_end_offset = current_offset;
235                        current_row.push((
236                            current_field.clone().into(),
237                            field_start_offset..field_end_offset,
238                        ));
239                        current_field.clear();
240
241                        // Only add non-empty rows
242                        if !current_row.is_empty()
243                            && !current_row.iter().all(|(field, _)| field.trim().is_empty())
244                        {
245                            rows.push(current_row);
246                            // Add line number info for this row
247                            let line_info = if row_start_line == current_line - 1 {
248                                LineNumber::Line(row_start_line)
249                            } else {
250                                LineNumber::LineRange(row_start_line, current_line - 1)
251                            };
252                            line_numbers.push(line_info);
253                        }
254                        current_row = Vec::new();
255                        row_start_line = current_line;
256                        field_start_offset = current_offset + char_byte_len;
257                    } else {
258                        // \r inside quotes - preserve it
259                        current_field.push(ch);
260                    }
261                }
262            }
263            _ => {
264                if current_field.is_empty() && !in_quotes {
265                    field_start_offset = current_offset;
266                }
267                current_field.push(ch);
268            }
269        }
270
271        current_offset += char_byte_len;
272    }
273
274    // Add the last field and row if not empty
275    if !current_field.is_empty() || !current_row.is_empty() {
276        let field_end_offset = current_offset;
277        current_row.push((
278            current_field.clone().into(),
279            field_start_offset..field_end_offset,
280        ));
281    }
282    if !current_row.is_empty() && !current_row.iter().all(|(field, _)| field.trim().is_empty()) {
283        rows.push(current_row);
284        // Add line number info for the last row
285        let line_info = if row_start_line == current_line {
286            LineNumber::Line(row_start_line)
287        } else {
288            LineNumber::LineRange(row_start_line, current_line)
289        };
290        line_numbers.push(line_info);
291    }
292
293    (rows, line_numbers)
294}
295
296fn create_table_row(
297    buffer_snapshot: &BufferSnapshot,
298    max_number_of_cols: usize,
299    row: Vec<(SharedString, std::ops::Range<usize>)>,
300) -> TableRow<TableCell> {
301    let mut raw_row = row
302        .into_iter()
303        .map(|(content, range)| {
304            TableCell::from_buffer_position(content, range.start, range.end, &buffer_snapshot)
305        })
306        .collect::<Vec<_>>();
307
308    let append_elements = max_number_of_cols - raw_row.len();
309    if append_elements > 0 {
310        for _ in 0..append_elements {
311            raw_row.push(TableCell::Virtual);
312        }
313    }
314
315    TableRow::from_vec(raw_row, max_number_of_cols)
316}
317
318#[cfg(test)]
319mod tests {
320    use super::*;
321
322    #[test]
323    fn test_csv_parsing_basic() {
324        let csv_data = "Name,Age,City\nJohn,30,New York\nJane,25,Los Angeles";
325        let parsed = TableLikeContent::from_str(csv_data.to_string());
326
327        assert_eq!(parsed.headers.cols(), 3);
328        assert_eq!(parsed.headers[0].display_value().unwrap().as_ref(), "Name");
329        assert_eq!(parsed.headers[1].display_value().unwrap().as_ref(), "Age");
330        assert_eq!(parsed.headers[2].display_value().unwrap().as_ref(), "City");
331
332        assert_eq!(parsed.rows.len(), 2);
333        assert_eq!(parsed.rows[0][0].display_value().unwrap().as_ref(), "John");
334        assert_eq!(parsed.rows[0][1].display_value().unwrap().as_ref(), "30");
335        assert_eq!(
336            parsed.rows[0][2].display_value().unwrap().as_ref(),
337            "New York"
338        );
339    }
340
341    #[test]
342    fn test_csv_parsing_with_quotes() {
343        let csv_data = r#"Name,Description
344"John Doe","A person with ""special"" characters"
345Jane,"Simple name""#;
346        let parsed = TableLikeContent::from_str(csv_data.to_string());
347
348        assert_eq!(parsed.headers.cols(), 2);
349        assert_eq!(parsed.rows.len(), 2);
350        assert_eq!(
351            parsed.rows[0][1].display_value().unwrap().as_ref(),
352            r#"A person with "special" characters"#
353        );
354    }
355
356    #[test]
357    fn test_csv_parsing_with_newlines_in_quotes() {
358        let csv_data = "Name,Description,Status\n\"John\nDoe\",\"A person with\nmultiple lines\",Active\n\"Jane Smith\",\"Simple\",\"Also\nActive\"";
359        let parsed = TableLikeContent::from_str(csv_data.to_string());
360
361        assert_eq!(parsed.headers.cols(), 3);
362        assert_eq!(parsed.headers[0].display_value().unwrap().as_ref(), "Name");
363        assert_eq!(
364            parsed.headers[1].display_value().unwrap().as_ref(),
365            "Description"
366        );
367        assert_eq!(
368            parsed.headers[2].display_value().unwrap().as_ref(),
369            "Status"
370        );
371
372        assert_eq!(parsed.rows.len(), 2);
373        assert_eq!(
374            parsed.rows[0][0].display_value().unwrap().as_ref(),
375            "John\nDoe"
376        );
377        assert_eq!(
378            parsed.rows[0][1].display_value().unwrap().as_ref(),
379            "A person with\nmultiple lines"
380        );
381        assert_eq!(
382            parsed.rows[0][2].display_value().unwrap().as_ref(),
383            "Active"
384        );
385
386        assert_eq!(
387            parsed.rows[1][0].display_value().unwrap().as_ref(),
388            "Jane Smith"
389        );
390        assert_eq!(
391            parsed.rows[1][1].display_value().unwrap().as_ref(),
392            "Simple"
393        );
394        assert_eq!(
395            parsed.rows[1][2].display_value().unwrap().as_ref(),
396            "Also\nActive"
397        );
398
399        // Check line numbers
400        assert_eq!(parsed.line_numbers.len(), 2);
401        match &parsed.line_numbers[0] {
402            LineNumber::LineRange(start, end) => {
403                assert_eq!(start, &2);
404                assert_eq!(end, &4);
405            }
406            _ => panic!("Expected LineRange for multiline row"),
407        }
408        match &parsed.line_numbers[1] {
409            LineNumber::LineRange(start, end) => {
410                assert_eq!(start, &5);
411                assert_eq!(end, &6);
412            }
413            _ => panic!("Expected LineRange for second multiline row"),
414        }
415    }
416
417    #[test]
418    fn test_empty_csv() {
419        let parsed = TableLikeContent::from_str("".to_string());
420        assert_eq!(parsed.headers.cols(), 0);
421        assert!(parsed.rows.is_empty());
422    }
423
424    #[test]
425    fn test_csv_parsing_quote_offset_handling() {
426        let csv_data = r#"first,"se,cond",third"#;
427        let (parsed_cells, _) = parse_csv_with_positions(csv_data);
428
429        assert_eq!(parsed_cells.len(), 1); // One row
430        assert_eq!(parsed_cells[0].len(), 3); // Three cells
431
432        // first: 0..5 (no quotes)
433        let (content1, range1) = &parsed_cells[0][0];
434        assert_eq!(content1.as_ref(), "first");
435        assert_eq!(*range1, 0..5);
436
437        // "se,cond": 6..15 (includes quotes in range, content without quotes)
438        let (content2, range2) = &parsed_cells[0][1];
439        assert_eq!(content2.as_ref(), "se,cond");
440        assert_eq!(*range2, 6..15);
441
442        // third: 16..21 (no quotes)
443        let (content3, range3) = &parsed_cells[0][2];
444        assert_eq!(content3.as_ref(), "third");
445        assert_eq!(*range3, 16..21);
446    }
447
448    #[test]
449    fn test_csv_parsing_complex_quotes() {
450        let csv_data = r#"id,"name with spaces","description, with commas",status
4511,"John Doe","A person with ""quotes"" and, commas",active
4522,"Jane Smith","Simple description",inactive"#;
453        let (parsed_cells, _) = parse_csv_with_positions(csv_data);
454
455        assert_eq!(parsed_cells.len(), 3); // header + 2 rows
456
457        // Check header row
458        let header_row = &parsed_cells[0];
459        assert_eq!(header_row.len(), 4);
460
461        // id: 0..2
462        assert_eq!(header_row[0].0.as_ref(), "id");
463        assert_eq!(header_row[0].1, 0..2);
464
465        // "name with spaces": 3..21 (includes quotes)
466        assert_eq!(header_row[1].0.as_ref(), "name with spaces");
467        assert_eq!(header_row[1].1, 3..21);
468
469        // "description, with commas": 22..48 (includes quotes)
470        assert_eq!(header_row[2].0.as_ref(), "description, with commas");
471        assert_eq!(header_row[2].1, 22..48);
472
473        // status: 49..55
474        assert_eq!(header_row[3].0.as_ref(), "status");
475        assert_eq!(header_row[3].1, 49..55);
476
477        // Check first data row
478        let first_row = &parsed_cells[1];
479        assert_eq!(first_row.len(), 4);
480
481        // 1: 56..57
482        assert_eq!(first_row[0].0.as_ref(), "1");
483        assert_eq!(first_row[0].1, 56..57);
484
485        // "John Doe": 58..68 (includes quotes)
486        assert_eq!(first_row[1].0.as_ref(), "John Doe");
487        assert_eq!(first_row[1].1, 58..68);
488
489        // Content should be stripped of quotes but include escaped quotes
490        assert_eq!(
491            first_row[2].0.as_ref(),
492            r#"A person with "quotes" and, commas"#
493        );
494        // The range should include the outer quotes: 69..107
495        assert_eq!(first_row[2].1, 69..107);
496
497        // active: 108..114
498        assert_eq!(first_row[3].0.as_ref(), "active");
499        assert_eq!(first_row[3].1, 108..114);
500    }
501}
502
503impl TableLikeContent {
504    #[cfg(test)]
505    pub fn from_str(text: String) -> Self {
506        use text::{Buffer, BufferId, ReplicaId};
507
508        let buffer_id = BufferId::new(1).unwrap();
509        let buffer = Buffer::new(ReplicaId::LOCAL, buffer_id, text);
510        let snapshot = buffer.snapshot();
511        from_buffer(snapshot)
512    }
513}