parser.rs

  1use crate::{
  2    CsvPreviewView,
  3    types::TableLikeContent,
  4    types::{LineNumber, TableCell},
  5};
  6use editor::Editor;
  7use gpui::{AppContext, Context, Entity, Subscription, Task};
  8use std::time::{Duration, Instant};
  9use text::BufferSnapshot;
 10use ui::{SharedString, table_row::TableRow};
 11
 12pub(crate) const REPARSE_DEBOUNCE: Duration = Duration::from_millis(200);
 13
 14pub(crate) struct EditorState {
 15    pub editor: Entity<Editor>,
 16    pub _subscription: Subscription,
 17}
 18
 19impl CsvPreviewView {
 20    pub(crate) fn parse_csv_from_active_editor(
 21        &mut self,
 22        wait_for_debounce: bool,
 23        cx: &mut Context<Self>,
 24    ) {
 25        let editor = self.active_editor_state.editor.clone();
 26        self.parsing_task = Some(self.parse_csv_in_background(wait_for_debounce, editor, cx));
 27    }
 28
 29    fn parse_csv_in_background(
 30        &mut self,
 31        wait_for_debounce: bool,
 32        editor: Entity<Editor>,
 33        cx: &mut Context<Self>,
 34    ) -> Task<anyhow::Result<()>> {
 35        cx.spawn(async move |view, cx| {
 36            if wait_for_debounce {
 37                // Smart debouncing: check if cooldown period has already passed
 38                let now = Instant::now();
 39                let should_wait = view.update(cx, |view, _| {
 40                    if let Some(last_end) = view.last_parse_end_time {
 41                        let cooldown_until = last_end + REPARSE_DEBOUNCE;
 42                        if now < cooldown_until {
 43                            Some(cooldown_until - now)
 44                        } else {
 45                            None // Cooldown already passed, parse immediately
 46                        }
 47                    } else {
 48                        None // First parse, no debounce
 49                    }
 50                })?;
 51
 52                if let Some(wait_duration) = should_wait {
 53                    cx.background_executor().timer(wait_duration).await;
 54                }
 55            }
 56
 57            let buffer_snapshot = view.update(cx, |_, cx| {
 58                editor
 59                    .read(cx)
 60                    .buffer()
 61                    .read(cx)
 62                    .as_singleton()
 63                    .map(|b| b.read(cx).text_snapshot())
 64            })?;
 65
 66            let Some(buffer_snapshot) = buffer_snapshot else {
 67                return Ok(());
 68            };
 69
 70            let instant = Instant::now();
 71            let parsed_csv = cx
 72                .background_spawn(async move { from_buffer(&buffer_snapshot) })
 73                .await;
 74            let parse_duration = instant.elapsed();
 75            let parse_end_time: Instant = Instant::now();
 76            log::debug!("Parsed CSV in {}ms", parse_duration.as_millis());
 77            view.update(cx, move |view, cx| {
 78                view.performance_metrics
 79                    .timings
 80                    .insert("Parsing", (parse_duration, Instant::now()));
 81
 82                log::debug!("Parsed {} rows", parsed_csv.rows.len());
 83                view.engine.contents = parsed_csv;
 84                view.sync_column_widths(cx);
 85                view.last_parse_end_time = Some(parse_end_time);
 86
 87                view.apply_filter_sort();
 88                cx.notify();
 89            })
 90        })
 91    }
 92}
 93
 94pub fn from_buffer(buffer_snapshot: &BufferSnapshot) -> TableLikeContent {
 95    let text = buffer_snapshot.text();
 96
 97    if text.trim().is_empty() {
 98        return TableLikeContent::default();
 99    }
100
101    let (parsed_cells_with_positions, line_numbers) = parse_csv_with_positions(&text);
102    if parsed_cells_with_positions.is_empty() {
103        return TableLikeContent::default();
104    }
105    let raw_headers = parsed_cells_with_positions[0].clone();
106
107    // Calculating the longest row, as CSV might have less headers than max row width
108    let Some(max_number_of_cols) = parsed_cells_with_positions.iter().map(|r| r.len()).max() else {
109        return TableLikeContent::default();
110    };
111
112    // Convert to TableCell objects with buffer positions
113    let headers = create_table_row(&buffer_snapshot, max_number_of_cols, raw_headers);
114
115    let rows = parsed_cells_with_positions
116        .into_iter()
117        .skip(1)
118        .map(|row| create_table_row(&buffer_snapshot, max_number_of_cols, row))
119        .collect();
120
121    let row_line_numbers = line_numbers.into_iter().skip(1).collect();
122
123    TableLikeContent {
124        headers,
125        rows,
126        line_numbers: row_line_numbers,
127        number_of_cols: max_number_of_cols,
128    }
129}
130
131/// Parse CSV and track byte positions for each cell
132fn parse_csv_with_positions(
133    text: &str,
134) -> (
135    Vec<Vec<(SharedString, std::ops::Range<usize>)>>,
136    Vec<LineNumber>,
137) {
138    let mut rows = Vec::new();
139    let mut line_numbers = Vec::new();
140    let mut current_row: Vec<(SharedString, std::ops::Range<usize>)> = Vec::new();
141    let mut current_field = String::new();
142    let mut field_start_offset = 0;
143    let mut current_offset = 0;
144    let mut in_quotes = false;
145    let mut current_line = 1; // 1-based line numbering
146    let mut row_start_line = 1;
147    let mut chars = text.chars().peekable();
148
149    while let Some(ch) = chars.next() {
150        let char_byte_len = ch.len_utf8();
151
152        match ch {
153            '"' => {
154                if in_quotes {
155                    if chars.peek() == Some(&'"') {
156                        // Escaped quote
157                        chars.next();
158                        current_field.push('"');
159                        current_offset += 1; // Skip the second quote
160                    } else {
161                        // End of quoted field
162                        in_quotes = false;
163                    }
164                } else {
165                    // Start of quoted field
166                    in_quotes = true;
167                    if current_field.is_empty() {
168                        // Include the opening quote in the range
169                        field_start_offset = current_offset;
170                    }
171                }
172            }
173            ',' if !in_quotes => {
174                // Field separator
175                let field_end_offset = current_offset;
176                if current_field.is_empty() && !in_quotes {
177                    field_start_offset = current_offset;
178                }
179                current_row.push((
180                    current_field.clone().into(),
181                    field_start_offset..field_end_offset,
182                ));
183                current_field.clear();
184                field_start_offset = current_offset + char_byte_len;
185            }
186            '\n' => {
187                current_line += 1;
188                if !in_quotes {
189                    // Row separator (only when not inside quotes)
190                    let field_end_offset = current_offset;
191                    if current_field.is_empty() && current_row.is_empty() {
192                        field_start_offset = 0;
193                    }
194                    current_row.push((
195                        current_field.clone().into(),
196                        field_start_offset..field_end_offset,
197                    ));
198                    current_field.clear();
199
200                    // Only add non-empty rows
201                    if !current_row.is_empty()
202                        && !current_row.iter().all(|(field, _)| field.trim().is_empty())
203                    {
204                        rows.push(current_row);
205                        // Add line number info for this row
206                        let line_info = if row_start_line == current_line - 1 {
207                            LineNumber::Line(row_start_line)
208                        } else {
209                            LineNumber::LineRange(row_start_line, current_line - 1)
210                        };
211                        line_numbers.push(line_info);
212                    }
213                    current_row = Vec::new();
214                    row_start_line = current_line;
215                    field_start_offset = current_offset + char_byte_len;
216                } else {
217                    // Newline inside quotes - preserve it
218                    current_field.push(ch);
219                }
220            }
221            '\r' => {
222                if chars.peek() == Some(&'\n') {
223                    // Handle Windows line endings (\r\n): account for \r byte, let \n be handled next
224                    current_offset += char_byte_len;
225                    continue;
226                } else {
227                    // Standalone \r
228                    current_line += 1;
229                    if !in_quotes {
230                        // Row separator (only when not inside quotes)
231                        let field_end_offset = current_offset;
232                        current_row.push((
233                            current_field.clone().into(),
234                            field_start_offset..field_end_offset,
235                        ));
236                        current_field.clear();
237
238                        // Only add non-empty rows
239                        if !current_row.is_empty()
240                            && !current_row.iter().all(|(field, _)| field.trim().is_empty())
241                        {
242                            rows.push(current_row);
243                            // Add line number info for this row
244                            let line_info = if row_start_line == current_line - 1 {
245                                LineNumber::Line(row_start_line)
246                            } else {
247                                LineNumber::LineRange(row_start_line, current_line - 1)
248                            };
249                            line_numbers.push(line_info);
250                        }
251                        current_row = Vec::new();
252                        row_start_line = current_line;
253                        field_start_offset = current_offset + char_byte_len;
254                    } else {
255                        // \r inside quotes - preserve it
256                        current_field.push(ch);
257                    }
258                }
259            }
260            _ => {
261                if current_field.is_empty() && !in_quotes {
262                    field_start_offset = current_offset;
263                }
264                current_field.push(ch);
265            }
266        }
267
268        current_offset += char_byte_len;
269    }
270
271    // Add the last field and row if not empty
272    if !current_field.is_empty() || !current_row.is_empty() {
273        let field_end_offset = current_offset;
274        current_row.push((
275            current_field.clone().into(),
276            field_start_offset..field_end_offset,
277        ));
278    }
279    if !current_row.is_empty() && !current_row.iter().all(|(field, _)| field.trim().is_empty()) {
280        rows.push(current_row);
281        // Add line number info for the last row
282        let line_info = if row_start_line == current_line {
283            LineNumber::Line(row_start_line)
284        } else {
285            LineNumber::LineRange(row_start_line, current_line)
286        };
287        line_numbers.push(line_info);
288    }
289
290    (rows, line_numbers)
291}
292
293fn create_table_row(
294    buffer_snapshot: &BufferSnapshot,
295    max_number_of_cols: usize,
296    row: Vec<(SharedString, std::ops::Range<usize>)>,
297) -> TableRow<TableCell> {
298    let mut raw_row = row
299        .into_iter()
300        .map(|(content, range)| {
301            TableCell::from_buffer_position(content, range.start, range.end, &buffer_snapshot)
302        })
303        .collect::<Vec<_>>();
304
305    let append_elements = max_number_of_cols - raw_row.len();
306    if append_elements > 0 {
307        for _ in 0..append_elements {
308            raw_row.push(TableCell::Virtual);
309        }
310    }
311
312    TableRow::from_vec(raw_row, max_number_of_cols)
313}
314
315#[cfg(test)]
316mod tests {
317    use super::*;
318
319    #[test]
320    fn test_csv_parsing_basic() {
321        let csv_data = "Name,Age,City\nJohn,30,New York\nJane,25,Los Angeles";
322        let parsed = TableLikeContent::from_str(csv_data.to_string());
323
324        assert_eq!(parsed.headers.cols(), 3);
325        assert_eq!(parsed.headers[0].display_value().unwrap().as_ref(), "Name");
326        assert_eq!(parsed.headers[1].display_value().unwrap().as_ref(), "Age");
327        assert_eq!(parsed.headers[2].display_value().unwrap().as_ref(), "City");
328
329        assert_eq!(parsed.rows.len(), 2);
330        assert_eq!(parsed.rows[0][0].display_value().unwrap().as_ref(), "John");
331        assert_eq!(parsed.rows[0][1].display_value().unwrap().as_ref(), "30");
332        assert_eq!(
333            parsed.rows[0][2].display_value().unwrap().as_ref(),
334            "New York"
335        );
336    }
337
338    #[test]
339    fn test_csv_parsing_with_quotes() {
340        let csv_data = r#"Name,Description
341"John Doe","A person with ""special"" characters"
342Jane,"Simple name""#;
343        let parsed = TableLikeContent::from_str(csv_data.to_string());
344
345        assert_eq!(parsed.headers.cols(), 2);
346        assert_eq!(parsed.rows.len(), 2);
347        assert_eq!(
348            parsed.rows[0][1].display_value().unwrap().as_ref(),
349            r#"A person with "special" characters"#
350        );
351    }
352
353    #[test]
354    fn test_csv_parsing_with_newlines_in_quotes() {
355        let csv_data = "Name,Description,Status\n\"John\nDoe\",\"A person with\nmultiple lines\",Active\n\"Jane Smith\",\"Simple\",\"Also\nActive\"";
356        let parsed = TableLikeContent::from_str(csv_data.to_string());
357
358        assert_eq!(parsed.headers.cols(), 3);
359        assert_eq!(parsed.headers[0].display_value().unwrap().as_ref(), "Name");
360        assert_eq!(
361            parsed.headers[1].display_value().unwrap().as_ref(),
362            "Description"
363        );
364        assert_eq!(
365            parsed.headers[2].display_value().unwrap().as_ref(),
366            "Status"
367        );
368
369        assert_eq!(parsed.rows.len(), 2);
370        assert_eq!(
371            parsed.rows[0][0].display_value().unwrap().as_ref(),
372            "John\nDoe"
373        );
374        assert_eq!(
375            parsed.rows[0][1].display_value().unwrap().as_ref(),
376            "A person with\nmultiple lines"
377        );
378        assert_eq!(
379            parsed.rows[0][2].display_value().unwrap().as_ref(),
380            "Active"
381        );
382
383        assert_eq!(
384            parsed.rows[1][0].display_value().unwrap().as_ref(),
385            "Jane Smith"
386        );
387        assert_eq!(
388            parsed.rows[1][1].display_value().unwrap().as_ref(),
389            "Simple"
390        );
391        assert_eq!(
392            parsed.rows[1][2].display_value().unwrap().as_ref(),
393            "Also\nActive"
394        );
395
396        // Check line numbers
397        assert_eq!(parsed.line_numbers.len(), 2);
398        match &parsed.line_numbers[0] {
399            LineNumber::LineRange(start, end) => {
400                assert_eq!(start, &2);
401                assert_eq!(end, &4);
402            }
403            _ => panic!("Expected LineRange for multiline row"),
404        }
405        match &parsed.line_numbers[1] {
406            LineNumber::LineRange(start, end) => {
407                assert_eq!(start, &5);
408                assert_eq!(end, &6);
409            }
410            _ => panic!("Expected LineRange for second multiline row"),
411        }
412    }
413
414    #[test]
415    fn test_empty_csv() {
416        let parsed = TableLikeContent::from_str("".to_string());
417        assert_eq!(parsed.headers.cols(), 0);
418        assert!(parsed.rows.is_empty());
419    }
420
421    #[test]
422    fn test_csv_parsing_quote_offset_handling() {
423        let csv_data = r#"first,"se,cond",third"#;
424        let (parsed_cells, _) = parse_csv_with_positions(csv_data);
425
426        assert_eq!(parsed_cells.len(), 1); // One row
427        assert_eq!(parsed_cells[0].len(), 3); // Three cells
428
429        // first: 0..5 (no quotes)
430        let (content1, range1) = &parsed_cells[0][0];
431        assert_eq!(content1.as_ref(), "first");
432        assert_eq!(*range1, 0..5);
433
434        // "se,cond": 6..15 (includes quotes in range, content without quotes)
435        let (content2, range2) = &parsed_cells[0][1];
436        assert_eq!(content2.as_ref(), "se,cond");
437        assert_eq!(*range2, 6..15);
438
439        // third: 16..21 (no quotes)
440        let (content3, range3) = &parsed_cells[0][2];
441        assert_eq!(content3.as_ref(), "third");
442        assert_eq!(*range3, 16..21);
443    }
444
445    #[test]
446    fn test_csv_parsing_complex_quotes() {
447        let csv_data = r#"id,"name with spaces","description, with commas",status
4481,"John Doe","A person with ""quotes"" and, commas",active
4492,"Jane Smith","Simple description",inactive"#;
450        let (parsed_cells, _) = parse_csv_with_positions(csv_data);
451
452        assert_eq!(parsed_cells.len(), 3); // header + 2 rows
453
454        // Check header row
455        let header_row = &parsed_cells[0];
456        assert_eq!(header_row.len(), 4);
457
458        // id: 0..2
459        assert_eq!(header_row[0].0.as_ref(), "id");
460        assert_eq!(header_row[0].1, 0..2);
461
462        // "name with spaces": 3..21 (includes quotes)
463        assert_eq!(header_row[1].0.as_ref(), "name with spaces");
464        assert_eq!(header_row[1].1, 3..21);
465
466        // "description, with commas": 22..48 (includes quotes)
467        assert_eq!(header_row[2].0.as_ref(), "description, with commas");
468        assert_eq!(header_row[2].1, 22..48);
469
470        // status: 49..55
471        assert_eq!(header_row[3].0.as_ref(), "status");
472        assert_eq!(header_row[3].1, 49..55);
473
474        // Check first data row
475        let first_row = &parsed_cells[1];
476        assert_eq!(first_row.len(), 4);
477
478        // 1: 56..57
479        assert_eq!(first_row[0].0.as_ref(), "1");
480        assert_eq!(first_row[0].1, 56..57);
481
482        // "John Doe": 58..68 (includes quotes)
483        assert_eq!(first_row[1].0.as_ref(), "John Doe");
484        assert_eq!(first_row[1].1, 58..68);
485
486        // Content should be stripped of quotes but include escaped quotes
487        assert_eq!(
488            first_row[2].0.as_ref(),
489            r#"A person with "quotes" and, commas"#
490        );
491        // The range should include the outer quotes: 69..107
492        assert_eq!(first_row[2].1, 69..107);
493
494        // active: 108..114
495        assert_eq!(first_row[3].0.as_ref(), "active");
496        assert_eq!(first_row[3].1, 108..114);
497    }
498}
499
500impl TableLikeContent {
501    #[cfg(test)]
502    pub fn from_str(text: String) -> Self {
503        use text::{Buffer, BufferId, ReplicaId};
504
505        let buffer_id = BufferId::new(1).unwrap();
506        let buffer = Buffer::new(ReplicaId::LOCAL, buffer_id, text);
507        let snapshot = buffer.snapshot();
508        from_buffer(snapshot)
509    }
510}