1use crate::{
2 CsvPreviewView,
3 types::TableLikeContent,
4 types::{LineNumber, TableCell},
5};
6use editor::Editor;
7use gpui::{AppContext, Context, Entity, Subscription, Task};
8use std::time::{Duration, Instant};
9use text::BufferSnapshot;
10use ui::{SharedString, table_row::TableRow};
11
12pub(crate) const REPARSE_DEBOUNCE: Duration = Duration::from_millis(200);
13
14pub(crate) struct EditorState {
15 pub editor: Entity<Editor>,
16 pub _subscription: Subscription,
17}
18
19impl CsvPreviewView {
20 pub(crate) fn parse_csv_from_active_editor(
21 &mut self,
22 wait_for_debounce: bool,
23 cx: &mut Context<Self>,
24 ) {
25 let editor = self.active_editor_state.editor.clone();
26 self.parsing_task = Some(self.parse_csv_in_background(wait_for_debounce, editor, cx));
27 }
28
29 fn parse_csv_in_background(
30 &mut self,
31 wait_for_debounce: bool,
32 editor: Entity<Editor>,
33 cx: &mut Context<Self>,
34 ) -> Task<anyhow::Result<()>> {
35 cx.spawn(async move |view, cx| {
36 if wait_for_debounce {
37 // Smart debouncing: check if cooldown period has already passed
38 let now = Instant::now();
39 let should_wait = view.update(cx, |view, _| {
40 if let Some(last_end) = view.last_parse_end_time {
41 let cooldown_until = last_end + REPARSE_DEBOUNCE;
42 if now < cooldown_until {
43 Some(cooldown_until - now)
44 } else {
45 None // Cooldown already passed, parse immediately
46 }
47 } else {
48 None // First parse, no debounce
49 }
50 })?;
51
52 if let Some(wait_duration) = should_wait {
53 cx.background_executor().timer(wait_duration).await;
54 }
55 }
56
57 let buffer_snapshot = view.update(cx, |_, cx| {
58 editor
59 .read(cx)
60 .buffer()
61 .read(cx)
62 .as_singleton()
63 .map(|b| b.read(cx).text_snapshot())
64 })?;
65
66 let Some(buffer_snapshot) = buffer_snapshot else {
67 return Ok(());
68 };
69
70 let instant = Instant::now();
71 let parsed_csv = cx
72 .background_spawn(async move { from_buffer(&buffer_snapshot) })
73 .await;
74 let parse_duration = instant.elapsed();
75 let parse_end_time: Instant = Instant::now();
76 log::debug!("Parsed CSV in {}ms", parse_duration.as_millis());
77 view.update(cx, move |view, cx| {
78 view.performance_metrics
79 .timings
80 .insert("Parsing", (parse_duration, Instant::now()));
81
82 log::debug!("Parsed {} rows", parsed_csv.rows.len());
83 view.engine.contents = parsed_csv;
84 view.sync_column_widths(cx);
85 view.last_parse_end_time = Some(parse_end_time);
86
87 view.apply_filter_sort();
88 cx.notify();
89 })
90 })
91 }
92}
93
94pub fn from_buffer(buffer_snapshot: &BufferSnapshot) -> TableLikeContent {
95 let text = buffer_snapshot.text();
96
97 if text.trim().is_empty() {
98 return TableLikeContent::default();
99 }
100
101 let (parsed_cells_with_positions, line_numbers) = parse_csv_with_positions(&text);
102 if parsed_cells_with_positions.is_empty() {
103 return TableLikeContent::default();
104 }
105 let raw_headers = parsed_cells_with_positions[0].clone();
106
107 // Calculating the longest row, as CSV might have less headers than max row width
108 let Some(max_number_of_cols) = parsed_cells_with_positions.iter().map(|r| r.len()).max() else {
109 return TableLikeContent::default();
110 };
111
112 // Convert to TableCell objects with buffer positions
113 let headers = create_table_row(&buffer_snapshot, max_number_of_cols, raw_headers);
114
115 let rows = parsed_cells_with_positions
116 .into_iter()
117 .skip(1)
118 .map(|row| create_table_row(&buffer_snapshot, max_number_of_cols, row))
119 .collect();
120
121 let row_line_numbers = line_numbers.into_iter().skip(1).collect();
122
123 TableLikeContent {
124 headers,
125 rows,
126 line_numbers: row_line_numbers,
127 number_of_cols: max_number_of_cols,
128 }
129}
130
131/// Parse CSV and track byte positions for each cell
132fn parse_csv_with_positions(
133 text: &str,
134) -> (
135 Vec<Vec<(SharedString, std::ops::Range<usize>)>>,
136 Vec<LineNumber>,
137) {
138 let mut rows = Vec::new();
139 let mut line_numbers = Vec::new();
140 let mut current_row: Vec<(SharedString, std::ops::Range<usize>)> = Vec::new();
141 let mut current_field = String::new();
142 let mut field_start_offset = 0;
143 let mut current_offset = 0;
144 let mut in_quotes = false;
145 let mut current_line = 1; // 1-based line numbering
146 let mut row_start_line = 1;
147 let mut chars = text.chars().peekable();
148
149 while let Some(ch) = chars.next() {
150 let char_byte_len = ch.len_utf8();
151
152 match ch {
153 '"' => {
154 if in_quotes {
155 if chars.peek() == Some(&'"') {
156 // Escaped quote
157 chars.next();
158 current_field.push('"');
159 current_offset += 1; // Skip the second quote
160 } else {
161 // End of quoted field
162 in_quotes = false;
163 }
164 } else {
165 // Start of quoted field
166 in_quotes = true;
167 if current_field.is_empty() {
168 // Include the opening quote in the range
169 field_start_offset = current_offset;
170 }
171 }
172 }
173 ',' if !in_quotes => {
174 // Field separator
175 let field_end_offset = current_offset;
176 if current_field.is_empty() && !in_quotes {
177 field_start_offset = current_offset;
178 }
179 current_row.push((
180 current_field.clone().into(),
181 field_start_offset..field_end_offset,
182 ));
183 current_field.clear();
184 field_start_offset = current_offset + char_byte_len;
185 }
186 '\n' => {
187 current_line += 1;
188 if !in_quotes {
189 // Row separator (only when not inside quotes)
190 let field_end_offset = current_offset;
191 if current_field.is_empty() && current_row.is_empty() {
192 field_start_offset = 0;
193 }
194 current_row.push((
195 current_field.clone().into(),
196 field_start_offset..field_end_offset,
197 ));
198 current_field.clear();
199
200 // Only add non-empty rows
201 if !current_row.is_empty()
202 && !current_row.iter().all(|(field, _)| field.trim().is_empty())
203 {
204 rows.push(current_row);
205 // Add line number info for this row
206 let line_info = if row_start_line == current_line - 1 {
207 LineNumber::Line(row_start_line)
208 } else {
209 LineNumber::LineRange(row_start_line, current_line - 1)
210 };
211 line_numbers.push(line_info);
212 }
213 current_row = Vec::new();
214 row_start_line = current_line;
215 field_start_offset = current_offset + char_byte_len;
216 } else {
217 // Newline inside quotes - preserve it
218 current_field.push(ch);
219 }
220 }
221 '\r' => {
222 if chars.peek() == Some(&'\n') {
223 // Handle Windows line endings (\r\n): account for \r byte, let \n be handled next
224 current_offset += char_byte_len;
225 continue;
226 } else {
227 // Standalone \r
228 current_line += 1;
229 if !in_quotes {
230 // Row separator (only when not inside quotes)
231 let field_end_offset = current_offset;
232 current_row.push((
233 current_field.clone().into(),
234 field_start_offset..field_end_offset,
235 ));
236 current_field.clear();
237
238 // Only add non-empty rows
239 if !current_row.is_empty()
240 && !current_row.iter().all(|(field, _)| field.trim().is_empty())
241 {
242 rows.push(current_row);
243 // Add line number info for this row
244 let line_info = if row_start_line == current_line - 1 {
245 LineNumber::Line(row_start_line)
246 } else {
247 LineNumber::LineRange(row_start_line, current_line - 1)
248 };
249 line_numbers.push(line_info);
250 }
251 current_row = Vec::new();
252 row_start_line = current_line;
253 field_start_offset = current_offset + char_byte_len;
254 } else {
255 // \r inside quotes - preserve it
256 current_field.push(ch);
257 }
258 }
259 }
260 _ => {
261 if current_field.is_empty() && !in_quotes {
262 field_start_offset = current_offset;
263 }
264 current_field.push(ch);
265 }
266 }
267
268 current_offset += char_byte_len;
269 }
270
271 // Add the last field and row if not empty
272 if !current_field.is_empty() || !current_row.is_empty() {
273 let field_end_offset = current_offset;
274 current_row.push((
275 current_field.clone().into(),
276 field_start_offset..field_end_offset,
277 ));
278 }
279 if !current_row.is_empty() && !current_row.iter().all(|(field, _)| field.trim().is_empty()) {
280 rows.push(current_row);
281 // Add line number info for the last row
282 let line_info = if row_start_line == current_line {
283 LineNumber::Line(row_start_line)
284 } else {
285 LineNumber::LineRange(row_start_line, current_line)
286 };
287 line_numbers.push(line_info);
288 }
289
290 (rows, line_numbers)
291}
292
293fn create_table_row(
294 buffer_snapshot: &BufferSnapshot,
295 max_number_of_cols: usize,
296 row: Vec<(SharedString, std::ops::Range<usize>)>,
297) -> TableRow<TableCell> {
298 let mut raw_row = row
299 .into_iter()
300 .map(|(content, range)| {
301 TableCell::from_buffer_position(content, range.start, range.end, &buffer_snapshot)
302 })
303 .collect::<Vec<_>>();
304
305 let append_elements = max_number_of_cols - raw_row.len();
306 if append_elements > 0 {
307 for _ in 0..append_elements {
308 raw_row.push(TableCell::Virtual);
309 }
310 }
311
312 TableRow::from_vec(raw_row, max_number_of_cols)
313}
314
315#[cfg(test)]
316mod tests {
317 use super::*;
318
319 #[test]
320 fn test_csv_parsing_basic() {
321 let csv_data = "Name,Age,City\nJohn,30,New York\nJane,25,Los Angeles";
322 let parsed = TableLikeContent::from_str(csv_data.to_string());
323
324 assert_eq!(parsed.headers.cols(), 3);
325 assert_eq!(parsed.headers[0].display_value().unwrap().as_ref(), "Name");
326 assert_eq!(parsed.headers[1].display_value().unwrap().as_ref(), "Age");
327 assert_eq!(parsed.headers[2].display_value().unwrap().as_ref(), "City");
328
329 assert_eq!(parsed.rows.len(), 2);
330 assert_eq!(parsed.rows[0][0].display_value().unwrap().as_ref(), "John");
331 assert_eq!(parsed.rows[0][1].display_value().unwrap().as_ref(), "30");
332 assert_eq!(
333 parsed.rows[0][2].display_value().unwrap().as_ref(),
334 "New York"
335 );
336 }
337
338 #[test]
339 fn test_csv_parsing_with_quotes() {
340 let csv_data = r#"Name,Description
341"John Doe","A person with ""special"" characters"
342Jane,"Simple name""#;
343 let parsed = TableLikeContent::from_str(csv_data.to_string());
344
345 assert_eq!(parsed.headers.cols(), 2);
346 assert_eq!(parsed.rows.len(), 2);
347 assert_eq!(
348 parsed.rows[0][1].display_value().unwrap().as_ref(),
349 r#"A person with "special" characters"#
350 );
351 }
352
353 #[test]
354 fn test_csv_parsing_with_newlines_in_quotes() {
355 let csv_data = "Name,Description,Status\n\"John\nDoe\",\"A person with\nmultiple lines\",Active\n\"Jane Smith\",\"Simple\",\"Also\nActive\"";
356 let parsed = TableLikeContent::from_str(csv_data.to_string());
357
358 assert_eq!(parsed.headers.cols(), 3);
359 assert_eq!(parsed.headers[0].display_value().unwrap().as_ref(), "Name");
360 assert_eq!(
361 parsed.headers[1].display_value().unwrap().as_ref(),
362 "Description"
363 );
364 assert_eq!(
365 parsed.headers[2].display_value().unwrap().as_ref(),
366 "Status"
367 );
368
369 assert_eq!(parsed.rows.len(), 2);
370 assert_eq!(
371 parsed.rows[0][0].display_value().unwrap().as_ref(),
372 "John\nDoe"
373 );
374 assert_eq!(
375 parsed.rows[0][1].display_value().unwrap().as_ref(),
376 "A person with\nmultiple lines"
377 );
378 assert_eq!(
379 parsed.rows[0][2].display_value().unwrap().as_ref(),
380 "Active"
381 );
382
383 assert_eq!(
384 parsed.rows[1][0].display_value().unwrap().as_ref(),
385 "Jane Smith"
386 );
387 assert_eq!(
388 parsed.rows[1][1].display_value().unwrap().as_ref(),
389 "Simple"
390 );
391 assert_eq!(
392 parsed.rows[1][2].display_value().unwrap().as_ref(),
393 "Also\nActive"
394 );
395
396 // Check line numbers
397 assert_eq!(parsed.line_numbers.len(), 2);
398 match &parsed.line_numbers[0] {
399 LineNumber::LineRange(start, end) => {
400 assert_eq!(start, &2);
401 assert_eq!(end, &4);
402 }
403 _ => panic!("Expected LineRange for multiline row"),
404 }
405 match &parsed.line_numbers[1] {
406 LineNumber::LineRange(start, end) => {
407 assert_eq!(start, &5);
408 assert_eq!(end, &6);
409 }
410 _ => panic!("Expected LineRange for second multiline row"),
411 }
412 }
413
414 #[test]
415 fn test_empty_csv() {
416 let parsed = TableLikeContent::from_str("".to_string());
417 assert_eq!(parsed.headers.cols(), 0);
418 assert!(parsed.rows.is_empty());
419 }
420
421 #[test]
422 fn test_csv_parsing_quote_offset_handling() {
423 let csv_data = r#"first,"se,cond",third"#;
424 let (parsed_cells, _) = parse_csv_with_positions(csv_data);
425
426 assert_eq!(parsed_cells.len(), 1); // One row
427 assert_eq!(parsed_cells[0].len(), 3); // Three cells
428
429 // first: 0..5 (no quotes)
430 let (content1, range1) = &parsed_cells[0][0];
431 assert_eq!(content1.as_ref(), "first");
432 assert_eq!(*range1, 0..5);
433
434 // "se,cond": 6..15 (includes quotes in range, content without quotes)
435 let (content2, range2) = &parsed_cells[0][1];
436 assert_eq!(content2.as_ref(), "se,cond");
437 assert_eq!(*range2, 6..15);
438
439 // third: 16..21 (no quotes)
440 let (content3, range3) = &parsed_cells[0][2];
441 assert_eq!(content3.as_ref(), "third");
442 assert_eq!(*range3, 16..21);
443 }
444
445 #[test]
446 fn test_csv_parsing_complex_quotes() {
447 let csv_data = r#"id,"name with spaces","description, with commas",status
4481,"John Doe","A person with ""quotes"" and, commas",active
4492,"Jane Smith","Simple description",inactive"#;
450 let (parsed_cells, _) = parse_csv_with_positions(csv_data);
451
452 assert_eq!(parsed_cells.len(), 3); // header + 2 rows
453
454 // Check header row
455 let header_row = &parsed_cells[0];
456 assert_eq!(header_row.len(), 4);
457
458 // id: 0..2
459 assert_eq!(header_row[0].0.as_ref(), "id");
460 assert_eq!(header_row[0].1, 0..2);
461
462 // "name with spaces": 3..21 (includes quotes)
463 assert_eq!(header_row[1].0.as_ref(), "name with spaces");
464 assert_eq!(header_row[1].1, 3..21);
465
466 // "description, with commas": 22..48 (includes quotes)
467 assert_eq!(header_row[2].0.as_ref(), "description, with commas");
468 assert_eq!(header_row[2].1, 22..48);
469
470 // status: 49..55
471 assert_eq!(header_row[3].0.as_ref(), "status");
472 assert_eq!(header_row[3].1, 49..55);
473
474 // Check first data row
475 let first_row = &parsed_cells[1];
476 assert_eq!(first_row.len(), 4);
477
478 // 1: 56..57
479 assert_eq!(first_row[0].0.as_ref(), "1");
480 assert_eq!(first_row[0].1, 56..57);
481
482 // "John Doe": 58..68 (includes quotes)
483 assert_eq!(first_row[1].0.as_ref(), "John Doe");
484 assert_eq!(first_row[1].1, 58..68);
485
486 // Content should be stripped of quotes but include escaped quotes
487 assert_eq!(
488 first_row[2].0.as_ref(),
489 r#"A person with "quotes" and, commas"#
490 );
491 // The range should include the outer quotes: 69..107
492 assert_eq!(first_row[2].1, 69..107);
493
494 // active: 108..114
495 assert_eq!(first_row[3].0.as_ref(), "active");
496 assert_eq!(first_row[3].1, 108..114);
497 }
498}
499
500impl TableLikeContent {
501 #[cfg(test)]
502 pub fn from_str(text: String) -> Self {
503 use text::{Buffer, BufferId, ReplicaId};
504
505 let buffer_id = BufferId::new(1).unwrap();
506 let buffer = Buffer::new(ReplicaId::LOCAL, buffer_id, text);
507 let snapshot = buffer.snapshot();
508 from_buffer(snapshot)
509 }
510}