1use crate::{
2 CsvPreviewView,
3 types::TableLikeContent,
4 types::{LineNumber, TableCell},
5};
6use editor::Editor;
7use gpui::{AppContext, Context, Entity, Subscription, Task};
8use std::time::{Duration, Instant};
9use text::BufferSnapshot;
10use ui::{SharedString, table_row::TableRow};
11
12pub(crate) const REPARSE_DEBOUNCE: Duration = Duration::from_millis(200);
13
14pub(crate) struct EditorState {
15 pub editor: Entity<Editor>,
16 pub _subscription: Subscription,
17}
18
19impl CsvPreviewView {
20 pub(crate) fn parse_csv_from_active_editor(
21 &mut self,
22 wait_for_debounce: bool,
23 cx: &mut Context<Self>,
24 ) {
25 let editor = self.active_editor_state.editor.clone();
26 self.parsing_task = Some(self.parse_csv_in_background(wait_for_debounce, editor, cx));
27 }
28
29 fn parse_csv_in_background(
30 &mut self,
31 wait_for_debounce: bool,
32 editor: Entity<Editor>,
33 cx: &mut Context<Self>,
34 ) -> Task<anyhow::Result<()>> {
35 cx.spawn(async move |view, cx| {
36 if wait_for_debounce {
37 // Smart debouncing: check if cooldown period has already passed
38 let now = Instant::now();
39 let should_wait = view.update(cx, |view, _| {
40 if let Some(last_end) = view.last_parse_end_time {
41 let cooldown_until = last_end + REPARSE_DEBOUNCE;
42 if now < cooldown_until {
43 Some(cooldown_until - now)
44 } else {
45 None // Cooldown already passed, parse immediately
46 }
47 } else {
48 None // First parse, no debounce
49 }
50 })?;
51
52 if let Some(wait_duration) = should_wait {
53 cx.background_executor().timer(wait_duration).await;
54 }
55 }
56
57 let buffer_snapshot = view.update(cx, |_, cx| {
58 editor
59 .read(cx)
60 .buffer()
61 .read(cx)
62 .as_singleton()
63 .map(|b| b.read(cx).text_snapshot())
64 })?;
65
66 let Some(buffer_snapshot) = buffer_snapshot else {
67 return Ok(());
68 };
69
70 let instant = Instant::now();
71 let parsed_csv = cx
72 .background_spawn(async move { from_buffer(&buffer_snapshot) })
73 .await;
74 let parse_duration = instant.elapsed();
75 let parse_end_time: Instant = Instant::now();
76 log::debug!("Parsed CSV in {}ms", parse_duration.as_millis());
77 view.update(cx, move |view, cx| {
78 view.performance_metrics
79 .timings
80 .insert("Parsing", (parse_duration, Instant::now()));
81
82 log::debug!("Parsed {} rows", parsed_csv.rows.len());
83 // Update table width so it can be rendered properly
84 let cols = parsed_csv.headers.cols();
85 view.column_widths.replace(cx, cols + 1); // Add 1 for the line number column
86
87 view.engine.contents = parsed_csv;
88 view.last_parse_end_time = Some(parse_end_time);
89
90 view.apply_filter_sort();
91 cx.notify();
92 })
93 })
94 }
95}
96
97pub fn from_buffer(buffer_snapshot: &BufferSnapshot) -> TableLikeContent {
98 let text = buffer_snapshot.text();
99
100 if text.trim().is_empty() {
101 return TableLikeContent::default();
102 }
103
104 let (parsed_cells_with_positions, line_numbers) = parse_csv_with_positions(&text);
105 if parsed_cells_with_positions.is_empty() {
106 return TableLikeContent::default();
107 }
108 let raw_headers = parsed_cells_with_positions[0].clone();
109
110 // Calculating the longest row, as CSV might have less headers than max row width
111 let Some(max_number_of_cols) = parsed_cells_with_positions.iter().map(|r| r.len()).max() else {
112 return TableLikeContent::default();
113 };
114
115 // Convert to TableCell objects with buffer positions
116 let headers = create_table_row(&buffer_snapshot, max_number_of_cols, raw_headers);
117
118 let rows = parsed_cells_with_positions
119 .into_iter()
120 .skip(1)
121 .map(|row| create_table_row(&buffer_snapshot, max_number_of_cols, row))
122 .collect();
123
124 let row_line_numbers = line_numbers.into_iter().skip(1).collect();
125
126 TableLikeContent {
127 headers,
128 rows,
129 line_numbers: row_line_numbers,
130 number_of_cols: max_number_of_cols,
131 }
132}
133
134/// Parse CSV and track byte positions for each cell
135fn parse_csv_with_positions(
136 text: &str,
137) -> (
138 Vec<Vec<(SharedString, std::ops::Range<usize>)>>,
139 Vec<LineNumber>,
140) {
141 let mut rows = Vec::new();
142 let mut line_numbers = Vec::new();
143 let mut current_row: Vec<(SharedString, std::ops::Range<usize>)> = Vec::new();
144 let mut current_field = String::new();
145 let mut field_start_offset = 0;
146 let mut current_offset = 0;
147 let mut in_quotes = false;
148 let mut current_line = 1; // 1-based line numbering
149 let mut row_start_line = 1;
150 let mut chars = text.chars().peekable();
151
152 while let Some(ch) = chars.next() {
153 let char_byte_len = ch.len_utf8();
154
155 match ch {
156 '"' => {
157 if in_quotes {
158 if chars.peek() == Some(&'"') {
159 // Escaped quote
160 chars.next();
161 current_field.push('"');
162 current_offset += 1; // Skip the second quote
163 } else {
164 // End of quoted field
165 in_quotes = false;
166 }
167 } else {
168 // Start of quoted field
169 in_quotes = true;
170 if current_field.is_empty() {
171 // Include the opening quote in the range
172 field_start_offset = current_offset;
173 }
174 }
175 }
176 ',' if !in_quotes => {
177 // Field separator
178 let field_end_offset = current_offset;
179 if current_field.is_empty() && !in_quotes {
180 field_start_offset = current_offset;
181 }
182 current_row.push((
183 current_field.clone().into(),
184 field_start_offset..field_end_offset,
185 ));
186 current_field.clear();
187 field_start_offset = current_offset + char_byte_len;
188 }
189 '\n' => {
190 current_line += 1;
191 if !in_quotes {
192 // Row separator (only when not inside quotes)
193 let field_end_offset = current_offset;
194 if current_field.is_empty() && current_row.is_empty() {
195 field_start_offset = 0;
196 }
197 current_row.push((
198 current_field.clone().into(),
199 field_start_offset..field_end_offset,
200 ));
201 current_field.clear();
202
203 // Only add non-empty rows
204 if !current_row.is_empty()
205 && !current_row.iter().all(|(field, _)| field.trim().is_empty())
206 {
207 rows.push(current_row);
208 // Add line number info for this row
209 let line_info = if row_start_line == current_line - 1 {
210 LineNumber::Line(row_start_line)
211 } else {
212 LineNumber::LineRange(row_start_line, current_line - 1)
213 };
214 line_numbers.push(line_info);
215 }
216 current_row = Vec::new();
217 row_start_line = current_line;
218 field_start_offset = current_offset + char_byte_len;
219 } else {
220 // Newline inside quotes - preserve it
221 current_field.push(ch);
222 }
223 }
224 '\r' => {
225 if chars.peek() == Some(&'\n') {
226 // Handle Windows line endings (\r\n): account for \r byte, let \n be handled next
227 current_offset += char_byte_len;
228 continue;
229 } else {
230 // Standalone \r
231 current_line += 1;
232 if !in_quotes {
233 // Row separator (only when not inside quotes)
234 let field_end_offset = current_offset;
235 current_row.push((
236 current_field.clone().into(),
237 field_start_offset..field_end_offset,
238 ));
239 current_field.clear();
240
241 // Only add non-empty rows
242 if !current_row.is_empty()
243 && !current_row.iter().all(|(field, _)| field.trim().is_empty())
244 {
245 rows.push(current_row);
246 // Add line number info for this row
247 let line_info = if row_start_line == current_line - 1 {
248 LineNumber::Line(row_start_line)
249 } else {
250 LineNumber::LineRange(row_start_line, current_line - 1)
251 };
252 line_numbers.push(line_info);
253 }
254 current_row = Vec::new();
255 row_start_line = current_line;
256 field_start_offset = current_offset + char_byte_len;
257 } else {
258 // \r inside quotes - preserve it
259 current_field.push(ch);
260 }
261 }
262 }
263 _ => {
264 if current_field.is_empty() && !in_quotes {
265 field_start_offset = current_offset;
266 }
267 current_field.push(ch);
268 }
269 }
270
271 current_offset += char_byte_len;
272 }
273
274 // Add the last field and row if not empty
275 if !current_field.is_empty() || !current_row.is_empty() {
276 let field_end_offset = current_offset;
277 current_row.push((
278 current_field.clone().into(),
279 field_start_offset..field_end_offset,
280 ));
281 }
282 if !current_row.is_empty() && !current_row.iter().all(|(field, _)| field.trim().is_empty()) {
283 rows.push(current_row);
284 // Add line number info for the last row
285 let line_info = if row_start_line == current_line {
286 LineNumber::Line(row_start_line)
287 } else {
288 LineNumber::LineRange(row_start_line, current_line)
289 };
290 line_numbers.push(line_info);
291 }
292
293 (rows, line_numbers)
294}
295
296fn create_table_row(
297 buffer_snapshot: &BufferSnapshot,
298 max_number_of_cols: usize,
299 row: Vec<(SharedString, std::ops::Range<usize>)>,
300) -> TableRow<TableCell> {
301 let mut raw_row = row
302 .into_iter()
303 .map(|(content, range)| {
304 TableCell::from_buffer_position(content, range.start, range.end, &buffer_snapshot)
305 })
306 .collect::<Vec<_>>();
307
308 let append_elements = max_number_of_cols - raw_row.len();
309 if append_elements > 0 {
310 for _ in 0..append_elements {
311 raw_row.push(TableCell::Virtual);
312 }
313 }
314
315 TableRow::from_vec(raw_row, max_number_of_cols)
316}
317
318#[cfg(test)]
319mod tests {
320 use super::*;
321
322 #[test]
323 fn test_csv_parsing_basic() {
324 let csv_data = "Name,Age,City\nJohn,30,New York\nJane,25,Los Angeles";
325 let parsed = TableLikeContent::from_str(csv_data.to_string());
326
327 assert_eq!(parsed.headers.cols(), 3);
328 assert_eq!(parsed.headers[0].display_value().unwrap().as_ref(), "Name");
329 assert_eq!(parsed.headers[1].display_value().unwrap().as_ref(), "Age");
330 assert_eq!(parsed.headers[2].display_value().unwrap().as_ref(), "City");
331
332 assert_eq!(parsed.rows.len(), 2);
333 assert_eq!(parsed.rows[0][0].display_value().unwrap().as_ref(), "John");
334 assert_eq!(parsed.rows[0][1].display_value().unwrap().as_ref(), "30");
335 assert_eq!(
336 parsed.rows[0][2].display_value().unwrap().as_ref(),
337 "New York"
338 );
339 }
340
341 #[test]
342 fn test_csv_parsing_with_quotes() {
343 let csv_data = r#"Name,Description
344"John Doe","A person with ""special"" characters"
345Jane,"Simple name""#;
346 let parsed = TableLikeContent::from_str(csv_data.to_string());
347
348 assert_eq!(parsed.headers.cols(), 2);
349 assert_eq!(parsed.rows.len(), 2);
350 assert_eq!(
351 parsed.rows[0][1].display_value().unwrap().as_ref(),
352 r#"A person with "special" characters"#
353 );
354 }
355
356 #[test]
357 fn test_csv_parsing_with_newlines_in_quotes() {
358 let csv_data = "Name,Description,Status\n\"John\nDoe\",\"A person with\nmultiple lines\",Active\n\"Jane Smith\",\"Simple\",\"Also\nActive\"";
359 let parsed = TableLikeContent::from_str(csv_data.to_string());
360
361 assert_eq!(parsed.headers.cols(), 3);
362 assert_eq!(parsed.headers[0].display_value().unwrap().as_ref(), "Name");
363 assert_eq!(
364 parsed.headers[1].display_value().unwrap().as_ref(),
365 "Description"
366 );
367 assert_eq!(
368 parsed.headers[2].display_value().unwrap().as_ref(),
369 "Status"
370 );
371
372 assert_eq!(parsed.rows.len(), 2);
373 assert_eq!(
374 parsed.rows[0][0].display_value().unwrap().as_ref(),
375 "John\nDoe"
376 );
377 assert_eq!(
378 parsed.rows[0][1].display_value().unwrap().as_ref(),
379 "A person with\nmultiple lines"
380 );
381 assert_eq!(
382 parsed.rows[0][2].display_value().unwrap().as_ref(),
383 "Active"
384 );
385
386 assert_eq!(
387 parsed.rows[1][0].display_value().unwrap().as_ref(),
388 "Jane Smith"
389 );
390 assert_eq!(
391 parsed.rows[1][1].display_value().unwrap().as_ref(),
392 "Simple"
393 );
394 assert_eq!(
395 parsed.rows[1][2].display_value().unwrap().as_ref(),
396 "Also\nActive"
397 );
398
399 // Check line numbers
400 assert_eq!(parsed.line_numbers.len(), 2);
401 match &parsed.line_numbers[0] {
402 LineNumber::LineRange(start, end) => {
403 assert_eq!(start, &2);
404 assert_eq!(end, &4);
405 }
406 _ => panic!("Expected LineRange for multiline row"),
407 }
408 match &parsed.line_numbers[1] {
409 LineNumber::LineRange(start, end) => {
410 assert_eq!(start, &5);
411 assert_eq!(end, &6);
412 }
413 _ => panic!("Expected LineRange for second multiline row"),
414 }
415 }
416
417 #[test]
418 fn test_empty_csv() {
419 let parsed = TableLikeContent::from_str("".to_string());
420 assert_eq!(parsed.headers.cols(), 0);
421 assert!(parsed.rows.is_empty());
422 }
423
424 #[test]
425 fn test_csv_parsing_quote_offset_handling() {
426 let csv_data = r#"first,"se,cond",third"#;
427 let (parsed_cells, _) = parse_csv_with_positions(csv_data);
428
429 assert_eq!(parsed_cells.len(), 1); // One row
430 assert_eq!(parsed_cells[0].len(), 3); // Three cells
431
432 // first: 0..5 (no quotes)
433 let (content1, range1) = &parsed_cells[0][0];
434 assert_eq!(content1.as_ref(), "first");
435 assert_eq!(*range1, 0..5);
436
437 // "se,cond": 6..15 (includes quotes in range, content without quotes)
438 let (content2, range2) = &parsed_cells[0][1];
439 assert_eq!(content2.as_ref(), "se,cond");
440 assert_eq!(*range2, 6..15);
441
442 // third: 16..21 (no quotes)
443 let (content3, range3) = &parsed_cells[0][2];
444 assert_eq!(content3.as_ref(), "third");
445 assert_eq!(*range3, 16..21);
446 }
447
448 #[test]
449 fn test_csv_parsing_complex_quotes() {
450 let csv_data = r#"id,"name with spaces","description, with commas",status
4511,"John Doe","A person with ""quotes"" and, commas",active
4522,"Jane Smith","Simple description",inactive"#;
453 let (parsed_cells, _) = parse_csv_with_positions(csv_data);
454
455 assert_eq!(parsed_cells.len(), 3); // header + 2 rows
456
457 // Check header row
458 let header_row = &parsed_cells[0];
459 assert_eq!(header_row.len(), 4);
460
461 // id: 0..2
462 assert_eq!(header_row[0].0.as_ref(), "id");
463 assert_eq!(header_row[0].1, 0..2);
464
465 // "name with spaces": 3..21 (includes quotes)
466 assert_eq!(header_row[1].0.as_ref(), "name with spaces");
467 assert_eq!(header_row[1].1, 3..21);
468
469 // "description, with commas": 22..48 (includes quotes)
470 assert_eq!(header_row[2].0.as_ref(), "description, with commas");
471 assert_eq!(header_row[2].1, 22..48);
472
473 // status: 49..55
474 assert_eq!(header_row[3].0.as_ref(), "status");
475 assert_eq!(header_row[3].1, 49..55);
476
477 // Check first data row
478 let first_row = &parsed_cells[1];
479 assert_eq!(first_row.len(), 4);
480
481 // 1: 56..57
482 assert_eq!(first_row[0].0.as_ref(), "1");
483 assert_eq!(first_row[0].1, 56..57);
484
485 // "John Doe": 58..68 (includes quotes)
486 assert_eq!(first_row[1].0.as_ref(), "John Doe");
487 assert_eq!(first_row[1].1, 58..68);
488
489 // Content should be stripped of quotes but include escaped quotes
490 assert_eq!(
491 first_row[2].0.as_ref(),
492 r#"A person with "quotes" and, commas"#
493 );
494 // The range should include the outer quotes: 69..107
495 assert_eq!(first_row[2].1, 69..107);
496
497 // active: 108..114
498 assert_eq!(first_row[3].0.as_ref(), "active");
499 assert_eq!(first_row[3].1, 108..114);
500 }
501}
502
503impl TableLikeContent {
504 #[cfg(test)]
505 pub fn from_str(text: String) -> Self {
506 use text::{Buffer, BufferId, ReplicaId};
507
508 let buffer_id = BufferId::new(1).unwrap();
509 let buffer = Buffer::new(ReplicaId::LOCAL, buffer_id, text);
510 let snapshot = buffer.snapshot();
511 from_buffer(snapshot)
512 }
513}