cursor_excerpt.rs

  1use language::{BufferSnapshot, Point};
  2use std::ops::Range;
  3use text::OffsetRangeExt as _;
  4use zeta_prompt::ExcerptRanges;
  5
  6/// Computes all range variants for a cursor position: editable ranges at 150, 180, and 350
  7/// token budgets, plus their corresponding context expansions. Returns the full excerpt range
  8/// (union of all context ranges) and the individual sub-ranges as Points.
  9pub fn compute_excerpt_ranges(
 10    position: Point,
 11    snapshot: &BufferSnapshot,
 12) -> (Range<Point>, Range<usize>, ExcerptRanges) {
 13    let editable_150 = compute_editable_range(snapshot, position, 150);
 14    let editable_180 = compute_editable_range(snapshot, position, 180);
 15    let editable_350 = compute_editable_range(snapshot, position, 350);
 16    let full_512 = compute_editable_range(snapshot, position, 512);
 17
 18    let editable_150_context_350 =
 19        expand_context_syntactically_then_linewise(snapshot, editable_150.clone(), 350);
 20    let editable_180_context_350 =
 21        expand_context_syntactically_then_linewise(snapshot, editable_180.clone(), 350);
 22    let editable_350_context_150 =
 23        expand_context_syntactically_then_linewise(snapshot, editable_350.clone(), 150);
 24
 25    let full_start_row = full_512
 26        .start
 27        .row
 28        .min(editable_150_context_350.start.row)
 29        .min(editable_180_context_350.start.row)
 30        .min(editable_350_context_150.start.row);
 31    let full_end_row = full_512
 32        .end
 33        .row
 34        .max(editable_150_context_350.end.row)
 35        .max(editable_180_context_350.end.row)
 36        .max(editable_350_context_150.end.row);
 37
 38    let full_context =
 39        Point::new(full_start_row, 0)..Point::new(full_end_row, snapshot.line_len(full_end_row));
 40
 41    let full_context_offset_range = full_context.to_offset(snapshot);
 42
 43    let to_offset = |range: &Range<Point>| -> Range<usize> {
 44        let start = range.start.to_offset(snapshot);
 45        let end = range.end.to_offset(snapshot);
 46        (start - full_context_offset_range.start)..(end - full_context_offset_range.start)
 47    };
 48
 49    let ranges = ExcerptRanges {
 50        editable_150: to_offset(&editable_150),
 51        editable_180: to_offset(&editable_180),
 52        editable_350: to_offset(&editable_350),
 53        editable_150_context_350: to_offset(&editable_150_context_350),
 54        editable_180_context_350: to_offset(&editable_180_context_350),
 55        editable_350_context_150: to_offset(&editable_350_context_150),
 56    };
 57
 58    (full_context, full_context_offset_range, ranges)
 59}
 60
 61pub fn editable_and_context_ranges_for_cursor_position(
 62    position: Point,
 63    snapshot: &BufferSnapshot,
 64    editable_region_token_limit: usize,
 65    context_token_limit: usize,
 66) -> (Range<Point>, Range<Point>) {
 67    let editable_range = compute_editable_range(snapshot, position, editable_region_token_limit);
 68
 69    let context_range = expand_context_syntactically_then_linewise(
 70        snapshot,
 71        editable_range.clone(),
 72        context_token_limit,
 73    );
 74
 75    (editable_range, context_range)
 76}
 77
 78/// Computes the editable range using a three-phase approach:
 79/// 1. Expand symmetrically from cursor (75% of budget)
 80/// 2. Expand to syntax boundaries
 81/// 3. Continue line-wise in the least-expanded direction
 82fn compute_editable_range(
 83    snapshot: &BufferSnapshot,
 84    cursor: Point,
 85    token_limit: usize,
 86) -> Range<Point> {
 87    // Phase 1: Expand symmetrically from cursor using 75% of budget.
 88    let initial_budget = (token_limit * 3) / 4;
 89    let (mut start_row, mut end_row, mut remaining_tokens) =
 90        expand_symmetric_from_cursor(snapshot, cursor.row, initial_budget);
 91
 92    // Add remaining budget from phase 1.
 93    remaining_tokens += token_limit.saturating_sub(initial_budget);
 94
 95    let original_start = start_row;
 96    let original_end = end_row;
 97
 98    // Phase 2: Expand to syntax boundaries that fit within budget.
 99    for (boundary_start, boundary_end) in containing_syntax_boundaries(snapshot, start_row, end_row)
100    {
101        let tokens_for_start = if boundary_start < start_row {
102            estimate_tokens_for_rows(snapshot, boundary_start, start_row)
103        } else {
104            0
105        };
106        let tokens_for_end = if boundary_end > end_row {
107            estimate_tokens_for_rows(snapshot, end_row + 1, boundary_end + 1)
108        } else {
109            0
110        };
111
112        let total_needed = tokens_for_start + tokens_for_end;
113
114        if total_needed <= remaining_tokens {
115            if boundary_start < start_row {
116                start_row = boundary_start;
117            }
118            if boundary_end > end_row {
119                end_row = boundary_end;
120            }
121            remaining_tokens = remaining_tokens.saturating_sub(total_needed);
122        } else {
123            break;
124        }
125    }
126
127    // Phase 3: Continue line-wise in the direction we expanded least during syntax phase.
128    let expanded_up = original_start.saturating_sub(start_row);
129    let expanded_down = end_row.saturating_sub(original_end);
130
131    (start_row, end_row, _) = expand_linewise_biased(
132        snapshot,
133        start_row,
134        end_row,
135        remaining_tokens,
136        expanded_up <= expanded_down, // prefer_up if we expanded less upward
137    );
138
139    let start = Point::new(start_row, 0);
140    let end = Point::new(end_row, snapshot.line_len(end_row));
141    start..end
142}
143
144/// Expands symmetrically from cursor, one line at a time, alternating down then up.
145/// Returns (start_row, end_row, remaining_tokens).
146fn expand_symmetric_from_cursor(
147    snapshot: &BufferSnapshot,
148    cursor_row: u32,
149    mut token_budget: usize,
150) -> (u32, u32, usize) {
151    let mut start_row = cursor_row;
152    let mut end_row = cursor_row;
153
154    // Account for the cursor's line.
155    let cursor_line_tokens = line_token_count(snapshot, cursor_row);
156    token_budget = token_budget.saturating_sub(cursor_line_tokens);
157
158    loop {
159        let can_expand_up = start_row > 0;
160        let can_expand_down = end_row < snapshot.max_point().row;
161
162        if token_budget == 0 || (!can_expand_up && !can_expand_down) {
163            break;
164        }
165
166        // Expand down first (slight forward bias for edit prediction).
167        if can_expand_down {
168            let next_row = end_row + 1;
169            let line_tokens = line_token_count(snapshot, next_row);
170            if line_tokens <= token_budget {
171                end_row = next_row;
172                token_budget = token_budget.saturating_sub(line_tokens);
173            } else {
174                break;
175            }
176        }
177
178        // Then expand up.
179        if can_expand_up && token_budget > 0 {
180            let next_row = start_row - 1;
181            let line_tokens = line_token_count(snapshot, next_row);
182            if line_tokens <= token_budget {
183                start_row = next_row;
184                token_budget = token_budget.saturating_sub(line_tokens);
185            } else {
186                break;
187            }
188        }
189    }
190
191    (start_row, end_row, token_budget)
192}
193
194/// Expands line-wise with a bias toward one direction.
195/// Returns (start_row, end_row, remaining_tokens).
196fn expand_linewise_biased(
197    snapshot: &BufferSnapshot,
198    mut start_row: u32,
199    mut end_row: u32,
200    mut remaining_tokens: usize,
201    prefer_up: bool,
202) -> (u32, u32, usize) {
203    loop {
204        let can_expand_up = start_row > 0;
205        let can_expand_down = end_row < snapshot.max_point().row;
206
207        if remaining_tokens == 0 || (!can_expand_up && !can_expand_down) {
208            break;
209        }
210
211        let mut expanded = false;
212
213        // Try preferred direction first.
214        if prefer_up {
215            if can_expand_up {
216                let next_row = start_row - 1;
217                let line_tokens = line_token_count(snapshot, next_row);
218                if line_tokens <= remaining_tokens {
219                    start_row = next_row;
220                    remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
221                    expanded = true;
222                }
223            }
224            if can_expand_down && remaining_tokens > 0 {
225                let next_row = end_row + 1;
226                let line_tokens = line_token_count(snapshot, next_row);
227                if line_tokens <= remaining_tokens {
228                    end_row = next_row;
229                    remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
230                    expanded = true;
231                }
232            }
233        } else {
234            if can_expand_down {
235                let next_row = end_row + 1;
236                let line_tokens = line_token_count(snapshot, next_row);
237                if line_tokens <= remaining_tokens {
238                    end_row = next_row;
239                    remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
240                    expanded = true;
241                }
242            }
243            if can_expand_up && remaining_tokens > 0 {
244                let next_row = start_row - 1;
245                let line_tokens = line_token_count(snapshot, next_row);
246                if line_tokens <= remaining_tokens {
247                    start_row = next_row;
248                    remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
249                    expanded = true;
250                }
251            }
252        }
253
254        if !expanded {
255            break;
256        }
257    }
258
259    (start_row, end_row, remaining_tokens)
260}
261
262/// Typical number of string bytes per token for the purposes of limiting model input. This is
263/// intentionally low to err on the side of underestimating limits.
264pub(crate) const BYTES_PER_TOKEN_GUESS: usize = 3;
265
266pub fn guess_token_count(bytes: usize) -> usize {
267    bytes / BYTES_PER_TOKEN_GUESS
268}
269
270fn line_token_count(snapshot: &BufferSnapshot, row: u32) -> usize {
271    guess_token_count(snapshot.line_len(row) as usize).max(1)
272}
273
274/// Estimates token count for rows in range [start_row, end_row).
275fn estimate_tokens_for_rows(snapshot: &BufferSnapshot, start_row: u32, end_row: u32) -> usize {
276    let mut tokens = 0;
277    for row in start_row..end_row {
278        tokens += line_token_count(snapshot, row);
279    }
280    tokens
281}
282
283/// Returns an iterator of (start_row, end_row) for successively larger syntax nodes
284/// containing the given row range. Smallest containing node first.
285fn containing_syntax_boundaries(
286    snapshot: &BufferSnapshot,
287    start_row: u32,
288    end_row: u32,
289) -> impl Iterator<Item = (u32, u32)> {
290    let range = Point::new(start_row, 0)..Point::new(end_row, snapshot.line_len(end_row));
291    let mut current = snapshot.syntax_ancestor(range);
292    let mut last_rows: Option<(u32, u32)> = None;
293
294    std::iter::from_fn(move || {
295        while let Some(node) = current.take() {
296            let node_start_row = node.start_position().row as u32;
297            let node_end_row = node.end_position().row as u32;
298            let rows = (node_start_row, node_end_row);
299
300            current = node.parent();
301
302            // Skip nodes that don't extend beyond our range.
303            if node_start_row >= start_row && node_end_row <= end_row {
304                continue;
305            }
306
307            // Skip if same as last returned (some nodes have same span).
308            if last_rows == Some(rows) {
309                continue;
310            }
311
312            last_rows = Some(rows);
313            return Some(rows);
314        }
315        None
316    })
317}
318
319/// Expands context by first trying to reach syntax boundaries,
320/// then expanding line-wise only if no syntax expansion occurred.
321fn expand_context_syntactically_then_linewise(
322    snapshot: &BufferSnapshot,
323    editable_range: Range<Point>,
324    context_token_limit: usize,
325) -> Range<Point> {
326    let mut start_row = editable_range.start.row;
327    let mut end_row = editable_range.end.row;
328    let mut remaining_tokens = context_token_limit;
329    let mut did_syntax_expand = false;
330
331    // Phase 1: Try to expand to containing syntax boundaries, picking the largest that fits.
332    for (boundary_start, boundary_end) in containing_syntax_boundaries(snapshot, start_row, end_row)
333    {
334        let tokens_for_start = if boundary_start < start_row {
335            estimate_tokens_for_rows(snapshot, boundary_start, start_row)
336        } else {
337            0
338        };
339        let tokens_for_end = if boundary_end > end_row {
340            estimate_tokens_for_rows(snapshot, end_row + 1, boundary_end + 1)
341        } else {
342            0
343        };
344
345        let total_needed = tokens_for_start + tokens_for_end;
346
347        if total_needed <= remaining_tokens {
348            if boundary_start < start_row {
349                start_row = boundary_start;
350            }
351            if boundary_end > end_row {
352                end_row = boundary_end;
353            }
354            remaining_tokens = remaining_tokens.saturating_sub(total_needed);
355            did_syntax_expand = true;
356        } else {
357            break;
358        }
359    }
360
361    // Phase 2: Only expand line-wise if no syntax expansion occurred.
362    if !did_syntax_expand {
363        (start_row, end_row, _) =
364            expand_linewise_biased(snapshot, start_row, end_row, remaining_tokens, true);
365    }
366
367    let start = Point::new(start_row, 0);
368    let end = Point::new(end_row, snapshot.line_len(end_row));
369    start..end
370}
371
372use language::ToOffset as _;
373
374#[cfg(test)]
375mod tests {
376    use super::*;
377    use gpui::{App, AppContext};
378    use indoc::indoc;
379    use language::{Buffer, rust_lang};
380    use util::test::{TextRangeMarker, marked_text_ranges_by};
381
382    struct TestCase {
383        name: &'static str,
384        marked_text: &'static str,
385        editable_token_limit: usize,
386        context_token_limit: usize,
387    }
388
389    #[gpui::test]
390    fn test_editable_and_context_ranges(cx: &mut App) {
391        // Markers:
392        // ˇ = cursor position
393        // « » = expected editable range
394        // [ ] = expected context range
395        let test_cases = vec![
396            TestCase {
397                name: "cursor near end of function - expands to syntax boundaries",
398                marked_text: indoc! {r#"
399                    [fn first() {
400                        let a = 1;
401                        let b = 2;
402                    }
403
404                    fn foo() {
405                    «    let x = 1;
406                        let y = 2;
407                        println!("{}", x + y);ˇ
408                    }»]
409                "#},
410                // 18 tokens - expands symmetrically then to syntax boundaries
411                editable_token_limit: 18,
412                context_token_limit: 35,
413            },
414            TestCase {
415                name: "cursor at function start - expands to syntax boundaries",
416                marked_text: indoc! {r#"
417                    [fn before() {
418                    «    let a = 1;
419                    }
420
421                    fn foo() {ˇ
422                        let x = 1;
423                        let y = 2;
424                        let z = 3;
425                    }
426                    »
427                    fn after() {
428                        let b = 2;
429                    }]
430                "#},
431                // 25 tokens - expands symmetrically then to syntax boundaries
432                editable_token_limit: 25,
433                context_token_limit: 50,
434            },
435            TestCase {
436                name: "tiny budget - just lines around cursor",
437                marked_text: indoc! {r#"
438                    fn outer() {
439                    [    let line1 = 1;
440                        let line2 = 2;
441                    «    let line3 = 3;
442                        let line4 = 4;ˇ»
443                        let line5 = 5;
444                        let line6 = 6;]
445                        let line7 = 7;
446                    }
447                "#},
448                // 12 tokens (~36 bytes) = just the cursor line with tiny budget
449                editable_token_limit: 12,
450                context_token_limit: 24,
451            },
452            TestCase {
453                name: "small function fits entirely",
454                marked_text: indoc! {r#"
455                    [«fn foo() {
456                        let x = 1;ˇ
457                        let y = 2;
458                    }»]
459                "#},
460                // Plenty of budget for this small function
461                editable_token_limit: 30,
462                context_token_limit: 60,
463            },
464            TestCase {
465                name: "context extends beyond editable",
466                marked_text: indoc! {r#"
467                    [fn first() { let a = 1; }
468                    «fn second() { let b = 2; }
469                    fn third() { let c = 3; }ˇ
470                    fn fourth() { let d = 4; }»
471                    fn fifth() { let e = 5; }]
472                "#},
473                // Small editable, larger context
474                editable_token_limit: 25,
475                context_token_limit: 45,
476            },
477            // Tests for syntax-aware editable and context expansion
478            TestCase {
479                name: "cursor in first if-statement - expands to syntax boundaries",
480                marked_text: indoc! {r#"
481                    [«fn before() { }
482
483                    fn process() {
484                        if condition1 {
485                            let a = 1;ˇ
486                            let b = 2;
487                        }
488                        if condition2 {»
489                            let c = 3;
490                            let d = 4;
491                        }
492                        if condition3 {
493                            let e = 5;
494                            let f = 6;
495                        }
496                    }
497
498                    fn after() { }]
499                "#},
500                // 35 tokens allows expansion to include function header and first two if blocks
501                editable_token_limit: 35,
502                // 60 tokens allows context to include the whole file
503                context_token_limit: 60,
504            },
505            TestCase {
506                name: "cursor in middle if-statement - expands to syntax boundaries",
507                marked_text: indoc! {r#"
508                    [fn before() { }
509
510                    fn process() {
511                        if condition1 {
512                            let a = 1;
513                    «        let b = 2;
514                        }
515                        if condition2 {
516                            let c = 3;ˇ
517                            let d = 4;
518                        }
519                        if condition3 {
520                            let e = 5;»
521                            let f = 6;
522                        }
523                    }
524
525                    fn after() { }]
526                "#},
527                // 40 tokens allows expansion to surrounding if blocks
528                editable_token_limit: 40,
529                // 60 tokens allows context to include the whole file
530                context_token_limit: 60,
531            },
532            TestCase {
533                name: "cursor near bottom of long function - editable expands toward syntax, context reaches function",
534                marked_text: indoc! {r#"
535                    [fn other() { }
536
537                    fn long_function() {
538                        let line1 = 1;
539                        let line2 = 2;
540                        let line3 = 3;
541                        let line4 = 4;
542                        let line5 = 5;
543                        let line6 = 6;
544                    «    let line7 = 7;
545                        let line8 = 8;
546                        let line9 = 9;
547                        let line10 = 10;ˇ
548                        let line11 = 11;
549                    }
550
551                    fn another() { }»]
552                "#},
553                // 40 tokens for editable - allows several lines plus syntax expansion
554                editable_token_limit: 40,
555                // 55 tokens - enough for function but not whole file
556                context_token_limit: 55,
557            },
558        ];
559
560        for test_case in test_cases {
561            let cursor_marker: TextRangeMarker = 'ˇ'.into();
562            let editable_marker: TextRangeMarker = ('«', '»').into();
563            let context_marker: TextRangeMarker = ('[', ']').into();
564
565            let (text, mut ranges) = marked_text_ranges_by(
566                test_case.marked_text,
567                vec![
568                    cursor_marker.clone(),
569                    editable_marker.clone(),
570                    context_marker.clone(),
571                ],
572            );
573
574            let cursor_ranges = ranges.remove(&cursor_marker).unwrap_or_default();
575            let expected_editable = ranges.remove(&editable_marker).unwrap_or_default();
576            let expected_context = ranges.remove(&context_marker).unwrap_or_default();
577            assert_eq!(expected_editable.len(), 1);
578            assert_eq!(expected_context.len(), 1);
579
580            cx.new(|cx| {
581                let text = text.trim_end_matches('\n');
582                let buffer = Buffer::local(text, cx).with_language(rust_lang(), cx);
583                let snapshot = buffer.snapshot();
584
585                let cursor_offset = cursor_ranges[0].start;
586                let cursor_point = snapshot.offset_to_point(cursor_offset);
587                let expected_editable_start = snapshot.offset_to_point(expected_editable[0].start);
588                let expected_editable_end = snapshot.offset_to_point(expected_editable[0].end);
589                let expected_context_start = snapshot.offset_to_point(expected_context[0].start);
590                let expected_context_end = snapshot.offset_to_point(expected_context[0].end);
591
592                let (actual_editable, actual_context) =
593                    editable_and_context_ranges_for_cursor_position(
594                        cursor_point,
595                        &snapshot,
596                        test_case.editable_token_limit,
597                        test_case.context_token_limit,
598                    );
599
600                let range_text = |start: Point, end: Point| -> String {
601                    snapshot.text_for_range(start..end).collect()
602                };
603
604                let editable_match = actual_editable.start == expected_editable_start
605                    && actual_editable.end == expected_editable_end;
606                let context_match = actual_context.start == expected_context_start
607                    && actual_context.end == expected_context_end;
608
609                if !editable_match || !context_match {
610                    println!("\n=== FAILED: {} ===", test_case.name);
611                    if !editable_match {
612                        println!(
613                            "\nExpected editable ({:?}..{:?}):",
614                            expected_editable_start, expected_editable_end
615                        );
616                        println!(
617                            "---\n{}---",
618                            range_text(expected_editable_start, expected_editable_end)
619                        );
620                        println!(
621                            "\nActual editable ({:?}..{:?}):",
622                            actual_editable.start, actual_editable.end
623                        );
624                        println!(
625                            "---\n{}---",
626                            range_text(actual_editable.start, actual_editable.end)
627                        );
628                    }
629                    if !context_match {
630                        println!(
631                            "\nExpected context ({:?}..{:?}):",
632                            expected_context_start, expected_context_end
633                        );
634                        println!(
635                            "---\n{}---",
636                            range_text(expected_context_start, expected_context_end)
637                        );
638                        println!(
639                            "\nActual context ({:?}..{:?}):",
640                            actual_context.start, actual_context.end
641                        );
642                        println!(
643                            "---\n{}---",
644                            range_text(actual_context.start, actual_context.end)
645                        );
646                    }
647                    panic!("Test '{}' failed - see output above", test_case.name);
648                }
649
650                buffer
651            });
652        }
653    }
654}