cursor_excerpt.rs

  1use language::{BufferSnapshot, Point};
  2use std::ops::Range;
  3
  4pub fn editable_and_context_ranges_for_cursor_position(
  5    position: Point,
  6    snapshot: &BufferSnapshot,
  7    editable_region_token_limit: usize,
  8    context_token_limit: usize,
  9) -> (Range<Point>, Range<Point>) {
 10    let editable_range = compute_editable_range(snapshot, position, editable_region_token_limit);
 11
 12    let context_range = expand_context_syntactically_then_linewise(
 13        snapshot,
 14        editable_range.clone(),
 15        context_token_limit,
 16    );
 17
 18    (editable_range, context_range)
 19}
 20
 21/// Computes the editable range using a three-phase approach:
 22/// 1. Expand symmetrically from cursor (75% of budget)
 23/// 2. Expand to syntax boundaries
 24/// 3. Continue line-wise in the least-expanded direction
 25fn compute_editable_range(
 26    snapshot: &BufferSnapshot,
 27    cursor: Point,
 28    token_limit: usize,
 29) -> Range<Point> {
 30    // Phase 1: Expand symmetrically from cursor using 75% of budget.
 31    let initial_budget = (token_limit * 3) / 4;
 32    let (mut start_row, mut end_row, mut remaining_tokens) =
 33        expand_symmetric_from_cursor(snapshot, cursor.row, initial_budget);
 34
 35    // Add remaining budget from phase 1.
 36    remaining_tokens += token_limit.saturating_sub(initial_budget);
 37
 38    let original_start = start_row;
 39    let original_end = end_row;
 40
 41    // Phase 2: Expand to syntax boundaries that fit within budget.
 42    for (boundary_start, boundary_end) in containing_syntax_boundaries(snapshot, start_row, end_row)
 43    {
 44        let tokens_for_start = if boundary_start < start_row {
 45            estimate_tokens_for_rows(snapshot, boundary_start, start_row)
 46        } else {
 47            0
 48        };
 49        let tokens_for_end = if boundary_end > end_row {
 50            estimate_tokens_for_rows(snapshot, end_row + 1, boundary_end + 1)
 51        } else {
 52            0
 53        };
 54
 55        let total_needed = tokens_for_start + tokens_for_end;
 56
 57        if total_needed <= remaining_tokens {
 58            if boundary_start < start_row {
 59                start_row = boundary_start;
 60            }
 61            if boundary_end > end_row {
 62                end_row = boundary_end;
 63            }
 64            remaining_tokens = remaining_tokens.saturating_sub(total_needed);
 65        } else {
 66            break;
 67        }
 68    }
 69
 70    // Phase 3: Continue line-wise in the direction we expanded least during syntax phase.
 71    let expanded_up = original_start.saturating_sub(start_row);
 72    let expanded_down = end_row.saturating_sub(original_end);
 73
 74    (start_row, end_row, _) = expand_linewise_biased(
 75        snapshot,
 76        start_row,
 77        end_row,
 78        remaining_tokens,
 79        expanded_up <= expanded_down, // prefer_up if we expanded less upward
 80    );
 81
 82    let start = Point::new(start_row, 0);
 83    let end = Point::new(end_row, snapshot.line_len(end_row));
 84    start..end
 85}
 86
 87/// Expands symmetrically from cursor, one line at a time, alternating down then up.
 88/// Returns (start_row, end_row, remaining_tokens).
 89fn expand_symmetric_from_cursor(
 90    snapshot: &BufferSnapshot,
 91    cursor_row: u32,
 92    mut token_budget: usize,
 93) -> (u32, u32, usize) {
 94    let mut start_row = cursor_row;
 95    let mut end_row = cursor_row;
 96
 97    // Account for the cursor's line.
 98    let cursor_line_tokens = line_token_count(snapshot, cursor_row);
 99    token_budget = token_budget.saturating_sub(cursor_line_tokens);
100
101    loop {
102        let can_expand_up = start_row > 0;
103        let can_expand_down = end_row < snapshot.max_point().row;
104
105        if token_budget == 0 || (!can_expand_up && !can_expand_down) {
106            break;
107        }
108
109        // Expand down first (slight forward bias for edit prediction).
110        if can_expand_down {
111            let next_row = end_row + 1;
112            let line_tokens = line_token_count(snapshot, next_row);
113            if line_tokens <= token_budget {
114                end_row = next_row;
115                token_budget = token_budget.saturating_sub(line_tokens);
116            } else {
117                break;
118            }
119        }
120
121        // Then expand up.
122        if can_expand_up && token_budget > 0 {
123            let next_row = start_row - 1;
124            let line_tokens = line_token_count(snapshot, next_row);
125            if line_tokens <= token_budget {
126                start_row = next_row;
127                token_budget = token_budget.saturating_sub(line_tokens);
128            } else {
129                break;
130            }
131        }
132    }
133
134    (start_row, end_row, token_budget)
135}
136
137/// Expands line-wise with a bias toward one direction.
138/// Returns (start_row, end_row, remaining_tokens).
139fn expand_linewise_biased(
140    snapshot: &BufferSnapshot,
141    mut start_row: u32,
142    mut end_row: u32,
143    mut remaining_tokens: usize,
144    prefer_up: bool,
145) -> (u32, u32, usize) {
146    loop {
147        let can_expand_up = start_row > 0;
148        let can_expand_down = end_row < snapshot.max_point().row;
149
150        if remaining_tokens == 0 || (!can_expand_up && !can_expand_down) {
151            break;
152        }
153
154        let mut expanded = false;
155
156        // Try preferred direction first.
157        if prefer_up {
158            if can_expand_up {
159                let next_row = start_row - 1;
160                let line_tokens = line_token_count(snapshot, next_row);
161                if line_tokens <= remaining_tokens {
162                    start_row = next_row;
163                    remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
164                    expanded = true;
165                }
166            }
167            if can_expand_down && remaining_tokens > 0 {
168                let next_row = end_row + 1;
169                let line_tokens = line_token_count(snapshot, next_row);
170                if line_tokens <= remaining_tokens {
171                    end_row = next_row;
172                    remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
173                    expanded = true;
174                }
175            }
176        } else {
177            if can_expand_down {
178                let next_row = end_row + 1;
179                let line_tokens = line_token_count(snapshot, next_row);
180                if line_tokens <= remaining_tokens {
181                    end_row = next_row;
182                    remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
183                    expanded = true;
184                }
185            }
186            if can_expand_up && remaining_tokens > 0 {
187                let next_row = start_row - 1;
188                let line_tokens = line_token_count(snapshot, next_row);
189                if line_tokens <= remaining_tokens {
190                    start_row = next_row;
191                    remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
192                    expanded = true;
193                }
194            }
195        }
196
197        if !expanded {
198            break;
199        }
200    }
201
202    (start_row, end_row, remaining_tokens)
203}
204
205/// Typical number of string bytes per token for the purposes of limiting model input. This is
206/// intentionally low to err on the side of underestimating limits.
207pub(crate) const BYTES_PER_TOKEN_GUESS: usize = 3;
208
209pub fn guess_token_count(bytes: usize) -> usize {
210    bytes / BYTES_PER_TOKEN_GUESS
211}
212
213fn line_token_count(snapshot: &BufferSnapshot, row: u32) -> usize {
214    guess_token_count(snapshot.line_len(row) as usize).max(1)
215}
216
217/// Estimates token count for rows in range [start_row, end_row).
218fn estimate_tokens_for_rows(snapshot: &BufferSnapshot, start_row: u32, end_row: u32) -> usize {
219    let mut tokens = 0;
220    for row in start_row..end_row {
221        tokens += line_token_count(snapshot, row);
222    }
223    tokens
224}
225
226/// Returns an iterator of (start_row, end_row) for successively larger syntax nodes
227/// containing the given row range. Smallest containing node first.
228fn containing_syntax_boundaries(
229    snapshot: &BufferSnapshot,
230    start_row: u32,
231    end_row: u32,
232) -> impl Iterator<Item = (u32, u32)> {
233    let range = Point::new(start_row, 0)..Point::new(end_row, snapshot.line_len(end_row));
234    let mut current = snapshot.syntax_ancestor(range);
235    let mut last_rows: Option<(u32, u32)> = None;
236
237    std::iter::from_fn(move || {
238        while let Some(node) = current.take() {
239            let node_start_row = node.start_position().row as u32;
240            let node_end_row = node.end_position().row as u32;
241            let rows = (node_start_row, node_end_row);
242
243            current = node.parent();
244
245            // Skip nodes that don't extend beyond our range.
246            if node_start_row >= start_row && node_end_row <= end_row {
247                continue;
248            }
249
250            // Skip if same as last returned (some nodes have same span).
251            if last_rows == Some(rows) {
252                continue;
253            }
254
255            last_rows = Some(rows);
256            return Some(rows);
257        }
258        None
259    })
260}
261
262/// Expands context by first trying to reach syntax boundaries,
263/// then expanding line-wise only if no syntax expansion occurred.
264fn expand_context_syntactically_then_linewise(
265    snapshot: &BufferSnapshot,
266    editable_range: Range<Point>,
267    context_token_limit: usize,
268) -> Range<Point> {
269    let mut start_row = editable_range.start.row;
270    let mut end_row = editable_range.end.row;
271    let mut remaining_tokens = context_token_limit;
272    let mut did_syntax_expand = false;
273
274    // Phase 1: Try to expand to containing syntax boundaries, picking the largest that fits.
275    for (boundary_start, boundary_end) in containing_syntax_boundaries(snapshot, start_row, end_row)
276    {
277        let tokens_for_start = if boundary_start < start_row {
278            estimate_tokens_for_rows(snapshot, boundary_start, start_row)
279        } else {
280            0
281        };
282        let tokens_for_end = if boundary_end > end_row {
283            estimate_tokens_for_rows(snapshot, end_row + 1, boundary_end + 1)
284        } else {
285            0
286        };
287
288        let total_needed = tokens_for_start + tokens_for_end;
289
290        if total_needed <= remaining_tokens {
291            if boundary_start < start_row {
292                start_row = boundary_start;
293            }
294            if boundary_end > end_row {
295                end_row = boundary_end;
296            }
297            remaining_tokens = remaining_tokens.saturating_sub(total_needed);
298            did_syntax_expand = true;
299        } else {
300            break;
301        }
302    }
303
304    // Phase 2: Only expand line-wise if no syntax expansion occurred.
305    if !did_syntax_expand {
306        (start_row, end_row, _) =
307            expand_linewise_biased(snapshot, start_row, end_row, remaining_tokens, true);
308    }
309
310    let start = Point::new(start_row, 0);
311    let end = Point::new(end_row, snapshot.line_len(end_row));
312    start..end
313}
314
315#[cfg(test)]
316mod tests {
317    use super::*;
318    use gpui::{App, AppContext};
319    use indoc::indoc;
320    use language::{Buffer, rust_lang};
321    use util::test::{TextRangeMarker, marked_text_ranges_by};
322
323    struct TestCase {
324        name: &'static str,
325        marked_text: &'static str,
326        editable_token_limit: usize,
327        context_token_limit: usize,
328    }
329
330    #[gpui::test]
331    fn test_editable_and_context_ranges(cx: &mut App) {
332        // Markers:
333        // ˇ = cursor position
334        // « » = expected editable range
335        // [ ] = expected context range
336        let test_cases = vec![
337            TestCase {
338                name: "cursor near end of function - expands to syntax boundaries",
339                marked_text: indoc! {r#"
340                    [fn first() {
341                        let a = 1;
342                        let b = 2;
343                    }
344
345                    fn foo() {
346                    «    let x = 1;
347                        let y = 2;
348                        println!("{}", x + y);ˇ
349                    }»]
350                "#},
351                // 18 tokens - expands symmetrically then to syntax boundaries
352                editable_token_limit: 18,
353                context_token_limit: 35,
354            },
355            TestCase {
356                name: "cursor at function start - expands to syntax boundaries",
357                marked_text: indoc! {r#"
358                    [fn before() {
359                    «    let a = 1;
360                    }
361
362                    fn foo() {ˇ
363                        let x = 1;
364                        let y = 2;
365                        let z = 3;
366                    }
367                    »
368                    fn after() {
369                        let b = 2;
370                    }]
371                "#},
372                // 25 tokens - expands symmetrically then to syntax boundaries
373                editable_token_limit: 25,
374                context_token_limit: 50,
375            },
376            TestCase {
377                name: "tiny budget - just lines around cursor",
378                marked_text: indoc! {r#"
379                    fn outer() {
380                    [    let line1 = 1;
381                        let line2 = 2;
382                    «    let line3 = 3;
383                        let line4 = 4;ˇ»
384                        let line5 = 5;
385                        let line6 = 6;]
386                        let line7 = 7;
387                    }
388                "#},
389                // 12 tokens (~36 bytes) = just the cursor line with tiny budget
390                editable_token_limit: 12,
391                context_token_limit: 24,
392            },
393            TestCase {
394                name: "small function fits entirely",
395                marked_text: indoc! {r#"
396                    [«fn foo() {
397                        let x = 1;ˇ
398                        let y = 2;
399                    }»]
400                "#},
401                // Plenty of budget for this small function
402                editable_token_limit: 30,
403                context_token_limit: 60,
404            },
405            TestCase {
406                name: "context extends beyond editable",
407                marked_text: indoc! {r#"
408                    [fn first() { let a = 1; }
409                    «fn second() { let b = 2; }
410                    fn third() { let c = 3; }ˇ
411                    fn fourth() { let d = 4; }»
412                    fn fifth() { let e = 5; }]
413                "#},
414                // Small editable, larger context
415                editable_token_limit: 25,
416                context_token_limit: 45,
417            },
418            // Tests for syntax-aware editable and context expansion
419            TestCase {
420                name: "cursor in first if-statement - expands to syntax boundaries",
421                marked_text: indoc! {r#"
422                    [«fn before() { }
423
424                    fn process() {
425                        if condition1 {
426                            let a = 1;ˇ
427                            let b = 2;
428                        }
429                        if condition2 {»
430                            let c = 3;
431                            let d = 4;
432                        }
433                        if condition3 {
434                            let e = 5;
435                            let f = 6;
436                        }
437                    }
438
439                    fn after() { }]
440                "#},
441                // 35 tokens allows expansion to include function header and first two if blocks
442                editable_token_limit: 35,
443                // 60 tokens allows context to include the whole file
444                context_token_limit: 60,
445            },
446            TestCase {
447                name: "cursor in middle if-statement - expands to syntax boundaries",
448                marked_text: indoc! {r#"
449                    [fn before() { }
450
451                    fn process() {
452                        if condition1 {
453                            let a = 1;
454                    «        let b = 2;
455                        }
456                        if condition2 {
457                            let c = 3;ˇ
458                            let d = 4;
459                        }
460                        if condition3 {
461                            let e = 5;»
462                            let f = 6;
463                        }
464                    }
465
466                    fn after() { }]
467                "#},
468                // 40 tokens allows expansion to surrounding if blocks
469                editable_token_limit: 40,
470                // 60 tokens allows context to include the whole file
471                context_token_limit: 60,
472            },
473            TestCase {
474                name: "cursor near bottom of long function - editable expands toward syntax, context reaches function",
475                marked_text: indoc! {r#"
476                    [fn other() { }
477
478                    fn long_function() {
479                        let line1 = 1;
480                        let line2 = 2;
481                        let line3 = 3;
482                        let line4 = 4;
483                        let line5 = 5;
484                        let line6 = 6;
485                    «    let line7 = 7;
486                        let line8 = 8;
487                        let line9 = 9;
488                        let line10 = 10;ˇ
489                        let line11 = 11;
490                    }
491
492                    fn another() { }»]
493                "#},
494                // 40 tokens for editable - allows several lines plus syntax expansion
495                editable_token_limit: 40,
496                // 55 tokens - enough for function but not whole file
497                context_token_limit: 55,
498            },
499        ];
500
501        for test_case in test_cases {
502            let cursor_marker: TextRangeMarker = 'ˇ'.into();
503            let editable_marker: TextRangeMarker = ('«', '»').into();
504            let context_marker: TextRangeMarker = ('[', ']').into();
505
506            let (text, mut ranges) = marked_text_ranges_by(
507                test_case.marked_text,
508                vec![
509                    cursor_marker.clone(),
510                    editable_marker.clone(),
511                    context_marker.clone(),
512                ],
513            );
514
515            let cursor_ranges = ranges.remove(&cursor_marker).unwrap_or_default();
516            let expected_editable = ranges.remove(&editable_marker).unwrap_or_default();
517            let expected_context = ranges.remove(&context_marker).unwrap_or_default();
518            assert_eq!(expected_editable.len(), 1);
519            assert_eq!(expected_context.len(), 1);
520
521            cx.new(|cx| {
522                let text = text.trim_end_matches('\n');
523                let buffer = Buffer::local(text, cx).with_language(rust_lang(), cx);
524                let snapshot = buffer.snapshot();
525
526                let cursor_offset = cursor_ranges[0].start;
527                let cursor_point = snapshot.offset_to_point(cursor_offset);
528                let expected_editable_start = snapshot.offset_to_point(expected_editable[0].start);
529                let expected_editable_end = snapshot.offset_to_point(expected_editable[0].end);
530                let expected_context_start = snapshot.offset_to_point(expected_context[0].start);
531                let expected_context_end = snapshot.offset_to_point(expected_context[0].end);
532
533                let (actual_editable, actual_context) =
534                    editable_and_context_ranges_for_cursor_position(
535                        cursor_point,
536                        &snapshot,
537                        test_case.editable_token_limit,
538                        test_case.context_token_limit,
539                    );
540
541                let range_text = |start: Point, end: Point| -> String {
542                    snapshot.text_for_range(start..end).collect()
543                };
544
545                let editable_match = actual_editable.start == expected_editable_start
546                    && actual_editable.end == expected_editable_end;
547                let context_match = actual_context.start == expected_context_start
548                    && actual_context.end == expected_context_end;
549
550                if !editable_match || !context_match {
551                    println!("\n=== FAILED: {} ===", test_case.name);
552                    if !editable_match {
553                        println!(
554                            "\nExpected editable ({:?}..{:?}):",
555                            expected_editable_start, expected_editable_end
556                        );
557                        println!(
558                            "---\n{}---",
559                            range_text(expected_editable_start, expected_editable_end)
560                        );
561                        println!(
562                            "\nActual editable ({:?}..{:?}):",
563                            actual_editable.start, actual_editable.end
564                        );
565                        println!(
566                            "---\n{}---",
567                            range_text(actual_editable.start, actual_editable.end)
568                        );
569                    }
570                    if !context_match {
571                        println!(
572                            "\nExpected context ({:?}..{:?}):",
573                            expected_context_start, expected_context_end
574                        );
575                        println!(
576                            "---\n{}---",
577                            range_text(expected_context_start, expected_context_end)
578                        );
579                        println!(
580                            "\nActual context ({:?}..{:?}):",
581                            actual_context.start, actual_context.end
582                        );
583                        println!(
584                            "---\n{}---",
585                            range_text(actual_context.start, actual_context.end)
586                        );
587                    }
588                    panic!("Test '{}' failed - see output above", test_case.name);
589                }
590
591                buffer
592            });
593        }
594    }
595}