1use language::{BufferSnapshot, Point};
2use std::ops::Range;
3use text::OffsetRangeExt as _;
4use zeta_prompt::ExcerptRanges;
5
6/// Computes all range variants for a cursor position: editable ranges at 150, 180, and 350
7/// token budgets, plus their corresponding context expansions. Returns the full excerpt range
8/// (union of all context ranges) and the individual sub-ranges as Points.
9pub fn compute_excerpt_ranges(
10 position: Point,
11 snapshot: &BufferSnapshot,
12) -> (Range<Point>, Range<usize>, ExcerptRanges) {
13 let editable_150 = compute_editable_range(snapshot, position, 150);
14 let editable_180 = compute_editable_range(snapshot, position, 180);
15 let editable_350 = compute_editable_range(snapshot, position, 350);
16 let full_512 = compute_editable_range(snapshot, position, 512);
17
18 let editable_150_context_350 =
19 expand_context_syntactically_then_linewise(snapshot, editable_150.clone(), 350);
20 let editable_180_context_350 =
21 expand_context_syntactically_then_linewise(snapshot, editable_180.clone(), 350);
22 let editable_350_context_150 =
23 expand_context_syntactically_then_linewise(snapshot, editable_350.clone(), 150);
24
25 let full_start_row = full_512
26 .start
27 .row
28 .min(editable_150_context_350.start.row)
29 .min(editable_180_context_350.start.row)
30 .min(editable_350_context_150.start.row);
31 let full_end_row = full_512
32 .end
33 .row
34 .max(editable_150_context_350.end.row)
35 .max(editable_180_context_350.end.row)
36 .max(editable_350_context_150.end.row);
37
38 let full_context =
39 Point::new(full_start_row, 0)..Point::new(full_end_row, snapshot.line_len(full_end_row));
40
41 let full_context_offset_range = full_context.to_offset(snapshot);
42
43 let to_offset = |range: &Range<Point>| -> Range<usize> {
44 let start = range.start.to_offset(snapshot);
45 let end = range.end.to_offset(snapshot);
46 (start - full_context_offset_range.start)..(end - full_context_offset_range.start)
47 };
48
49 let ranges = ExcerptRanges {
50 editable_150: to_offset(&editable_150),
51 editable_180: to_offset(&editable_180),
52 editable_350: to_offset(&editable_350),
53 editable_150_context_350: to_offset(&editable_150_context_350),
54 editable_180_context_350: to_offset(&editable_180_context_350),
55 editable_350_context_150: to_offset(&editable_350_context_150),
56 };
57
58 (full_context, full_context_offset_range, ranges)
59}
60
61pub fn editable_and_context_ranges_for_cursor_position(
62 position: Point,
63 snapshot: &BufferSnapshot,
64 editable_region_token_limit: usize,
65 context_token_limit: usize,
66) -> (Range<Point>, Range<Point>) {
67 let editable_range = compute_editable_range(snapshot, position, editable_region_token_limit);
68
69 let context_range = expand_context_syntactically_then_linewise(
70 snapshot,
71 editable_range.clone(),
72 context_token_limit,
73 );
74
75 (editable_range, context_range)
76}
77
78/// Computes the editable range using a three-phase approach:
79/// 1. Expand symmetrically from cursor (75% of budget)
80/// 2. Expand to syntax boundaries
81/// 3. Continue line-wise in the least-expanded direction
82fn compute_editable_range(
83 snapshot: &BufferSnapshot,
84 cursor: Point,
85 token_limit: usize,
86) -> Range<Point> {
87 // Phase 1: Expand symmetrically from cursor using 75% of budget.
88 let initial_budget = (token_limit * 3) / 4;
89 let (mut start_row, mut end_row, mut remaining_tokens) =
90 expand_symmetric_from_cursor(snapshot, cursor.row, initial_budget);
91
92 // Add remaining budget from phase 1.
93 remaining_tokens += token_limit.saturating_sub(initial_budget);
94
95 let original_start = start_row;
96 let original_end = end_row;
97
98 // Phase 2: Expand to syntax boundaries that fit within budget.
99 for (boundary_start, boundary_end) in containing_syntax_boundaries(snapshot, start_row, end_row)
100 {
101 let tokens_for_start = if boundary_start < start_row {
102 estimate_tokens_for_rows(snapshot, boundary_start, start_row)
103 } else {
104 0
105 };
106 let tokens_for_end = if boundary_end > end_row {
107 estimate_tokens_for_rows(snapshot, end_row + 1, boundary_end + 1)
108 } else {
109 0
110 };
111
112 let total_needed = tokens_for_start + tokens_for_end;
113
114 if total_needed <= remaining_tokens {
115 if boundary_start < start_row {
116 start_row = boundary_start;
117 }
118 if boundary_end > end_row {
119 end_row = boundary_end;
120 }
121 remaining_tokens = remaining_tokens.saturating_sub(total_needed);
122 } else {
123 break;
124 }
125 }
126
127 // Phase 3: Continue line-wise in the direction we expanded least during syntax phase.
128 let expanded_up = original_start.saturating_sub(start_row);
129 let expanded_down = end_row.saturating_sub(original_end);
130
131 (start_row, end_row, _) = expand_linewise_biased(
132 snapshot,
133 start_row,
134 end_row,
135 remaining_tokens,
136 expanded_up <= expanded_down, // prefer_up if we expanded less upward
137 );
138
139 let start = Point::new(start_row, 0);
140 let end = Point::new(end_row, snapshot.line_len(end_row));
141 start..end
142}
143
144/// Expands symmetrically from cursor, one line at a time, alternating down then up.
145/// Returns (start_row, end_row, remaining_tokens).
146fn expand_symmetric_from_cursor(
147 snapshot: &BufferSnapshot,
148 cursor_row: u32,
149 mut token_budget: usize,
150) -> (u32, u32, usize) {
151 let mut start_row = cursor_row;
152 let mut end_row = cursor_row;
153
154 // Account for the cursor's line.
155 let cursor_line_tokens = line_token_count(snapshot, cursor_row);
156 token_budget = token_budget.saturating_sub(cursor_line_tokens);
157
158 loop {
159 let can_expand_up = start_row > 0;
160 let can_expand_down = end_row < snapshot.max_point().row;
161
162 if token_budget == 0 || (!can_expand_up && !can_expand_down) {
163 break;
164 }
165
166 // Expand down first (slight forward bias for edit prediction).
167 if can_expand_down {
168 let next_row = end_row + 1;
169 let line_tokens = line_token_count(snapshot, next_row);
170 if line_tokens <= token_budget {
171 end_row = next_row;
172 token_budget = token_budget.saturating_sub(line_tokens);
173 } else {
174 break;
175 }
176 }
177
178 // Then expand up.
179 if can_expand_up && token_budget > 0 {
180 let next_row = start_row - 1;
181 let line_tokens = line_token_count(snapshot, next_row);
182 if line_tokens <= token_budget {
183 start_row = next_row;
184 token_budget = token_budget.saturating_sub(line_tokens);
185 } else {
186 break;
187 }
188 }
189 }
190
191 (start_row, end_row, token_budget)
192}
193
194/// Expands line-wise with a bias toward one direction.
195/// Returns (start_row, end_row, remaining_tokens).
196fn expand_linewise_biased(
197 snapshot: &BufferSnapshot,
198 mut start_row: u32,
199 mut end_row: u32,
200 mut remaining_tokens: usize,
201 prefer_up: bool,
202) -> (u32, u32, usize) {
203 loop {
204 let can_expand_up = start_row > 0;
205 let can_expand_down = end_row < snapshot.max_point().row;
206
207 if remaining_tokens == 0 || (!can_expand_up && !can_expand_down) {
208 break;
209 }
210
211 let mut expanded = false;
212
213 // Try preferred direction first.
214 if prefer_up {
215 if can_expand_up {
216 let next_row = start_row - 1;
217 let line_tokens = line_token_count(snapshot, next_row);
218 if line_tokens <= remaining_tokens {
219 start_row = next_row;
220 remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
221 expanded = true;
222 }
223 }
224 if can_expand_down && remaining_tokens > 0 {
225 let next_row = end_row + 1;
226 let line_tokens = line_token_count(snapshot, next_row);
227 if line_tokens <= remaining_tokens {
228 end_row = next_row;
229 remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
230 expanded = true;
231 }
232 }
233 } else {
234 if can_expand_down {
235 let next_row = end_row + 1;
236 let line_tokens = line_token_count(snapshot, next_row);
237 if line_tokens <= remaining_tokens {
238 end_row = next_row;
239 remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
240 expanded = true;
241 }
242 }
243 if can_expand_up && remaining_tokens > 0 {
244 let next_row = start_row - 1;
245 let line_tokens = line_token_count(snapshot, next_row);
246 if line_tokens <= remaining_tokens {
247 start_row = next_row;
248 remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
249 expanded = true;
250 }
251 }
252 }
253
254 if !expanded {
255 break;
256 }
257 }
258
259 (start_row, end_row, remaining_tokens)
260}
261
262/// Typical number of string bytes per token for the purposes of limiting model input. This is
263/// intentionally low to err on the side of underestimating limits.
264pub(crate) const BYTES_PER_TOKEN_GUESS: usize = 3;
265
266pub fn guess_token_count(bytes: usize) -> usize {
267 bytes / BYTES_PER_TOKEN_GUESS
268}
269
270fn line_token_count(snapshot: &BufferSnapshot, row: u32) -> usize {
271 guess_token_count(snapshot.line_len(row) as usize).max(1)
272}
273
274/// Estimates token count for rows in range [start_row, end_row).
275fn estimate_tokens_for_rows(snapshot: &BufferSnapshot, start_row: u32, end_row: u32) -> usize {
276 let mut tokens = 0;
277 for row in start_row..end_row {
278 tokens += line_token_count(snapshot, row);
279 }
280 tokens
281}
282
283/// Returns an iterator of (start_row, end_row) for successively larger syntax nodes
284/// containing the given row range. Smallest containing node first.
285fn containing_syntax_boundaries(
286 snapshot: &BufferSnapshot,
287 start_row: u32,
288 end_row: u32,
289) -> impl Iterator<Item = (u32, u32)> {
290 let range = Point::new(start_row, 0)..Point::new(end_row, snapshot.line_len(end_row));
291 let mut current = snapshot.syntax_ancestor(range);
292 let mut last_rows: Option<(u32, u32)> = None;
293
294 std::iter::from_fn(move || {
295 while let Some(node) = current.take() {
296 let node_start_row = node.start_position().row as u32;
297 let node_end_row = node.end_position().row as u32;
298 let rows = (node_start_row, node_end_row);
299
300 current = node.parent();
301
302 // Skip nodes that don't extend beyond our range.
303 if node_start_row >= start_row && node_end_row <= end_row {
304 continue;
305 }
306
307 // Skip if same as last returned (some nodes have same span).
308 if last_rows == Some(rows) {
309 continue;
310 }
311
312 last_rows = Some(rows);
313 return Some(rows);
314 }
315 None
316 })
317}
318
319/// Expands context by first trying to reach syntax boundaries,
320/// then expanding line-wise only if no syntax expansion occurred.
321fn expand_context_syntactically_then_linewise(
322 snapshot: &BufferSnapshot,
323 editable_range: Range<Point>,
324 context_token_limit: usize,
325) -> Range<Point> {
326 let mut start_row = editable_range.start.row;
327 let mut end_row = editable_range.end.row;
328 let mut remaining_tokens = context_token_limit;
329 let mut did_syntax_expand = false;
330
331 // Phase 1: Try to expand to containing syntax boundaries, picking the largest that fits.
332 for (boundary_start, boundary_end) in containing_syntax_boundaries(snapshot, start_row, end_row)
333 {
334 let tokens_for_start = if boundary_start < start_row {
335 estimate_tokens_for_rows(snapshot, boundary_start, start_row)
336 } else {
337 0
338 };
339 let tokens_for_end = if boundary_end > end_row {
340 estimate_tokens_for_rows(snapshot, end_row + 1, boundary_end + 1)
341 } else {
342 0
343 };
344
345 let total_needed = tokens_for_start + tokens_for_end;
346
347 if total_needed <= remaining_tokens {
348 if boundary_start < start_row {
349 start_row = boundary_start;
350 }
351 if boundary_end > end_row {
352 end_row = boundary_end;
353 }
354 remaining_tokens = remaining_tokens.saturating_sub(total_needed);
355 did_syntax_expand = true;
356 } else {
357 break;
358 }
359 }
360
361 // Phase 2: Only expand line-wise if no syntax expansion occurred.
362 if !did_syntax_expand {
363 (start_row, end_row, _) =
364 expand_linewise_biased(snapshot, start_row, end_row, remaining_tokens, true);
365 }
366
367 let start = Point::new(start_row, 0);
368 let end = Point::new(end_row, snapshot.line_len(end_row));
369 start..end
370}
371
372use language::ToOffset as _;
373
374#[cfg(test)]
375mod tests {
376 use super::*;
377 use gpui::{App, AppContext};
378 use indoc::indoc;
379 use language::{Buffer, rust_lang};
380 use util::test::{TextRangeMarker, marked_text_ranges_by};
381
382 struct TestCase {
383 name: &'static str,
384 marked_text: &'static str,
385 editable_token_limit: usize,
386 context_token_limit: usize,
387 }
388
389 #[gpui::test]
390 fn test_editable_and_context_ranges(cx: &mut App) {
391 // Markers:
392 // ˇ = cursor position
393 // « » = expected editable range
394 // [ ] = expected context range
395 let test_cases = vec![
396 TestCase {
397 name: "cursor near end of function - expands to syntax boundaries",
398 marked_text: indoc! {r#"
399 [fn first() {
400 let a = 1;
401 let b = 2;
402 }
403
404 fn foo() {
405 « let x = 1;
406 let y = 2;
407 println!("{}", x + y);ˇ
408 }»]
409 "#},
410 // 18 tokens - expands symmetrically then to syntax boundaries
411 editable_token_limit: 18,
412 context_token_limit: 35,
413 },
414 TestCase {
415 name: "cursor at function start - expands to syntax boundaries",
416 marked_text: indoc! {r#"
417 [fn before() {
418 « let a = 1;
419 }
420
421 fn foo() {ˇ
422 let x = 1;
423 let y = 2;
424 let z = 3;
425 }
426 »
427 fn after() {
428 let b = 2;
429 }]
430 "#},
431 // 25 tokens - expands symmetrically then to syntax boundaries
432 editable_token_limit: 25,
433 context_token_limit: 50,
434 },
435 TestCase {
436 name: "tiny budget - just lines around cursor",
437 marked_text: indoc! {r#"
438 fn outer() {
439 [ let line1 = 1;
440 let line2 = 2;
441 « let line3 = 3;
442 let line4 = 4;ˇ»
443 let line5 = 5;
444 let line6 = 6;]
445 let line7 = 7;
446 }
447 "#},
448 // 12 tokens (~36 bytes) = just the cursor line with tiny budget
449 editable_token_limit: 12,
450 context_token_limit: 24,
451 },
452 TestCase {
453 name: "small function fits entirely",
454 marked_text: indoc! {r#"
455 [«fn foo() {
456 let x = 1;ˇ
457 let y = 2;
458 }»]
459 "#},
460 // Plenty of budget for this small function
461 editable_token_limit: 30,
462 context_token_limit: 60,
463 },
464 TestCase {
465 name: "context extends beyond editable",
466 marked_text: indoc! {r#"
467 [fn first() { let a = 1; }
468 «fn second() { let b = 2; }
469 fn third() { let c = 3; }ˇ
470 fn fourth() { let d = 4; }»
471 fn fifth() { let e = 5; }]
472 "#},
473 // Small editable, larger context
474 editable_token_limit: 25,
475 context_token_limit: 45,
476 },
477 // Tests for syntax-aware editable and context expansion
478 TestCase {
479 name: "cursor in first if-statement - expands to syntax boundaries",
480 marked_text: indoc! {r#"
481 [«fn before() { }
482
483 fn process() {
484 if condition1 {
485 let a = 1;ˇ
486 let b = 2;
487 }
488 if condition2 {»
489 let c = 3;
490 let d = 4;
491 }
492 if condition3 {
493 let e = 5;
494 let f = 6;
495 }
496 }
497
498 fn after() { }]
499 "#},
500 // 35 tokens allows expansion to include function header and first two if blocks
501 editable_token_limit: 35,
502 // 60 tokens allows context to include the whole file
503 context_token_limit: 60,
504 },
505 TestCase {
506 name: "cursor in middle if-statement - expands to syntax boundaries",
507 marked_text: indoc! {r#"
508 [fn before() { }
509
510 fn process() {
511 if condition1 {
512 let a = 1;
513 « let b = 2;
514 }
515 if condition2 {
516 let c = 3;ˇ
517 let d = 4;
518 }
519 if condition3 {
520 let e = 5;»
521 let f = 6;
522 }
523 }
524
525 fn after() { }]
526 "#},
527 // 40 tokens allows expansion to surrounding if blocks
528 editable_token_limit: 40,
529 // 60 tokens allows context to include the whole file
530 context_token_limit: 60,
531 },
532 TestCase {
533 name: "cursor near bottom of long function - editable expands toward syntax, context reaches function",
534 marked_text: indoc! {r#"
535 [fn other() { }
536
537 fn long_function() {
538 let line1 = 1;
539 let line2 = 2;
540 let line3 = 3;
541 let line4 = 4;
542 let line5 = 5;
543 let line6 = 6;
544 « let line7 = 7;
545 let line8 = 8;
546 let line9 = 9;
547 let line10 = 10;ˇ
548 let line11 = 11;
549 }
550
551 fn another() { }»]
552 "#},
553 // 40 tokens for editable - allows several lines plus syntax expansion
554 editable_token_limit: 40,
555 // 55 tokens - enough for function but not whole file
556 context_token_limit: 55,
557 },
558 ];
559
560 for test_case in test_cases {
561 let cursor_marker: TextRangeMarker = 'ˇ'.into();
562 let editable_marker: TextRangeMarker = ('«', '»').into();
563 let context_marker: TextRangeMarker = ('[', ']').into();
564
565 let (text, mut ranges) = marked_text_ranges_by(
566 test_case.marked_text,
567 vec![
568 cursor_marker.clone(),
569 editable_marker.clone(),
570 context_marker.clone(),
571 ],
572 );
573
574 let cursor_ranges = ranges.remove(&cursor_marker).unwrap_or_default();
575 let expected_editable = ranges.remove(&editable_marker).unwrap_or_default();
576 let expected_context = ranges.remove(&context_marker).unwrap_or_default();
577 assert_eq!(expected_editable.len(), 1);
578 assert_eq!(expected_context.len(), 1);
579
580 cx.new(|cx| {
581 let text = text.trim_end_matches('\n');
582 let buffer = Buffer::local(text, cx).with_language(rust_lang(), cx);
583 let snapshot = buffer.snapshot();
584
585 let cursor_offset = cursor_ranges[0].start;
586 let cursor_point = snapshot.offset_to_point(cursor_offset);
587 let expected_editable_start = snapshot.offset_to_point(expected_editable[0].start);
588 let expected_editable_end = snapshot.offset_to_point(expected_editable[0].end);
589 let expected_context_start = snapshot.offset_to_point(expected_context[0].start);
590 let expected_context_end = snapshot.offset_to_point(expected_context[0].end);
591
592 let (actual_editable, actual_context) =
593 editable_and_context_ranges_for_cursor_position(
594 cursor_point,
595 &snapshot,
596 test_case.editable_token_limit,
597 test_case.context_token_limit,
598 );
599
600 let range_text = |start: Point, end: Point| -> String {
601 snapshot.text_for_range(start..end).collect()
602 };
603
604 let editable_match = actual_editable.start == expected_editable_start
605 && actual_editable.end == expected_editable_end;
606 let context_match = actual_context.start == expected_context_start
607 && actual_context.end == expected_context_end;
608
609 if !editable_match || !context_match {
610 println!("\n=== FAILED: {} ===", test_case.name);
611 if !editable_match {
612 println!(
613 "\nExpected editable ({:?}..{:?}):",
614 expected_editable_start, expected_editable_end
615 );
616 println!(
617 "---\n{}---",
618 range_text(expected_editable_start, expected_editable_end)
619 );
620 println!(
621 "\nActual editable ({:?}..{:?}):",
622 actual_editable.start, actual_editable.end
623 );
624 println!(
625 "---\n{}---",
626 range_text(actual_editable.start, actual_editable.end)
627 );
628 }
629 if !context_match {
630 println!(
631 "\nExpected context ({:?}..{:?}):",
632 expected_context_start, expected_context_end
633 );
634 println!(
635 "---\n{}---",
636 range_text(expected_context_start, expected_context_end)
637 );
638 println!(
639 "\nActual context ({:?}..{:?}):",
640 actual_context.start, actual_context.end
641 );
642 println!(
643 "---\n{}---",
644 range_text(actual_context.start, actual_context.end)
645 );
646 }
647 panic!("Test '{}' failed - see output above", test_case.name);
648 }
649
650 buffer
651 });
652 }
653 }
654}