1use cloud_llm_client::predict_edits_v3::Line;
2use language::{BufferSnapshot, LanguageId, Point, ToOffset as _, ToPoint as _};
3use std::ops::Range;
4use tree_sitter::{Node, TreeCursor};
5use util::RangeExt;
6
7// TODO:
8//
9// - Test parent signatures
10//
11// - Decide whether to count signatures against the excerpt size. Could instead defer this to prompt
12// planning.
13//
14// - Still return an excerpt even if the line around the cursor doesn't fit (e.g. for a markdown
15// paragraph).
16//
17// - Truncation of long lines.
18//
19// - Filter outer syntax layers that don't support edit prediction.
20
21#[derive(Debug, Clone, PartialEq)]
22pub struct EditPredictionExcerptOptions {
23 /// Limit for the number of bytes in the window around the cursor.
24 pub max_bytes: usize,
25 /// Minimum number of bytes in the window around the cursor. When syntax tree selection results
26 /// in an excerpt smaller than this, it will fall back on line-based selection.
27 pub min_bytes: usize,
28 /// Target ratio of bytes before the cursor divided by total bytes in the window.
29 pub target_before_cursor_over_total_bytes: f32,
30}
31
32#[derive(Debug, Clone)]
33pub struct EditPredictionExcerpt {
34 pub range: Range<usize>,
35 pub line_range: Range<Line>,
36 pub size: usize,
37}
38
39#[derive(Debug, Clone)]
40pub struct EditPredictionExcerptText {
41 pub body: String,
42 pub language_id: Option<LanguageId>,
43}
44
45impl EditPredictionExcerpt {
46 pub fn text(&self, buffer: &BufferSnapshot) -> EditPredictionExcerptText {
47 let body = buffer
48 .text_for_range(self.range.clone())
49 .collect::<String>();
50 let language_id = buffer.language().map(|l| l.id());
51 EditPredictionExcerptText { body, language_id }
52 }
53
54 /// Selects an excerpt around a buffer position, attempting to choose logical boundaries based
55 /// on TreeSitter structure and approximately targeting a goal ratio of bytesbefore vs after the
56 /// cursor.
57 ///
58 /// When `index` is provided, the excerpt will include the signatures of parent outline items.
59 ///
60 /// First tries to use AST node boundaries to select the excerpt, and falls back on line-based
61 /// expansion.
62 ///
63 /// Returns `None` if the line around the cursor doesn't fit.
64 pub fn select_from_buffer(
65 query_point: Point,
66 buffer: &BufferSnapshot,
67 options: &EditPredictionExcerptOptions,
68 ) -> Option<Self> {
69 if buffer.len() <= options.max_bytes {
70 log::debug!(
71 "using entire file for excerpt since source length ({}) <= window max bytes ({})",
72 buffer.len(),
73 options.max_bytes
74 );
75 let offset_range = 0..buffer.len();
76 let line_range = Line(0)..Line(buffer.max_point().row);
77 return Some(EditPredictionExcerpt::new(offset_range, line_range));
78 }
79
80 let query_offset = query_point.to_offset(buffer);
81 let query_line_range = query_point.row..query_point.row + 1;
82 let query_range = Point::new(query_line_range.start, 0).to_offset(buffer)
83 ..Point::new(query_line_range.end, 0).to_offset(buffer);
84 if query_range.len() >= options.max_bytes {
85 return None;
86 }
87
88 let excerpt_selector = ExcerptSelector {
89 query_offset,
90 query_range,
91 query_line_range: Line(query_line_range.start)..Line(query_line_range.end),
92 buffer,
93 options,
94 };
95
96 if let Some(excerpt) = excerpt_selector.select_tree_sitter_nodes() {
97 if excerpt.size >= options.min_bytes {
98 return Some(excerpt);
99 }
100 log::debug!(
101 "tree-sitter excerpt was {} bytes, smaller than min of {}, falling back on line-based selection",
102 excerpt.size,
103 options.min_bytes
104 );
105 } else {
106 log::debug!(
107 "couldn't find excerpt via tree-sitter, falling back on line-based selection"
108 );
109 }
110
111 excerpt_selector.select_lines()
112 }
113
114 fn new(range: Range<usize>, line_range: Range<Line>) -> Self {
115 Self {
116 size: range.len(),
117 range,
118 line_range,
119 }
120 }
121
122 fn with_expanded_range(&self, new_range: Range<usize>, new_line_range: Range<Line>) -> Self {
123 if !new_range.contains_inclusive(&self.range) {
124 // this is an issue because parent_signature_ranges may be incorrect
125 log::error!("bug: with_expanded_range called with disjoint range");
126 }
127 Self::new(new_range, new_line_range)
128 }
129
130 fn parent_signatures_size(&self) -> usize {
131 self.size - self.range.len()
132 }
133}
134
135struct ExcerptSelector<'a> {
136 query_offset: usize,
137 query_range: Range<usize>,
138 query_line_range: Range<Line>,
139 buffer: &'a BufferSnapshot,
140 options: &'a EditPredictionExcerptOptions,
141}
142
143impl<'a> ExcerptSelector<'a> {
144 /// Finds the largest node that is smaller than the window size and contains `query_range`.
145 fn select_tree_sitter_nodes(&self) -> Option<EditPredictionExcerpt> {
146 let selected_layer_root = self.select_syntax_layer()?;
147 let mut cursor = selected_layer_root.walk();
148
149 loop {
150 let line_start = node_line_start(cursor.node());
151 let line_end = node_line_end(cursor.node());
152 let line_range = Line(line_start.row)..Line(line_end.row);
153 let excerpt_range =
154 line_start.to_offset(&self.buffer)..line_end.to_offset(&self.buffer);
155 if excerpt_range.contains_inclusive(&self.query_range) {
156 let excerpt = self.make_excerpt(excerpt_range, line_range);
157 if excerpt.size <= self.options.max_bytes {
158 return Some(self.expand_to_siblings(&mut cursor, excerpt));
159 }
160 } else {
161 // TODO: Should still be able to handle this case via AST nodes. For example, this
162 // can happen if the cursor is between two methods in a large class file.
163 return None;
164 }
165
166 if cursor
167 .goto_first_child_for_byte(self.query_range.start)
168 .is_none()
169 {
170 return None;
171 }
172 }
173 }
174
175 /// Select the smallest syntax layer that exceeds max_len, or the largest if none exceed max_len.
176 fn select_syntax_layer(&self) -> Option<Node<'_>> {
177 let mut smallest_exceeding_max_len: Option<Node<'_>> = None;
178 let mut largest: Option<Node<'_>> = None;
179 for layer in self
180 .buffer
181 .syntax_layers_for_range(self.query_range.start..self.query_range.start, true)
182 {
183 let layer_range = layer.node().byte_range();
184 if !layer_range.contains_inclusive(&self.query_range) {
185 continue;
186 }
187
188 if layer_range.len() > self.options.max_bytes {
189 match &smallest_exceeding_max_len {
190 None => smallest_exceeding_max_len = Some(layer.node()),
191 Some(existing) => {
192 if layer_range.len() < existing.byte_range().len() {
193 smallest_exceeding_max_len = Some(layer.node());
194 }
195 }
196 }
197 } else {
198 match &largest {
199 None => largest = Some(layer.node()),
200 Some(existing) if layer_range.len() > existing.byte_range().len() => {
201 largest = Some(layer.node())
202 }
203 _ => {}
204 }
205 }
206 }
207
208 smallest_exceeding_max_len.or(largest)
209 }
210
211 // motivation for this and `goto_previous_named_sibling` is to avoid including things like
212 // trailing unnamed "}" in body nodes
213 fn goto_next_named_sibling(cursor: &mut TreeCursor) -> bool {
214 while cursor.goto_next_sibling() {
215 if cursor.node().is_named() {
216 return true;
217 }
218 }
219 false
220 }
221
222 fn goto_previous_named_sibling(cursor: &mut TreeCursor) -> bool {
223 while cursor.goto_previous_sibling() {
224 if cursor.node().is_named() {
225 return true;
226 }
227 }
228 false
229 }
230
231 fn expand_to_siblings(
232 &self,
233 cursor: &mut TreeCursor,
234 mut excerpt: EditPredictionExcerpt,
235 ) -> EditPredictionExcerpt {
236 let mut forward_cursor = cursor.clone();
237 let backward_cursor = cursor;
238 let mut forward_done = !Self::goto_next_named_sibling(&mut forward_cursor);
239 let mut backward_done = !Self::goto_previous_named_sibling(backward_cursor);
240 loop {
241 if backward_done && forward_done {
242 break;
243 }
244
245 let mut forward = None;
246 while !forward_done {
247 let new_end_point = node_line_end(forward_cursor.node());
248 let new_end = new_end_point.to_offset(&self.buffer);
249 if new_end > excerpt.range.end {
250 let new_excerpt = excerpt.with_expanded_range(
251 excerpt.range.start..new_end,
252 excerpt.line_range.start..Line(new_end_point.row),
253 );
254 if new_excerpt.size <= self.options.max_bytes {
255 forward = Some(new_excerpt);
256 break;
257 } else {
258 log::debug!("halting forward expansion, as it doesn't fit");
259 forward_done = true;
260 break;
261 }
262 }
263 forward_done = !Self::goto_next_named_sibling(&mut forward_cursor);
264 }
265
266 let mut backward = None;
267 while !backward_done {
268 let new_start_point = node_line_start(backward_cursor.node());
269 let new_start = new_start_point.to_offset(&self.buffer);
270 if new_start < excerpt.range.start {
271 let new_excerpt = excerpt.with_expanded_range(
272 new_start..excerpt.range.end,
273 Line(new_start_point.row)..excerpt.line_range.end,
274 );
275 if new_excerpt.size <= self.options.max_bytes {
276 backward = Some(new_excerpt);
277 break;
278 } else {
279 log::debug!("halting backward expansion, as it doesn't fit");
280 backward_done = true;
281 break;
282 }
283 }
284 backward_done = !Self::goto_previous_named_sibling(backward_cursor);
285 }
286
287 let go_forward = match (forward, backward) {
288 (Some(forward), Some(backward)) => {
289 let go_forward = self.is_better_excerpt(&forward, &backward);
290 if go_forward {
291 excerpt = forward;
292 } else {
293 excerpt = backward;
294 }
295 go_forward
296 }
297 (Some(forward), None) => {
298 log::debug!("expanding forward, since backward expansion has halted");
299 excerpt = forward;
300 true
301 }
302 (None, Some(backward)) => {
303 log::debug!("expanding backward, since forward expansion has halted");
304 excerpt = backward;
305 false
306 }
307 (None, None) => break,
308 };
309
310 if go_forward {
311 forward_done = !Self::goto_next_named_sibling(&mut forward_cursor);
312 } else {
313 backward_done = !Self::goto_previous_named_sibling(backward_cursor);
314 }
315 }
316
317 excerpt
318 }
319
320 fn select_lines(&self) -> Option<EditPredictionExcerpt> {
321 // early return if line containing query_offset is already too large
322 let excerpt = self.make_excerpt(self.query_range.clone(), self.query_line_range.clone());
323 if excerpt.size > self.options.max_bytes {
324 log::debug!(
325 "excerpt for cursor line is {} bytes, which exceeds the window",
326 excerpt.size
327 );
328 return None;
329 }
330 let signatures_size = excerpt.parent_signatures_size();
331 let bytes_remaining = self.options.max_bytes.saturating_sub(signatures_size);
332
333 let before_bytes =
334 (self.options.target_before_cursor_over_total_bytes * bytes_remaining as f32) as usize;
335
336 let start_line = {
337 let offset = self.query_offset.saturating_sub(before_bytes);
338 let point = offset.to_point(self.buffer);
339 Line(point.row + 1)
340 };
341 let start_offset = Point::new(start_line.0, 0).to_offset(&self.buffer);
342 let end_line = {
343 let offset = start_offset + bytes_remaining;
344 let point = offset.to_point(self.buffer);
345 Line(point.row)
346 };
347 let end_offset = Point::new(end_line.0, 0).to_offset(&self.buffer);
348
349 // this could be expanded further since recalculated `signature_size` may be smaller, but
350 // skipping that for now for simplicity
351 //
352 // TODO: could also consider checking if lines immediately before / after fit.
353 let excerpt = self.make_excerpt(start_offset..end_offset, start_line..end_line);
354 if excerpt.size > self.options.max_bytes {
355 log::error!(
356 "bug: line-based excerpt selection has size {}, \
357 which is {} bytes larger than the max size",
358 excerpt.size,
359 excerpt.size - self.options.max_bytes
360 );
361 }
362 return Some(excerpt);
363 }
364
365 fn make_excerpt(&self, range: Range<usize>, line_range: Range<Line>) -> EditPredictionExcerpt {
366 EditPredictionExcerpt::new(range, line_range)
367 }
368
369 /// Returns `true` if the `forward` excerpt is a better choice than the `backward` excerpt.
370 fn is_better_excerpt(
371 &self,
372 forward: &EditPredictionExcerpt,
373 backward: &EditPredictionExcerpt,
374 ) -> bool {
375 let forward_ratio = self.excerpt_range_ratio(forward);
376 let backward_ratio = self.excerpt_range_ratio(backward);
377 let forward_delta =
378 (forward_ratio - self.options.target_before_cursor_over_total_bytes).abs();
379 let backward_delta =
380 (backward_ratio - self.options.target_before_cursor_over_total_bytes).abs();
381 let forward_is_better = forward_delta <= backward_delta;
382 if forward_is_better {
383 log::debug!(
384 "expanding forward since {} is closer than {} to {}",
385 forward_ratio,
386 backward_ratio,
387 self.options.target_before_cursor_over_total_bytes
388 );
389 } else {
390 log::debug!(
391 "expanding backward since {} is closer than {} to {}",
392 backward_ratio,
393 forward_ratio,
394 self.options.target_before_cursor_over_total_bytes
395 );
396 }
397 forward_is_better
398 }
399
400 /// Returns the ratio of bytes before the cursor over bytes within the range.
401 fn excerpt_range_ratio(&self, excerpt: &EditPredictionExcerpt) -> f32 {
402 let Some(bytes_before_cursor) = self.query_offset.checked_sub(excerpt.range.start) else {
403 log::error!("bug: edit prediction cursor offset is not outside the excerpt");
404 return 0.0;
405 };
406 bytes_before_cursor as f32 / excerpt.range.len() as f32
407 }
408}
409
410fn node_line_start(node: Node) -> Point {
411 Point::new(node.start_position().row as u32, 0)
412}
413
414fn node_line_end(node: Node) -> Point {
415 Point::new(node.end_position().row as u32 + 1, 0)
416}
417
418#[cfg(test)]
419mod tests {
420 use super::*;
421 use gpui::{AppContext, TestAppContext};
422 use language::Buffer;
423 use util::test::{generate_marked_text, marked_text_offsets_by};
424
425 fn create_buffer(text: &str, cx: &mut TestAppContext) -> BufferSnapshot {
426 let buffer = cx.new(|cx| Buffer::local(text, cx).with_language(language::rust_lang(), cx));
427 buffer.read_with(cx, |buffer, _| buffer.snapshot())
428 }
429
430 fn cursor_and_excerpt_range(text: &str) -> (String, usize, Range<usize>) {
431 let (text, offsets) = marked_text_offsets_by(text, vec!['ˇ', '«', '»']);
432 (text, offsets[&'ˇ'][0], offsets[&'«'][0]..offsets[&'»'][0])
433 }
434
435 fn check_example(options: EditPredictionExcerptOptions, text: &str, cx: &mut TestAppContext) {
436 let (text, cursor, expected_excerpt) = cursor_and_excerpt_range(text);
437
438 let buffer = create_buffer(&text, cx);
439 let cursor_point = cursor.to_point(&buffer);
440
441 let excerpt = EditPredictionExcerpt::select_from_buffer(cursor_point, &buffer, &options)
442 .expect("Should select an excerpt");
443 pretty_assertions::assert_eq!(
444 generate_marked_text(&text, std::slice::from_ref(&excerpt.range), false),
445 generate_marked_text(&text, &[expected_excerpt], false)
446 );
447 assert!(excerpt.size <= options.max_bytes);
448 assert!(excerpt.range.contains(&cursor));
449 }
450
451 #[gpui::test]
452 fn test_ast_based_selection_current_node(cx: &mut TestAppContext) {
453 zlog::init_test();
454 let text = r#"
455fn main() {
456 let x = 1;
457« let ˇy = 2;
458» let z = 3;
459}"#;
460
461 let options = EditPredictionExcerptOptions {
462 max_bytes: 20,
463 min_bytes: 10,
464 target_before_cursor_over_total_bytes: 0.5,
465 };
466
467 check_example(options, text, cx);
468 }
469
470 #[gpui::test]
471 fn test_ast_based_selection_parent_node(cx: &mut TestAppContext) {
472 zlog::init_test();
473 let text = r#"
474fn foo() {}
475
476«fn main() {
477 let x = 1;
478 let ˇy = 2;
479 let z = 3;
480}
481»
482fn bar() {}"#;
483
484 let options = EditPredictionExcerptOptions {
485 max_bytes: 65,
486 min_bytes: 10,
487 target_before_cursor_over_total_bytes: 0.5,
488 };
489
490 check_example(options, text, cx);
491 }
492
493 #[gpui::test]
494 fn test_ast_based_selection_expands_to_siblings(cx: &mut TestAppContext) {
495 zlog::init_test();
496 let text = r#"
497fn main() {
498« let x = 1;
499 let ˇy = 2;
500 let z = 3;
501»}"#;
502
503 let options = EditPredictionExcerptOptions {
504 max_bytes: 50,
505 min_bytes: 10,
506 target_before_cursor_over_total_bytes: 0.5,
507 };
508
509 check_example(options, text, cx);
510 }
511
512 #[gpui::test]
513 fn test_line_based_selection(cx: &mut TestAppContext) {
514 zlog::init_test();
515 let text = r#"
516fn main() {
517 let x = 1;
518« if true {
519 let ˇy = 2;
520 }
521 let z = 3;
522»}"#;
523
524 let options = EditPredictionExcerptOptions {
525 max_bytes: 60,
526 min_bytes: 45,
527 target_before_cursor_over_total_bytes: 0.5,
528 };
529
530 check_example(options, text, cx);
531 }
532
533 #[gpui::test]
534 fn test_line_based_selection_with_before_cursor_ratio(cx: &mut TestAppContext) {
535 zlog::init_test();
536 let text = r#"
537 fn main() {
538« let a = 1;
539 let b = 2;
540 let c = 3;
541 let ˇd = 4;
542 let e = 5;
543 let f = 6;
544»
545 let g = 7;
546 }"#;
547
548 let options = EditPredictionExcerptOptions {
549 max_bytes: 120,
550 min_bytes: 10,
551 target_before_cursor_over_total_bytes: 0.6,
552 };
553
554 check_example(options, text, cx);
555 }
556}