1use language::BufferSnapshot;
2use std::ops::Range;
3use text::{OffsetRangeExt as _, Point, ToOffset as _, ToPoint as _};
4use tree_sitter::{Node, TreeCursor};
5use util::RangeExt;
6
7// TODO:
8//
9// - Test parent signatures
10//
11// - Decide whether to count signatures against the excerpt size. Could instead defer this to prompt
12// planning.
13//
14// - Still return an excerpt even if the line around the cursor doesn't fit (e.g. for a markdown
15// paragraph).
16//
17// - Truncation of long lines.
18//
19// - Filter outer syntax layers that don't support edit prediction.
20
21#[derive(Debug, Clone)]
22pub struct EditPredictionExcerptOptions {
23 /// Limit for the number of bytes in the window around the cursor.
24 pub max_bytes: usize,
25 /// Minimum number of bytes in the window around the cursor. When syntax tree selection results
26 /// in an excerpt smaller than this, it will fall back on line-based selection.
27 pub min_bytes: usize,
28 /// Target ratio of bytes before the cursor divided by total bytes in the window.
29 pub target_before_cursor_over_total_bytes: f32,
30 /// Whether to include parent signatures
31 pub include_parent_signatures: bool,
32}
33
34#[derive(Clone)]
35pub struct EditPredictionExcerpt {
36 pub range: Range<usize>,
37 pub parent_signature_ranges: Vec<Range<usize>>,
38 pub size: usize,
39}
40
41#[derive(Clone)]
42pub struct EditPredictionExcerptText {
43 pub body: String,
44 pub parent_signatures: Vec<String>,
45}
46
47impl EditPredictionExcerpt {
48 pub fn text(&self, buffer: &BufferSnapshot) -> EditPredictionExcerptText {
49 let body = buffer
50 .text_for_range(self.range.clone())
51 .collect::<String>();
52 let parent_signatures = self
53 .parent_signature_ranges
54 .iter()
55 .map(|range| buffer.text_for_range(range.clone()).collect::<String>())
56 .collect();
57 EditPredictionExcerptText {
58 body,
59 parent_signatures,
60 }
61 }
62
63 /// Selects an excerpt around a buffer position, attempting to choose logical boundaries based
64 /// on TreeSitter structure and approximately targeting a goal ratio of bytesbefore vs after the
65 /// cursor. When `include_parent_signatures` is true, the excerpt also includes the signatures
66 /// of parent outline items.
67 ///
68 /// First tries to use AST node boundaries to select the excerpt, and falls back on line-based
69 /// expansion.
70 ///
71 /// Returns `None` if the line around the cursor doesn't fit.
72 pub fn select_from_buffer(
73 query_point: Point,
74 buffer: &BufferSnapshot,
75 options: &EditPredictionExcerptOptions,
76 ) -> Option<Self> {
77 if buffer.len() <= options.max_bytes {
78 log::debug!(
79 "using entire file for excerpt since source length ({}) <= window max bytes ({})",
80 buffer.len(),
81 options.max_bytes
82 );
83 return Some(EditPredictionExcerpt::new(0..buffer.len(), Vec::new()));
84 }
85
86 let query_offset = query_point.to_offset(buffer);
87 let query_range = Point::new(query_point.row, 0).to_offset(buffer)
88 ..Point::new(query_point.row + 1, 0).to_offset(buffer);
89 if query_range.len() >= options.max_bytes {
90 return None;
91 }
92
93 // TODO: Don't compute text / annotation_range / skip converting to and from anchors.
94 let outline_items = if options.include_parent_signatures {
95 buffer
96 .outline_items_containing(query_range.clone(), false, None)
97 .into_iter()
98 .flat_map(|item| {
99 Some(ExcerptOutlineItem {
100 item_range: item.range.to_offset(&buffer),
101 signature_range: item.signature_range?.to_offset(&buffer),
102 })
103 })
104 .collect()
105 } else {
106 Vec::new()
107 };
108
109 let excerpt_selector = ExcerptSelector {
110 query_offset,
111 query_range,
112 outline_items: &outline_items,
113 buffer,
114 options,
115 };
116
117 if let Some(excerpt_ranges) = excerpt_selector.select_tree_sitter_nodes() {
118 if excerpt_ranges.size >= options.min_bytes {
119 return Some(excerpt_ranges);
120 }
121 log::debug!(
122 "tree-sitter excerpt was {} bytes, smaller than min of {}, falling back on line-based selection",
123 excerpt_ranges.size,
124 options.min_bytes
125 );
126 } else {
127 log::debug!(
128 "couldn't find excerpt via tree-sitter, falling back on line-based selection"
129 );
130 }
131
132 excerpt_selector.select_lines()
133 }
134
135 fn new(range: Range<usize>, parent_signature_ranges: Vec<Range<usize>>) -> Self {
136 let size = range.len()
137 + parent_signature_ranges
138 .iter()
139 .map(|r| r.len())
140 .sum::<usize>();
141 Self {
142 range,
143 parent_signature_ranges,
144 size,
145 }
146 }
147
148 fn with_expanded_range(&self, new_range: Range<usize>) -> Self {
149 if !new_range.contains_inclusive(&self.range) {
150 // this is an issue because parent_signature_ranges may be incorrect
151 log::error!("bug: with_expanded_range called with disjoint range");
152 }
153 let mut parent_signature_ranges = Vec::with_capacity(self.parent_signature_ranges.len());
154 let mut size = new_range.len();
155 for range in &self.parent_signature_ranges {
156 if range.contains_inclusive(&new_range) {
157 break;
158 }
159 parent_signature_ranges.push(range.clone());
160 size += range.len();
161 }
162 Self {
163 range: new_range,
164 parent_signature_ranges,
165 size,
166 }
167 }
168
169 fn parent_signatures_size(&self) -> usize {
170 self.size - self.range.len()
171 }
172}
173
174struct ExcerptSelector<'a> {
175 query_offset: usize,
176 query_range: Range<usize>,
177 outline_items: &'a [ExcerptOutlineItem],
178 buffer: &'a BufferSnapshot,
179 options: &'a EditPredictionExcerptOptions,
180}
181
182struct ExcerptOutlineItem {
183 item_range: Range<usize>,
184 signature_range: Range<usize>,
185}
186
187impl<'a> ExcerptSelector<'a> {
188 /// Finds the largest node that is smaller than the window size and contains `query_range`.
189 fn select_tree_sitter_nodes(&self) -> Option<EditPredictionExcerpt> {
190 let selected_layer_root = self.select_syntax_layer()?;
191 let mut cursor = selected_layer_root.walk();
192
193 loop {
194 let excerpt_range = node_line_start(cursor.node()).to_offset(&self.buffer)
195 ..node_line_end(cursor.node()).to_offset(&self.buffer);
196 if excerpt_range.contains_inclusive(&self.query_range) {
197 let excerpt = self.make_excerpt(excerpt_range);
198 if excerpt.size <= self.options.max_bytes {
199 return Some(self.expand_to_siblings(&mut cursor, excerpt));
200 }
201 } else {
202 // TODO: Should still be able to handle this case via AST nodes. For example, this
203 // can happen if the cursor is between two methods in a large class file.
204 return None;
205 }
206
207 if cursor
208 .goto_first_child_for_byte(self.query_range.start)
209 .is_none()
210 {
211 return None;
212 }
213 }
214 }
215
216 /// Select the smallest syntax layer that exceeds max_len, or the largest if none exceed max_len.
217 fn select_syntax_layer(&self) -> Option<Node<'_>> {
218 let mut smallest_exceeding_max_len: Option<Node<'_>> = None;
219 let mut largest: Option<Node<'_>> = None;
220 for layer in self
221 .buffer
222 .syntax_layers_for_range(self.query_range.start..self.query_range.start, true)
223 {
224 let layer_range = layer.node().byte_range();
225 if !layer_range.contains_inclusive(&self.query_range) {
226 continue;
227 }
228
229 if layer_range.len() > self.options.max_bytes {
230 match &smallest_exceeding_max_len {
231 None => smallest_exceeding_max_len = Some(layer.node()),
232 Some(existing) => {
233 if layer_range.len() < existing.byte_range().len() {
234 smallest_exceeding_max_len = Some(layer.node());
235 }
236 }
237 }
238 } else {
239 match &largest {
240 None => largest = Some(layer.node()),
241 Some(existing) if layer_range.len() > existing.byte_range().len() => {
242 largest = Some(layer.node())
243 }
244 _ => {}
245 }
246 }
247 }
248
249 smallest_exceeding_max_len.or(largest)
250 }
251
252 // motivation for this and `goto_previous_named_sibling` is to avoid including things like
253 // trailing unnamed "}" in body nodes
254 fn goto_next_named_sibling(cursor: &mut TreeCursor) -> bool {
255 while cursor.goto_next_sibling() {
256 if cursor.node().is_named() {
257 return true;
258 }
259 }
260 false
261 }
262
263 fn goto_previous_named_sibling(cursor: &mut TreeCursor) -> bool {
264 while cursor.goto_previous_sibling() {
265 if cursor.node().is_named() {
266 return true;
267 }
268 }
269 false
270 }
271
272 fn expand_to_siblings(
273 &self,
274 cursor: &mut TreeCursor,
275 mut excerpt: EditPredictionExcerpt,
276 ) -> EditPredictionExcerpt {
277 let mut forward_cursor = cursor.clone();
278 let backward_cursor = cursor;
279 let mut forward_done = !Self::goto_next_named_sibling(&mut forward_cursor);
280 let mut backward_done = !Self::goto_previous_named_sibling(backward_cursor);
281 loop {
282 if backward_done && forward_done {
283 break;
284 }
285
286 let mut forward = None;
287 while !forward_done {
288 let new_end = node_line_end(forward_cursor.node()).to_offset(&self.buffer);
289 if new_end > excerpt.range.end {
290 let new_excerpt = excerpt.with_expanded_range(excerpt.range.start..new_end);
291 if new_excerpt.size <= self.options.max_bytes {
292 forward = Some(new_excerpt);
293 break;
294 } else {
295 log::debug!("halting forward expansion, as it doesn't fit");
296 forward_done = true;
297 break;
298 }
299 }
300 forward_done = !Self::goto_next_named_sibling(&mut forward_cursor);
301 }
302
303 let mut backward = None;
304 while !backward_done {
305 let new_start = node_line_start(backward_cursor.node()).to_offset(&self.buffer);
306 if new_start < excerpt.range.start {
307 let new_excerpt = excerpt.with_expanded_range(new_start..excerpt.range.end);
308 if new_excerpt.size <= self.options.max_bytes {
309 backward = Some(new_excerpt);
310 break;
311 } else {
312 log::debug!("halting backward expansion, as it doesn't fit");
313 backward_done = true;
314 break;
315 }
316 }
317 backward_done = !Self::goto_previous_named_sibling(backward_cursor);
318 }
319
320 let go_forward = match (forward, backward) {
321 (Some(forward), Some(backward)) => {
322 let go_forward = self.is_better_excerpt(&forward, &backward);
323 if go_forward {
324 excerpt = forward;
325 } else {
326 excerpt = backward;
327 }
328 go_forward
329 }
330 (Some(forward), None) => {
331 log::debug!("expanding forward, since backward expansion has halted");
332 excerpt = forward;
333 true
334 }
335 (None, Some(backward)) => {
336 log::debug!("expanding backward, since forward expansion has halted");
337 excerpt = backward;
338 false
339 }
340 (None, None) => break,
341 };
342
343 if go_forward {
344 forward_done = !Self::goto_next_named_sibling(&mut forward_cursor);
345 } else {
346 backward_done = !Self::goto_previous_named_sibling(backward_cursor);
347 }
348 }
349
350 excerpt
351 }
352
353 fn select_lines(&self) -> Option<EditPredictionExcerpt> {
354 // early return if line containing query_offset is already too large
355 let excerpt = self.make_excerpt(self.query_range.clone());
356 if excerpt.size > self.options.max_bytes {
357 log::debug!(
358 "excerpt for cursor line is {} bytes, which exceeds the window",
359 excerpt.size
360 );
361 return None;
362 }
363 let signatures_size = excerpt.parent_signatures_size();
364 let bytes_remaining = self.options.max_bytes.saturating_sub(signatures_size);
365
366 let before_bytes =
367 (self.options.target_before_cursor_over_total_bytes * bytes_remaining as f32) as usize;
368
369 let start_point = {
370 let offset = self.query_offset.saturating_sub(before_bytes);
371 let point = offset.to_point(self.buffer);
372 Point::new(point.row + 1, 0)
373 };
374 let start_offset = start_point.to_offset(&self.buffer);
375 let end_point = {
376 let offset = start_offset + bytes_remaining;
377 let point = offset.to_point(self.buffer);
378 Point::new(point.row, 0)
379 };
380 let end_offset = end_point.to_offset(&self.buffer);
381
382 // this could be expanded further since recalculated `signature_size` may be smaller, but
383 // skipping that for now for simplicity
384 //
385 // TODO: could also consider checking if lines immediately before / after fit.
386 let excerpt = self.make_excerpt(start_offset..end_offset);
387 if excerpt.size > self.options.max_bytes {
388 log::error!(
389 "bug: line-based excerpt selection has size {}, \
390 which is {} bytes larger than the max size",
391 excerpt.size,
392 excerpt.size - self.options.max_bytes
393 );
394 }
395 return Some(excerpt);
396 }
397
398 fn make_excerpt(&self, range: Range<usize>) -> EditPredictionExcerpt {
399 let parent_signature_ranges = self
400 .outline_items
401 .iter()
402 .filter(|item| item.item_range.contains_inclusive(&range))
403 .map(|item| item.signature_range.clone())
404 .collect();
405 EditPredictionExcerpt::new(range, parent_signature_ranges)
406 }
407
408 /// Returns `true` if the `forward` excerpt is a better choice than the `backward` excerpt.
409 fn is_better_excerpt(
410 &self,
411 forward: &EditPredictionExcerpt,
412 backward: &EditPredictionExcerpt,
413 ) -> bool {
414 let forward_ratio = self.excerpt_range_ratio(forward);
415 let backward_ratio = self.excerpt_range_ratio(backward);
416 let forward_delta =
417 (forward_ratio - self.options.target_before_cursor_over_total_bytes).abs();
418 let backward_delta =
419 (backward_ratio - self.options.target_before_cursor_over_total_bytes).abs();
420 let forward_is_better = forward_delta <= backward_delta;
421 if forward_is_better {
422 log::debug!(
423 "expanding forward since {} is closer than {} to {}",
424 forward_ratio,
425 backward_ratio,
426 self.options.target_before_cursor_over_total_bytes
427 );
428 } else {
429 log::debug!(
430 "expanding backward since {} is closer than {} to {}",
431 backward_ratio,
432 forward_ratio,
433 self.options.target_before_cursor_over_total_bytes
434 );
435 }
436 forward_is_better
437 }
438
439 /// Returns the ratio of bytes before the cursor over bytes within the range.
440 fn excerpt_range_ratio(&self, excerpt: &EditPredictionExcerpt) -> f32 {
441 let Some(bytes_before_cursor) = self.query_offset.checked_sub(excerpt.range.start) else {
442 log::error!("bug: edit prediction cursor offset is not outside the excerpt");
443 return 0.0;
444 };
445 bytes_before_cursor as f32 / excerpt.range.len() as f32
446 }
447}
448
449fn node_line_start(node: Node) -> Point {
450 Point::new(node.start_position().row as u32, 0)
451}
452
453fn node_line_end(node: Node) -> Point {
454 Point::new(node.end_position().row as u32 + 1, 0)
455}
456
457#[cfg(test)]
458mod tests {
459 use super::*;
460 use gpui::{AppContext, TestAppContext};
461 use language::{Buffer, Language, LanguageConfig, LanguageMatcher, tree_sitter_rust};
462 use util::test::{generate_marked_text, marked_text_offsets_by};
463
464 fn create_buffer(text: &str, cx: &mut TestAppContext) -> BufferSnapshot {
465 let buffer = cx.new(|cx| Buffer::local(text, cx).with_language(rust_lang().into(), cx));
466 buffer.read_with(cx, |buffer, _| buffer.snapshot())
467 }
468
469 fn rust_lang() -> Language {
470 Language::new(
471 LanguageConfig {
472 name: "Rust".into(),
473 matcher: LanguageMatcher {
474 path_suffixes: vec!["rs".to_string()],
475 ..Default::default()
476 },
477 ..Default::default()
478 },
479 Some(tree_sitter_rust::LANGUAGE.into()),
480 )
481 .with_outline_query(include_str!("../../languages/src/rust/outline.scm"))
482 .unwrap()
483 }
484
485 fn cursor_and_excerpt_range(text: &str) -> (String, usize, Range<usize>) {
486 let (text, offsets) = marked_text_offsets_by(text, vec!['ˇ', '«', '»']);
487 (text, offsets[&'ˇ'][0], offsets[&'«'][0]..offsets[&'»'][0])
488 }
489
490 fn check_example(options: EditPredictionExcerptOptions, text: &str, cx: &mut TestAppContext) {
491 let (text, cursor, expected_excerpt) = cursor_and_excerpt_range(text);
492
493 let buffer = create_buffer(&text, cx);
494 let cursor_point = cursor.to_point(&buffer);
495
496 let excerpt = EditPredictionExcerpt::select_from_buffer(cursor_point, &buffer, &options)
497 .expect("Should select an excerpt");
498 pretty_assertions::assert_eq!(
499 generate_marked_text(&text, std::slice::from_ref(&excerpt.range), false),
500 generate_marked_text(&text, &[expected_excerpt], false)
501 );
502 assert!(excerpt.size <= options.max_bytes);
503 assert!(excerpt.range.contains(&cursor));
504 }
505
506 #[gpui::test]
507 fn test_ast_based_selection_current_node(cx: &mut TestAppContext) {
508 zlog::init_test();
509 let text = r#"
510fn main() {
511 let x = 1;
512« let ˇy = 2;
513» let z = 3;
514}"#;
515
516 let options = EditPredictionExcerptOptions {
517 max_bytes: 20,
518 min_bytes: 10,
519 target_before_cursor_over_total_bytes: 0.5,
520 include_parent_signatures: false,
521 };
522
523 check_example(options, text, cx);
524 }
525
526 #[gpui::test]
527 fn test_ast_based_selection_parent_node(cx: &mut TestAppContext) {
528 zlog::init_test();
529 let text = r#"
530fn foo() {}
531
532«fn main() {
533 let x = 1;
534 let ˇy = 2;
535 let z = 3;
536}
537»
538fn bar() {}"#;
539
540 let options = EditPredictionExcerptOptions {
541 max_bytes: 65,
542 min_bytes: 10,
543 target_before_cursor_over_total_bytes: 0.5,
544 include_parent_signatures: false,
545 };
546
547 check_example(options, text, cx);
548 }
549
550 #[gpui::test]
551 fn test_ast_based_selection_expands_to_siblings(cx: &mut TestAppContext) {
552 zlog::init_test();
553 let text = r#"
554fn main() {
555« let x = 1;
556 let ˇy = 2;
557 let z = 3;
558»}"#;
559
560 let options = EditPredictionExcerptOptions {
561 max_bytes: 50,
562 min_bytes: 10,
563 target_before_cursor_over_total_bytes: 0.5,
564 include_parent_signatures: false,
565 };
566
567 check_example(options, text, cx);
568 }
569
570 #[gpui::test]
571 fn test_line_based_selection(cx: &mut TestAppContext) {
572 zlog::init_test();
573 let text = r#"
574fn main() {
575 let x = 1;
576« if true {
577 let ˇy = 2;
578 }
579 let z = 3;
580»}"#;
581
582 let options = EditPredictionExcerptOptions {
583 max_bytes: 60,
584 min_bytes: 45,
585 target_before_cursor_over_total_bytes: 0.5,
586 include_parent_signatures: false,
587 };
588
589 check_example(options, text, cx);
590 }
591
592 #[gpui::test]
593 fn test_line_based_selection_with_before_cursor_ratio(cx: &mut TestAppContext) {
594 zlog::init_test();
595 let text = r#"
596 fn main() {
597« let a = 1;
598 let b = 2;
599 let c = 3;
600 let ˇd = 4;
601 let e = 5;
602 let f = 6;
603»
604 let g = 7;
605 }"#;
606
607 let options = EditPredictionExcerptOptions {
608 max_bytes: 120,
609 min_bytes: 10,
610 target_before_cursor_over_total_bytes: 0.6,
611 include_parent_signatures: false,
612 };
613
614 check_example(options, text, cx);
615 }
616}