excerpt.rs

  1use language::BufferSnapshot;
  2use std::ops::Range;
  3use text::{OffsetRangeExt as _, Point, ToOffset as _, ToPoint as _};
  4use tree_sitter::{Node, TreeCursor};
  5use util::RangeExt;
  6
  7// TODO:
  8//
  9// - Test parent signatures
 10//
 11// - Decide whether to count signatures against the excerpt size. Could instead defer this to prompt
 12// planning.
 13//
 14// - Still return an excerpt even if the line around the cursor doesn't fit (e.g. for a markdown
 15// paragraph).
 16//
 17// - Truncation of long lines.
 18//
 19// - Filter outer syntax layers that don't support edit prediction.
 20
 21#[derive(Debug, Clone)]
 22pub struct EditPredictionExcerptOptions {
 23    /// Limit for the number of bytes in the window around the cursor.
 24    pub max_bytes: usize,
 25    /// Minimum number of bytes in the window around the cursor. When syntax tree selection results
 26    /// in an excerpt smaller than this, it will fall back on line-based selection.
 27    pub min_bytes: usize,
 28    /// Target ratio of bytes before the cursor divided by total bytes in the window.
 29    pub target_before_cursor_over_total_bytes: f32,
 30    /// Whether to include parent signatures
 31    pub include_parent_signatures: bool,
 32}
 33
 34#[derive(Clone)]
 35pub struct EditPredictionExcerpt {
 36    pub range: Range<usize>,
 37    pub parent_signature_ranges: Vec<Range<usize>>,
 38    pub size: usize,
 39}
 40
 41#[derive(Clone)]
 42pub struct EditPredictionExcerptText {
 43    pub body: String,
 44    pub parent_signatures: Vec<String>,
 45}
 46
 47impl EditPredictionExcerpt {
 48    pub fn text(&self, buffer: &BufferSnapshot) -> EditPredictionExcerptText {
 49        let body = buffer
 50            .text_for_range(self.range.clone())
 51            .collect::<String>();
 52        let parent_signatures = self
 53            .parent_signature_ranges
 54            .iter()
 55            .map(|range| buffer.text_for_range(range.clone()).collect::<String>())
 56            .collect();
 57        EditPredictionExcerptText {
 58            body,
 59            parent_signatures,
 60        }
 61    }
 62
 63    /// Selects an excerpt around a buffer position, attempting to choose logical boundaries based
 64    /// on TreeSitter structure and approximately targeting a goal ratio of bytesbefore vs after the
 65    /// cursor. When `include_parent_signatures` is true, the excerpt also includes the signatures
 66    /// of parent outline items.
 67    ///
 68    /// First tries to use AST node boundaries to select the excerpt, and falls back on line-based
 69    /// expansion.
 70    ///
 71    /// Returns `None` if the line around the cursor doesn't fit.
 72    pub fn select_from_buffer(
 73        query_point: Point,
 74        buffer: &BufferSnapshot,
 75        options: &EditPredictionExcerptOptions,
 76    ) -> Option<Self> {
 77        if buffer.len() <= options.max_bytes {
 78            log::debug!(
 79                "using entire file for excerpt since source length ({}) <= window max bytes ({})",
 80                buffer.len(),
 81                options.max_bytes
 82            );
 83            return Some(EditPredictionExcerpt::new(0..buffer.len(), Vec::new()));
 84        }
 85
 86        let query_offset = query_point.to_offset(buffer);
 87        let query_range = Point::new(query_point.row, 0).to_offset(buffer)
 88            ..Point::new(query_point.row + 1, 0).to_offset(buffer);
 89        if query_range.len() >= options.max_bytes {
 90            return None;
 91        }
 92
 93        // TODO: Don't compute text / annotation_range / skip converting to and from anchors.
 94        let outline_items = if options.include_parent_signatures {
 95            buffer
 96                .outline_items_containing(query_range.clone(), false, None)
 97                .into_iter()
 98                .flat_map(|item| {
 99                    Some(ExcerptOutlineItem {
100                        item_range: item.range.to_offset(&buffer),
101                        signature_range: item.signature_range?.to_offset(&buffer),
102                    })
103                })
104                .collect()
105        } else {
106            Vec::new()
107        };
108
109        let excerpt_selector = ExcerptSelector {
110            query_offset,
111            query_range,
112            outline_items: &outline_items,
113            buffer,
114            options,
115        };
116
117        if let Some(excerpt_ranges) = excerpt_selector.select_tree_sitter_nodes() {
118            if excerpt_ranges.size >= options.min_bytes {
119                return Some(excerpt_ranges);
120            }
121            log::debug!(
122                "tree-sitter excerpt was {} bytes, smaller than min of {}, falling back on line-based selection",
123                excerpt_ranges.size,
124                options.min_bytes
125            );
126        } else {
127            log::debug!(
128                "couldn't find excerpt via tree-sitter, falling back on line-based selection"
129            );
130        }
131
132        excerpt_selector.select_lines()
133    }
134
135    fn new(range: Range<usize>, parent_signature_ranges: Vec<Range<usize>>) -> Self {
136        let size = range.len()
137            + parent_signature_ranges
138                .iter()
139                .map(|r| r.len())
140                .sum::<usize>();
141        Self {
142            range,
143            parent_signature_ranges,
144            size,
145        }
146    }
147
148    fn with_expanded_range(&self, new_range: Range<usize>) -> Self {
149        if !new_range.contains_inclusive(&self.range) {
150            // this is an issue because parent_signature_ranges may be incorrect
151            log::error!("bug: with_expanded_range called with disjoint range");
152        }
153        let mut parent_signature_ranges = Vec::with_capacity(self.parent_signature_ranges.len());
154        let mut size = new_range.len();
155        for range in &self.parent_signature_ranges {
156            if range.contains_inclusive(&new_range) {
157                break;
158            }
159            parent_signature_ranges.push(range.clone());
160            size += range.len();
161        }
162        Self {
163            range: new_range,
164            parent_signature_ranges,
165            size,
166        }
167    }
168
169    fn parent_signatures_size(&self) -> usize {
170        self.size - self.range.len()
171    }
172}
173
174struct ExcerptSelector<'a> {
175    query_offset: usize,
176    query_range: Range<usize>,
177    outline_items: &'a [ExcerptOutlineItem],
178    buffer: &'a BufferSnapshot,
179    options: &'a EditPredictionExcerptOptions,
180}
181
182struct ExcerptOutlineItem {
183    item_range: Range<usize>,
184    signature_range: Range<usize>,
185}
186
187impl<'a> ExcerptSelector<'a> {
188    /// Finds the largest node that is smaller than the window size and contains `query_range`.
189    fn select_tree_sitter_nodes(&self) -> Option<EditPredictionExcerpt> {
190        let selected_layer_root = self.select_syntax_layer()?;
191        let mut cursor = selected_layer_root.walk();
192
193        loop {
194            let excerpt_range = node_line_start(cursor.node()).to_offset(&self.buffer)
195                ..node_line_end(cursor.node()).to_offset(&self.buffer);
196            if excerpt_range.contains_inclusive(&self.query_range) {
197                let excerpt = self.make_excerpt(excerpt_range);
198                if excerpt.size <= self.options.max_bytes {
199                    return Some(self.expand_to_siblings(&mut cursor, excerpt));
200                }
201            } else {
202                // TODO: Should still be able to handle this case via AST nodes. For example, this
203                // can happen if the cursor is between two methods in a large class file.
204                return None;
205            }
206
207            if cursor
208                .goto_first_child_for_byte(self.query_range.start)
209                .is_none()
210            {
211                return None;
212            }
213        }
214    }
215
216    /// Select the smallest syntax layer that exceeds max_len, or the largest if none exceed max_len.
217    fn select_syntax_layer(&self) -> Option<Node<'_>> {
218        let mut smallest_exceeding_max_len: Option<Node<'_>> = None;
219        let mut largest: Option<Node<'_>> = None;
220        for layer in self
221            .buffer
222            .syntax_layers_for_range(self.query_range.start..self.query_range.start, true)
223        {
224            let layer_range = layer.node().byte_range();
225            if !layer_range.contains_inclusive(&self.query_range) {
226                continue;
227            }
228
229            if layer_range.len() > self.options.max_bytes {
230                match &smallest_exceeding_max_len {
231                    None => smallest_exceeding_max_len = Some(layer.node()),
232                    Some(existing) => {
233                        if layer_range.len() < existing.byte_range().len() {
234                            smallest_exceeding_max_len = Some(layer.node());
235                        }
236                    }
237                }
238            } else {
239                match &largest {
240                    None => largest = Some(layer.node()),
241                    Some(existing) if layer_range.len() > existing.byte_range().len() => {
242                        largest = Some(layer.node())
243                    }
244                    _ => {}
245                }
246            }
247        }
248
249        smallest_exceeding_max_len.or(largest)
250    }
251
252    // motivation for this and `goto_previous_named_sibling` is to avoid including things like
253    // trailing unnamed "}" in body nodes
254    fn goto_next_named_sibling(cursor: &mut TreeCursor) -> bool {
255        while cursor.goto_next_sibling() {
256            if cursor.node().is_named() {
257                return true;
258            }
259        }
260        false
261    }
262
263    fn goto_previous_named_sibling(cursor: &mut TreeCursor) -> bool {
264        while cursor.goto_previous_sibling() {
265            if cursor.node().is_named() {
266                return true;
267            }
268        }
269        false
270    }
271
272    fn expand_to_siblings(
273        &self,
274        cursor: &mut TreeCursor,
275        mut excerpt: EditPredictionExcerpt,
276    ) -> EditPredictionExcerpt {
277        let mut forward_cursor = cursor.clone();
278        let backward_cursor = cursor;
279        let mut forward_done = !Self::goto_next_named_sibling(&mut forward_cursor);
280        let mut backward_done = !Self::goto_previous_named_sibling(backward_cursor);
281        loop {
282            if backward_done && forward_done {
283                break;
284            }
285
286            let mut forward = None;
287            while !forward_done {
288                let new_end = node_line_end(forward_cursor.node()).to_offset(&self.buffer);
289                if new_end > excerpt.range.end {
290                    let new_excerpt = excerpt.with_expanded_range(excerpt.range.start..new_end);
291                    if new_excerpt.size <= self.options.max_bytes {
292                        forward = Some(new_excerpt);
293                        break;
294                    } else {
295                        log::debug!("halting forward expansion, as it doesn't fit");
296                        forward_done = true;
297                        break;
298                    }
299                }
300                forward_done = !Self::goto_next_named_sibling(&mut forward_cursor);
301            }
302
303            let mut backward = None;
304            while !backward_done {
305                let new_start = node_line_start(backward_cursor.node()).to_offset(&self.buffer);
306                if new_start < excerpt.range.start {
307                    let new_excerpt = excerpt.with_expanded_range(new_start..excerpt.range.end);
308                    if new_excerpt.size <= self.options.max_bytes {
309                        backward = Some(new_excerpt);
310                        break;
311                    } else {
312                        log::debug!("halting backward expansion, as it doesn't fit");
313                        backward_done = true;
314                        break;
315                    }
316                }
317                backward_done = !Self::goto_previous_named_sibling(backward_cursor);
318            }
319
320            let go_forward = match (forward, backward) {
321                (Some(forward), Some(backward)) => {
322                    let go_forward = self.is_better_excerpt(&forward, &backward);
323                    if go_forward {
324                        excerpt = forward;
325                    } else {
326                        excerpt = backward;
327                    }
328                    go_forward
329                }
330                (Some(forward), None) => {
331                    log::debug!("expanding forward, since backward expansion has halted");
332                    excerpt = forward;
333                    true
334                }
335                (None, Some(backward)) => {
336                    log::debug!("expanding backward, since forward expansion has halted");
337                    excerpt = backward;
338                    false
339                }
340                (None, None) => break,
341            };
342
343            if go_forward {
344                forward_done = !Self::goto_next_named_sibling(&mut forward_cursor);
345            } else {
346                backward_done = !Self::goto_previous_named_sibling(backward_cursor);
347            }
348        }
349
350        excerpt
351    }
352
353    fn select_lines(&self) -> Option<EditPredictionExcerpt> {
354        // early return if line containing query_offset is already too large
355        let excerpt = self.make_excerpt(self.query_range.clone());
356        if excerpt.size > self.options.max_bytes {
357            log::debug!(
358                "excerpt for cursor line is {} bytes, which exceeds the window",
359                excerpt.size
360            );
361            return None;
362        }
363        let signatures_size = excerpt.parent_signatures_size();
364        let bytes_remaining = self.options.max_bytes.saturating_sub(signatures_size);
365
366        let before_bytes =
367            (self.options.target_before_cursor_over_total_bytes * bytes_remaining as f32) as usize;
368
369        let start_point = {
370            let offset = self.query_offset.saturating_sub(before_bytes);
371            let point = offset.to_point(self.buffer);
372            Point::new(point.row + 1, 0)
373        };
374        let start_offset = start_point.to_offset(&self.buffer);
375        let end_point = {
376            let offset = start_offset + bytes_remaining;
377            let point = offset.to_point(self.buffer);
378            Point::new(point.row, 0)
379        };
380        let end_offset = end_point.to_offset(&self.buffer);
381
382        // this could be expanded further since recalculated `signature_size` may be smaller, but
383        // skipping that for now for simplicity
384        //
385        // TODO: could also consider checking if lines immediately before / after fit.
386        let excerpt = self.make_excerpt(start_offset..end_offset);
387        if excerpt.size > self.options.max_bytes {
388            log::error!(
389                "bug: line-based excerpt selection has size {}, \
390                which is {} bytes larger than the max size",
391                excerpt.size,
392                excerpt.size - self.options.max_bytes
393            );
394        }
395        return Some(excerpt);
396    }
397
398    fn make_excerpt(&self, range: Range<usize>) -> EditPredictionExcerpt {
399        let parent_signature_ranges = self
400            .outline_items
401            .iter()
402            .filter(|item| item.item_range.contains_inclusive(&range))
403            .map(|item| item.signature_range.clone())
404            .collect();
405        EditPredictionExcerpt::new(range, parent_signature_ranges)
406    }
407
408    /// Returns `true` if the `forward` excerpt is a better choice than the `backward` excerpt.
409    fn is_better_excerpt(
410        &self,
411        forward: &EditPredictionExcerpt,
412        backward: &EditPredictionExcerpt,
413    ) -> bool {
414        let forward_ratio = self.excerpt_range_ratio(forward);
415        let backward_ratio = self.excerpt_range_ratio(backward);
416        let forward_delta =
417            (forward_ratio - self.options.target_before_cursor_over_total_bytes).abs();
418        let backward_delta =
419            (backward_ratio - self.options.target_before_cursor_over_total_bytes).abs();
420        let forward_is_better = forward_delta <= backward_delta;
421        if forward_is_better {
422            log::debug!(
423                "expanding forward since {} is closer than {} to {}",
424                forward_ratio,
425                backward_ratio,
426                self.options.target_before_cursor_over_total_bytes
427            );
428        } else {
429            log::debug!(
430                "expanding backward since {} is closer than {} to {}",
431                backward_ratio,
432                forward_ratio,
433                self.options.target_before_cursor_over_total_bytes
434            );
435        }
436        forward_is_better
437    }
438
439    /// Returns the ratio of bytes before the cursor over bytes within the range.
440    fn excerpt_range_ratio(&self, excerpt: &EditPredictionExcerpt) -> f32 {
441        let Some(bytes_before_cursor) = self.query_offset.checked_sub(excerpt.range.start) else {
442            log::error!("bug: edit prediction cursor offset is not outside the excerpt");
443            return 0.0;
444        };
445        bytes_before_cursor as f32 / excerpt.range.len() as f32
446    }
447}
448
449fn node_line_start(node: Node) -> Point {
450    Point::new(node.start_position().row as u32, 0)
451}
452
453fn node_line_end(node: Node) -> Point {
454    Point::new(node.end_position().row as u32 + 1, 0)
455}
456
457#[cfg(test)]
458mod tests {
459    use super::*;
460    use gpui::{AppContext, TestAppContext};
461    use language::{Buffer, Language, LanguageConfig, LanguageMatcher, tree_sitter_rust};
462    use util::test::{generate_marked_text, marked_text_offsets_by};
463
464    fn create_buffer(text: &str, cx: &mut TestAppContext) -> BufferSnapshot {
465        let buffer = cx.new(|cx| Buffer::local(text, cx).with_language(rust_lang().into(), cx));
466        buffer.read_with(cx, |buffer, _| buffer.snapshot())
467    }
468
469    fn rust_lang() -> Language {
470        Language::new(
471            LanguageConfig {
472                name: "Rust".into(),
473                matcher: LanguageMatcher {
474                    path_suffixes: vec!["rs".to_string()],
475                    ..Default::default()
476                },
477                ..Default::default()
478            },
479            Some(tree_sitter_rust::LANGUAGE.into()),
480        )
481        .with_outline_query(include_str!("../../languages/src/rust/outline.scm"))
482        .unwrap()
483    }
484
485    fn cursor_and_excerpt_range(text: &str) -> (String, usize, Range<usize>) {
486        let (text, offsets) = marked_text_offsets_by(text, vec!['ˇ', '«', '»']);
487        (text, offsets[&'ˇ'][0], offsets[&'«'][0]..offsets[&'»'][0])
488    }
489
490    fn check_example(options: EditPredictionExcerptOptions, text: &str, cx: &mut TestAppContext) {
491        let (text, cursor, expected_excerpt) = cursor_and_excerpt_range(text);
492
493        let buffer = create_buffer(&text, cx);
494        let cursor_point = cursor.to_point(&buffer);
495
496        let excerpt = EditPredictionExcerpt::select_from_buffer(cursor_point, &buffer, &options)
497            .expect("Should select an excerpt");
498        pretty_assertions::assert_eq!(
499            generate_marked_text(&text, std::slice::from_ref(&excerpt.range), false),
500            generate_marked_text(&text, &[expected_excerpt], false)
501        );
502        assert!(excerpt.size <= options.max_bytes);
503        assert!(excerpt.range.contains(&cursor));
504    }
505
506    #[gpui::test]
507    fn test_ast_based_selection_current_node(cx: &mut TestAppContext) {
508        zlog::init_test();
509        let text = r#"
510fn main() {
511    let x = 1;
512«    let ˇy = 2;
513»    let z = 3;
514}"#;
515
516        let options = EditPredictionExcerptOptions {
517            max_bytes: 20,
518            min_bytes: 10,
519            target_before_cursor_over_total_bytes: 0.5,
520            include_parent_signatures: false,
521        };
522
523        check_example(options, text, cx);
524    }
525
526    #[gpui::test]
527    fn test_ast_based_selection_parent_node(cx: &mut TestAppContext) {
528        zlog::init_test();
529        let text = r#"
530fn foo() {}
531
532«fn main() {
533    let x = 1;
534    let ˇy = 2;
535    let z = 3;
536}
537»
538fn bar() {}"#;
539
540        let options = EditPredictionExcerptOptions {
541            max_bytes: 65,
542            min_bytes: 10,
543            target_before_cursor_over_total_bytes: 0.5,
544            include_parent_signatures: false,
545        };
546
547        check_example(options, text, cx);
548    }
549
550    #[gpui::test]
551    fn test_ast_based_selection_expands_to_siblings(cx: &mut TestAppContext) {
552        zlog::init_test();
553        let text = r#"
554fn main() {
555«    let x = 1;
556    let ˇy = 2;
557    let z = 3;
558»}"#;
559
560        let options = EditPredictionExcerptOptions {
561            max_bytes: 50,
562            min_bytes: 10,
563            target_before_cursor_over_total_bytes: 0.5,
564            include_parent_signatures: false,
565        };
566
567        check_example(options, text, cx);
568    }
569
570    #[gpui::test]
571    fn test_line_based_selection(cx: &mut TestAppContext) {
572        zlog::init_test();
573        let text = r#"
574fn main() {
575    let x = 1;
576«    if true {
577        let ˇy = 2;
578    }
579    let z = 3;
580»}"#;
581
582        let options = EditPredictionExcerptOptions {
583            max_bytes: 60,
584            min_bytes: 45,
585            target_before_cursor_over_total_bytes: 0.5,
586            include_parent_signatures: false,
587        };
588
589        check_example(options, text, cx);
590    }
591
592    #[gpui::test]
593    fn test_line_based_selection_with_before_cursor_ratio(cx: &mut TestAppContext) {
594        zlog::init_test();
595        let text = r#"
596    fn main() {
597«        let a = 1;
598        let b = 2;
599        let c = 3;
600        let ˇd = 4;
601        let e = 5;
602        let f = 6;
603»
604        let g = 7;
605    }"#;
606
607        let options = EditPredictionExcerptOptions {
608            max_bytes: 120,
609            min_bytes: 10,
610            target_before_cursor_over_total_bytes: 0.6,
611            include_parent_signatures: false,
612        };
613
614        check_example(options, text, cx);
615    }
616}