excerpt.rs

  1use cloud_llm_client::predict_edits_v3::Line;
  2use language::{BufferSnapshot, LanguageId, Point, ToOffset as _, ToPoint as _};
  3use std::ops::Range;
  4use tree_sitter::{Node, TreeCursor};
  5use util::RangeExt;
  6
  7// TODO:
  8//
  9// - Test parent signatures
 10//
 11// - Decide whether to count signatures against the excerpt size. Could instead defer this to prompt
 12// planning.
 13//
 14// - Still return an excerpt even if the line around the cursor doesn't fit (e.g. for a markdown
 15// paragraph).
 16//
 17// - Truncation of long lines.
 18//
 19// - Filter outer syntax layers that don't support edit prediction.
 20
 21#[derive(Debug, Clone, PartialEq)]
 22pub struct EditPredictionExcerptOptions {
 23    /// Limit for the number of bytes in the window around the cursor.
 24    pub max_bytes: usize,
 25    /// Minimum number of bytes in the window around the cursor. When syntax tree selection results
 26    /// in an excerpt smaller than this, it will fall back on line-based selection.
 27    pub min_bytes: usize,
 28    /// Target ratio of bytes before the cursor divided by total bytes in the window.
 29    pub target_before_cursor_over_total_bytes: f32,
 30}
 31
 32#[derive(Debug, Clone)]
 33pub struct EditPredictionExcerpt {
 34    pub range: Range<usize>,
 35    pub line_range: Range<Line>,
 36    pub size: usize,
 37}
 38
 39#[derive(Debug, Clone)]
 40pub struct EditPredictionExcerptText {
 41    pub body: String,
 42    pub language_id: Option<LanguageId>,
 43}
 44
 45impl EditPredictionExcerpt {
 46    pub fn text(&self, buffer: &BufferSnapshot) -> EditPredictionExcerptText {
 47        let body = buffer
 48            .text_for_range(self.range.clone())
 49            .collect::<String>();
 50        let language_id = buffer.language().map(|l| l.id());
 51        EditPredictionExcerptText { body, language_id }
 52    }
 53
 54    /// Selects an excerpt around a buffer position, attempting to choose logical boundaries based
 55    /// on TreeSitter structure and approximately targeting a goal ratio of bytesbefore vs after the
 56    /// cursor.
 57    ///
 58    /// When `index` is provided, the excerpt will include the signatures of parent outline items.
 59    ///
 60    /// First tries to use AST node boundaries to select the excerpt, and falls back on line-based
 61    /// expansion.
 62    ///
 63    /// Returns `None` if the line around the cursor doesn't fit.
 64    pub fn select_from_buffer(
 65        query_point: Point,
 66        buffer: &BufferSnapshot,
 67        options: &EditPredictionExcerptOptions,
 68    ) -> Option<Self> {
 69        if buffer.len() <= options.max_bytes {
 70            log::debug!(
 71                "using entire file for excerpt since source length ({}) <= window max bytes ({})",
 72                buffer.len(),
 73                options.max_bytes
 74            );
 75            let offset_range = 0..buffer.len();
 76            let line_range = Line(0)..Line(buffer.max_point().row);
 77            return Some(EditPredictionExcerpt::new(offset_range, line_range));
 78        }
 79
 80        let query_offset = query_point.to_offset(buffer);
 81        let query_line_range = query_point.row..query_point.row + 1;
 82        let query_range = Point::new(query_line_range.start, 0).to_offset(buffer)
 83            ..Point::new(query_line_range.end, 0).to_offset(buffer);
 84        if query_range.len() >= options.max_bytes {
 85            return None;
 86        }
 87
 88        let excerpt_selector = ExcerptSelector {
 89            query_offset,
 90            query_range,
 91            query_line_range: Line(query_line_range.start)..Line(query_line_range.end),
 92            buffer,
 93            options,
 94        };
 95
 96        if let Some(excerpt) = excerpt_selector.select_tree_sitter_nodes() {
 97            if excerpt.size >= options.min_bytes {
 98                return Some(excerpt);
 99            }
100            log::debug!(
101                "tree-sitter excerpt was {} bytes, smaller than min of {}, falling back on line-based selection",
102                excerpt.size,
103                options.min_bytes
104            );
105        } else {
106            log::debug!(
107                "couldn't find excerpt via tree-sitter, falling back on line-based selection"
108            );
109        }
110
111        excerpt_selector.select_lines()
112    }
113
114    fn new(range: Range<usize>, line_range: Range<Line>) -> Self {
115        Self {
116            size: range.len(),
117            range,
118            line_range,
119        }
120    }
121
122    fn with_expanded_range(&self, new_range: Range<usize>, new_line_range: Range<Line>) -> Self {
123        if !new_range.contains_inclusive(&self.range) {
124            // this is an issue because parent_signature_ranges may be incorrect
125            log::error!("bug: with_expanded_range called with disjoint range");
126        }
127        Self::new(new_range, new_line_range)
128    }
129
130    fn parent_signatures_size(&self) -> usize {
131        self.size - self.range.len()
132    }
133}
134
135struct ExcerptSelector<'a> {
136    query_offset: usize,
137    query_range: Range<usize>,
138    query_line_range: Range<Line>,
139    buffer: &'a BufferSnapshot,
140    options: &'a EditPredictionExcerptOptions,
141}
142
143impl<'a> ExcerptSelector<'a> {
144    /// Finds the largest node that is smaller than the window size and contains `query_range`.
145    fn select_tree_sitter_nodes(&self) -> Option<EditPredictionExcerpt> {
146        let selected_layer_root = self.select_syntax_layer()?;
147        let mut cursor = selected_layer_root.walk();
148
149        loop {
150            let line_start = node_line_start(cursor.node());
151            let line_end = node_line_end(cursor.node());
152            let line_range = Line(line_start.row)..Line(line_end.row);
153            let excerpt_range =
154                line_start.to_offset(&self.buffer)..line_end.to_offset(&self.buffer);
155            if excerpt_range.contains_inclusive(&self.query_range) {
156                let excerpt = self.make_excerpt(excerpt_range, line_range);
157                if excerpt.size <= self.options.max_bytes {
158                    return Some(self.expand_to_siblings(&mut cursor, excerpt));
159                }
160            } else {
161                // TODO: Should still be able to handle this case via AST nodes. For example, this
162                // can happen if the cursor is between two methods in a large class file.
163                return None;
164            }
165
166            if cursor
167                .goto_first_child_for_byte(self.query_range.start)
168                .is_none()
169            {
170                return None;
171            }
172        }
173    }
174
175    /// Select the smallest syntax layer that exceeds max_len, or the largest if none exceed max_len.
176    fn select_syntax_layer(&self) -> Option<Node<'_>> {
177        let mut smallest_exceeding_max_len: Option<Node<'_>> = None;
178        let mut largest: Option<Node<'_>> = None;
179        for layer in self
180            .buffer
181            .syntax_layers_for_range(self.query_range.start..self.query_range.start, true)
182        {
183            let layer_range = layer.node().byte_range();
184            if !layer_range.contains_inclusive(&self.query_range) {
185                continue;
186            }
187
188            if layer_range.len() > self.options.max_bytes {
189                match &smallest_exceeding_max_len {
190                    None => smallest_exceeding_max_len = Some(layer.node()),
191                    Some(existing) => {
192                        if layer_range.len() < existing.byte_range().len() {
193                            smallest_exceeding_max_len = Some(layer.node());
194                        }
195                    }
196                }
197            } else {
198                match &largest {
199                    None => largest = Some(layer.node()),
200                    Some(existing) if layer_range.len() > existing.byte_range().len() => {
201                        largest = Some(layer.node())
202                    }
203                    _ => {}
204                }
205            }
206        }
207
208        smallest_exceeding_max_len.or(largest)
209    }
210
211    // motivation for this and `goto_previous_named_sibling` is to avoid including things like
212    // trailing unnamed "}" in body nodes
213    fn goto_next_named_sibling(cursor: &mut TreeCursor) -> bool {
214        while cursor.goto_next_sibling() {
215            if cursor.node().is_named() {
216                return true;
217            }
218        }
219        false
220    }
221
222    fn goto_previous_named_sibling(cursor: &mut TreeCursor) -> bool {
223        while cursor.goto_previous_sibling() {
224            if cursor.node().is_named() {
225                return true;
226            }
227        }
228        false
229    }
230
231    fn expand_to_siblings(
232        &self,
233        cursor: &mut TreeCursor,
234        mut excerpt: EditPredictionExcerpt,
235    ) -> EditPredictionExcerpt {
236        let mut forward_cursor = cursor.clone();
237        let backward_cursor = cursor;
238        let mut forward_done = !Self::goto_next_named_sibling(&mut forward_cursor);
239        let mut backward_done = !Self::goto_previous_named_sibling(backward_cursor);
240        loop {
241            if backward_done && forward_done {
242                break;
243            }
244
245            let mut forward = None;
246            while !forward_done {
247                let new_end_point = node_line_end(forward_cursor.node());
248                let new_end = new_end_point.to_offset(&self.buffer);
249                if new_end > excerpt.range.end {
250                    let new_excerpt = excerpt.with_expanded_range(
251                        excerpt.range.start..new_end,
252                        excerpt.line_range.start..Line(new_end_point.row),
253                    );
254                    if new_excerpt.size <= self.options.max_bytes {
255                        forward = Some(new_excerpt);
256                        break;
257                    } else {
258                        log::debug!("halting forward expansion, as it doesn't fit");
259                        forward_done = true;
260                        break;
261                    }
262                }
263                forward_done = !Self::goto_next_named_sibling(&mut forward_cursor);
264            }
265
266            let mut backward = None;
267            while !backward_done {
268                let new_start_point = node_line_start(backward_cursor.node());
269                let new_start = new_start_point.to_offset(&self.buffer);
270                if new_start < excerpt.range.start {
271                    let new_excerpt = excerpt.with_expanded_range(
272                        new_start..excerpt.range.end,
273                        Line(new_start_point.row)..excerpt.line_range.end,
274                    );
275                    if new_excerpt.size <= self.options.max_bytes {
276                        backward = Some(new_excerpt);
277                        break;
278                    } else {
279                        log::debug!("halting backward expansion, as it doesn't fit");
280                        backward_done = true;
281                        break;
282                    }
283                }
284                backward_done = !Self::goto_previous_named_sibling(backward_cursor);
285            }
286
287            let go_forward = match (forward, backward) {
288                (Some(forward), Some(backward)) => {
289                    let go_forward = self.is_better_excerpt(&forward, &backward);
290                    if go_forward {
291                        excerpt = forward;
292                    } else {
293                        excerpt = backward;
294                    }
295                    go_forward
296                }
297                (Some(forward), None) => {
298                    log::debug!("expanding forward, since backward expansion has halted");
299                    excerpt = forward;
300                    true
301                }
302                (None, Some(backward)) => {
303                    log::debug!("expanding backward, since forward expansion has halted");
304                    excerpt = backward;
305                    false
306                }
307                (None, None) => break,
308            };
309
310            if go_forward {
311                forward_done = !Self::goto_next_named_sibling(&mut forward_cursor);
312            } else {
313                backward_done = !Self::goto_previous_named_sibling(backward_cursor);
314            }
315        }
316
317        excerpt
318    }
319
320    fn select_lines(&self) -> Option<EditPredictionExcerpt> {
321        // early return if line containing query_offset is already too large
322        let excerpt = self.make_excerpt(self.query_range.clone(), self.query_line_range.clone());
323        if excerpt.size > self.options.max_bytes {
324            log::debug!(
325                "excerpt for cursor line is {} bytes, which exceeds the window",
326                excerpt.size
327            );
328            return None;
329        }
330        let signatures_size = excerpt.parent_signatures_size();
331        let bytes_remaining = self.options.max_bytes.saturating_sub(signatures_size);
332
333        let before_bytes =
334            (self.options.target_before_cursor_over_total_bytes * bytes_remaining as f32) as usize;
335
336        let start_line = {
337            let offset = self.query_offset.saturating_sub(before_bytes);
338            let point = offset.to_point(self.buffer);
339            Line(point.row + 1)
340        };
341        let start_offset = Point::new(start_line.0, 0).to_offset(&self.buffer);
342        let end_line = {
343            let offset = start_offset + bytes_remaining;
344            let point = offset.to_point(self.buffer);
345            Line(point.row)
346        };
347        let end_offset = Point::new(end_line.0, 0).to_offset(&self.buffer);
348
349        // this could be expanded further since recalculated `signature_size` may be smaller, but
350        // skipping that for now for simplicity
351        //
352        // TODO: could also consider checking if lines immediately before / after fit.
353        let excerpt = self.make_excerpt(start_offset..end_offset, start_line..end_line);
354        if excerpt.size > self.options.max_bytes {
355            log::error!(
356                "bug: line-based excerpt selection has size {}, \
357                which is {} bytes larger than the max size",
358                excerpt.size,
359                excerpt.size - self.options.max_bytes
360            );
361        }
362        return Some(excerpt);
363    }
364
365    fn make_excerpt(&self, range: Range<usize>, line_range: Range<Line>) -> EditPredictionExcerpt {
366        EditPredictionExcerpt::new(range, line_range)
367    }
368
369    /// Returns `true` if the `forward` excerpt is a better choice than the `backward` excerpt.
370    fn is_better_excerpt(
371        &self,
372        forward: &EditPredictionExcerpt,
373        backward: &EditPredictionExcerpt,
374    ) -> bool {
375        let forward_ratio = self.excerpt_range_ratio(forward);
376        let backward_ratio = self.excerpt_range_ratio(backward);
377        let forward_delta =
378            (forward_ratio - self.options.target_before_cursor_over_total_bytes).abs();
379        let backward_delta =
380            (backward_ratio - self.options.target_before_cursor_over_total_bytes).abs();
381        let forward_is_better = forward_delta <= backward_delta;
382        if forward_is_better {
383            log::debug!(
384                "expanding forward since {} is closer than {} to {}",
385                forward_ratio,
386                backward_ratio,
387                self.options.target_before_cursor_over_total_bytes
388            );
389        } else {
390            log::debug!(
391                "expanding backward since {} is closer than {} to {}",
392                backward_ratio,
393                forward_ratio,
394                self.options.target_before_cursor_over_total_bytes
395            );
396        }
397        forward_is_better
398    }
399
400    /// Returns the ratio of bytes before the cursor over bytes within the range.
401    fn excerpt_range_ratio(&self, excerpt: &EditPredictionExcerpt) -> f32 {
402        let Some(bytes_before_cursor) = self.query_offset.checked_sub(excerpt.range.start) else {
403            log::error!("bug: edit prediction cursor offset is not outside the excerpt");
404            return 0.0;
405        };
406        bytes_before_cursor as f32 / excerpt.range.len() as f32
407    }
408}
409
410fn node_line_start(node: Node) -> Point {
411    Point::new(node.start_position().row as u32, 0)
412}
413
414fn node_line_end(node: Node) -> Point {
415    Point::new(node.end_position().row as u32 + 1, 0)
416}
417
418#[cfg(test)]
419mod tests {
420    use super::*;
421    use gpui::{AppContext, TestAppContext};
422    use language::Buffer;
423    use util::test::{generate_marked_text, marked_text_offsets_by};
424
425    fn create_buffer(text: &str, cx: &mut TestAppContext) -> BufferSnapshot {
426        let buffer = cx.new(|cx| Buffer::local(text, cx).with_language(language::rust_lang(), cx));
427        buffer.read_with(cx, |buffer, _| buffer.snapshot())
428    }
429
430    fn cursor_and_excerpt_range(text: &str) -> (String, usize, Range<usize>) {
431        let (text, offsets) = marked_text_offsets_by(text, vec!['ˇ', '«', '»']);
432        (text, offsets[&'ˇ'][0], offsets[&'«'][0]..offsets[&'»'][0])
433    }
434
435    fn check_example(options: EditPredictionExcerptOptions, text: &str, cx: &mut TestAppContext) {
436        let (text, cursor, expected_excerpt) = cursor_and_excerpt_range(text);
437
438        let buffer = create_buffer(&text, cx);
439        let cursor_point = cursor.to_point(&buffer);
440
441        let excerpt = EditPredictionExcerpt::select_from_buffer(cursor_point, &buffer, &options)
442            .expect("Should select an excerpt");
443        pretty_assertions::assert_eq!(
444            generate_marked_text(&text, std::slice::from_ref(&excerpt.range), false),
445            generate_marked_text(&text, &[expected_excerpt], false)
446        );
447        assert!(excerpt.size <= options.max_bytes);
448        assert!(excerpt.range.contains(&cursor));
449    }
450
451    #[gpui::test]
452    fn test_ast_based_selection_current_node(cx: &mut TestAppContext) {
453        zlog::init_test();
454        let text = r#"
455fn main() {
456    let x = 1;
457«    let ˇy = 2;
458»    let z = 3;
459}"#;
460
461        let options = EditPredictionExcerptOptions {
462            max_bytes: 20,
463            min_bytes: 10,
464            target_before_cursor_over_total_bytes: 0.5,
465        };
466
467        check_example(options, text, cx);
468    }
469
470    #[gpui::test]
471    fn test_ast_based_selection_parent_node(cx: &mut TestAppContext) {
472        zlog::init_test();
473        let text = r#"
474fn foo() {}
475
476«fn main() {
477    let x = 1;
478    let ˇy = 2;
479    let z = 3;
480}
481»
482fn bar() {}"#;
483
484        let options = EditPredictionExcerptOptions {
485            max_bytes: 65,
486            min_bytes: 10,
487            target_before_cursor_over_total_bytes: 0.5,
488        };
489
490        check_example(options, text, cx);
491    }
492
493    #[gpui::test]
494    fn test_ast_based_selection_expands_to_siblings(cx: &mut TestAppContext) {
495        zlog::init_test();
496        let text = r#"
497fn main() {
498«    let x = 1;
499    let ˇy = 2;
500    let z = 3;
501»}"#;
502
503        let options = EditPredictionExcerptOptions {
504            max_bytes: 50,
505            min_bytes: 10,
506            target_before_cursor_over_total_bytes: 0.5,
507        };
508
509        check_example(options, text, cx);
510    }
511
512    #[gpui::test]
513    fn test_line_based_selection(cx: &mut TestAppContext) {
514        zlog::init_test();
515        let text = r#"
516fn main() {
517    let x = 1;
518«    if true {
519        let ˇy = 2;
520    }
521    let z = 3;
522»}"#;
523
524        let options = EditPredictionExcerptOptions {
525            max_bytes: 60,
526            min_bytes: 45,
527            target_before_cursor_over_total_bytes: 0.5,
528        };
529
530        check_example(options, text, cx);
531    }
532
533    #[gpui::test]
534    fn test_line_based_selection_with_before_cursor_ratio(cx: &mut TestAppContext) {
535        zlog::init_test();
536        let text = r#"
537    fn main() {
538«        let a = 1;
539        let b = 2;
540        let c = 3;
541        let ˇd = 4;
542        let e = 5;
543        let f = 6;
544»
545        let g = 7;
546    }"#;
547
548        let options = EditPredictionExcerptOptions {
549            max_bytes: 120,
550            min_bytes: 10,
551            target_before_cursor_over_total_bytes: 0.6,
552        };
553
554        check_example(options, text, cx);
555    }
556}