cursor_excerpt.rs

  1use language::{BufferSnapshot, Point};
  2use std::ops::Range;
  3
  4pub fn editable_and_context_ranges_for_cursor_position(
  5    position: Point,
  6    snapshot: &BufferSnapshot,
  7    editable_region_token_limit: usize,
  8    context_token_limit: usize,
  9) -> (Range<Point>, Range<Point>) {
 10    let editable_range = compute_editable_range(snapshot, position, editable_region_token_limit);
 11
 12    let context_range = expand_context_syntactically_then_linewise(
 13        snapshot,
 14        editable_range.clone(),
 15        context_token_limit,
 16    );
 17
 18    (editable_range, context_range)
 19}
 20
 21/// Computes the editable range using a three-phase approach:
 22/// 1. Expand symmetrically from cursor (75% of budget)
 23/// 2. Expand to syntax boundaries
 24/// 3. Continue line-wise in the least-expanded direction
 25fn compute_editable_range(
 26    snapshot: &BufferSnapshot,
 27    cursor: Point,
 28    token_limit: usize,
 29) -> Range<Point> {
 30    // Phase 1: Expand symmetrically from cursor using 75% of budget.
 31    let initial_budget = (token_limit * 3) / 4;
 32    let (mut start_row, mut end_row, mut remaining_tokens) =
 33        expand_symmetric_from_cursor(snapshot, cursor.row, initial_budget);
 34
 35    // Add remaining budget from phase 1.
 36    remaining_tokens += token_limit.saturating_sub(initial_budget);
 37
 38    let original_start = start_row;
 39    let original_end = end_row;
 40
 41    // Phase 2: Expand to syntax boundaries that fit within budget.
 42    for (boundary_start, boundary_end) in containing_syntax_boundaries(snapshot, start_row, end_row)
 43    {
 44        let tokens_for_start = if boundary_start < start_row {
 45            estimate_tokens_for_rows(snapshot, boundary_start, start_row)
 46        } else {
 47            0
 48        };
 49        let tokens_for_end = if boundary_end > end_row {
 50            estimate_tokens_for_rows(snapshot, end_row + 1, boundary_end + 1)
 51        } else {
 52            0
 53        };
 54
 55        let total_needed = tokens_for_start + tokens_for_end;
 56
 57        if total_needed <= remaining_tokens {
 58            if boundary_start < start_row {
 59                start_row = boundary_start;
 60            }
 61            if boundary_end > end_row {
 62                end_row = boundary_end;
 63            }
 64            remaining_tokens = remaining_tokens.saturating_sub(total_needed);
 65        } else {
 66            break;
 67        }
 68    }
 69
 70    // Phase 3: Continue line-wise in the direction we expanded least during syntax phase.
 71    let expanded_up = original_start.saturating_sub(start_row);
 72    let expanded_down = end_row.saturating_sub(original_end);
 73
 74    (start_row, end_row, _) = expand_linewise_biased(
 75        snapshot,
 76        start_row,
 77        end_row,
 78        remaining_tokens,
 79        expanded_up <= expanded_down, // prefer_up if we expanded less upward
 80    );
 81
 82    let start = Point::new(start_row, 0);
 83    let end = Point::new(end_row, snapshot.line_len(end_row));
 84    start..end
 85}
 86
 87/// Expands symmetrically from cursor, one line at a time, alternating down then up.
 88/// Returns (start_row, end_row, remaining_tokens).
 89fn expand_symmetric_from_cursor(
 90    snapshot: &BufferSnapshot,
 91    cursor_row: u32,
 92    mut token_budget: usize,
 93) -> (u32, u32, usize) {
 94    let mut start_row = cursor_row;
 95    let mut end_row = cursor_row;
 96
 97    // Account for the cursor's line.
 98    let cursor_line_tokens = line_token_count(snapshot, cursor_row);
 99    token_budget = token_budget.saturating_sub(cursor_line_tokens);
100
101    loop {
102        let can_expand_up = start_row > 0;
103        let can_expand_down = end_row < snapshot.max_point().row;
104
105        if token_budget == 0 || (!can_expand_up && !can_expand_down) {
106            break;
107        }
108
109        // Expand down first (slight forward bias for edit prediction).
110        if can_expand_down {
111            let next_row = end_row + 1;
112            let line_tokens = line_token_count(snapshot, next_row);
113            if line_tokens <= token_budget {
114                end_row = next_row;
115                token_budget = token_budget.saturating_sub(line_tokens);
116            } else {
117                break;
118            }
119        }
120
121        // Then expand up.
122        if can_expand_up && token_budget > 0 {
123            let next_row = start_row - 1;
124            let line_tokens = line_token_count(snapshot, next_row);
125            if line_tokens <= token_budget {
126                start_row = next_row;
127                token_budget = token_budget.saturating_sub(line_tokens);
128            } else {
129                break;
130            }
131        }
132    }
133
134    (start_row, end_row, token_budget)
135}
136
137/// Expands line-wise with a bias toward one direction.
138/// Returns (start_row, end_row, remaining_tokens).
139fn expand_linewise_biased(
140    snapshot: &BufferSnapshot,
141    mut start_row: u32,
142    mut end_row: u32,
143    mut remaining_tokens: usize,
144    prefer_up: bool,
145) -> (u32, u32, usize) {
146    loop {
147        let can_expand_up = start_row > 0;
148        let can_expand_down = end_row < snapshot.max_point().row;
149
150        if remaining_tokens == 0 || (!can_expand_up && !can_expand_down) {
151            break;
152        }
153
154        let mut expanded = false;
155
156        // Try preferred direction first.
157        if prefer_up {
158            if can_expand_up {
159                let next_row = start_row - 1;
160                let line_tokens = line_token_count(snapshot, next_row);
161                if line_tokens <= remaining_tokens {
162                    start_row = next_row;
163                    remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
164                    expanded = true;
165                }
166            }
167            if can_expand_down && remaining_tokens > 0 {
168                let next_row = end_row + 1;
169                let line_tokens = line_token_count(snapshot, next_row);
170                if line_tokens <= remaining_tokens {
171                    end_row = next_row;
172                    remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
173                    expanded = true;
174                }
175            }
176        } else {
177            if can_expand_down {
178                let next_row = end_row + 1;
179                let line_tokens = line_token_count(snapshot, next_row);
180                if line_tokens <= remaining_tokens {
181                    end_row = next_row;
182                    remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
183                    expanded = true;
184                }
185            }
186            if can_expand_up && remaining_tokens > 0 {
187                let next_row = start_row - 1;
188                let line_tokens = line_token_count(snapshot, next_row);
189                if line_tokens <= remaining_tokens {
190                    start_row = next_row;
191                    remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
192                    expanded = true;
193                }
194            }
195        }
196
197        if !expanded {
198            break;
199        }
200    }
201
202    (start_row, end_row, remaining_tokens)
203}
204
205/// Typical number of string bytes per token for the purposes of limiting model input. This is
206/// intentionally low to err on the side of underestimating limits.
207pub(crate) const BYTES_PER_TOKEN_GUESS: usize = 3;
208
209pub fn guess_token_count(bytes: usize) -> usize {
210    bytes / BYTES_PER_TOKEN_GUESS
211}
212
213fn line_token_count(snapshot: &BufferSnapshot, row: u32) -> usize {
214    guess_token_count(snapshot.line_len(row) as usize).max(1)
215}
216
217/// Estimates token count for rows in range [start_row, end_row).
218fn estimate_tokens_for_rows(snapshot: &BufferSnapshot, start_row: u32, end_row: u32) -> usize {
219    let mut tokens = 0;
220    for row in start_row..end_row {
221        tokens += line_token_count(snapshot, row);
222    }
223    tokens
224}
225
226/// Returns an iterator of (start_row, end_row) for successively larger syntax nodes
227/// containing the given row range. Smallest containing node first.
228fn containing_syntax_boundaries(
229    snapshot: &BufferSnapshot,
230    start_row: u32,
231    end_row: u32,
232) -> impl Iterator<Item = (u32, u32)> {
233    let range = Point::new(start_row, 0)..Point::new(end_row, snapshot.line_len(end_row));
234    let mut current = snapshot.syntax_ancestor(range);
235    let mut last_rows: Option<(u32, u32)> = None;
236
237    std::iter::from_fn(move || {
238        while let Some(node) = current.take() {
239            let node_start_row = node.start_position().row as u32;
240            let node_end_row = node.end_position().row as u32;
241            let rows = (node_start_row, node_end_row);
242
243            current = node.parent();
244
245            // Skip nodes that don't extend beyond our range.
246            if node_start_row >= start_row && node_end_row <= end_row {
247                continue;
248            }
249
250            // Skip if same as last returned (some nodes have same span).
251            if last_rows == Some(rows) {
252                continue;
253            }
254
255            last_rows = Some(rows);
256            return Some(rows);
257        }
258        None
259    })
260}
261
262/// Expands context by first trying to reach syntax boundaries,
263/// then expanding line-wise only if no syntax expansion occurred.
264fn expand_context_syntactically_then_linewise(
265    snapshot: &BufferSnapshot,
266    editable_range: Range<Point>,
267    context_token_limit: usize,
268) -> Range<Point> {
269    let mut start_row = editable_range.start.row;
270    let mut end_row = editable_range.end.row;
271    let mut remaining_tokens = context_token_limit;
272    let mut did_syntax_expand = false;
273
274    // Phase 1: Try to expand to containing syntax boundaries, picking the largest that fits.
275    for (boundary_start, boundary_end) in containing_syntax_boundaries(snapshot, start_row, end_row)
276    {
277        let tokens_for_start = if boundary_start < start_row {
278            estimate_tokens_for_rows(snapshot, boundary_start, start_row)
279        } else {
280            0
281        };
282        let tokens_for_end = if boundary_end > end_row {
283            estimate_tokens_for_rows(snapshot, end_row + 1, boundary_end + 1)
284        } else {
285            0
286        };
287
288        let total_needed = tokens_for_start + tokens_for_end;
289
290        if total_needed <= remaining_tokens {
291            if boundary_start < start_row {
292                start_row = boundary_start;
293            }
294            if boundary_end > end_row {
295                end_row = boundary_end;
296            }
297            remaining_tokens = remaining_tokens.saturating_sub(total_needed);
298            did_syntax_expand = true;
299        } else {
300            break;
301        }
302    }
303
304    // Phase 2: Only expand line-wise if no syntax expansion occurred.
305    if !did_syntax_expand {
306        (start_row, end_row, _) =
307            expand_linewise_biased(snapshot, start_row, end_row, remaining_tokens, true);
308    }
309
310    let start = Point::new(start_row, 0);
311    let end = Point::new(end_row, snapshot.line_len(end_row));
312    start..end
313}