1use language::{BufferSnapshot, Point};
2use std::ops::Range;
3
4pub fn editable_and_context_ranges_for_cursor_position(
5 position: Point,
6 snapshot: &BufferSnapshot,
7 editable_region_token_limit: usize,
8 context_token_limit: usize,
9) -> (Range<Point>, Range<Point>) {
10 let editable_range = compute_editable_range(snapshot, position, editable_region_token_limit);
11
12 let context_range = expand_context_syntactically_then_linewise(
13 snapshot,
14 editable_range.clone(),
15 context_token_limit,
16 );
17
18 (editable_range, context_range)
19}
20
21/// Computes the editable range using a three-phase approach:
22/// 1. Expand symmetrically from cursor (75% of budget)
23/// 2. Expand to syntax boundaries
24/// 3. Continue line-wise in the least-expanded direction
25fn compute_editable_range(
26 snapshot: &BufferSnapshot,
27 cursor: Point,
28 token_limit: usize,
29) -> Range<Point> {
30 // Phase 1: Expand symmetrically from cursor using 75% of budget.
31 let initial_budget = (token_limit * 3) / 4;
32 let (mut start_row, mut end_row, mut remaining_tokens) =
33 expand_symmetric_from_cursor(snapshot, cursor.row, initial_budget);
34
35 // Add remaining budget from phase 1.
36 remaining_tokens += token_limit.saturating_sub(initial_budget);
37
38 let original_start = start_row;
39 let original_end = end_row;
40
41 // Phase 2: Expand to syntax boundaries that fit within budget.
42 for (boundary_start, boundary_end) in containing_syntax_boundaries(snapshot, start_row, end_row)
43 {
44 let tokens_for_start = if boundary_start < start_row {
45 estimate_tokens_for_rows(snapshot, boundary_start, start_row)
46 } else {
47 0
48 };
49 let tokens_for_end = if boundary_end > end_row {
50 estimate_tokens_for_rows(snapshot, end_row + 1, boundary_end + 1)
51 } else {
52 0
53 };
54
55 let total_needed = tokens_for_start + tokens_for_end;
56
57 if total_needed <= remaining_tokens {
58 if boundary_start < start_row {
59 start_row = boundary_start;
60 }
61 if boundary_end > end_row {
62 end_row = boundary_end;
63 }
64 remaining_tokens = remaining_tokens.saturating_sub(total_needed);
65 } else {
66 break;
67 }
68 }
69
70 // Phase 3: Continue line-wise in the direction we expanded least during syntax phase.
71 let expanded_up = original_start.saturating_sub(start_row);
72 let expanded_down = end_row.saturating_sub(original_end);
73
74 (start_row, end_row, _) = expand_linewise_biased(
75 snapshot,
76 start_row,
77 end_row,
78 remaining_tokens,
79 expanded_up <= expanded_down, // prefer_up if we expanded less upward
80 );
81
82 let start = Point::new(start_row, 0);
83 let end = Point::new(end_row, snapshot.line_len(end_row));
84 start..end
85}
86
87/// Expands symmetrically from cursor, one line at a time, alternating down then up.
88/// Returns (start_row, end_row, remaining_tokens).
89fn expand_symmetric_from_cursor(
90 snapshot: &BufferSnapshot,
91 cursor_row: u32,
92 mut token_budget: usize,
93) -> (u32, u32, usize) {
94 let mut start_row = cursor_row;
95 let mut end_row = cursor_row;
96
97 // Account for the cursor's line.
98 let cursor_line_tokens = line_token_count(snapshot, cursor_row);
99 token_budget = token_budget.saturating_sub(cursor_line_tokens);
100
101 loop {
102 let can_expand_up = start_row > 0;
103 let can_expand_down = end_row < snapshot.max_point().row;
104
105 if token_budget == 0 || (!can_expand_up && !can_expand_down) {
106 break;
107 }
108
109 // Expand down first (slight forward bias for edit prediction).
110 if can_expand_down {
111 let next_row = end_row + 1;
112 let line_tokens = line_token_count(snapshot, next_row);
113 if line_tokens <= token_budget {
114 end_row = next_row;
115 token_budget = token_budget.saturating_sub(line_tokens);
116 } else {
117 break;
118 }
119 }
120
121 // Then expand up.
122 if can_expand_up && token_budget > 0 {
123 let next_row = start_row - 1;
124 let line_tokens = line_token_count(snapshot, next_row);
125 if line_tokens <= token_budget {
126 start_row = next_row;
127 token_budget = token_budget.saturating_sub(line_tokens);
128 } else {
129 break;
130 }
131 }
132 }
133
134 (start_row, end_row, token_budget)
135}
136
137/// Expands line-wise with a bias toward one direction.
138/// Returns (start_row, end_row, remaining_tokens).
139fn expand_linewise_biased(
140 snapshot: &BufferSnapshot,
141 mut start_row: u32,
142 mut end_row: u32,
143 mut remaining_tokens: usize,
144 prefer_up: bool,
145) -> (u32, u32, usize) {
146 loop {
147 let can_expand_up = start_row > 0;
148 let can_expand_down = end_row < snapshot.max_point().row;
149
150 if remaining_tokens == 0 || (!can_expand_up && !can_expand_down) {
151 break;
152 }
153
154 let mut expanded = false;
155
156 // Try preferred direction first.
157 if prefer_up {
158 if can_expand_up {
159 let next_row = start_row - 1;
160 let line_tokens = line_token_count(snapshot, next_row);
161 if line_tokens <= remaining_tokens {
162 start_row = next_row;
163 remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
164 expanded = true;
165 }
166 }
167 if can_expand_down && remaining_tokens > 0 {
168 let next_row = end_row + 1;
169 let line_tokens = line_token_count(snapshot, next_row);
170 if line_tokens <= remaining_tokens {
171 end_row = next_row;
172 remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
173 expanded = true;
174 }
175 }
176 } else {
177 if can_expand_down {
178 let next_row = end_row + 1;
179 let line_tokens = line_token_count(snapshot, next_row);
180 if line_tokens <= remaining_tokens {
181 end_row = next_row;
182 remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
183 expanded = true;
184 }
185 }
186 if can_expand_up && remaining_tokens > 0 {
187 let next_row = start_row - 1;
188 let line_tokens = line_token_count(snapshot, next_row);
189 if line_tokens <= remaining_tokens {
190 start_row = next_row;
191 remaining_tokens = remaining_tokens.saturating_sub(line_tokens);
192 expanded = true;
193 }
194 }
195 }
196
197 if !expanded {
198 break;
199 }
200 }
201
202 (start_row, end_row, remaining_tokens)
203}
204
205/// Typical number of string bytes per token for the purposes of limiting model input. This is
206/// intentionally low to err on the side of underestimating limits.
207pub(crate) const BYTES_PER_TOKEN_GUESS: usize = 3;
208
209pub fn guess_token_count(bytes: usize) -> usize {
210 bytes / BYTES_PER_TOKEN_GUESS
211}
212
213fn line_token_count(snapshot: &BufferSnapshot, row: u32) -> usize {
214 guess_token_count(snapshot.line_len(row) as usize).max(1)
215}
216
217/// Estimates token count for rows in range [start_row, end_row).
218fn estimate_tokens_for_rows(snapshot: &BufferSnapshot, start_row: u32, end_row: u32) -> usize {
219 let mut tokens = 0;
220 for row in start_row..end_row {
221 tokens += line_token_count(snapshot, row);
222 }
223 tokens
224}
225
226/// Returns an iterator of (start_row, end_row) for successively larger syntax nodes
227/// containing the given row range. Smallest containing node first.
228fn containing_syntax_boundaries(
229 snapshot: &BufferSnapshot,
230 start_row: u32,
231 end_row: u32,
232) -> impl Iterator<Item = (u32, u32)> {
233 let range = Point::new(start_row, 0)..Point::new(end_row, snapshot.line_len(end_row));
234 let mut current = snapshot.syntax_ancestor(range);
235 let mut last_rows: Option<(u32, u32)> = None;
236
237 std::iter::from_fn(move || {
238 while let Some(node) = current.take() {
239 let node_start_row = node.start_position().row as u32;
240 let node_end_row = node.end_position().row as u32;
241 let rows = (node_start_row, node_end_row);
242
243 current = node.parent();
244
245 // Skip nodes that don't extend beyond our range.
246 if node_start_row >= start_row && node_end_row <= end_row {
247 continue;
248 }
249
250 // Skip if same as last returned (some nodes have same span).
251 if last_rows == Some(rows) {
252 continue;
253 }
254
255 last_rows = Some(rows);
256 return Some(rows);
257 }
258 None
259 })
260}
261
262/// Expands context by first trying to reach syntax boundaries,
263/// then expanding line-wise only if no syntax expansion occurred.
264fn expand_context_syntactically_then_linewise(
265 snapshot: &BufferSnapshot,
266 editable_range: Range<Point>,
267 context_token_limit: usize,
268) -> Range<Point> {
269 let mut start_row = editable_range.start.row;
270 let mut end_row = editable_range.end.row;
271 let mut remaining_tokens = context_token_limit;
272 let mut did_syntax_expand = false;
273
274 // Phase 1: Try to expand to containing syntax boundaries, picking the largest that fits.
275 for (boundary_start, boundary_end) in containing_syntax_boundaries(snapshot, start_row, end_row)
276 {
277 let tokens_for_start = if boundary_start < start_row {
278 estimate_tokens_for_rows(snapshot, boundary_start, start_row)
279 } else {
280 0
281 };
282 let tokens_for_end = if boundary_end > end_row {
283 estimate_tokens_for_rows(snapshot, end_row + 1, boundary_end + 1)
284 } else {
285 0
286 };
287
288 let total_needed = tokens_for_start + tokens_for_end;
289
290 if total_needed <= remaining_tokens {
291 if boundary_start < start_row {
292 start_row = boundary_start;
293 }
294 if boundary_end > end_row {
295 end_row = boundary_end;
296 }
297 remaining_tokens = remaining_tokens.saturating_sub(total_needed);
298 did_syntax_expand = true;
299 } else {
300 break;
301 }
302 }
303
304 // Phase 2: Only expand line-wise if no syntax expansion occurred.
305 if !did_syntax_expand {
306 (start_row, end_row, _) =
307 expand_linewise_biased(snapshot, start_row, end_row, remaining_tokens, true);
308 }
309
310 let start = Point::new(start_row, 0);
311 let end = Point::new(end_row, snapshot.line_len(end_row));
312 start..end
313}