1use collections::HashSet;
2use gpui::{App, Entity};
3use itertools::Itertools as _;
4use language::BufferSnapshot;
5use project::ProjectEntryId;
6use serde::Serialize;
7use std::{collections::HashMap, ops::Range};
8use strum::EnumIter;
9use text::{OffsetRangeExt, Point, ToPoint};
10
11use crate::{
12 Declaration, EditPredictionExcerpt, EditPredictionExcerptText, TreeSitterIndex,
13 outline::Identifier,
14 reference::{Reference, ReferenceRegion},
15 text_similarity::{IdentifierOccurrences, jaccard_similarity, weighted_overlap_coefficient},
16};
17
18#[derive(Clone, Debug)]
19pub struct ScoredSnippet {
20 #[allow(dead_code)]
21 pub identifier: Identifier,
22 pub declaration: Declaration,
23 pub score_components: ScoreInputs,
24 pub scores: Scores,
25}
26
27// TODO: Consider having "Concise" style corresponding to `concise_text`
28#[derive(EnumIter, Clone, Copy, PartialEq, Eq, Hash, Debug)]
29pub enum SnippetStyle {
30 Signature,
31 Definition,
32}
33
34impl ScoredSnippet {
35 /// Returns the score for this snippet with the specified style.
36 pub fn score(&self, style: SnippetStyle) -> f32 {
37 match style {
38 SnippetStyle::Signature => self.scores.signature,
39 SnippetStyle::Definition => self.scores.definition,
40 }
41 }
42
43 pub fn size(&self, style: SnippetStyle) -> usize {
44 todo!()
45 }
46
47 pub fn score_density(&self, style: SnippetStyle) -> f32 {
48 self.score(style) / (self.size(style)) as f32
49 }
50}
51
52fn scored_snippets(
53 index: Entity<TreeSitterIndex>,
54 excerpt: &EditPredictionExcerpt,
55 excerpt_text: &EditPredictionExcerptText,
56 references: Vec<Reference>,
57 cursor_offset: usize,
58 current_buffer: &BufferSnapshot,
59 cx: &App,
60) -> Vec<ScoredSnippet> {
61 let containing_range_identifier_occurrences =
62 IdentifierOccurrences::within_string(&excerpt_text.body);
63 let cursor_point = cursor_offset.to_point(¤t_buffer);
64
65 // todo! ask michael why we needed this
66 // if let Some(cursor_within_excerpt) = cursor_offset.checked_sub(excerpt.range.start) {
67 // } else {
68 // };
69 let start_point = Point::new(cursor_point.row.saturating_sub(2), 0);
70 let end_point = Point::new(cursor_point.row + 1, 0);
71 let adjacent_identifier_occurrences = IdentifierOccurrences::within_string(
72 ¤t_buffer
73 .text_for_range(start_point..end_point)
74 .collect::<String>(),
75 );
76
77 let mut identifier_to_references: HashMap<Identifier, Vec<Reference>> = HashMap::new();
78 for reference in references {
79 identifier_to_references
80 .entry(reference.identifier.clone())
81 .or_insert_with(Vec::new)
82 .push(reference);
83 }
84
85 identifier_to_references
86 .into_iter()
87 .flat_map(|(identifier, references)| {
88 let definitions = index
89 .read(cx)
90 // todo! pick a limit
91 .declarations_for_identifier::<16>(&identifier, cx);
92 let definition_count = definitions.len();
93 let total_file_count = definitions
94 .iter()
95 .filter_map(|definition| definition.project_entry_id(cx))
96 .collect::<HashSet<ProjectEntryId>>()
97 .len();
98
99 definitions
100 .iter()
101 .filter_map(|definition| match definition {
102 Declaration::Buffer {
103 declaration,
104 buffer,
105 } => {
106 let is_same_file = buffer
107 .read_with(cx, |buffer, _| buffer.remote_id())
108 .is_ok_and(|buffer_id| buffer_id == current_buffer.remote_id());
109
110 if is_same_file {
111 range_intersection(
112 &declaration.item_range.to_offset(¤t_buffer),
113 &excerpt.range,
114 )
115 .is_none()
116 .then(|| {
117 let definition_line =
118 declaration.item_range.start.to_point(current_buffer).row;
119 (
120 true,
121 (cursor_point.row as i32 - definition_line as i32).abs() as u32,
122 definition,
123 )
124 })
125 } else {
126 Some((false, 0, definition))
127 }
128 }
129 Declaration::File { .. } => {
130 // We can assume that a file declaration is in a different file,
131 // because the current onemust be open
132 Some((false, 0, definition))
133 }
134 })
135 .sorted_by_key(|&(_, distance, _)| distance)
136 .enumerate()
137 .map(
138 |(
139 definition_line_distance_rank,
140 (is_same_file, definition_line_distance, definition),
141 )| {
142 let same_file_definition_count =
143 index.read(cx).file_declaration_count(definition);
144
145 score_snippet(
146 &identifier,
147 &references,
148 definition.clone(),
149 is_same_file,
150 definition_line_distance,
151 definition_line_distance_rank,
152 same_file_definition_count,
153 definition_count,
154 total_file_count,
155 &containing_range_identifier_occurrences,
156 &adjacent_identifier_occurrences,
157 cursor_point,
158 current_buffer,
159 cx,
160 )
161 },
162 )
163 .collect::<Vec<_>>()
164 })
165 .flatten()
166 .collect::<Vec<_>>()
167}
168
169// todo! replace with existing util?
170fn range_intersection<T: Ord + Clone>(a: &Range<T>, b: &Range<T>) -> Option<Range<T>> {
171 let start = a.start.clone().max(b.start.clone());
172 let end = a.end.clone().min(b.end.clone());
173 if start < end {
174 Some(Range { start, end })
175 } else {
176 None
177 }
178}
179
180fn score_snippet(
181 identifier: &Identifier,
182 references: &[Reference],
183 definition: Declaration,
184 is_same_file: bool,
185 definition_line_distance: u32,
186 definition_line_distance_rank: usize,
187 same_file_definition_count: usize,
188 definition_count: usize,
189 definition_file_count: usize,
190 containing_range_identifier_occurrences: &IdentifierOccurrences,
191 adjacent_identifier_occurrences: &IdentifierOccurrences,
192 cursor: Point,
193 current_buffer: &BufferSnapshot,
194 cx: &App,
195) -> Option<ScoredSnippet> {
196 let is_referenced_nearby = references
197 .iter()
198 .any(|r| r.region == ReferenceRegion::Nearby);
199 let is_referenced_in_breadcrumb = references
200 .iter()
201 .any(|r| r.region == ReferenceRegion::Breadcrumb);
202 let reference_count = references.len();
203 let reference_line_distance = references
204 .iter()
205 .map(|r| {
206 let reference_line = r.range.start.to_point(current_buffer).row as i32;
207 (cursor.row as i32 - reference_line).abs() as u32
208 })
209 .min()
210 .unwrap();
211
212 let item_source_occurrences = IdentifierOccurrences::within_string(&definition.item_text(cx));
213 let item_signature_occurrences =
214 IdentifierOccurrences::within_string(&definition.signature_text(cx));
215 let containing_range_vs_item_jaccard = jaccard_similarity(
216 containing_range_identifier_occurrences,
217 &item_source_occurrences,
218 );
219 let containing_range_vs_signature_jaccard = jaccard_similarity(
220 containing_range_identifier_occurrences,
221 &item_signature_occurrences,
222 );
223 let adjacent_vs_item_jaccard =
224 jaccard_similarity(adjacent_identifier_occurrences, &item_source_occurrences);
225 let adjacent_vs_signature_jaccard =
226 jaccard_similarity(adjacent_identifier_occurrences, &item_signature_occurrences);
227
228 let containing_range_vs_item_weighted_overlap = weighted_overlap_coefficient(
229 containing_range_identifier_occurrences,
230 &item_source_occurrences,
231 );
232 let containing_range_vs_signature_weighted_overlap = weighted_overlap_coefficient(
233 containing_range_identifier_occurrences,
234 &item_signature_occurrences,
235 );
236 let adjacent_vs_item_weighted_overlap =
237 weighted_overlap_coefficient(adjacent_identifier_occurrences, &item_source_occurrences);
238 let adjacent_vs_signature_weighted_overlap =
239 weighted_overlap_coefficient(adjacent_identifier_occurrences, &item_signature_occurrences);
240
241 let score_components = ScoreInputs {
242 is_same_file,
243 is_referenced_nearby,
244 is_referenced_in_breadcrumb,
245 reference_line_distance,
246 definition_line_distance,
247 definition_line_distance_rank,
248 reference_count,
249 same_file_definition_count,
250 definition_count,
251 definition_file_count,
252 containing_range_vs_item_jaccard,
253 containing_range_vs_signature_jaccard,
254 adjacent_vs_item_jaccard,
255 adjacent_vs_signature_jaccard,
256 containing_range_vs_item_weighted_overlap,
257 containing_range_vs_signature_weighted_overlap,
258 adjacent_vs_item_weighted_overlap,
259 adjacent_vs_signature_weighted_overlap,
260 };
261
262 Some(ScoredSnippet {
263 identifier: identifier.clone(),
264 declaration: definition,
265 scores: score_components.score(),
266 score_components,
267 })
268}
269
270#[derive(Clone, Debug, Serialize)]
271pub struct ScoreInputs {
272 pub is_same_file: bool,
273 pub is_referenced_nearby: bool,
274 pub is_referenced_in_breadcrumb: bool,
275 pub reference_count: usize,
276 pub same_file_definition_count: usize,
277 pub definition_count: usize,
278 // todo! do we need this?
279 pub definition_file_count: usize,
280 pub reference_line_distance: u32,
281 pub definition_line_distance: u32,
282 pub definition_line_distance_rank: usize,
283 pub containing_range_vs_item_jaccard: f32,
284 pub containing_range_vs_signature_jaccard: f32,
285 pub adjacent_vs_item_jaccard: f32,
286 pub adjacent_vs_signature_jaccard: f32,
287 pub containing_range_vs_item_weighted_overlap: f32,
288 pub containing_range_vs_signature_weighted_overlap: f32,
289 pub adjacent_vs_item_weighted_overlap: f32,
290 pub adjacent_vs_signature_weighted_overlap: f32,
291}
292
293#[derive(Clone, Debug, Serialize)]
294pub struct Scores {
295 pub signature: f32,
296 pub definition: f32,
297}
298
299impl ScoreInputs {
300 fn score(&self) -> Scores {
301 // Score related to how likely this is the correct definition, range 0 to 1
302 let accuracy_score = if self.is_same_file {
303 // TODO: use definition_line_distance_rank
304 (0.5 / self.same_file_definition_count as f32)
305 + (0.5 / self.definition_file_count as f32)
306 } else {
307 1.0 / self.definition_count as f32
308 };
309
310 // Score related to the distance between the reference and cursor, range 0 to 1
311 let distance_score = if self.is_referenced_nearby {
312 1.0 / (1.0 + self.reference_line_distance as f32 / 10.0).powf(2.0)
313 } else {
314 // same score as ~14 lines away, rationale is to not overly penalize references from parent signatures
315 0.5
316 };
317
318 // For now instead of linear combination, the scores are just multiplied together.
319 let combined_score = 10.0 * accuracy_score * distance_score;
320
321 Scores {
322 signature: combined_score * self.containing_range_vs_signature_weighted_overlap,
323 // definition score gets boosted both by being multipled by 2 and by there being more
324 // weighted overlap.
325 definition: 2.0 * combined_score * self.containing_range_vs_item_weighted_overlap,
326 }
327 }
328}