1use itertools::Itertools as _;
2use language::BufferSnapshot;
3use serde::Serialize;
4use std::{collections::HashMap, ops::Range};
5use strum::EnumIter;
6use text::{OffsetRangeExt, Point, ToPoint};
7
8use crate::{
9 Declaration, EditPredictionExcerpt, EditPredictionExcerptText, Identifier,
10 reference::{Reference, ReferenceRegion},
11 syntax_index::SyntaxIndexState,
12 text_similarity::{IdentifierOccurrences, jaccard_similarity, weighted_overlap_coefficient},
13};
14
15// TODO:
16//
17// * Consider adding declaration_file_count (n)
18
19#[derive(Clone, Debug)]
20pub struct ScoredSnippet {
21 #[allow(dead_code)]
22 pub identifier: Identifier,
23 pub declaration: Declaration,
24 pub score_components: ScoreInputs,
25 pub scores: Scores,
26}
27
28// TODO: Consider having "Concise" style corresponding to `concise_text`
29#[derive(EnumIter, Clone, Copy, PartialEq, Eq, Hash, Debug)]
30pub enum SnippetStyle {
31 Signature,
32 Declaration,
33}
34
35impl ScoredSnippet {
36 /// Returns the score for this snippet with the specified style.
37 pub fn score(&self, style: SnippetStyle) -> f32 {
38 match style {
39 SnippetStyle::Signature => self.scores.signature,
40 SnippetStyle::Declaration => self.scores.declaration,
41 }
42 }
43
44 pub fn size(&self, style: SnippetStyle) -> usize {
45 todo!()
46 }
47
48 pub fn score_density(&self, style: SnippetStyle) -> f32 {
49 self.score(style) / (self.size(style)) as f32
50 }
51}
52
53pub fn scored_snippets(
54 index: &SyntaxIndexState,
55 excerpt: &EditPredictionExcerpt,
56 excerpt_text: &EditPredictionExcerptText,
57 identifier_to_references: HashMap<Identifier, Vec<Reference>>,
58 cursor_offset: usize,
59 current_buffer: &BufferSnapshot,
60) -> Vec<ScoredSnippet> {
61 let containing_range_identifier_occurrences =
62 IdentifierOccurrences::within_string(&excerpt_text.body);
63 let cursor_point = cursor_offset.to_point(¤t_buffer);
64
65 let start_point = Point::new(cursor_point.row.saturating_sub(2), 0);
66 let end_point = Point::new(cursor_point.row + 1, 0);
67 let adjacent_identifier_occurrences = IdentifierOccurrences::within_string(
68 ¤t_buffer
69 .text_for_range(start_point..end_point)
70 .collect::<String>(),
71 );
72
73 identifier_to_references
74 .into_iter()
75 .flat_map(|(identifier, references)| {
76 // todo! pick a limit
77 let declarations = index.declarations_for_identifier::<16>(&identifier);
78 let declaration_count = declarations.len();
79
80 declarations
81 .iter()
82 .filter_map(|declaration| match declaration {
83 Declaration::Buffer {
84 buffer_id,
85 declaration: buffer_declaration,
86 ..
87 } => {
88 let is_same_file = buffer_id == ¤t_buffer.remote_id();
89
90 if is_same_file {
91 range_intersection(
92 &buffer_declaration.item_range.to_offset(¤t_buffer),
93 &excerpt.range,
94 )
95 .is_none()
96 .then(|| {
97 let declaration_line = buffer_declaration
98 .item_range
99 .start
100 .to_point(current_buffer)
101 .row;
102 (
103 true,
104 (cursor_point.row as i32 - declaration_line as i32).abs()
105 as u32,
106 declaration,
107 )
108 })
109 } else {
110 Some((false, 0, declaration))
111 }
112 }
113 Declaration::File { .. } => {
114 // We can assume that a file declaration is in a different file,
115 // because the current one must be open
116 Some((false, 0, declaration))
117 }
118 })
119 .sorted_by_key(|&(_, distance, _)| distance)
120 .enumerate()
121 .map(
122 |(
123 declaration_line_distance_rank,
124 (is_same_file, declaration_line_distance, declaration),
125 )| {
126 let same_file_declaration_count = index.file_declaration_count(declaration);
127
128 score_snippet(
129 &identifier,
130 &references,
131 declaration.clone(),
132 is_same_file,
133 declaration_line_distance,
134 declaration_line_distance_rank,
135 same_file_declaration_count,
136 declaration_count,
137 &containing_range_identifier_occurrences,
138 &adjacent_identifier_occurrences,
139 cursor_point,
140 current_buffer,
141 )
142 },
143 )
144 .collect::<Vec<_>>()
145 })
146 .flatten()
147 .collect::<Vec<_>>()
148}
149
150// todo! replace with existing util?
151fn range_intersection<T: Ord + Clone>(a: &Range<T>, b: &Range<T>) -> Option<Range<T>> {
152 let start = a.start.clone().max(b.start.clone());
153 let end = a.end.clone().min(b.end.clone());
154 if start < end {
155 Some(Range { start, end })
156 } else {
157 None
158 }
159}
160
161fn score_snippet(
162 identifier: &Identifier,
163 references: &[Reference],
164 declaration: Declaration,
165 is_same_file: bool,
166 declaration_line_distance: u32,
167 declaration_line_distance_rank: usize,
168 same_file_declaration_count: usize,
169 declaration_count: usize,
170 containing_range_identifier_occurrences: &IdentifierOccurrences,
171 adjacent_identifier_occurrences: &IdentifierOccurrences,
172 cursor: Point,
173 current_buffer: &BufferSnapshot,
174) -> Option<ScoredSnippet> {
175 let is_referenced_nearby = references
176 .iter()
177 .any(|r| r.region == ReferenceRegion::Nearby);
178 let is_referenced_in_breadcrumb = references
179 .iter()
180 .any(|r| r.region == ReferenceRegion::Breadcrumb);
181 let reference_count = references.len();
182 let reference_line_distance = references
183 .iter()
184 .map(|r| {
185 let reference_line = r.range.start.to_point(current_buffer).row as i32;
186 (cursor.row as i32 - reference_line).abs() as u32
187 })
188 .min()
189 .unwrap();
190
191 let item_source_occurrences = IdentifierOccurrences::within_string(&declaration.item_text().0);
192 let item_signature_occurrences =
193 IdentifierOccurrences::within_string(&declaration.signature_text().0);
194 let containing_range_vs_item_jaccard = jaccard_similarity(
195 containing_range_identifier_occurrences,
196 &item_source_occurrences,
197 );
198 let containing_range_vs_signature_jaccard = jaccard_similarity(
199 containing_range_identifier_occurrences,
200 &item_signature_occurrences,
201 );
202 let adjacent_vs_item_jaccard =
203 jaccard_similarity(adjacent_identifier_occurrences, &item_source_occurrences);
204 let adjacent_vs_signature_jaccard =
205 jaccard_similarity(adjacent_identifier_occurrences, &item_signature_occurrences);
206
207 let containing_range_vs_item_weighted_overlap = weighted_overlap_coefficient(
208 containing_range_identifier_occurrences,
209 &item_source_occurrences,
210 );
211 let containing_range_vs_signature_weighted_overlap = weighted_overlap_coefficient(
212 containing_range_identifier_occurrences,
213 &item_signature_occurrences,
214 );
215 let adjacent_vs_item_weighted_overlap =
216 weighted_overlap_coefficient(adjacent_identifier_occurrences, &item_source_occurrences);
217 let adjacent_vs_signature_weighted_overlap =
218 weighted_overlap_coefficient(adjacent_identifier_occurrences, &item_signature_occurrences);
219
220 let score_components = ScoreInputs {
221 is_same_file,
222 is_referenced_nearby,
223 is_referenced_in_breadcrumb,
224 reference_line_distance,
225 declaration_line_distance,
226 declaration_line_distance_rank,
227 reference_count,
228 same_file_declaration_count,
229 declaration_count,
230 containing_range_vs_item_jaccard,
231 containing_range_vs_signature_jaccard,
232 adjacent_vs_item_jaccard,
233 adjacent_vs_signature_jaccard,
234 containing_range_vs_item_weighted_overlap,
235 containing_range_vs_signature_weighted_overlap,
236 adjacent_vs_item_weighted_overlap,
237 adjacent_vs_signature_weighted_overlap,
238 };
239
240 Some(ScoredSnippet {
241 identifier: identifier.clone(),
242 declaration: declaration,
243 scores: score_components.score(),
244 score_components,
245 })
246}
247
248#[derive(Clone, Debug, Serialize)]
249pub struct ScoreInputs {
250 pub is_same_file: bool,
251 pub is_referenced_nearby: bool,
252 pub is_referenced_in_breadcrumb: bool,
253 pub reference_count: usize,
254 pub same_file_declaration_count: usize,
255 pub declaration_count: usize,
256 pub reference_line_distance: u32,
257 pub declaration_line_distance: u32,
258 pub declaration_line_distance_rank: usize,
259 pub containing_range_vs_item_jaccard: f32,
260 pub containing_range_vs_signature_jaccard: f32,
261 pub adjacent_vs_item_jaccard: f32,
262 pub adjacent_vs_signature_jaccard: f32,
263 pub containing_range_vs_item_weighted_overlap: f32,
264 pub containing_range_vs_signature_weighted_overlap: f32,
265 pub adjacent_vs_item_weighted_overlap: f32,
266 pub adjacent_vs_signature_weighted_overlap: f32,
267}
268
269#[derive(Clone, Debug, Serialize)]
270pub struct Scores {
271 pub signature: f32,
272 pub declaration: f32,
273}
274
275impl ScoreInputs {
276 fn score(&self) -> Scores {
277 // Score related to how likely this is the correct declaration, range 0 to 1
278 let accuracy_score = if self.is_same_file {
279 // TODO: use declaration_line_distance_rank
280 1.0 / self.same_file_declaration_count as f32
281 } else {
282 1.0 / self.declaration_count as f32
283 };
284
285 // Score related to the distance between the reference and cursor, range 0 to 1
286 let distance_score = if self.is_referenced_nearby {
287 1.0 / (1.0 + self.reference_line_distance as f32 / 10.0).powf(2.0)
288 } else {
289 // same score as ~14 lines away, rationale is to not overly penalize references from parent signatures
290 0.5
291 };
292
293 // For now instead of linear combination, the scores are just multiplied together.
294 let combined_score = 10.0 * accuracy_score * distance_score;
295
296 Scores {
297 signature: combined_score * self.containing_range_vs_signature_weighted_overlap,
298 // declaration score gets boosted both by being multipled by 2 and by there being more
299 // weighted overlap.
300 declaration: 2.0 * combined_score * self.containing_range_vs_item_weighted_overlap,
301 }
302 }
303}