1use itertools::Itertools as _;
2use serde::Serialize;
3use std::collections::HashMap;
4use std::path::Path;
5use std::sync::Arc;
6use strum::EnumIter;
7use tree_sitter::StreamingIterator;
8
9use crate::{
10 Declaration, EditPredictionExcerpt, EditPredictionExcerptText, outline::Identifier,
11 reference::Reference, text_similarity::IdentifierOccurrences,
12};
13
14#[derive(Clone, Debug)]
15pub struct ScoredSnippet {
16 #[allow(dead_code)]
17 pub identifier: Identifier,
18 pub declaration: Declaration,
19 pub score_components: ScoreInputs,
20 pub scores: Scores,
21}
22
23// TODO: Consider having "Concise" style corresponding to `concise_text`
24#[derive(EnumIter, Clone, Copy, PartialEq, Eq, Hash, Debug)]
25pub enum SnippetStyle {
26 Signature,
27 Definition,
28}
29
30impl ScoredSnippet {
31 /// Returns the score for this snippet with the specified style.
32 pub fn score(&self, style: SnippetStyle) -> f32 {
33 match style {
34 SnippetStyle::Signature => self.scores.signature,
35 SnippetStyle::Definition => self.scores.definition,
36 }
37 }
38
39 pub fn size(&self, style: SnippetStyle) -> usize {
40 todo!()
41 }
42
43 pub fn score_density(&self, style: SnippetStyle) -> f32 {
44 self.score(style) / (self.size(style)) as f32
45 }
46}
47
48fn scored_snippets(
49 excerpt: &EditPredictionExcerpt,
50 excerpt_text: &EditPredictionExcerptText,
51 references: Vec<Reference>,
52 cursor_offset: usize,
53) -> Vec<ScoredSnippet> {
54 let excerpt_occurrences = IdentifierOccurrences::within_string(&excerpt_text.body);
55
56 /* todo!
57 if let Some(cursor_within_excerpt) = cursor_offset.checked_sub(excerpt.range.start) {
58 } else {
59 };
60 let start_point = Point::new(cursor.row.saturating_sub(2), 0);
61 let end_point = Point::new(cursor.row + 1, 0);
62 let adjacent_identifier_occurrences = IdentifierOccurrences::within_string(
63 &source[offset_from_point(source, start_point)..offset_from_point(source, end_point)],
64 );
65 */
66
67 let mut identifier_to_references: HashMap<Identifier, Vec<Reference>> = HashMap::new();
68 for reference in references {
69 identifier_to_references
70 .entry(reference.identifier.clone())
71 .or_insert_with(Vec::new)
72 .push(reference);
73 }
74
75 identifier_to_references
76 .into_iter()
77 .flat_map(|(identifier, references)| {
78 let Some(definitions) = index
79 .identifier_to_definitions
80 .get(&(identifier.clone(), language.name.clone()))
81 else {
82 return Vec::new();
83 };
84 let definition_count = definitions.len();
85 let definition_file_count = definitions.keys().len();
86
87 definitions
88 .iter_all()
89 .flat_map(|(definition_file, file_definitions)| {
90 let same_file_definition_count = file_definitions.len();
91 let is_same_file = reference_file == definition_file.as_ref();
92 file_definitions
93 .iter()
94 .filter(|definition| {
95 !is_same_file
96 || !range_intersection(&definition.item_range, &excerpt_range)
97 .is_some()
98 })
99 .filter_map(|definition| {
100 let definition_line_distance = if is_same_file {
101 let definition_line =
102 point_from_offset(source, definition.item_range.start).row;
103 (cursor.row as i32 - definition_line as i32).abs() as u32
104 } else {
105 0
106 };
107 Some((definition_line_distance, definition))
108 })
109 .sorted_by_key(|&(distance, _)| distance)
110 .enumerate()
111 .map(
112 |(
113 definition_line_distance_rank,
114 (definition_line_distance, definition),
115 )| {
116 score_snippet(
117 &identifier,
118 &references,
119 definition_file.clone(),
120 definition.clone(),
121 is_same_file,
122 definition_line_distance,
123 definition_line_distance_rank,
124 same_file_definition_count,
125 definition_count,
126 definition_file_count,
127 &containing_range_identifier_occurrences,
128 &adjacent_identifier_occurrences,
129 cursor,
130 )
131 },
132 )
133 .collect::<Vec<_>>()
134 })
135 .collect::<Vec<_>>()
136 })
137 .flatten()
138 .collect::<Vec<_>>()
139}
140
141fn score_snippet(
142 identifier: &Identifier,
143 references: &[Reference],
144 definition_file: Arc<Path>,
145 definition: OutlineItem,
146 is_same_file: bool,
147 definition_line_distance: u32,
148 definition_line_distance_rank: usize,
149 same_file_definition_count: usize,
150 definition_count: usize,
151 definition_file_count: usize,
152 containing_range_identifier_occurrences: &IdentifierOccurrences,
153 adjacent_identifier_occurrences: &IdentifierOccurrences,
154 cursor: Point,
155) -> Option<ScoredSnippet> {
156 let is_referenced_nearby = references
157 .iter()
158 .any(|r| r.reference_region == ReferenceRegion::Nearby);
159 let is_referenced_in_breadcrumb = references
160 .iter()
161 .any(|r| r.reference_region == ReferenceRegion::Breadcrumb);
162 let reference_count = references.len();
163 let reference_line_distance = references
164 .iter()
165 .map(|r| {
166 let reference_line = point_from_offset(reference_source, r.range.start).row as i32;
167 (cursor.row as i32 - reference_line).abs() as u32
168 })
169 .min()
170 .unwrap();
171
172 let definition_source = index.path_to_source.get(&definition_file).unwrap();
173 let item_source_occurrences =
174 IdentifierOccurrences::within_string(definition.item(&definition_source));
175 let item_signature_occurrences =
176 IdentifierOccurrences::within_string(definition.signature(&definition_source));
177 let containing_range_vs_item_jaccard = jaccard_similarity(
178 containing_range_identifier_occurrences,
179 &item_source_occurrences,
180 );
181 let containing_range_vs_signature_jaccard = jaccard_similarity(
182 containing_range_identifier_occurrences,
183 &item_signature_occurrences,
184 );
185 let adjacent_vs_item_jaccard =
186 jaccard_similarity(adjacent_identifier_occurrences, &item_source_occurrences);
187 let adjacent_vs_signature_jaccard =
188 jaccard_similarity(adjacent_identifier_occurrences, &item_signature_occurrences);
189
190 let containing_range_vs_item_weighted_overlap = weighted_overlap_coefficient(
191 containing_range_identifier_occurrences,
192 &item_source_occurrences,
193 );
194 let containing_range_vs_signature_weighted_overlap = weighted_overlap_coefficient(
195 containing_range_identifier_occurrences,
196 &item_signature_occurrences,
197 );
198 let adjacent_vs_item_weighted_overlap =
199 weighted_overlap_coefficient(adjacent_identifier_occurrences, &item_source_occurrences);
200 let adjacent_vs_signature_weighted_overlap =
201 weighted_overlap_coefficient(adjacent_identifier_occurrences, &item_signature_occurrences);
202
203 let score_components = ScoreInputs {
204 is_same_file,
205 is_referenced_nearby,
206 is_referenced_in_breadcrumb,
207 reference_line_distance,
208 definition_line_distance,
209 definition_line_distance_rank,
210 reference_count,
211 same_file_definition_count,
212 definition_count,
213 definition_file_count,
214 containing_range_vs_item_jaccard,
215 containing_range_vs_signature_jaccard,
216 adjacent_vs_item_jaccard,
217 adjacent_vs_signature_jaccard,
218 containing_range_vs_item_weighted_overlap,
219 containing_range_vs_signature_weighted_overlap,
220 adjacent_vs_item_weighted_overlap,
221 adjacent_vs_signature_weighted_overlap,
222 };
223
224 Some(ScoredSnippet {
225 identifier: identifier.clone(),
226 declaration_file: definition_file,
227 declaration: definition,
228 scores: score_components.score(),
229 score_components,
230 })
231}
232
233#[derive(Clone, Debug, Serialize)]
234pub struct ScoreInputs {
235 pub is_same_file: bool,
236 pub is_referenced_nearby: bool,
237 pub is_referenced_in_breadcrumb: bool,
238 pub reference_count: usize,
239 pub same_file_definition_count: usize,
240 pub definition_count: usize,
241 pub definition_file_count: usize,
242 pub reference_line_distance: u32,
243 pub definition_line_distance: u32,
244 pub definition_line_distance_rank: usize,
245 pub containing_range_vs_item_jaccard: f32,
246 pub containing_range_vs_signature_jaccard: f32,
247 pub adjacent_vs_item_jaccard: f32,
248 pub adjacent_vs_signature_jaccard: f32,
249 pub containing_range_vs_item_weighted_overlap: f32,
250 pub containing_range_vs_signature_weighted_overlap: f32,
251 pub adjacent_vs_item_weighted_overlap: f32,
252 pub adjacent_vs_signature_weighted_overlap: f32,
253}
254
255#[derive(Clone, Debug, Serialize)]
256pub struct Scores {
257 pub signature: f32,
258 pub definition: f32,
259}
260
261impl ScoreInputs {
262 fn score(&self) -> Scores {
263 // Score related to how likely this is the correct definition, range 0 to 1
264 let accuracy_score = if self.is_same_file {
265 // TODO: use definition_line_distance_rank
266 (0.5 / self.same_file_definition_count as f32)
267 + (0.5 / self.definition_file_count as f32)
268 } else {
269 1.0 / self.definition_count as f32
270 };
271
272 // Score related to the distance between the reference and cursor, range 0 to 1
273 let distance_score = if self.is_referenced_nearby {
274 1.0 / (1.0 + self.reference_line_distance as f32 / 10.0).powf(2.0)
275 } else {
276 // same score as ~14 lines away, rationale is to not overly penalize references from parent signatures
277 0.5
278 };
279
280 // For now instead of linear combination, the scores are just multiplied together.
281 let combined_score = 10.0 * accuracy_score * distance_score;
282
283 Scores {
284 signature: combined_score * self.containing_range_vs_signature_weighted_overlap,
285 // definition score gets boosted both by being multipled by 2 and by there being more
286 // weighted overlap.
287 definition: 2.0 * combined_score * self.containing_range_vs_item_weighted_overlap,
288 }
289 }
290}