1use itertools::Itertools as _;
2use serde::Serialize;
3use std::collections::HashMap;
4use std::ops::Range;
5use std::path::Path;
6use std::sync::Arc;
7use strum::EnumIter;
8use tree_sitter::{QueryCursor, StreamingIterator, Tree};
9
10use crate::{Declaration, outline::Identifier};
11
12#[derive(Clone, Debug)]
13pub struct ScoredSnippet {
14 #[allow(dead_code)]
15 pub identifier: Identifier,
16 pub definition_file: Arc<Path>,
17 pub definition: OutlineItem,
18 pub score_components: ScoreInputs,
19 pub scores: Scores,
20}
21
22// TODO: Consider having "Concise" style corresponding to `concise_text`
23#[derive(EnumIter, Clone, Copy, PartialEq, Eq, Hash, Debug)]
24pub enum SnippetStyle {
25 Signature,
26 Definition,
27}
28
29impl ScoredSnippet {
30 /// Returns the score for this snippet with the specified style.
31 pub fn score(&self, style: SnippetStyle) -> f32 {
32 match style {
33 SnippetStyle::Signature => self.scores.signature,
34 SnippetStyle::Definition => self.scores.definition,
35 }
36 }
37
38 /// Returns the byte range for the snippet with the specified style. For `Signature` this is the
39 /// signature_range expanded to line boundaries. For `Definition` this is the item_range expanded to
40 /// line boundaries (similar to slice_at_line_boundaries).
41 pub fn line_range(
42 &self,
43 identifier_index: &IdentifierIndex,
44 style: SnippetStyle,
45 ) -> Range<usize> {
46 let source = identifier_index
47 .path_to_source
48 .get(&self.definition_file)
49 .unwrap();
50
51 let base_range = match style {
52 SnippetStyle::Signature => self.definition.signature_range.clone(),
53 SnippetStyle::Definition => self.definition.item_range.clone(),
54 };
55
56 expand_range_to_line_boundaries(source, base_range)
57 }
58
59 pub fn score_density(&self, identifier_index: &IdentifierIndex, style: SnippetStyle) -> f32 {
60 self.score(style) / range_size(self.line_range(identifier_index, style)) as f32
61 }
62}
63
64fn scored_snippets(
65 language: &Language,
66 index: &IdentifierIndex,
67 source: &str,
68 reference_file: &Path,
69 references: Vec<Reference>,
70 cursor_offset: usize,
71 excerpt_range: Range<usize>,
72) -> Vec<ScoredSnippet> {
73 let cursor = point_from_offset(source, cursor_offset);
74
75 let containing_range_identifier_occurrences =
76 IdentifierOccurrences::within_string(&source[excerpt_range.clone()]);
77
78 let start_point = Point::new(cursor.row.saturating_sub(2), 0);
79 let end_point = Point::new(cursor.row + 1, 0);
80 let adjacent_identifier_occurrences = IdentifierOccurrences::within_string(
81 &source[offset_from_point(source, start_point)..offset_from_point(source, end_point)],
82 );
83
84 let mut identifier_to_references: HashMap<Identifier, Vec<Reference>> = HashMap::new();
85 for reference in references {
86 identifier_to_references
87 .entry(reference.identifier.clone())
88 .or_insert_with(Vec::new)
89 .push(reference);
90 }
91
92 identifier_to_references
93 .into_iter()
94 .flat_map(|(identifier, references)| {
95 let Some(definitions) = index
96 .identifier_to_definitions
97 .get(&(identifier.clone(), language.name.clone()))
98 else {
99 return Vec::new();
100 };
101 let definition_count = definitions.len();
102 let definition_file_count = definitions.keys().len();
103
104 definitions
105 .iter_all()
106 .flat_map(|(definition_file, file_definitions)| {
107 let same_file_definition_count = file_definitions.len();
108 let is_same_file = reference_file == definition_file.as_ref();
109 file_definitions
110 .iter()
111 .filter(|definition| {
112 !is_same_file
113 || !range_intersection(&definition.item_range, &excerpt_range)
114 .is_some()
115 })
116 .filter_map(|definition| {
117 let definition_line_distance = if is_same_file {
118 let definition_line =
119 point_from_offset(source, definition.item_range.start).row;
120 (cursor.row as i32 - definition_line as i32).abs() as u32
121 } else {
122 0
123 };
124 Some((definition_line_distance, definition))
125 })
126 .sorted_by_key(|&(distance, _)| distance)
127 .enumerate()
128 .map(
129 |(
130 definition_line_distance_rank,
131 (definition_line_distance, definition),
132 )| {
133 score_snippet(
134 index,
135 source,
136 &identifier,
137 &references,
138 definition_file.clone(),
139 definition.clone(),
140 is_same_file,
141 definition_line_distance,
142 definition_line_distance_rank,
143 same_file_definition_count,
144 definition_count,
145 definition_file_count,
146 &containing_range_identifier_occurrences,
147 &adjacent_identifier_occurrences,
148 cursor,
149 )
150 },
151 )
152 .collect::<Vec<_>>()
153 })
154 .collect::<Vec<_>>()
155 })
156 .flatten()
157 .collect::<Vec<_>>()
158}
159
160fn score_snippet(
161 index: &IdentifierIndex,
162 reference_source: &str,
163 identifier: &Identifier,
164 references: &Vec<Reference>,
165 definition_file: Arc<Path>,
166 definition: OutlineItem,
167 is_same_file: bool,
168 definition_line_distance: u32,
169 definition_line_distance_rank: usize,
170 same_file_definition_count: usize,
171 definition_count: usize,
172 definition_file_count: usize,
173 containing_range_identifier_occurrences: &IdentifierOccurrences,
174 adjacent_identifier_occurrences: &IdentifierOccurrences,
175 cursor: Point,
176) -> Option<ScoredSnippet> {
177 let is_referenced_nearby = references
178 .iter()
179 .any(|r| r.reference_region == ReferenceRegion::Nearby);
180 let is_referenced_in_breadcrumb = references
181 .iter()
182 .any(|r| r.reference_region == ReferenceRegion::Breadcrumb);
183 let reference_count = references.len();
184 let reference_line_distance = references
185 .iter()
186 .map(|r| {
187 let reference_line = point_from_offset(reference_source, r.range.start).row as i32;
188 (cursor.row as i32 - reference_line).abs() as u32
189 })
190 .min()
191 .unwrap();
192
193 let definition_source = index.path_to_source.get(&definition_file).unwrap();
194 let item_source_occurrences =
195 IdentifierOccurrences::within_string(definition.item(&definition_source));
196 let item_signature_occurrences =
197 IdentifierOccurrences::within_string(definition.signature(&definition_source));
198 let containing_range_vs_item_jaccard = jaccard_similarity(
199 containing_range_identifier_occurrences,
200 &item_source_occurrences,
201 );
202 let containing_range_vs_signature_jaccard = jaccard_similarity(
203 containing_range_identifier_occurrences,
204 &item_signature_occurrences,
205 );
206 let adjacent_vs_item_jaccard =
207 jaccard_similarity(adjacent_identifier_occurrences, &item_source_occurrences);
208 let adjacent_vs_signature_jaccard =
209 jaccard_similarity(adjacent_identifier_occurrences, &item_signature_occurrences);
210
211 let containing_range_vs_item_weighted_overlap = weighted_overlap_coefficient(
212 containing_range_identifier_occurrences,
213 &item_source_occurrences,
214 );
215 let containing_range_vs_signature_weighted_overlap = weighted_overlap_coefficient(
216 containing_range_identifier_occurrences,
217 &item_signature_occurrences,
218 );
219 let adjacent_vs_item_weighted_overlap =
220 weighted_overlap_coefficient(adjacent_identifier_occurrences, &item_source_occurrences);
221 let adjacent_vs_signature_weighted_overlap =
222 weighted_overlap_coefficient(adjacent_identifier_occurrences, &item_signature_occurrences);
223
224 let score_components = ScoreInputs {
225 is_same_file,
226 is_referenced_nearby,
227 is_referenced_in_breadcrumb,
228 reference_line_distance,
229 definition_line_distance,
230 definition_line_distance_rank,
231 reference_count,
232 same_file_definition_count,
233 definition_count,
234 definition_file_count,
235 containing_range_vs_item_jaccard,
236 containing_range_vs_signature_jaccard,
237 adjacent_vs_item_jaccard,
238 adjacent_vs_signature_jaccard,
239 containing_range_vs_item_weighted_overlap,
240 containing_range_vs_signature_weighted_overlap,
241 adjacent_vs_item_weighted_overlap,
242 adjacent_vs_signature_weighted_overlap,
243 };
244
245 Some(ScoredSnippet {
246 identifier: identifier.clone(),
247 definition_file,
248 definition,
249 scores: score_components.score(),
250 score_components,
251 })
252}
253
254#[derive(Clone, Debug, Serialize)]
255pub struct ScoreInputs {
256 pub is_same_file: bool,
257 pub is_referenced_nearby: bool,
258 pub is_referenced_in_breadcrumb: bool,
259 pub reference_count: usize,
260 pub same_file_definition_count: usize,
261 pub definition_count: usize,
262 pub definition_file_count: usize,
263 pub reference_line_distance: u32,
264 pub definition_line_distance: u32,
265 pub definition_line_distance_rank: usize,
266 pub containing_range_vs_item_jaccard: f32,
267 pub containing_range_vs_signature_jaccard: f32,
268 pub adjacent_vs_item_jaccard: f32,
269 pub adjacent_vs_signature_jaccard: f32,
270 pub containing_range_vs_item_weighted_overlap: f32,
271 pub containing_range_vs_signature_weighted_overlap: f32,
272 pub adjacent_vs_item_weighted_overlap: f32,
273 pub adjacent_vs_signature_weighted_overlap: f32,
274}
275
276#[derive(Clone, Debug, Serialize)]
277pub struct Scores {
278 pub signature: f32,
279 pub definition: f32,
280}
281
282impl ScoreInputs {
283 fn score(&self) -> Scores {
284 // Score related to how likely this is the correct definition, range 0 to 1
285 let accuracy_score = if self.is_same_file {
286 // TODO: use definition_line_distance_rank
287 (0.5 / self.same_file_definition_count as f32)
288 + (0.5 / self.definition_file_count as f32)
289 } else {
290 1.0 / self.definition_count as f32
291 };
292
293 // Score related to the distance between the reference and cursor, range 0 to 1
294 let distance_score = if self.is_referenced_nearby {
295 1.0 / (1.0 + self.reference_line_distance as f32 / 10.0).powf(2.0)
296 } else {
297 // same score as ~14 lines away, rationale is to not overly penalize references from parent signatures
298 0.5
299 };
300
301 // For now instead of linear combination, the scores are just multiplied together.
302 let combined_score = 10.0 * accuracy_score * distance_score;
303
304 Scores {
305 signature: combined_score * self.containing_range_vs_signature_weighted_overlap,
306 // definition score gets boosted both by being multipled by 2 and by there being more
307 // weighted overlap.
308 definition: 2.0 * combined_score * self.containing_range_vs_item_weighted_overlap,
309 }
310 }
311}