scored_declaration.rs

  1use itertools::Itertools as _;
  2use serde::Serialize;
  3use std::collections::HashMap;
  4use std::path::Path;
  5use std::sync::Arc;
  6use strum::EnumIter;
  7use tree_sitter::StreamingIterator;
  8
  9use crate::{
 10    Declaration, EditPredictionExcerpt, EditPredictionExcerptText, outline::Identifier,
 11    reference::Reference, text_similarity::IdentifierOccurrences,
 12};
 13
 14#[derive(Clone, Debug)]
 15pub struct ScoredSnippet {
 16    #[allow(dead_code)]
 17    pub identifier: Identifier,
 18    pub declaration: Declaration,
 19    pub score_components: ScoreInputs,
 20    pub scores: Scores,
 21}
 22
 23// TODO: Consider having "Concise" style corresponding to `concise_text`
 24#[derive(EnumIter, Clone, Copy, PartialEq, Eq, Hash, Debug)]
 25pub enum SnippetStyle {
 26    Signature,
 27    Definition,
 28}
 29
 30impl ScoredSnippet {
 31    /// Returns the score for this snippet with the specified style.
 32    pub fn score(&self, style: SnippetStyle) -> f32 {
 33        match style {
 34            SnippetStyle::Signature => self.scores.signature,
 35            SnippetStyle::Definition => self.scores.definition,
 36        }
 37    }
 38
 39    pub fn size(&self, style: SnippetStyle) -> usize {
 40        todo!()
 41    }
 42
 43    pub fn score_density(&self, style: SnippetStyle) -> f32 {
 44        self.score(style) / (self.size(style)) as f32
 45    }
 46}
 47
 48fn scored_snippets(
 49    excerpt: &EditPredictionExcerpt,
 50    excerpt_text: &EditPredictionExcerptText,
 51    references: Vec<Reference>,
 52    cursor_offset: usize,
 53) -> Vec<ScoredSnippet> {
 54    let excerpt_occurrences = IdentifierOccurrences::within_string(&excerpt_text.body);
 55
 56    /* todo!
 57    if let Some(cursor_within_excerpt) = cursor_offset.checked_sub(excerpt.range.start) {
 58    } else {
 59    };
 60    let start_point = Point::new(cursor.row.saturating_sub(2), 0);
 61    let end_point = Point::new(cursor.row + 1, 0);
 62    let adjacent_identifier_occurrences = IdentifierOccurrences::within_string(
 63        &source[offset_from_point(source, start_point)..offset_from_point(source, end_point)],
 64    );
 65    */
 66
 67    let mut identifier_to_references: HashMap<Identifier, Vec<Reference>> = HashMap::new();
 68    for reference in references {
 69        identifier_to_references
 70            .entry(reference.identifier.clone())
 71            .or_insert_with(Vec::new)
 72            .push(reference);
 73    }
 74
 75    identifier_to_references
 76        .into_iter()
 77        .flat_map(|(identifier, references)| {
 78            let Some(definitions) = index
 79                .identifier_to_definitions
 80                .get(&(identifier.clone(), language.name.clone()))
 81            else {
 82                return Vec::new();
 83            };
 84            let definition_count = definitions.len();
 85            let definition_file_count = definitions.keys().len();
 86
 87            definitions
 88                .iter_all()
 89                .flat_map(|(definition_file, file_definitions)| {
 90                    let same_file_definition_count = file_definitions.len();
 91                    let is_same_file = reference_file == definition_file.as_ref();
 92                    file_definitions
 93                        .iter()
 94                        .filter(|definition| {
 95                            !is_same_file
 96                                || !range_intersection(&definition.item_range, &excerpt_range)
 97                                    .is_some()
 98                        })
 99                        .filter_map(|definition| {
100                            let definition_line_distance = if is_same_file {
101                                let definition_line =
102                                    point_from_offset(source, definition.item_range.start).row;
103                                (cursor.row as i32 - definition_line as i32).abs() as u32
104                            } else {
105                                0
106                            };
107                            Some((definition_line_distance, definition))
108                        })
109                        .sorted_by_key(|&(distance, _)| distance)
110                        .enumerate()
111                        .map(
112                            |(
113                                definition_line_distance_rank,
114                                (definition_line_distance, definition),
115                            )| {
116                                score_snippet(
117                                    &identifier,
118                                    &references,
119                                    definition_file.clone(),
120                                    definition.clone(),
121                                    is_same_file,
122                                    definition_line_distance,
123                                    definition_line_distance_rank,
124                                    same_file_definition_count,
125                                    definition_count,
126                                    definition_file_count,
127                                    &containing_range_identifier_occurrences,
128                                    &adjacent_identifier_occurrences,
129                                    cursor,
130                                )
131                            },
132                        )
133                        .collect::<Vec<_>>()
134                })
135                .collect::<Vec<_>>()
136        })
137        .flatten()
138        .collect::<Vec<_>>()
139}
140
141fn score_snippet(
142    identifier: &Identifier,
143    references: &[Reference],
144    definition_file: Arc<Path>,
145    definition: OutlineItem,
146    is_same_file: bool,
147    definition_line_distance: u32,
148    definition_line_distance_rank: usize,
149    same_file_definition_count: usize,
150    definition_count: usize,
151    definition_file_count: usize,
152    containing_range_identifier_occurrences: &IdentifierOccurrences,
153    adjacent_identifier_occurrences: &IdentifierOccurrences,
154    cursor: Point,
155) -> Option<ScoredSnippet> {
156    let is_referenced_nearby = references
157        .iter()
158        .any(|r| r.reference_region == ReferenceRegion::Nearby);
159    let is_referenced_in_breadcrumb = references
160        .iter()
161        .any(|r| r.reference_region == ReferenceRegion::Breadcrumb);
162    let reference_count = references.len();
163    let reference_line_distance = references
164        .iter()
165        .map(|r| {
166            let reference_line = point_from_offset(reference_source, r.range.start).row as i32;
167            (cursor.row as i32 - reference_line).abs() as u32
168        })
169        .min()
170        .unwrap();
171
172    let definition_source = index.path_to_source.get(&definition_file).unwrap();
173    let item_source_occurrences =
174        IdentifierOccurrences::within_string(definition.item(&definition_source));
175    let item_signature_occurrences =
176        IdentifierOccurrences::within_string(definition.signature(&definition_source));
177    let containing_range_vs_item_jaccard = jaccard_similarity(
178        containing_range_identifier_occurrences,
179        &item_source_occurrences,
180    );
181    let containing_range_vs_signature_jaccard = jaccard_similarity(
182        containing_range_identifier_occurrences,
183        &item_signature_occurrences,
184    );
185    let adjacent_vs_item_jaccard =
186        jaccard_similarity(adjacent_identifier_occurrences, &item_source_occurrences);
187    let adjacent_vs_signature_jaccard =
188        jaccard_similarity(adjacent_identifier_occurrences, &item_signature_occurrences);
189
190    let containing_range_vs_item_weighted_overlap = weighted_overlap_coefficient(
191        containing_range_identifier_occurrences,
192        &item_source_occurrences,
193    );
194    let containing_range_vs_signature_weighted_overlap = weighted_overlap_coefficient(
195        containing_range_identifier_occurrences,
196        &item_signature_occurrences,
197    );
198    let adjacent_vs_item_weighted_overlap =
199        weighted_overlap_coefficient(adjacent_identifier_occurrences, &item_source_occurrences);
200    let adjacent_vs_signature_weighted_overlap =
201        weighted_overlap_coefficient(adjacent_identifier_occurrences, &item_signature_occurrences);
202
203    let score_components = ScoreInputs {
204        is_same_file,
205        is_referenced_nearby,
206        is_referenced_in_breadcrumb,
207        reference_line_distance,
208        definition_line_distance,
209        definition_line_distance_rank,
210        reference_count,
211        same_file_definition_count,
212        definition_count,
213        definition_file_count,
214        containing_range_vs_item_jaccard,
215        containing_range_vs_signature_jaccard,
216        adjacent_vs_item_jaccard,
217        adjacent_vs_signature_jaccard,
218        containing_range_vs_item_weighted_overlap,
219        containing_range_vs_signature_weighted_overlap,
220        adjacent_vs_item_weighted_overlap,
221        adjacent_vs_signature_weighted_overlap,
222    };
223
224    Some(ScoredSnippet {
225        identifier: identifier.clone(),
226        declaration_file: definition_file,
227        declaration: definition,
228        scores: score_components.score(),
229        score_components,
230    })
231}
232
233#[derive(Clone, Debug, Serialize)]
234pub struct ScoreInputs {
235    pub is_same_file: bool,
236    pub is_referenced_nearby: bool,
237    pub is_referenced_in_breadcrumb: bool,
238    pub reference_count: usize,
239    pub same_file_definition_count: usize,
240    pub definition_count: usize,
241    pub definition_file_count: usize,
242    pub reference_line_distance: u32,
243    pub definition_line_distance: u32,
244    pub definition_line_distance_rank: usize,
245    pub containing_range_vs_item_jaccard: f32,
246    pub containing_range_vs_signature_jaccard: f32,
247    pub adjacent_vs_item_jaccard: f32,
248    pub adjacent_vs_signature_jaccard: f32,
249    pub containing_range_vs_item_weighted_overlap: f32,
250    pub containing_range_vs_signature_weighted_overlap: f32,
251    pub adjacent_vs_item_weighted_overlap: f32,
252    pub adjacent_vs_signature_weighted_overlap: f32,
253}
254
255#[derive(Clone, Debug, Serialize)]
256pub struct Scores {
257    pub signature: f32,
258    pub definition: f32,
259}
260
261impl ScoreInputs {
262    fn score(&self) -> Scores {
263        // Score related to how likely this is the correct definition, range 0 to 1
264        let accuracy_score = if self.is_same_file {
265            // TODO: use definition_line_distance_rank
266            (0.5 / self.same_file_definition_count as f32)
267                + (0.5 / self.definition_file_count as f32)
268        } else {
269            1.0 / self.definition_count as f32
270        };
271
272        // Score related to the distance between the reference and cursor, range 0 to 1
273        let distance_score = if self.is_referenced_nearby {
274            1.0 / (1.0 + self.reference_line_distance as f32 / 10.0).powf(2.0)
275        } else {
276            // same score as ~14 lines away, rationale is to not overly penalize references from parent signatures
277            0.5
278        };
279
280        // For now instead of linear combination, the scores are just multiplied together.
281        let combined_score = 10.0 * accuracy_score * distance_score;
282
283        Scores {
284            signature: combined_score * self.containing_range_vs_signature_weighted_overlap,
285            // definition score gets boosted both by being multipled by 2 and by there being more
286            // weighted overlap.
287            definition: 2.0 * combined_score * self.containing_range_vs_item_weighted_overlap,
288        }
289    }
290}