declaration_scoring.rs

  1use itertools::Itertools as _;
  2use language::BufferSnapshot;
  3use serde::Serialize;
  4use std::{collections::HashMap, ops::Range};
  5use strum::EnumIter;
  6use text::{OffsetRangeExt, Point, ToPoint};
  7
  8use crate::{
  9    Declaration, EditPredictionExcerpt, EditPredictionExcerptText, Identifier,
 10    reference::{Reference, ReferenceRegion},
 11    syntax_index::SyntaxIndexState,
 12    text_similarity::{IdentifierOccurrences, jaccard_similarity, weighted_overlap_coefficient},
 13};
 14
 15// TODO:
 16//
 17// * Consider adding declaration_file_count (n)
 18
 19#[derive(Clone, Debug)]
 20pub struct ScoredSnippet {
 21    #[allow(dead_code)]
 22    pub identifier: Identifier,
 23    pub declaration: Declaration,
 24    pub score_components: ScoreInputs,
 25    pub scores: Scores,
 26}
 27
 28// TODO: Consider having "Concise" style corresponding to `concise_text`
 29#[derive(EnumIter, Clone, Copy, PartialEq, Eq, Hash, Debug)]
 30pub enum SnippetStyle {
 31    Signature,
 32    Declaration,
 33}
 34
 35impl ScoredSnippet {
 36    /// Returns the score for this snippet with the specified style.
 37    pub fn score(&self, style: SnippetStyle) -> f32 {
 38        match style {
 39            SnippetStyle::Signature => self.scores.signature,
 40            SnippetStyle::Declaration => self.scores.declaration,
 41        }
 42    }
 43
 44    pub fn size(&self, style: SnippetStyle) -> usize {
 45        todo!()
 46    }
 47
 48    pub fn score_density(&self, style: SnippetStyle) -> f32 {
 49        self.score(style) / (self.size(style)) as f32
 50    }
 51}
 52
 53pub fn scored_snippets(
 54    index: &SyntaxIndexState,
 55    excerpt: &EditPredictionExcerpt,
 56    excerpt_text: &EditPredictionExcerptText,
 57    identifier_to_references: HashMap<Identifier, Vec<Reference>>,
 58    cursor_offset: usize,
 59    current_buffer: &BufferSnapshot,
 60) -> Vec<ScoredSnippet> {
 61    let containing_range_identifier_occurrences =
 62        IdentifierOccurrences::within_string(&excerpt_text.body);
 63    let cursor_point = cursor_offset.to_point(&current_buffer);
 64
 65    let start_point = Point::new(cursor_point.row.saturating_sub(2), 0);
 66    let end_point = Point::new(cursor_point.row + 1, 0);
 67    let adjacent_identifier_occurrences = IdentifierOccurrences::within_string(
 68        &current_buffer
 69            .text_for_range(start_point..end_point)
 70            .collect::<String>(),
 71    );
 72
 73    identifier_to_references
 74        .into_iter()
 75        .flat_map(|(identifier, references)| {
 76            // todo! pick a limit
 77            let declarations = index.declarations_for_identifier::<16>(&identifier);
 78            let declaration_count = declarations.len();
 79
 80            declarations
 81                .iter()
 82                .filter_map(|declaration| match declaration {
 83                    Declaration::Buffer {
 84                        buffer_id,
 85                        declaration: buffer_declaration,
 86                        ..
 87                    } => {
 88                        let is_same_file = buffer_id == &current_buffer.remote_id();
 89
 90                        if is_same_file {
 91                            range_intersection(
 92                                &buffer_declaration.item_range.to_offset(&current_buffer),
 93                                &excerpt.range,
 94                            )
 95                            .is_none()
 96                            .then(|| {
 97                                let declaration_line = buffer_declaration
 98                                    .item_range
 99                                    .start
100                                    .to_point(current_buffer)
101                                    .row;
102                                (
103                                    true,
104                                    (cursor_point.row as i32 - declaration_line as i32).abs()
105                                        as u32,
106                                    declaration,
107                                )
108                            })
109                        } else {
110                            Some((false, 0, declaration))
111                        }
112                    }
113                    Declaration::File { .. } => {
114                        // We can assume that a file declaration is in a different file,
115                        // because the current one must be open
116                        Some((false, 0, declaration))
117                    }
118                })
119                .sorted_by_key(|&(_, distance, _)| distance)
120                .enumerate()
121                .map(
122                    |(
123                        declaration_line_distance_rank,
124                        (is_same_file, declaration_line_distance, declaration),
125                    )| {
126                        let same_file_declaration_count = index.file_declaration_count(declaration);
127
128                        score_snippet(
129                            &identifier,
130                            &references,
131                            declaration.clone(),
132                            is_same_file,
133                            declaration_line_distance,
134                            declaration_line_distance_rank,
135                            same_file_declaration_count,
136                            declaration_count,
137                            &containing_range_identifier_occurrences,
138                            &adjacent_identifier_occurrences,
139                            cursor_point,
140                            current_buffer,
141                        )
142                    },
143                )
144                .collect::<Vec<_>>()
145        })
146        .flatten()
147        .collect::<Vec<_>>()
148}
149
150// todo! replace with existing util?
151fn range_intersection<T: Ord + Clone>(a: &Range<T>, b: &Range<T>) -> Option<Range<T>> {
152    let start = a.start.clone().max(b.start.clone());
153    let end = a.end.clone().min(b.end.clone());
154    if start < end {
155        Some(Range { start, end })
156    } else {
157        None
158    }
159}
160
161fn score_snippet(
162    identifier: &Identifier,
163    references: &[Reference],
164    declaration: Declaration,
165    is_same_file: bool,
166    declaration_line_distance: u32,
167    declaration_line_distance_rank: usize,
168    same_file_declaration_count: usize,
169    declaration_count: usize,
170    containing_range_identifier_occurrences: &IdentifierOccurrences,
171    adjacent_identifier_occurrences: &IdentifierOccurrences,
172    cursor: Point,
173    current_buffer: &BufferSnapshot,
174) -> Option<ScoredSnippet> {
175    let is_referenced_nearby = references
176        .iter()
177        .any(|r| r.region == ReferenceRegion::Nearby);
178    let is_referenced_in_breadcrumb = references
179        .iter()
180        .any(|r| r.region == ReferenceRegion::Breadcrumb);
181    let reference_count = references.len();
182    let reference_line_distance = references
183        .iter()
184        .map(|r| {
185            let reference_line = r.range.start.to_point(current_buffer).row as i32;
186            (cursor.row as i32 - reference_line).abs() as u32
187        })
188        .min()
189        .unwrap();
190
191    let item_source_occurrences = IdentifierOccurrences::within_string(&declaration.item_text().0);
192    let item_signature_occurrences =
193        IdentifierOccurrences::within_string(&declaration.signature_text().0);
194    let containing_range_vs_item_jaccard = jaccard_similarity(
195        containing_range_identifier_occurrences,
196        &item_source_occurrences,
197    );
198    let containing_range_vs_signature_jaccard = jaccard_similarity(
199        containing_range_identifier_occurrences,
200        &item_signature_occurrences,
201    );
202    let adjacent_vs_item_jaccard =
203        jaccard_similarity(adjacent_identifier_occurrences, &item_source_occurrences);
204    let adjacent_vs_signature_jaccard =
205        jaccard_similarity(adjacent_identifier_occurrences, &item_signature_occurrences);
206
207    let containing_range_vs_item_weighted_overlap = weighted_overlap_coefficient(
208        containing_range_identifier_occurrences,
209        &item_source_occurrences,
210    );
211    let containing_range_vs_signature_weighted_overlap = weighted_overlap_coefficient(
212        containing_range_identifier_occurrences,
213        &item_signature_occurrences,
214    );
215    let adjacent_vs_item_weighted_overlap =
216        weighted_overlap_coefficient(adjacent_identifier_occurrences, &item_source_occurrences);
217    let adjacent_vs_signature_weighted_overlap =
218        weighted_overlap_coefficient(adjacent_identifier_occurrences, &item_signature_occurrences);
219
220    let score_components = ScoreInputs {
221        is_same_file,
222        is_referenced_nearby,
223        is_referenced_in_breadcrumb,
224        reference_line_distance,
225        declaration_line_distance,
226        declaration_line_distance_rank,
227        reference_count,
228        same_file_declaration_count,
229        declaration_count,
230        containing_range_vs_item_jaccard,
231        containing_range_vs_signature_jaccard,
232        adjacent_vs_item_jaccard,
233        adjacent_vs_signature_jaccard,
234        containing_range_vs_item_weighted_overlap,
235        containing_range_vs_signature_weighted_overlap,
236        adjacent_vs_item_weighted_overlap,
237        adjacent_vs_signature_weighted_overlap,
238    };
239
240    Some(ScoredSnippet {
241        identifier: identifier.clone(),
242        declaration: declaration,
243        scores: score_components.score(),
244        score_components,
245    })
246}
247
248#[derive(Clone, Debug, Serialize)]
249pub struct ScoreInputs {
250    pub is_same_file: bool,
251    pub is_referenced_nearby: bool,
252    pub is_referenced_in_breadcrumb: bool,
253    pub reference_count: usize,
254    pub same_file_declaration_count: usize,
255    pub declaration_count: usize,
256    pub reference_line_distance: u32,
257    pub declaration_line_distance: u32,
258    pub declaration_line_distance_rank: usize,
259    pub containing_range_vs_item_jaccard: f32,
260    pub containing_range_vs_signature_jaccard: f32,
261    pub adjacent_vs_item_jaccard: f32,
262    pub adjacent_vs_signature_jaccard: f32,
263    pub containing_range_vs_item_weighted_overlap: f32,
264    pub containing_range_vs_signature_weighted_overlap: f32,
265    pub adjacent_vs_item_weighted_overlap: f32,
266    pub adjacent_vs_signature_weighted_overlap: f32,
267}
268
269#[derive(Clone, Debug, Serialize)]
270pub struct Scores {
271    pub signature: f32,
272    pub declaration: f32,
273}
274
275impl ScoreInputs {
276    fn score(&self) -> Scores {
277        // Score related to how likely this is the correct declaration, range 0 to 1
278        let accuracy_score = if self.is_same_file {
279            // TODO: use declaration_line_distance_rank
280            1.0 / self.same_file_declaration_count as f32
281        } else {
282            1.0 / self.declaration_count as f32
283        };
284
285        // Score related to the distance between the reference and cursor, range 0 to 1
286        let distance_score = if self.is_referenced_nearby {
287            1.0 / (1.0 + self.reference_line_distance as f32 / 10.0).powf(2.0)
288        } else {
289            // same score as ~14 lines away, rationale is to not overly penalize references from parent signatures
290            0.5
291        };
292
293        // For now instead of linear combination, the scores are just multiplied together.
294        let combined_score = 10.0 * accuracy_score * distance_score;
295
296        Scores {
297            signature: combined_score * self.containing_range_vs_signature_weighted_overlap,
298            // declaration score gets boosted both by being multipled by 2 and by there being more
299            // weighted overlap.
300            declaration: 2.0 * combined_score * self.containing_range_vs_item_weighted_overlap,
301        }
302    }
303}