scored_declaration.rs

  1use collections::HashSet;
  2use gpui::{App, Entity};
  3use itertools::Itertools as _;
  4use language::BufferSnapshot;
  5use project::ProjectEntryId;
  6use serde::Serialize;
  7use std::{collections::HashMap, ops::Range};
  8use strum::EnumIter;
  9use text::{OffsetRangeExt, Point, ToPoint};
 10
 11use crate::{
 12    Declaration, EditPredictionExcerpt, EditPredictionExcerptText, TreeSitterIndex,
 13    outline::Identifier,
 14    reference::{Reference, ReferenceRegion},
 15    text_similarity::{IdentifierOccurrences, jaccard_similarity, weighted_overlap_coefficient},
 16};
 17
 18#[derive(Clone, Debug)]
 19pub struct ScoredSnippet {
 20    #[allow(dead_code)]
 21    pub identifier: Identifier,
 22    pub declaration: Declaration,
 23    pub score_components: ScoreInputs,
 24    pub scores: Scores,
 25}
 26
 27// TODO: Consider having "Concise" style corresponding to `concise_text`
 28#[derive(EnumIter, Clone, Copy, PartialEq, Eq, Hash, Debug)]
 29pub enum SnippetStyle {
 30    Signature,
 31    Definition,
 32}
 33
 34impl ScoredSnippet {
 35    /// Returns the score for this snippet with the specified style.
 36    pub fn score(&self, style: SnippetStyle) -> f32 {
 37        match style {
 38            SnippetStyle::Signature => self.scores.signature,
 39            SnippetStyle::Definition => self.scores.definition,
 40        }
 41    }
 42
 43    pub fn size(&self, style: SnippetStyle) -> usize {
 44        todo!()
 45    }
 46
 47    pub fn score_density(&self, style: SnippetStyle) -> f32 {
 48        self.score(style) / (self.size(style)) as f32
 49    }
 50}
 51
 52fn scored_snippets(
 53    index: Entity<TreeSitterIndex>,
 54    excerpt: &EditPredictionExcerpt,
 55    excerpt_text: &EditPredictionExcerptText,
 56    references: Vec<Reference>,
 57    cursor_offset: usize,
 58    current_buffer: &BufferSnapshot,
 59    cx: &App,
 60) -> Vec<ScoredSnippet> {
 61    let containing_range_identifier_occurrences =
 62        IdentifierOccurrences::within_string(&excerpt_text.body);
 63    let cursor_point = cursor_offset.to_point(&current_buffer);
 64
 65    // todo! ask michael why we needed this
 66    // if let Some(cursor_within_excerpt) = cursor_offset.checked_sub(excerpt.range.start) {
 67    // } else {
 68    // };
 69    let start_point = Point::new(cursor_point.row.saturating_sub(2), 0);
 70    let end_point = Point::new(cursor_point.row + 1, 0);
 71    let adjacent_identifier_occurrences = IdentifierOccurrences::within_string(
 72        &current_buffer
 73            .text_for_range(start_point..end_point)
 74            .collect::<String>(),
 75    );
 76
 77    let mut identifier_to_references: HashMap<Identifier, Vec<Reference>> = HashMap::new();
 78    for reference in references {
 79        identifier_to_references
 80            .entry(reference.identifier.clone())
 81            .or_insert_with(Vec::new)
 82            .push(reference);
 83    }
 84
 85    identifier_to_references
 86        .into_iter()
 87        .flat_map(|(identifier, references)| {
 88            let definitions = index
 89                .read(cx)
 90                // todo! pick a limit
 91                .declarations_for_identifier::<16>(&identifier, cx);
 92            let definition_count = definitions.len();
 93            let total_file_count = definitions
 94                .iter()
 95                .filter_map(|definition| definition.project_entry_id(cx))
 96                .collect::<HashSet<ProjectEntryId>>()
 97                .len();
 98
 99            definitions
100                .iter()
101                .filter_map(|definition| match definition {
102                    Declaration::Buffer {
103                        declaration,
104                        buffer,
105                    } => {
106                        let is_same_file = buffer
107                            .read_with(cx, |buffer, _| buffer.remote_id())
108                            .is_ok_and(|buffer_id| buffer_id == current_buffer.remote_id());
109
110                        if is_same_file {
111                            range_intersection(
112                                &declaration.item_range.to_offset(&current_buffer),
113                                &excerpt.range,
114                            )
115                            .is_none()
116                            .then(|| {
117                                let definition_line =
118                                    declaration.item_range.start.to_point(current_buffer).row;
119                                (
120                                    true,
121                                    (cursor_point.row as i32 - definition_line as i32).abs() as u32,
122                                    definition,
123                                )
124                            })
125                        } else {
126                            Some((false, 0, definition))
127                        }
128                    }
129                    Declaration::File { .. } => {
130                        // We can assume that a file declaration is in a different file,
131                        // because the current onemust be open
132                        Some((false, 0, definition))
133                    }
134                })
135                .sorted_by_key(|&(_, distance, _)| distance)
136                .enumerate()
137                .map(
138                    |(
139                        definition_line_distance_rank,
140                        (is_same_file, definition_line_distance, definition),
141                    )| {
142                        let same_file_definition_count =
143                            index.read(cx).file_declaration_count(definition);
144
145                        score_snippet(
146                            &identifier,
147                            &references,
148                            definition.clone(),
149                            is_same_file,
150                            definition_line_distance,
151                            definition_line_distance_rank,
152                            same_file_definition_count,
153                            definition_count,
154                            total_file_count,
155                            &containing_range_identifier_occurrences,
156                            &adjacent_identifier_occurrences,
157                            cursor_point,
158                            current_buffer,
159                            cx,
160                        )
161                    },
162                )
163                .collect::<Vec<_>>()
164        })
165        .flatten()
166        .collect::<Vec<_>>()
167}
168
169// todo! replace with existing util?
170fn range_intersection<T: Ord + Clone>(a: &Range<T>, b: &Range<T>) -> Option<Range<T>> {
171    let start = a.start.clone().max(b.start.clone());
172    let end = a.end.clone().min(b.end.clone());
173    if start < end {
174        Some(Range { start, end })
175    } else {
176        None
177    }
178}
179
180fn score_snippet(
181    identifier: &Identifier,
182    references: &[Reference],
183    definition: Declaration,
184    is_same_file: bool,
185    definition_line_distance: u32,
186    definition_line_distance_rank: usize,
187    same_file_definition_count: usize,
188    definition_count: usize,
189    definition_file_count: usize,
190    containing_range_identifier_occurrences: &IdentifierOccurrences,
191    adjacent_identifier_occurrences: &IdentifierOccurrences,
192    cursor: Point,
193    current_buffer: &BufferSnapshot,
194    cx: &App,
195) -> Option<ScoredSnippet> {
196    let is_referenced_nearby = references
197        .iter()
198        .any(|r| r.region == ReferenceRegion::Nearby);
199    let is_referenced_in_breadcrumb = references
200        .iter()
201        .any(|r| r.region == ReferenceRegion::Breadcrumb);
202    let reference_count = references.len();
203    let reference_line_distance = references
204        .iter()
205        .map(|r| {
206            let reference_line = r.range.start.to_point(current_buffer).row as i32;
207            (cursor.row as i32 - reference_line).abs() as u32
208        })
209        .min()
210        .unwrap();
211
212    let item_source_occurrences = IdentifierOccurrences::within_string(&definition.item_text(cx));
213    let item_signature_occurrences =
214        IdentifierOccurrences::within_string(&definition.signature_text(cx));
215    let containing_range_vs_item_jaccard = jaccard_similarity(
216        containing_range_identifier_occurrences,
217        &item_source_occurrences,
218    );
219    let containing_range_vs_signature_jaccard = jaccard_similarity(
220        containing_range_identifier_occurrences,
221        &item_signature_occurrences,
222    );
223    let adjacent_vs_item_jaccard =
224        jaccard_similarity(adjacent_identifier_occurrences, &item_source_occurrences);
225    let adjacent_vs_signature_jaccard =
226        jaccard_similarity(adjacent_identifier_occurrences, &item_signature_occurrences);
227
228    let containing_range_vs_item_weighted_overlap = weighted_overlap_coefficient(
229        containing_range_identifier_occurrences,
230        &item_source_occurrences,
231    );
232    let containing_range_vs_signature_weighted_overlap = weighted_overlap_coefficient(
233        containing_range_identifier_occurrences,
234        &item_signature_occurrences,
235    );
236    let adjacent_vs_item_weighted_overlap =
237        weighted_overlap_coefficient(adjacent_identifier_occurrences, &item_source_occurrences);
238    let adjacent_vs_signature_weighted_overlap =
239        weighted_overlap_coefficient(adjacent_identifier_occurrences, &item_signature_occurrences);
240
241    let score_components = ScoreInputs {
242        is_same_file,
243        is_referenced_nearby,
244        is_referenced_in_breadcrumb,
245        reference_line_distance,
246        definition_line_distance,
247        definition_line_distance_rank,
248        reference_count,
249        same_file_definition_count,
250        definition_count,
251        definition_file_count,
252        containing_range_vs_item_jaccard,
253        containing_range_vs_signature_jaccard,
254        adjacent_vs_item_jaccard,
255        adjacent_vs_signature_jaccard,
256        containing_range_vs_item_weighted_overlap,
257        containing_range_vs_signature_weighted_overlap,
258        adjacent_vs_item_weighted_overlap,
259        adjacent_vs_signature_weighted_overlap,
260    };
261
262    Some(ScoredSnippet {
263        identifier: identifier.clone(),
264        declaration: definition,
265        scores: score_components.score(),
266        score_components,
267    })
268}
269
270#[derive(Clone, Debug, Serialize)]
271pub struct ScoreInputs {
272    pub is_same_file: bool,
273    pub is_referenced_nearby: bool,
274    pub is_referenced_in_breadcrumb: bool,
275    pub reference_count: usize,
276    pub same_file_definition_count: usize,
277    pub definition_count: usize,
278    // todo! do we need this?
279    pub definition_file_count: usize,
280    pub reference_line_distance: u32,
281    pub definition_line_distance: u32,
282    pub definition_line_distance_rank: usize,
283    pub containing_range_vs_item_jaccard: f32,
284    pub containing_range_vs_signature_jaccard: f32,
285    pub adjacent_vs_item_jaccard: f32,
286    pub adjacent_vs_signature_jaccard: f32,
287    pub containing_range_vs_item_weighted_overlap: f32,
288    pub containing_range_vs_signature_weighted_overlap: f32,
289    pub adjacent_vs_item_weighted_overlap: f32,
290    pub adjacent_vs_signature_weighted_overlap: f32,
291}
292
293#[derive(Clone, Debug, Serialize)]
294pub struct Scores {
295    pub signature: f32,
296    pub definition: f32,
297}
298
299impl ScoreInputs {
300    fn score(&self) -> Scores {
301        // Score related to how likely this is the correct definition, range 0 to 1
302        let accuracy_score = if self.is_same_file {
303            // TODO: use definition_line_distance_rank
304            (0.5 / self.same_file_definition_count as f32)
305                + (0.5 / self.definition_file_count as f32)
306        } else {
307            1.0 / self.definition_count as f32
308        };
309
310        // Score related to the distance between the reference and cursor, range 0 to 1
311        let distance_score = if self.is_referenced_nearby {
312            1.0 / (1.0 + self.reference_line_distance as f32 / 10.0).powf(2.0)
313        } else {
314            // same score as ~14 lines away, rationale is to not overly penalize references from parent signatures
315            0.5
316        };
317
318        // For now instead of linear combination, the scores are just multiplied together.
319        let combined_score = 10.0 * accuracy_score * distance_score;
320
321        Scores {
322            signature: combined_score * self.containing_range_vs_signature_weighted_overlap,
323            // definition score gets boosted both by being multipled by 2 and by there being more
324            // weighted overlap.
325            definition: 2.0 * combined_score * self.containing_range_vs_item_weighted_overlap,
326        }
327    }
328}