language: Further optimize `language_for_file` (#28694)

Finn Evers created

Follow-up to #28671

This primarily follows two ideas:
1. We currently take the element with the highest score which appears
last in the iterator (see
[`last_by_key`](https://doc.rust-lang.org/std/iter/trait.Iterator.html#method.max_by_key)),
so we can also just reverse the iterator and take the first highest
match instead.
2. Once we have a match with a given precedence, we are not interested
in any matches with a lower or even the same priority, given what was
established in 1. Thus, we also only have to check whether any language
checked afterwards has a higher priority match.

Furthermore, once we have a match with the highest possible precedence,
there is no need to look for any more possible matches. Thus, this PR
also adds short-circuiting for that scenario.

Lastly, I also cleaned-up the custom suffix match (an empty glob-set
will never match so no need to iterate there) as well reorder the
zip-call in the content matches, as we never need the content if there
is no first line pattern present for the checked languages.

Release Notes:

- N/A

Change summary

crates/language/src/language_registry.rs | 130 +++++++++++++++++--------
1 file changed, 86 insertions(+), 44 deletions(-)

Detailed changes

crates/language/src/language_registry.rs 🔗

@@ -16,6 +16,8 @@ use futures::{
 };
 use globset::GlobSet;
 use gpui::{App, BackgroundExecutor, SharedString};
+use itertools::FoldWhile::{Continue, Done};
+use itertools::Itertools;
 use lsp::LanguageServerId;
 use parking_lot::{Mutex, RwLock};
 use postage::watch;
@@ -165,6 +167,20 @@ impl AvailableLanguage {
     }
 }
 
+#[derive(Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)]
+enum LanguageMatchPrecedence {
+    #[default]
+    Undetermined,
+    PathOrContent,
+    UserConfigured,
+}
+
+impl LanguageMatchPrecedence {
+    fn best_possible_match(&self) -> bool {
+        *self == LanguageMatchPrecedence::UserConfigured
+    }
+}
+
 enum AvailableGrammar {
     Native(tree_sitter::Language),
     Loaded(#[allow(unused)] PathBuf, tree_sitter::Language),
@@ -602,12 +618,10 @@ impl LanguageRegistry {
         name: &str,
     ) -> impl Future<Output = Result<Arc<Language>>> + use<> {
         let name = UniCase::new(name);
-        let rx = self.get_or_load_language(|language_name, _| {
-            if UniCase::new(&language_name.0) == name {
-                1
-            } else {
-                0
-            }
+        let rx = self.get_or_load_language(|language_name, _, current_best_match| {
+            (current_best_match < LanguageMatchPrecedence::PathOrContent
+                && UniCase::new(&language_name.0) == name)
+                .then_some(LanguageMatchPrecedence::PathOrContent)
         });
         async move { rx.await? }
     }
@@ -617,17 +631,14 @@ impl LanguageRegistry {
         string: &str,
     ) -> impl Future<Output = Result<Arc<Language>>> {
         let string = UniCase::new(string);
-        let rx = self.get_or_load_language(|name, config| {
-            if UniCase::new(&name.0) == string
-                || config
-                    .path_suffixes
-                    .iter()
-                    .any(|suffix| UniCase::new(suffix) == string)
-            {
-                1
-            } else {
-                0
-            }
+        let rx = self.get_or_load_language(|name, config, current_best_match| {
+            (current_best_match < LanguageMatchPrecedence::PathOrContent
+                && (UniCase::new(&name.0) == string
+                    || config
+                        .path_suffixes
+                        .iter()
+                        .any(|suffix| UniCase::new(suffix) == string)))
+            .then_some(LanguageMatchPrecedence::PathOrContent)
         });
         async move { rx.await? }
     }
@@ -688,7 +699,6 @@ impl LanguageRegistry {
             .iter()
             .filter_map(|suffix| suffix.map(globset::Candidate::new))
             .collect::<SmallVec<[_; 3]>>();
-        let empty = GlobSet::empty();
         let content = LazyCell::new(|| {
             content.map(|content| {
                 let end = content.clip_point(Point::new(0, 256), Bias::Left);
@@ -696,7 +706,7 @@ impl LanguageRegistry {
                 content.chunks_in_range(0..end).collect::<String>()
             })
         });
-        self.find_matching_language(move |language_name, config| {
+        self.find_matching_language(move |language_name, config, current_best_match| {
             let path_matches_default_suffix = || {
                 config
                     .path_suffixes
@@ -704,47 +714,75 @@ impl LanguageRegistry {
                     .any(|suffix| path_suffixes.contains(&Some(suffix.as_str())))
             };
             let path_matches_custom_suffix = || {
-                let custom_suffixes = user_file_types
+                user_file_types
                     .and_then(|types| types.get(language_name.as_ref()))
-                    .unwrap_or(&empty);
-                path_suffixes_candidates
-                    .iter()
-                    .any(|suffix| custom_suffixes.is_match_candidate(suffix))
+                    .map_or(false, |custom_suffixes| {
+                        path_suffixes_candidates
+                            .iter()
+                            .any(|suffix| custom_suffixes.is_match_candidate(suffix))
+                    })
             };
             let content_matches = || {
-                content
-                    .as_ref()
-                    .zip(config.first_line_pattern.as_ref())
-                    .map_or(false, |(text, pattern)| pattern.is_match(&text))
+                config.first_line_pattern.as_ref().map_or(false, |pattern| {
+                    content
+                        .as_ref()
+                        .is_some_and(|content| pattern.is_match(content))
+                })
             };
-            if path_matches_custom_suffix() {
-                2
-            } else if path_matches_default_suffix() || content_matches() {
-                1
-            } else {
-                0
+
+            // Only return a match for the given file if we have a better match than
+            // the current one.
+            match current_best_match {
+                LanguageMatchPrecedence::PathOrContent | LanguageMatchPrecedence::Undetermined
+                    if path_matches_custom_suffix() =>
+                {
+                    Some(LanguageMatchPrecedence::UserConfigured)
+                }
+                LanguageMatchPrecedence::Undetermined
+                    if path_matches_default_suffix() || content_matches() =>
+                {
+                    Some(LanguageMatchPrecedence::PathOrContent)
+                }
+                _ => None,
             }
         })
     }
 
     fn find_matching_language(
         self: &Arc<Self>,
-        callback: impl Fn(&LanguageName, &LanguageMatcher) -> usize,
+        callback: impl Fn(
+            &LanguageName,
+            &LanguageMatcher,
+            LanguageMatchPrecedence,
+        ) -> Option<LanguageMatchPrecedence>,
     ) -> Option<AvailableLanguage> {
         let state = self.state.read();
         let available_language = state
             .available_languages
             .iter()
-            .filter_map(|language| {
-                let score = callback(&language.name, &language.matcher);
-                if score > 0 {
-                    Some((language.clone(), score))
-                } else {
-                    None
+            .rev()
+            .fold_while(None, |best_language_match, language| {
+                let current_match_type = best_language_match
+                    .as_ref()
+                    .map_or(LanguageMatchPrecedence::default(), |(_, score)| *score);
+                let language_score =
+                    callback(&language.name, &language.matcher, current_match_type);
+                debug_assert!(
+                    language_score.is_none_or(|new_score| new_score > current_match_type),
+                    "Matching callback should only return a better match than the current one"
+                );
+
+                match language_score {
+                    Some(new_score) if new_score.best_possible_match() => {
+                        Done(Some((language.clone(), new_score)))
+                    }
+                    Some(new_score) if current_match_type < new_score => {
+                        Continue(Some((language.clone(), new_score)))
+                    }
+                    _ => Continue(best_language_match),
                 }
             })
-            .max_by_key(|e| e.1)
-            .clone()
+            .into_inner()
             .map(|(available_language, _)| available_language);
         drop(state);
         available_language
@@ -839,7 +877,11 @@ impl LanguageRegistry {
 
     fn get_or_load_language(
         self: &Arc<Self>,
-        callback: impl Fn(&LanguageName, &LanguageMatcher) -> usize,
+        callback: impl Fn(
+            &LanguageName,
+            &LanguageMatcher,
+            LanguageMatchPrecedence,
+        ) -> Option<LanguageMatchPrecedence>,
     ) -> oneshot::Receiver<Result<Arc<Language>>> {
         let Some(language) = self.find_matching_language(callback) else {
             let (tx, rx) = oneshot::channel();