Merge pull request #2379 from zed-industries/shebang

Max Brunsfeld created

Select language based on a file's first content line in addition to its path

Change summary

crates/language/src/buffer_tests.rs             | 12 +-
crates/language/src/language.rs                 | 58 +++++++++++++++++-
crates/project/src/project.rs                   | 42 +++++++------
crates/zed/src/languages/javascript/config.toml |  1 
crates/zed/src/languages/python/config.toml     |  1 
crates/zed/src/languages/ruby/config.toml       |  1 
6 files changed, 85 insertions(+), 30 deletions(-)

Detailed changes

crates/language/src/buffer_tests.rs 🔗

@@ -81,14 +81,14 @@ fn test_select_language() {
     // matching file extension
     assert_eq!(
         registry
-            .language_for_path("zed/lib.rs")
+            .language_for_file("zed/lib.rs", None)
             .now_or_never()
             .and_then(|l| Some(l.ok()?.name())),
         Some("Rust".into())
     );
     assert_eq!(
         registry
-            .language_for_path("zed/lib.mk")
+            .language_for_file("zed/lib.mk", None)
             .now_or_never()
             .and_then(|l| Some(l.ok()?.name())),
         Some("Make".into())
@@ -97,7 +97,7 @@ fn test_select_language() {
     // matching filename
     assert_eq!(
         registry
-            .language_for_path("zed/Makefile")
+            .language_for_file("zed/Makefile", None)
             .now_or_never()
             .and_then(|l| Some(l.ok()?.name())),
         Some("Make".into())
@@ -106,21 +106,21 @@ fn test_select_language() {
     // matching suffix that is not the full file extension or filename
     assert_eq!(
         registry
-            .language_for_path("zed/cars")
+            .language_for_file("zed/cars", None)
             .now_or_never()
             .and_then(|l| Some(l.ok()?.name())),
         None
     );
     assert_eq!(
         registry
-            .language_for_path("zed/a.cars")
+            .language_for_file("zed/a.cars", None)
             .now_or_never()
             .and_then(|l| Some(l.ok()?.name())),
         None
     );
     assert_eq!(
         registry
-            .language_for_path("zed/sumk")
+            .language_for_file("zed/sumk", None)
             .now_or_never()
             .and_then(|l| Some(l.ok()?.name())),
         None

crates/language/src/language.rs 🔗

@@ -262,6 +262,8 @@ pub struct LanguageConfig {
     pub name: Arc<str>,
     pub path_suffixes: Vec<String>,
     pub brackets: BracketPairConfig,
+    #[serde(default, deserialize_with = "deserialize_regex")]
+    pub first_line_pattern: Option<Regex>,
     #[serde(default = "auto_indent_using_last_non_empty_line_default")]
     pub auto_indent_using_last_non_empty_line: bool,
     #[serde(default, deserialize_with = "deserialize_regex")]
@@ -334,6 +336,7 @@ impl Default for LanguageConfig {
             path_suffixes: Default::default(),
             brackets: Default::default(),
             auto_indent_using_last_non_empty_line: auto_indent_using_last_non_empty_line_default(),
+            first_line_pattern: Default::default(),
             increase_indent_pattern: Default::default(),
             decrease_indent_pattern: Default::default(),
             autoclose_before: Default::default(),
@@ -660,19 +663,30 @@ impl LanguageRegistry {
         })
     }
 
-    pub fn language_for_path(
+    pub fn language_for_file(
         self: &Arc<Self>,
         path: impl AsRef<Path>,
+        content: Option<&Rope>,
     ) -> UnwrapFuture<oneshot::Receiver<Result<Arc<Language>>>> {
         let path = path.as_ref();
         let filename = path.file_name().and_then(|name| name.to_str());
         let extension = path.extension().and_then(|name| name.to_str());
         let path_suffixes = [extension, filename];
         self.get_or_load_language(|config| {
-            config
+            let path_matches = config
                 .path_suffixes
                 .iter()
-                .any(|suffix| path_suffixes.contains(&Some(suffix.as_str())))
+                .any(|suffix| path_suffixes.contains(&Some(suffix.as_str())));
+            let content_matches = content.zip(config.first_line_pattern.as_ref()).map_or(
+                false,
+                |(content, pattern)| {
+                    let end = content.clip_point(Point::new(0, 256), Bias::Left);
+                    let end = content.point_to_offset(end);
+                    let text = content.chunks_in_range(0..end).collect::<String>();
+                    pattern.is_match(&text)
+                },
+            );
+            path_matches || content_matches
         })
     }
 
@@ -1528,9 +1542,45 @@ pub fn range_from_lsp(range: lsp::Range) -> Range<Unclipped<PointUtf16>> {
 
 #[cfg(test)]
 mod tests {
+    use super::*;
     use gpui::TestAppContext;
 
-    use super::*;
+    #[gpui::test(iterations = 10)]
+    async fn test_first_line_pattern(cx: &mut TestAppContext) {
+        let mut languages = LanguageRegistry::test();
+        languages.set_executor(cx.background());
+        let languages = Arc::new(languages);
+        languages.register(
+            "/javascript",
+            LanguageConfig {
+                name: "JavaScript".into(),
+                path_suffixes: vec!["js".into()],
+                first_line_pattern: Some(Regex::new(r"\bnode\b").unwrap()),
+                ..Default::default()
+            },
+            tree_sitter_javascript::language(),
+            None,
+            |_| Default::default(),
+        );
+
+        languages
+            .language_for_file("the/script", None)
+            .await
+            .unwrap_err();
+        languages
+            .language_for_file("the/script", Some(&"nothing".into()))
+            .await
+            .unwrap_err();
+        assert_eq!(
+            languages
+                .language_for_file("the/script", Some(&"#!/bin/env node".into()))
+                .await
+                .unwrap()
+                .name()
+                .as_ref(),
+            "JavaScript"
+        );
+    }
 
     #[gpui::test(iterations = 10)]
     async fn test_language_loading(cx: &mut TestAppContext) {

crates/project/src/project.rs 🔗

@@ -2013,17 +2013,19 @@ impl Project {
 
     fn detect_language_for_buffer(
         &mut self,
-        buffer: &ModelHandle<Buffer>,
+        buffer_handle: &ModelHandle<Buffer>,
         cx: &mut ModelContext<Self>,
     ) -> Option<()> {
         // If the buffer has a language, set it and start the language server if we haven't already.
-        let full_path = buffer.read(cx).file()?.full_path(cx);
+        let buffer = buffer_handle.read(cx);
+        let full_path = buffer.file()?.full_path(cx);
+        let content = buffer.as_rope();
         let new_language = self
             .languages
-            .language_for_path(&full_path)
+            .language_for_file(&full_path, Some(content))
             .now_or_never()?
             .ok()?;
-        self.set_language_for_buffer(buffer, new_language, cx);
+        self.set_language_for_buffer(buffer_handle, new_language, cx);
         None
     }
 
@@ -2434,26 +2436,23 @@ impl Project {
         buffers: impl IntoIterator<Item = ModelHandle<Buffer>>,
         cx: &mut ModelContext<Self>,
     ) -> Option<()> {
-        let language_server_lookup_info: HashSet<(WorktreeId, Arc<Path>, PathBuf)> = buffers
+        let language_server_lookup_info: HashSet<(WorktreeId, Arc<Path>, Arc<Language>)> = buffers
             .into_iter()
             .filter_map(|buffer| {
-                let file = File::from_dyn(buffer.read(cx).file())?;
+                let buffer = buffer.read(cx);
+                let file = File::from_dyn(buffer.file())?;
                 let worktree = file.worktree.read(cx).as_local()?;
-                let worktree_id = worktree.id();
-                let worktree_abs_path = worktree.abs_path().clone();
                 let full_path = file.full_path(cx);
-                Some((worktree_id, worktree_abs_path, full_path))
+                let language = self
+                    .languages
+                    .language_for_file(&full_path, Some(buffer.as_rope()))
+                    .now_or_never()?
+                    .ok()?;
+                Some((worktree.id(), worktree.abs_path().clone(), language))
             })
             .collect();
-        for (worktree_id, worktree_abs_path, full_path) in language_server_lookup_info {
-            if let Some(language) = self
-                .languages
-                .language_for_path(&full_path)
-                .now_or_never()
-                .and_then(|language| language.ok())
-            {
-                self.restart_language_server(worktree_id, worktree_abs_path, language, cx);
-            }
+        for (worktree_id, worktree_abs_path, language) in language_server_lookup_info {
+            self.restart_language_server(worktree_id, worktree_abs_path, language, cx);
         }
 
         None
@@ -3487,7 +3486,7 @@ impl Project {
                             let adapter_language = adapter_language.clone();
                             let language = this
                                 .languages
-                                .language_for_path(&project_path.path)
+                                .language_for_file(&project_path.path, None)
                                 .unwrap_or_else(move |_| adapter_language);
                             let language_server_name = adapter.name.clone();
                             Some(async move {
@@ -5916,7 +5915,10 @@ impl Project {
                 worktree_id,
                 path: PathBuf::from(serialized_symbol.path).into(),
             };
-            let language = languages.language_for_path(&path.path).await.log_err();
+            let language = languages
+                .language_for_file(&path.path, None)
+                .await
+                .log_err();
             Ok(Symbol {
                 language_server_name: LanguageServerName(
                     serialized_symbol.language_server_name.into(),