Add logging of missing or unexpected capture names in Tree-sitter queries (#37830)

Michael Sloan and Max created

Now logs warnings for unrecognized capture names and logs errors for
missing required captures.

Release Notes:

- N/A

---------

Co-authored-by: Max <max@zed.dev>

Change summary

crates/language/src/language.rs                | 356 ++++++++++++-------
crates/languages/src/typescript/injections.scm |   4 
crates/languages/src/yaml/textobjects.scm      |   2 
3 files changed, 221 insertions(+), 141 deletions(-)

Detailed changes

crates/language/src/language.rs 🔗

@@ -1368,6 +1368,7 @@ impl Language {
         self.manifest_name = name;
         self
     }
+
     pub fn with_queries(mut self, queries: LanguageQueries) -> Result<Self> {
         if let Some(query) = queries.highlights {
             self = self
@@ -1428,13 +1429,13 @@ impl Language {
     }
 
     pub fn with_highlights_query(mut self, source: &str) -> Result<Self> {
-        let grammar = self.grammar_mut().context("cannot mutate grammar")?;
+        let grammar = self.grammar_mut()?;
         grammar.highlights_query = Some(Query::new(&grammar.ts_language, source)?);
         Ok(self)
     }
 
     pub fn with_runnable_query(mut self, source: &str) -> Result<Self> {
-        let grammar = self.grammar_mut().context("cannot mutate grammar")?;
+        let grammar = self.grammar_mut()?;
 
         let query = Query::new(&grammar.ts_language, source)?;
         let extra_captures: Vec<_> = query
@@ -1455,29 +1456,30 @@ impl Language {
     }
 
     pub fn with_outline_query(mut self, source: &str) -> Result<Self> {
-        let grammar = self.grammar_mut().context("cannot mutate grammar")?;
-        let query = Query::new(&grammar.ts_language, source)?;
-        let mut item_capture_ix = None;
-        let mut name_capture_ix = None;
+        let query = Query::new(&self.expect_grammar()?.ts_language, source)?;
+        let mut item_capture_ix = 0;
+        let mut name_capture_ix = 0;
         let mut context_capture_ix = None;
         let mut extra_context_capture_ix = None;
         let mut open_capture_ix = None;
         let mut close_capture_ix = None;
         let mut annotation_capture_ix = None;
-        get_capture_indices(
+        if populate_capture_indices(
             &query,
+            &self.config.name,
+            "outline",
+            &[],
             &mut [
-                ("item", &mut item_capture_ix),
-                ("name", &mut name_capture_ix),
-                ("context", &mut context_capture_ix),
-                ("context.extra", &mut extra_context_capture_ix),
-                ("open", &mut open_capture_ix),
-                ("close", &mut close_capture_ix),
-                ("annotation", &mut annotation_capture_ix),
+                Capture::Required("item", &mut item_capture_ix),
+                Capture::Required("name", &mut name_capture_ix),
+                Capture::Optional("context", &mut context_capture_ix),
+                Capture::Optional("context.extra", &mut extra_context_capture_ix),
+                Capture::Optional("open", &mut open_capture_ix),
+                Capture::Optional("close", &mut close_capture_ix),
+                Capture::Optional("annotation", &mut annotation_capture_ix),
             ],
-        );
-        if let Some((item_capture_ix, name_capture_ix)) = item_capture_ix.zip(name_capture_ix) {
-            grammar.outline_config = Some(OutlineConfig {
+        ) {
+            self.grammar_mut()?.outline_config = Some(OutlineConfig {
                 query,
                 item_capture_ix,
                 name_capture_ix,
@@ -1492,17 +1494,22 @@ impl Language {
     }
 
     pub fn with_text_object_query(mut self, source: &str) -> Result<Self> {
-        let grammar = self.grammar_mut().context("cannot mutate grammar")?;
-        let query = Query::new(&grammar.ts_language, source)?;
+        let query = Query::new(&self.expect_grammar()?.ts_language, source)?;
 
         let mut text_objects_by_capture_ix = Vec::new();
         for (ix, name) in query.capture_names().iter().enumerate() {
             if let Some(text_object) = TextObject::from_capture_name(name) {
                 text_objects_by_capture_ix.push((ix as u32, text_object));
+            } else {
+                log::warn!(
+                    "unrecognized capture name '{}' in {} textobjects TreeSitter query",
+                    name,
+                    self.config.name,
+                );
             }
         }
 
-        grammar.text_object_config = Some(TextObjectConfig {
+        self.grammar_mut()?.text_object_config = Some(TextObjectConfig {
             query,
             text_objects_by_capture_ix,
         });
@@ -1510,25 +1517,26 @@ impl Language {
     }
 
     pub fn with_embedding_query(mut self, source: &str) -> Result<Self> {
-        let grammar = self.grammar_mut().context("cannot mutate grammar")?;
-        let query = Query::new(&grammar.ts_language, source)?;
-        let mut item_capture_ix = None;
+        let query = Query::new(&self.expect_grammar()?.ts_language, source)?;
+        let mut item_capture_ix = 0;
         let mut name_capture_ix = None;
         let mut context_capture_ix = None;
         let mut collapse_capture_ix = None;
         let mut keep_capture_ix = None;
-        get_capture_indices(
+        if populate_capture_indices(
             &query,
+            &self.config.name,
+            "embedding",
+            &[],
             &mut [
-                ("item", &mut item_capture_ix),
-                ("name", &mut name_capture_ix),
-                ("context", &mut context_capture_ix),
-                ("keep", &mut keep_capture_ix),
-                ("collapse", &mut collapse_capture_ix),
+                Capture::Required("item", &mut item_capture_ix),
+                Capture::Optional("name", &mut name_capture_ix),
+                Capture::Optional("context", &mut context_capture_ix),
+                Capture::Optional("keep", &mut keep_capture_ix),
+                Capture::Optional("collapse", &mut collapse_capture_ix),
             ],
-        );
-        if let Some(item_capture_ix) = item_capture_ix {
-            grammar.embedding_config = Some(EmbeddingConfig {
+        ) {
+            self.grammar_mut()?.embedding_config = Some(EmbeddingConfig {
                 query,
                 item_capture_ix,
                 name_capture_ix,
@@ -1541,17 +1549,22 @@ impl Language {
     }
 
     pub fn with_debug_variables_query(mut self, source: &str) -> Result<Self> {
-        let grammar = self.grammar_mut().context("cannot mutate grammar")?;
-        let query = Query::new(&grammar.ts_language, source)?;
+        let query = Query::new(&self.expect_grammar()?.ts_language, source)?;
 
         let mut objects_by_capture_ix = Vec::new();
         for (ix, name) in query.capture_names().iter().enumerate() {
             if let Some(text_object) = DebuggerTextObject::from_capture_name(name) {
                 objects_by_capture_ix.push((ix as u32, text_object));
+            } else {
+                log::warn!(
+                    "unrecognized capture name '{}' in {} debugger TreeSitter query",
+                    name,
+                    self.config.name,
+                );
             }
         }
 
-        grammar.debug_variables_config = Some(DebugVariablesConfig {
+        self.grammar_mut()?.debug_variables_config = Some(DebugVariablesConfig {
             query,
             objects_by_capture_ix,
         });
@@ -1559,30 +1572,31 @@ impl Language {
     }
 
     pub fn with_brackets_query(mut self, source: &str) -> Result<Self> {
-        let grammar = self.grammar_mut().context("cannot mutate grammar")?;
-        let query = Query::new(&grammar.ts_language, source)?;
-        let mut open_capture_ix = None;
-        let mut close_capture_ix = None;
-        get_capture_indices(
+        let query = Query::new(&self.expect_grammar()?.ts_language, source)?;
+        let mut open_capture_ix = 0;
+        let mut close_capture_ix = 0;
+        if populate_capture_indices(
             &query,
+            &self.config.name,
+            "brackets",
+            &[],
             &mut [
-                ("open", &mut open_capture_ix),
-                ("close", &mut close_capture_ix),
+                Capture::Required("open", &mut open_capture_ix),
+                Capture::Required("close", &mut close_capture_ix),
             ],
-        );
-        let patterns = (0..query.pattern_count())
-            .map(|ix| {
-                let mut config = BracketsPatternConfig::default();
-                for setting in query.property_settings(ix) {
-                    if setting.key.as_ref() == "newline.only" {
-                        config.newline_only = true
+        ) {
+            let patterns = (0..query.pattern_count())
+                .map(|ix| {
+                    let mut config = BracketsPatternConfig::default();
+                    for setting in query.property_settings(ix) {
+                        if setting.key.as_ref() == "newline.only" {
+                            config.newline_only = true
+                        }
                     }
-                }
-                config
-            })
-            .collect();
-        if let Some((open_capture_ix, close_capture_ix)) = open_capture_ix.zip(close_capture_ix) {
-            grammar.brackets_config = Some(BracketsConfig {
+                    config
+                })
+                .collect();
+            self.grammar_mut()?.brackets_config = Some(BracketsConfig {
                 query,
                 open_capture_ix,
                 close_capture_ix,
@@ -1593,31 +1607,31 @@ impl Language {
     }
 
     pub fn with_indents_query(mut self, source: &str) -> Result<Self> {
-        let grammar = self.grammar_mut().context("cannot mutate grammar")?;
-        let query = Query::new(&grammar.ts_language, source)?;
-        let mut indent_capture_ix = None;
+        let query = Query::new(&self.expect_grammar()?.ts_language, source)?;
+        let mut indent_capture_ix = 0;
         let mut start_capture_ix = None;
         let mut end_capture_ix = None;
         let mut outdent_capture_ix = None;
-        get_capture_indices(
+        if populate_capture_indices(
             &query,
+            &self.config.name,
+            "indents",
+            &["start."],
             &mut [
-                ("indent", &mut indent_capture_ix),
-                ("start", &mut start_capture_ix),
-                ("end", &mut end_capture_ix),
-                ("outdent", &mut outdent_capture_ix),
+                Capture::Required("indent", &mut indent_capture_ix),
+                Capture::Optional("start", &mut start_capture_ix),
+                Capture::Optional("end", &mut end_capture_ix),
+                Capture::Optional("outdent", &mut outdent_capture_ix),
             ],
-        );
-
-        let mut suffixed_start_captures = HashMap::default();
-        for (ix, name) in query.capture_names().iter().enumerate() {
-            if let Some(suffix) = name.strip_prefix("start.") {
-                suffixed_start_captures.insert(ix as u32, suffix.to_owned().into());
+        ) {
+            let mut suffixed_start_captures = HashMap::default();
+            for (ix, name) in query.capture_names().iter().enumerate() {
+                if let Some(suffix) = name.strip_prefix("start.") {
+                    suffixed_start_captures.insert(ix as u32, suffix.to_owned().into());
+                }
             }
-        }
 
-        if let Some(indent_capture_ix) = indent_capture_ix {
-            grammar.indents_config = Some(IndentConfig {
+            self.grammar_mut()?.indents_config = Some(IndentConfig {
                 query,
                 indent_capture_ix,
                 start_capture_ix,
@@ -1630,68 +1644,74 @@ impl Language {
     }
 
     pub fn with_injection_query(mut self, source: &str) -> Result<Self> {
-        let grammar = self.grammar_mut().context("cannot mutate grammar")?;
-        let query = Query::new(&grammar.ts_language, source)?;
+        let query = Query::new(&self.expect_grammar()?.ts_language, source)?;
         let mut language_capture_ix = None;
         let mut injection_language_capture_ix = None;
         let mut content_capture_ix = None;
         let mut injection_content_capture_ix = None;
-        get_capture_indices(
+        if populate_capture_indices(
             &query,
+            &self.config.name,
+            "injections",
+            &[],
             &mut [
-                ("language", &mut language_capture_ix),
-                ("injection.language", &mut injection_language_capture_ix),
-                ("content", &mut content_capture_ix),
-                ("injection.content", &mut injection_content_capture_ix),
+                Capture::Optional("language", &mut language_capture_ix),
+                Capture::Optional("injection.language", &mut injection_language_capture_ix),
+                Capture::Optional("content", &mut content_capture_ix),
+                Capture::Optional("injection.content", &mut injection_content_capture_ix),
             ],
-        );
-        language_capture_ix = match (language_capture_ix, injection_language_capture_ix) {
-            (None, Some(ix)) => Some(ix),
-            (Some(_), Some(_)) => {
-                anyhow::bail!("both language and injection.language captures are present");
-            }
-            _ => language_capture_ix,
-        };
-        content_capture_ix = match (content_capture_ix, injection_content_capture_ix) {
-            (None, Some(ix)) => Some(ix),
-            (Some(_), Some(_)) => {
-                anyhow::bail!("both content and injection.content captures are present")
-            }
-            _ => content_capture_ix,
-        };
-        let patterns = (0..query.pattern_count())
-            .map(|ix| {
-                let mut config = InjectionPatternConfig::default();
-                for setting in query.property_settings(ix) {
-                    match setting.key.as_ref() {
-                        "language" | "injection.language" => {
-                            config.language.clone_from(&setting.value);
-                        }
-                        "combined" | "injection.combined" => {
-                            config.combined = true;
+        ) {
+            language_capture_ix = match (language_capture_ix, injection_language_capture_ix) {
+                (None, Some(ix)) => Some(ix),
+                (Some(_), Some(_)) => {
+                    anyhow::bail!("both language and injection.language captures are present");
+                }
+                _ => language_capture_ix,
+            };
+            content_capture_ix = match (content_capture_ix, injection_content_capture_ix) {
+                (None, Some(ix)) => Some(ix),
+                (Some(_), Some(_)) => {
+                    anyhow::bail!("both content and injection.content captures are present")
+                }
+                _ => content_capture_ix,
+            };
+            let patterns = (0..query.pattern_count())
+                .map(|ix| {
+                    let mut config = InjectionPatternConfig::default();
+                    for setting in query.property_settings(ix) {
+                        match setting.key.as_ref() {
+                            "language" | "injection.language" => {
+                                config.language.clone_from(&setting.value);
+                            }
+                            "combined" | "injection.combined" => {
+                                config.combined = true;
+                            }
+                            _ => {}
                         }
-                        _ => {}
                     }
-                }
-                config
-            })
-            .collect();
-        if let Some(content_capture_ix) = content_capture_ix {
-            grammar.injection_config = Some(InjectionConfig {
-                query,
-                language_capture_ix,
-                content_capture_ix,
-                patterns,
-            });
+                    config
+                })
+                .collect();
+            if let Some(content_capture_ix) = content_capture_ix {
+                self.grammar_mut()?.injection_config = Some(InjectionConfig {
+                    query,
+                    language_capture_ix,
+                    content_capture_ix,
+                    patterns,
+                });
+            } else {
+                log::error!(
+                    "missing required capture in injections {} TreeSitter query: \
+                    content or injection.content",
+                    &self.config.name,
+                );
+            }
         }
         Ok(self)
     }
 
     pub fn with_override_query(mut self, source: &str) -> anyhow::Result<Self> {
-        let query = {
-            let grammar = self.grammar.as_ref().context("no grammar for language")?;
-            Query::new(&grammar.ts_language, source)?
-        };
+        let query = Query::new(&self.expect_grammar()?.ts_language, source)?;
 
         let mut override_configs_by_id = HashMap::default();
         for (ix, mut name) in query.capture_names().iter().copied().enumerate() {
@@ -1766,7 +1786,7 @@ impl Language {
 
         self.config.brackets.disabled_scopes_by_bracket_ix.clear();
 
-        let grammar = self.grammar_mut().context("cannot mutate grammar")?;
+        let grammar = self.grammar_mut()?;
         grammar.override_config = Some(OverrideConfig {
             query,
             values: override_configs_by_id,
@@ -1775,24 +1795,33 @@ impl Language {
     }
 
     pub fn with_redaction_query(mut self, source: &str) -> anyhow::Result<Self> {
-        let grammar = self.grammar_mut().context("cannot mutate grammar")?;
-
-        let query = Query::new(&grammar.ts_language, source)?;
-        let mut redaction_capture_ix = None;
-        get_capture_indices(&query, &mut [("redact", &mut redaction_capture_ix)]);
-
-        if let Some(redaction_capture_ix) = redaction_capture_ix {
-            grammar.redactions_config = Some(RedactionConfig {
+        let query = Query::new(&self.expect_grammar()?.ts_language, source)?;
+        let mut redaction_capture_ix = 0;
+        if populate_capture_indices(
+            &query,
+            &self.config.name,
+            "redactions",
+            &[],
+            &mut [Capture::Required("redact", &mut redaction_capture_ix)],
+        ) {
+            self.grammar_mut()?.redactions_config = Some(RedactionConfig {
                 query,
                 redaction_capture_ix,
             });
         }
-
         Ok(self)
     }
 
-    fn grammar_mut(&mut self) -> Option<&mut Grammar> {
-        Arc::get_mut(self.grammar.as_mut()?)
+    fn expect_grammar(&self) -> Result<&Grammar> {
+        self.grammar
+            .as_ref()
+            .map(|grammar| grammar.as_ref())
+            .context("no grammar for language")
+    }
+
+    fn grammar_mut(&mut self) -> Result<&mut Grammar> {
+        Arc::get_mut(self.grammar.as_mut().context("no grammar for language")?)
+            .context("cannot mutate grammar")
     }
 
     pub fn name(&self) -> LanguageName {
@@ -2312,15 +2341,66 @@ impl LspAdapter for FakeLspAdapter {
     }
 }
 
-fn get_capture_indices(query: &Query, captures: &mut [(&str, &mut Option<u32>)]) {
-    for (ix, name) in query.capture_names().iter().enumerate() {
-        for (capture_name, index) in captures.iter_mut() {
-            if capture_name == name {
-                **index = Some(ix as u32);
-                break;
+enum Capture<'a> {
+    Required(&'static str, &'a mut u32),
+    Optional(&'static str, &'a mut Option<u32>),
+}
+
+fn populate_capture_indices(
+    query: &Query,
+    language_name: &LanguageName,
+    query_type: &str,
+    expected_prefixes: &[&str],
+    captures: &mut [Capture<'_>],
+) -> bool {
+    let mut found_required_indices = Vec::new();
+    'outer: for (ix, name) in query.capture_names().iter().enumerate() {
+        for (required_ix, capture) in captures.iter_mut().enumerate() {
+            match capture {
+                Capture::Required(capture_name, index) if capture_name == name => {
+                    **index = ix as u32;
+                    found_required_indices.push(required_ix);
+                    continue 'outer;
+                }
+                Capture::Optional(capture_name, index) if capture_name == name => {
+                    **index = Some(ix as u32);
+                    continue 'outer;
+                }
+                _ => {}
             }
         }
+        if !name.starts_with("_")
+            && !expected_prefixes
+                .iter()
+                .any(|&prefix| name.starts_with(prefix))
+        {
+            log::warn!(
+                "unrecognized capture name '{}' in {} {} TreeSitter query \
+                (suppress this warning by prefixing with '_')",
+                name,
+                language_name,
+                query_type
+            );
+        }
+    }
+    let mut missing_required_captures = Vec::new();
+    for (capture_ix, capture) in captures.iter().enumerate() {
+        if let Capture::Required(capture_name, _) = capture
+            && !found_required_indices.contains(&capture_ix)
+        {
+            missing_required_captures.push(*capture_name);
+        }
+    }
+    let success = missing_required_captures.is_empty();
+    if !success {
+        log::error!(
+            "missing required capture(s) in {} {} TreeSitter query: {}",
+            language_name,
+            query_type,
+            missing_required_captures.join(", ")
+        );
     }
+    success
 }
 
 pub fn point_to_lsp(point: PointUtf16) -> lsp::Position {

crates/languages/src/typescript/injections.scm 🔗

@@ -2,8 +2,8 @@
   (#match? @_jsdoc_comment "(?s)^/[*][*][^*].*[*]/$")) @injection.content
   (#set! injection.language "jsdoc"))
 
-(((comment) @reference
-  (#match? @reference "^///\\s+<reference\\s+types=\"\\S+\"\\s*/>\\s*$")) @injection.content
+(((comment) @_reference
+  (#match? @_reference "^///\\s+<reference\\s+types=\"\\S+\"\\s*/>\\s*$")) @injection.content
   (#set! injection.language "html"))
 
 ((regex) @injection.content