language_core: Introduce fallback highlights (#52575)

Finn Evers and Kirill Bulatov created

Self-Review Checklist:

- [X] I've reviewed my own diff for quality, security, and reliability
- [ ] Unsafe blocks (if any) have justifying comments
- [ ] The content is consistent with the [UI/UX
checklist](https://github.com/zed-industries/zed/blob/main/CONTRIBUTING.md#uiux-checklist)
- [ ] Tests cover the new/changed behavior
- [ ] Performance impact has been considered and is acceptable

Release Notes:

- Added the option for highlights from languages to specify fallbacks.
That means that if you have a pattern with the captures `@second.capture
@first.capture`, Zed will first try resolving a highlight from your
theme for the code fragment using the first capture, then look for the
second capture if no match for the first capture could be found.

---------

Co-authored-by: Kirill Bulatov <kirill@zed.dev>

Change summary

crates/agent_ui/src/completion_provider.rs             |   2 
crates/editor/src/editor_tests.rs                      | 110 ++++++++---
crates/language/src/buffer.rs                          |  18 +
crates/language/src/language.rs                        |  29 +-
crates/language_core/src/grammar.rs                    |   7 
crates/language_core/src/highlight_map.rs              |  50 ++---
crates/language_extension/src/extension_lsp_adapter.rs |   6 
crates/language_tools/src/highlights_tree_view.rs      |   5 
crates/languages/src/rust.rs                           |  38 ++--
crates/project/tests/integration/lsp_store.rs          |   4 
docs/src/extensions/languages.md                       |  15 +
11 files changed, 171 insertions(+), 113 deletions(-)

Detailed changes

crates/agent_ui/src/completion_provider.rs 🔗

@@ -2144,7 +2144,7 @@ fn build_code_label_for_path(
         .theme()
         .syntax()
         .highlight_id("variable")
-        .map(HighlightId);
+        .map(HighlightId::new);
     let mut label = CodeLabelBuilder::default();
 
     label.push_str(file, None);

crates/editor/src/editor_tests.rs 🔗

@@ -52,7 +52,7 @@ use settings::{
     ProjectSettingsContent, ScrollBeyondLastLine, SearchSettingsContent, SettingsContent,
     SettingsStore,
 };
-use std::borrow::Cow;
+use std::{borrow::Cow, sync::Arc};
 use std::{cell::RefCell, future::Future, rc::Rc, sync::atomic::AtomicBool, time::Instant};
 use std::{
     iter,
@@ -19112,7 +19112,7 @@ async fn test_copy_highlight_json(cx: &mut TestAppContext) {
             let x = 1;ˇ
         }
     "});
-    setup_rust_syntax_highlighting(&mut cx);
+    setup_syntax_highlighting(rust_lang(), &mut cx);
 
     cx.update_editor(|editor, window, cx| {
         editor.copy_highlight_json(&CopyHighlightJson, window, cx);
@@ -19160,7 +19160,7 @@ async fn test_copy_highlight_json_selected_range(cx: &mut TestAppContext) {
             let yˇ» = 2;
         }
     "});
-    setup_rust_syntax_highlighting(&mut cx);
+    setup_syntax_highlighting(rust_lang(), &mut cx);
 
     cx.update_editor(|editor, window, cx| {
         editor.copy_highlight_json(&CopyHighlightJson, window, cx);
@@ -19203,7 +19203,7 @@ async fn test_copy_highlight_json_selected_line_range(cx: &mut TestAppContext) {
             let yˇ» = 2;
         }
     "});
-    setup_rust_syntax_highlighting(&mut cx);
+    setup_syntax_highlighting(rust_lang(), &mut cx);
 
     cx.update_editor(|editor, window, cx| {
         editor.selections.set_line_mode(true);
@@ -19253,7 +19253,7 @@ async fn test_copy_highlight_json_single_line(cx: &mut TestAppContext) {
             let y = 2;
         }
     "});
-    setup_rust_syntax_highlighting(&mut cx);
+    setup_syntax_highlighting(rust_lang(), &mut cx);
 
     cx.update_editor(|editor, window, cx| {
         editor.selections.set_line_mode(true);
@@ -19280,34 +19280,6 @@ async fn test_copy_highlight_json_single_line(cx: &mut TestAppContext) {
     );
 }
 
-fn setup_rust_syntax_highlighting(cx: &mut EditorTestContext) {
-    let syntax = SyntaxTheme::new_test(vec![
-        ("keyword", Hsla::red()),
-        ("function", Hsla::blue()),
-        ("variable", Hsla::green()),
-        ("number", Hsla::default()),
-        ("operator", Hsla::default()),
-        ("punctuation.bracket", Hsla::default()),
-        ("punctuation.delimiter", Hsla::default()),
-    ]);
-
-    let language = rust_lang();
-    language.set_theme(&syntax);
-
-    cx.update_buffer(|buffer, cx| buffer.set_language(Some(language), cx));
-    cx.executor().run_until_parked();
-    cx.update_editor(|editor, window, cx| {
-        editor.set_style(
-            EditorStyle {
-                syntax: Arc::new(syntax),
-                ..Default::default()
-            },
-            window,
-            cx,
-        );
-    });
-}
-
 #[gpui::test]
 async fn test_following(cx: &mut TestAppContext) {
     init_test(cx, |_| {});
@@ -35729,3 +35701,75 @@ async fn test_align_selections_multicolumn(cx: &mut TestAppContext) {
     cx.update_editor(|e, window, cx| e.align_selections(&AlignSelections, window, cx));
     cx.assert_editor_state(after);
 }
+
+#[gpui::test]
+async fn test_custom_fallback_highlights(cx: &mut TestAppContext) {
+    init_test(cx, |_| {});
+
+    let mut cx = EditorTestContext::new(cx).await;
+    cx.set_state(indoc! {"fn main(self, variable: TType) {ˇ}"});
+
+    let variable_color = Hsla::green();
+    let function_color = Hsla::blue();
+
+    let test_cases = [
+        ("@variable", Some(variable_color)),
+        ("@type", None),
+        ("@type @variable", Some(variable_color)),
+        ("@variable @type", Some(variable_color)),
+        ("@variable @function", Some(function_color)),
+        ("@function @variable", Some(variable_color)),
+    ];
+
+    for (test_case, expected) in test_cases {
+        let custom_rust_lang = Arc::into_inner(rust_lang())
+            .unwrap()
+            .with_highlights_query(format! {r#"(type_identifier) {test_case}"#}.as_str())
+            .unwrap();
+        let theme = setup_syntax_highlighting(Arc::new(custom_rust_lang), &mut cx);
+        let expected = expected.map_or_else(Vec::new, |expected_color| {
+            vec![(24..29, HighlightStyle::color(expected_color))]
+        });
+
+        cx.update_editor(|editor, window, cx| {
+            let snapshot = editor.snapshot(window, cx);
+            assert_eq!(
+                expected,
+                snapshot.combined_highlights(MultiBufferOffset(0)..snapshot.buffer().len(), &theme),
+                "Test case with '{test_case}' highlights query did not pass",
+            );
+        });
+    }
+}
+
+fn setup_syntax_highlighting(
+    language: Arc<Language>,
+    cx: &mut EditorTestContext,
+) -> Arc<SyntaxTheme> {
+    let syntax = Arc::new(SyntaxTheme::new_test(vec![
+        ("keyword", Hsla::red()),
+        ("function", Hsla::blue()),
+        ("variable", Hsla::green()),
+        ("number", Hsla::default()),
+        ("operator", Hsla::default()),
+        ("punctuation.bracket", Hsla::default()),
+        ("punctuation.delimiter", Hsla::default()),
+    ]));
+
+    language.set_theme(&syntax);
+
+    cx.update_buffer(|buffer, cx| buffer.set_language(Some(language), cx));
+    cx.executor().run_until_parked();
+    cx.update_editor(|editor, window, cx| {
+        editor.set_style(
+            EditorStyle {
+                syntax: syntax.clone(),
+                ..EditorStyle::default()
+            },
+            window,
+            cx,
+        );
+    });
+
+    syntax
+}

crates/language/src/buffer.rs 🔗

@@ -5549,11 +5549,11 @@ impl<'a> BufferChunks<'a> {
                     && range.start >= capture.node.start_byte()
                 {
                     let next_capture_end = capture.node.end_byte();
-                    if range.start < next_capture_end {
-                        highlights.stack.push((
-                            next_capture_end,
-                            highlights.highlight_maps[capture.grammar_index].get(capture.index),
-                        ));
+                    if range.start < next_capture_end
+                        && let Some(capture_id) =
+                            highlights.highlight_maps[capture.grammar_index].get(capture.index)
+                    {
+                        highlights.stack.push((next_capture_end, capture_id));
                     }
                     highlights.next_capture.take();
                 }
@@ -5688,9 +5688,11 @@ impl<'a> Iterator for BufferChunks<'a> {
                 } else {
                     let highlight_id =
                         highlights.highlight_maps[capture.grammar_index].get(capture.index);
-                    highlights
-                        .stack
-                        .push((capture.node.end_byte(), highlight_id));
+                    if let Some(highlight_id) = highlight_id {
+                        highlights
+                            .stack
+                            .push((capture.node.end_byte(), highlight_id));
+                    }
                     highlights.next_capture = highlights.captures.next();
                 }
             }

crates/language/src/language.rs 🔗

@@ -1023,9 +1023,7 @@ impl Language {
                 BufferChunks::new(text, range, Some((captures, highlight_maps)), false, None)
             {
                 let end_offset = offset + chunk.text.len();
-                if let Some(highlight_id) = chunk.syntax_highlight_id
-                    && !highlight_id.is_default()
-                {
+                if let Some(highlight_id) = chunk.syntax_highlight_id {
                     result.push((offset..end_offset, highlight_id));
                 }
                 offset = end_offset;
@@ -1077,11 +1075,11 @@ impl Language {
 
 #[inline]
 pub fn build_highlight_map(capture_names: &[&str], theme: &SyntaxTheme) -> HighlightMap {
-    HighlightMap::from_ids(capture_names.iter().map(|capture_name| {
-        theme
-            .highlight_id(capture_name)
-            .map_or(HighlightId::default(), HighlightId)
-    }))
+    HighlightMap::from_ids(
+        capture_names
+            .iter()
+            .map(|capture_name| theme.highlight_id(capture_name).map(HighlightId::new)),
+    )
 }
 
 impl LanguageScope {
@@ -1645,9 +1643,18 @@ mod tests {
         ];
 
         let map = build_highlight_map(capture_names, &theme);
-        assert_eq!(theme.get_capture_name(map.get(0)), Some("function"));
-        assert_eq!(theme.get_capture_name(map.get(1)), Some("function.async"));
-        assert_eq!(theme.get_capture_name(map.get(2)), Some("variable.builtin"));
+        assert_eq!(
+            theme.get_capture_name(map.get(0).unwrap()),
+            Some("function")
+        );
+        assert_eq!(
+            theme.get_capture_name(map.get(1).unwrap()),
+            Some("function.async")
+        );
+        assert_eq!(
+            theme.get_capture_name(map.get(2).unwrap()),
+            Some("variable.builtin")
+        );
     }
 
     #[gpui::test(iterations = 10)]

crates/language_core/src/grammar.rs 🔗

@@ -275,12 +275,11 @@ impl Grammar {
     }
 
     pub fn highlight_id_for_name(&self, name: &str) -> Option<HighlightId> {
-        let capture_id = self
-            .highlights_config
+        self.highlights_config
             .as_ref()?
             .query
-            .capture_index_for_name(name)?;
-        Some(self.highlight_map.lock().get(capture_id))
+            .capture_index_for_name(name)
+            .and_then(|capture_id| self.highlight_map.lock().get(capture_id))
     }
 
     pub fn debug_variables_config(&self) -> Option<&DebugVariablesConfig> {

crates/language_core/src/highlight_map.rs 🔗

@@ -1,35 +1,35 @@
-use std::sync::Arc;
+use std::{num::NonZeroU32, sync::Arc};
 
 #[derive(Clone, Debug)]
-pub struct HighlightMap(Arc<[HighlightId]>);
+pub struct HighlightMap(Arc<[Option<HighlightId>]>);
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub struct HighlightId(pub u32);
+pub struct HighlightId(NonZeroU32);
 
-const DEFAULT_SYNTAX_HIGHLIGHT_ID: HighlightId = HighlightId(u32::MAX);
+impl HighlightId {
+    pub const TABSTOP_INSERT_ID: HighlightId = HighlightId(NonZeroU32::new(u32::MAX - 1).unwrap());
+    pub const TABSTOP_REPLACE_ID: HighlightId = HighlightId(NonZeroU32::new(u32::MAX - 2).unwrap());
 
-impl HighlightMap {
-    #[inline]
-    pub fn from_ids(highlight_ids: impl IntoIterator<Item = HighlightId>) -> Self {
-        Self(highlight_ids.into_iter().collect())
+    pub fn new(capture_id: u32) -> Self {
+        Self(NonZeroU32::new(capture_id + 1).unwrap_or(NonZeroU32::MAX))
     }
+}
 
-    #[inline]
-    pub fn get(&self, capture_id: u32) -> HighlightId {
-        self.0
-            .get(capture_id as usize)
-            .copied()
-            .unwrap_or(DEFAULT_SYNTAX_HIGHLIGHT_ID)
+impl From<HighlightId> for usize {
+    fn from(value: HighlightId) -> Self {
+        value.0.get() as usize - 1
     }
 }
 
-impl HighlightId {
-    pub const TABSTOP_INSERT_ID: HighlightId = HighlightId(u32::MAX - 1);
-    pub const TABSTOP_REPLACE_ID: HighlightId = HighlightId(u32::MAX - 2);
+impl HighlightMap {
+    #[inline]
+    pub fn from_ids(highlight_ids: impl IntoIterator<Item = Option<HighlightId>>) -> Self {
+        Self(highlight_ids.into_iter().collect())
+    }
 
     #[inline]
-    pub fn is_default(&self) -> bool {
-        *self == DEFAULT_SYNTAX_HIGHLIGHT_ID
+    pub fn get(&self, capture_id: u32) -> Option<HighlightId> {
+        self.0.get(capture_id as usize).copied().flatten()
     }
 }
 
@@ -38,15 +38,3 @@ impl Default for HighlightMap {
         Self(Arc::new([]))
     }
 }
-
-impl Default for HighlightId {
-    fn default() -> Self {
-        DEFAULT_SYNTAX_HIGHLIGHT_ID
-    }
-}
-
-impl From<HighlightId> for usize {
-    fn from(value: HighlightId) -> Self {
-        value.0 as usize
-    }
-}

crates/language_extension/src/extension_lsp_adapter.rs 🔗

@@ -684,7 +684,7 @@ fn test_build_code_label() {
     );
     let code_runs = code_ranges
         .into_iter()
-        .map(|range| (range, HighlightId(0)))
+        .map(|range| (range, HighlightId::new(0)))
         .collect::<Vec<_>>();
 
     let label = build_code_label(
@@ -707,7 +707,7 @@ fn test_build_code_label() {
         marked_text_ranges("pqrs.tuv: «fn»(«Bcd»(«Efgh»)) -> «Ijklm»", false);
     let label_runs = label_ranges
         .into_iter()
-        .map(|range| (range, HighlightId(0)))
+        .map(|range| (range, HighlightId::new(0)))
         .collect::<Vec<_>>();
 
     assert_eq!(
@@ -723,7 +723,7 @@ fn test_build_code_label_with_invalid_ranges() {
     let (code, code_ranges) = marked_text_ranges("const «a»: «B» = '🏀'", false);
     let code_runs = code_ranges
         .into_iter()
-        .map(|range| (range, HighlightId(0)))
+        .map(|range| (range, HighlightId::new(0)))
         .collect::<Vec<_>>();
 
     // A span uses a code range that is invalid because it starts inside of

crates/language_tools/src/highlights_tree_view.rs 🔗

@@ -420,7 +420,10 @@ impl HighlightsTreeView {
             let highlight_maps: Vec<_> = grammars.iter().map(|g| g.highlight_map()).collect();
 
             for capture in captures {
-                let highlight_id = highlight_maps[capture.grammar_index].get(capture.index);
+                let Some(highlight_id) = highlight_maps[capture.grammar_index].get(capture.index)
+                else {
+                    continue;
+                };
                 let Some(style) = syntax_theme.get(highlight_id).cloned() else {
                     continue;
                 };

crates/languages/src/rust.rs 🔗

@@ -1542,10 +1542,10 @@ mod tests {
                 "await.as_deref_mut(&mut self) -> IterMut<'_, T>".to_string(),
                 6..18,
                 vec![
-                    (6..18, HighlightId(2)),
-                    (20..23, HighlightId(1)),
-                    (33..40, HighlightId(0)),
-                    (45..46, HighlightId(0))
+                    (6..18, HighlightId::new(2)),
+                    (20..23, HighlightId::new(1)),
+                    (33..40, HighlightId::new(0)),
+                    (45..46, HighlightId::new(0))
                 ],
             ))
         );
@@ -1572,12 +1572,12 @@ mod tests {
                 "pub fn as_deref_mut(&mut self) -> IterMut<'_, T>".to_string(),
                 7..19,
                 vec![
-                    (0..3, HighlightId(1)),
-                    (4..6, HighlightId(1)),
-                    (7..19, HighlightId(2)),
-                    (21..24, HighlightId(1)),
-                    (34..41, HighlightId(0)),
-                    (46..47, HighlightId(0))
+                    (0..3, HighlightId::new(1)),
+                    (4..6, HighlightId::new(1)),
+                    (7..19, HighlightId::new(2)),
+                    (21..24, HighlightId::new(1)),
+                    (34..41, HighlightId::new(0)),
+                    (46..47, HighlightId::new(0))
                 ],
             ))
         );
@@ -1598,7 +1598,7 @@ mod tests {
             Some(CodeLabel::new(
                 "inner_value: String".to_string(),
                 6..11,
-                vec![(0..11, HighlightId(3)), (13..19, HighlightId(0))],
+                vec![(0..11, HighlightId::new(3)), (13..19, HighlightId::new(0))],
             ))
         );
 
@@ -1625,8 +1625,8 @@ mod tests {
                 vec![
                     (10..13, HighlightId::TABSTOP_INSERT_ID),
                     (16..19, HighlightId::TABSTOP_INSERT_ID),
-                    (0..7, HighlightId(2)),
-                    (7..8, HighlightId(2)),
+                    (0..7, HighlightId::new(2)),
+                    (7..8, HighlightId::new(2)),
                 ],
             ))
         );
@@ -1653,8 +1653,8 @@ mod tests {
                 0..4,
                 vec![
                     (5..9, HighlightId::TABSTOP_REPLACE_ID),
-                    (0..3, HighlightId(2)),
-                    (3..4, HighlightId(2)),
+                    (0..3, HighlightId::new(2)),
+                    (3..4, HighlightId::new(2)),
                 ],
             ))
         );
@@ -1682,8 +1682,8 @@ mod tests {
                 vec![
                     (7..10, HighlightId::TABSTOP_REPLACE_ID),
                     (13..16, HighlightId::TABSTOP_INSERT_ID),
-                    (0..2, HighlightId(1)),
-                    (3..6, HighlightId(1)),
+                    (0..2, HighlightId::new(1)),
+                    (3..6, HighlightId::new(1)),
                 ],
             ))
         );
@@ -1711,8 +1711,8 @@ mod tests {
                 vec![
                     (4..8, HighlightId::TABSTOP_REPLACE_ID),
                     (12..16, HighlightId::TABSTOP_REPLACE_ID),
-                    (0..3, HighlightId(1)),
-                    (9..11, HighlightId(1)),
+                    (0..3, HighlightId::new(1)),
+                    (9..11, HighlightId::new(1)),
                 ],
             ))
         );

crates/project/tests/integration/lsp_store.rs 🔗

@@ -43,7 +43,7 @@ fn test_multi_len_chars_normalization() {
     let mut label = CodeLabel::new(
         "myElˇ (parameter) myElˇ: {\n    foo: string;\n}".to_string(),
         0..6,
-        vec![(0..6, HighlightId(1))],
+        vec![(0..6, HighlightId::new(1))],
     );
     ensure_uniform_list_compatible_label(&mut label);
     assert_eq!(
@@ -51,7 +51,7 @@ fn test_multi_len_chars_normalization() {
         CodeLabel::new(
             "myElˇ (parameter) myElˇ: { foo: string; }".to_string(),
             0..6,
-            vec![(0..6, HighlightId(1))],
+            vec![(0..6, HighlightId::new(1))],
         )
     );
 }

docs/src/extensions/languages.md 🔗

@@ -143,6 +143,21 @@ This query marks strings, object keys, and numbers for highlighting. The followi
 | @variable.parameter      | Captures function/method parameters    |
 | @variant                 | Captures variants                      |
 
+#### Fallback captures
+
+A single Tree-sitter pattern can specify multiple captures on the same node to define fallback highlights.
+Zed resolves them right-to-left: It first tries the rightmost capture, and if the current theme has no style for it, falls back to the next capture to the left, and so on.
+
+For example:
+
+```scheme
+(type_identifier) @type @variable
+```
+
+Here Zed will first try to resolve `@variable` from the theme. If the theme defines a style for `@variable`, that style is used. Otherwise, Zed falls back to `@type`.
+
+This is useful when a language wants to provide a preferred highlight that not all themes may support, while still falling back to a more common capture that most themes define.
+
 ### Bracket matching
 
 The `brackets.scm` file defines matching brackets.