Use outline queries to chunk files syntactically (#11283)

Cargo.lock 🔗

@@ -8704,6 +8704,8 @@ dependencies = [
  "sha2 0.10.7",
  "smol",
  "tempfile",
+ "tree-sitter",
+ "unindent",
  "util",
  "worktree",
 ]

crates/language/src/language.rs 🔗

@@ -55,10 +55,10 @@ use std::{
         Arc,
     },
 };
-use syntax_map::SyntaxSnapshot;
+use syntax_map::{QueryCursorHandle, SyntaxSnapshot};
 pub use task_context::{BasicContextProvider, ContextProvider, ContextProviderWithTasks};
 use theme::SyntaxTheme;
-use tree_sitter::{self, wasmtime, Query, WasmStore};
+use tree_sitter::{self, wasmtime, Query, QueryCursor, WasmStore};
 use util::http::HttpClient;
 
 pub use buffer::Operation;
@@ -101,6 +101,15 @@ where
     })
 }
 
+pub fn with_query_cursor<F, R>(func: F) -> R
+where
+    F: FnOnce(&mut QueryCursor) -> R,
+{
+    use std::ops::DerefMut;
+    let mut cursor = QueryCursorHandle::new();
+    func(cursor.deref_mut())
+}
+
 lazy_static! {
     static ref NEXT_LANGUAGE_ID: AtomicUsize = Default::default();
     static ref NEXT_GRAMMAR_ID: AtomicUsize = Default::default();

crates/language/src/syntax_map.rs 🔗

@@ -211,7 +211,7 @@ struct TextProvider<'a>(&'a Rope);
 
 struct ByteChunks<'a>(text::Chunks<'a>);
 
-struct QueryCursorHandle(Option<QueryCursor>);
+pub(crate) struct QueryCursorHandle(Option<QueryCursor>);
 
 impl SyntaxMap {
     pub fn new() -> Self {
@@ -1739,7 +1739,7 @@ impl<'a> Iterator for ByteChunks<'a> {
 }
 
 impl QueryCursorHandle {
-    pub(crate) fn new() -> Self {
+    pub fn new() -> Self {
         let mut cursor = QUERY_CURSORS.lock().pop().unwrap_or_else(QueryCursor::new);
         cursor.set_match_limit(64);
         QueryCursorHandle(Some(cursor))

crates/semantic_index/Cargo.toml 🔗

@@ -37,7 +37,9 @@ serde.workspace = true
 serde_json.workspace = true
 sha2.workspace = true
 smol.workspace = true
+tree-sitter.workspace = true
 util. workspace = true
+unindent.workspace = true
 worktree.workspace = true
 
 [dev-dependencies]

crates/semantic_index/src/chunking.rs 🔗

@@ -1,9 +1,24 @@
-use language::{with_parser, Grammar, Tree};
+use language::{with_parser, with_query_cursor, Grammar};
 use serde::{Deserialize, Serialize};
 use sha2::{Digest, Sha256};
-use std::{cmp, ops::Range, sync::Arc};
+use std::{
+    cmp::{self, Reverse},
+    ops::Range,
+    sync::Arc,
+};
+use tree_sitter::QueryCapture;
+use util::ResultExt as _;
+
+#[derive(Copy, Clone)]
+struct ChunkSizeRange {
+    min: usize,
+    max: usize,
+}
 
-const CHUNK_THRESHOLD: usize = 1500;
+const CHUNK_SIZE_RANGE: ChunkSizeRange = ChunkSizeRange {
+    min: 1024,
+    max: 8192,
+};
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Chunk {
@@ -12,396 +27,337 @@ pub struct Chunk {
 }
 
 pub fn chunk_text(text: &str, grammar: Option<&Arc<Grammar>>) -> Vec<Chunk> {
-    if let Some(grammar) = grammar {
-        let tree = with_parser(|parser| {
-            parser
-                .set_language(&grammar.ts_language)
-                .expect("incompatible grammar");
-            parser.parse(&text, None).expect("invalid language")
-        });
-
-        chunk_parse_tree(tree, &text, CHUNK_THRESHOLD)
-    } else {
-        chunk_lines(&text)
-    }
+    chunk_text_with_size_range(text, grammar, CHUNK_SIZE_RANGE)
 }
 
-fn chunk_parse_tree(tree: Tree, text: &str, chunk_threshold: usize) -> Vec<Chunk> {
-    let mut chunk_ranges = Vec::new();
-    let mut cursor = tree.walk();
-
-    let mut range = 0..0;
-    loop {
-        let node = cursor.node();
-
-        // If adding the node to the current chunk exceeds the threshold
-        if node.end_byte() - range.start > chunk_threshold {
-            // Try to descend into its first child. If we can't, flush the current
-            // range and try again.
-            if cursor.goto_first_child() {
-                continue;
-            } else if !range.is_empty() {
-                chunk_ranges.push(range.clone());
-                range.start = range.end;
-                continue;
-            }
-
-            // If we get here, the node itself has no children but is larger than the threshold.
-            // Break its text into arbitrary chunks.
-            split_text(text, range.clone(), node.end_byte(), &mut chunk_ranges);
-        }
-        range.end = node.end_byte();
-
-        // If we get here, we consumed the node. Advance to the next child, ascending if there isn't one.
-        while !cursor.goto_next_sibling() {
-            if !cursor.goto_parent() {
-                if !range.is_empty() {
-                    chunk_ranges.push(range);
-                }
+fn chunk_text_with_size_range(
+    text: &str,
+    grammar: Option<&Arc<Grammar>>,
+    size_config: ChunkSizeRange,
+) -> Vec<Chunk> {
+    let mut syntactic_ranges = Vec::new();
 
-                return chunk_ranges
-                    .into_iter()
-                    .map(|range| {
-                        let digest = Sha256::digest(&text[range.clone()]).into();
-                        Chunk { range, digest }
-                    })
-                    .collect();
+    if let Some(grammar) = grammar {
+        if let Some(outline) = grammar.outline_config.as_ref() {
+            let tree = with_parser(|parser| {
+                parser.set_language(&grammar.ts_language).log_err()?;
+                parser.parse(&text, None)
+            });
+
+            if let Some(tree) = tree {
+                with_query_cursor(|cursor| {
+                    // Retrieve a list of ranges of outline items (types, functions, etc) in the document.
+                    // Omit single-line outline items (e.g. struct fields, constant declarations), because
+                    // we'll already be attempting to split on lines.
+                    syntactic_ranges = cursor
+                        .matches(&outline.query, tree.root_node(), text.as_bytes())
+                        .filter_map(|mat| {
+                            mat.captures
+                                .iter()
+                                .find_map(|QueryCapture { node, index }| {
+                                    if *index == outline.item_capture_ix {
+                                        if node.end_position().row > node.start_position().row {
+                                            return Some(node.byte_range());
+                                        }
+                                    }
+                                    None
+                                })
+                        })
+                        .collect::<Vec<_>>();
+                    syntactic_ranges
+                        .sort_unstable_by_key(|range| (range.start, Reverse(range.end)));
+                });
             }
         }
     }
+
+    chunk_text_with_syntactic_ranges(text, &syntactic_ranges, size_config)
 }
 
-fn chunk_lines(text: &str) -> Vec<Chunk> {
-    let mut chunk_ranges = Vec::new();
+fn chunk_text_with_syntactic_ranges(
+    text: &str,
+    mut syntactic_ranges: &[Range<usize>],
+    size_config: ChunkSizeRange,
+) -> Vec<Chunk> {
+    let mut chunks = Vec::new();
     let mut range = 0..0;
-
-    let mut newlines = text.match_indices('\n').peekable();
-    while let Some((newline_ix, _)) = newlines.peek() {
-        let newline_ix = newline_ix + 1;
-        if newline_ix - range.start <= CHUNK_THRESHOLD {
-            range.end = newline_ix;
-            newlines.next();
+    let mut range_end_nesting_depth = 0;
+
+    // Try to split the text at line boundaries.
+    let mut line_ixs = text
+        .match_indices('\n')
+        .map(|(ix, _)| ix + 1)
+        .chain(if text.ends_with('\n') {
+            None
         } else {
+            Some(text.len())
+        })
+        .peekable();
+
+    while let Some(&line_ix) = line_ixs.peek() {
+        // If the current position is beyond the maximum chunk size, then
+        // start a new chunk.
+        if line_ix - range.start > size_config.max {
             if range.is_empty() {
-                split_text(text, range, newline_ix, &mut chunk_ranges);
-                range = newline_ix..newline_ix;
+                range.end = cmp::min(range.start + size_config.max, line_ix);
+                while !text.is_char_boundary(range.end) {
+                    range.end -= 1;
+                }
+            }
+
+            chunks.push(Chunk {
+                range: range.clone(),
+                digest: Sha256::digest(&text[range.clone()]).into(),
+            });
+            range_end_nesting_depth = 0;
+            range.start = range.end;
+            continue;
+        }
+
+        // Discard any syntactic ranges that end before the current position.
+        while let Some(first_item) = syntactic_ranges.first() {
+            if first_item.end < line_ix {
+                syntactic_ranges = &syntactic_ranges[1..];
+                continue;
             } else {
-                chunk_ranges.push(range.clone());
-                range.start = range.end;
+                break;
             }
         }
-    }
 
-    if !range.is_empty() {
-        chunk_ranges.push(range);
+        // Count how many syntactic ranges contain the current position.
+        let mut nesting_depth = 0;
+        for range in syntactic_ranges {
+            if range.start > line_ix {
+                break;
+            }
+            if range.start < line_ix && range.end > line_ix {
+                nesting_depth += 1;
+            }
+        }
+
+        // Extend the current range to this position, unless an earlier candidate
+        // end position was less nested syntactically.
+        if range.len() < size_config.min || nesting_depth <= range_end_nesting_depth {
+            range.end = line_ix;
+            range_end_nesting_depth = nesting_depth;
+        }
+
+        line_ixs.next();
     }
 
-    chunk_ranges
-        .into_iter()
-        .map(|range| Chunk {
+    if !range.is_empty() {
+        chunks.push(Chunk {
+            range: range.clone(),
             digest: Sha256::digest(&text[range.clone()]).into(),
-            range,
-        })
-        .collect()
-}
-
-fn split_text(
-    text: &str,
-    mut range: Range<usize>,
-    max_end: usize,
-    chunk_ranges: &mut Vec<Range<usize>>,
-) {
-    while range.start < max_end {
-        range.end = cmp::min(range.start + CHUNK_THRESHOLD, max_end);
-        while !text.is_char_boundary(range.end) {
-            range.end -= 1;
-        }
-        chunk_ranges.push(range.clone());
-        range.start = range.end;
+        });
     }
+
+    chunks
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
     use language::{tree_sitter_rust, Language, LanguageConfig, LanguageMatcher};
+    use unindent::Unindent as _;
 
-    // This example comes from crates/gpui/examples/window_positioning.rs which
-    // has the property of being CHUNK_THRESHOLD < TEXT.len() < 2*CHUNK_THRESHOLD
-    static TEXT: &str = r#"
-    use gpui::*;
+    #[test]
+    fn test_chunk_text_with_syntax() {
+        let language = rust_language();
+
+        let text = "
+            struct Person {
+                first_name: String,
+                last_name: String,
+                age: u32,
+            }
 
-    struct WindowContent {
-        text: SharedString,
-    }
+            impl Person {
+                fn new(first_name: String, last_name: String, age: u32) -> Self {
+                    Self { first_name, last_name, age }
+                }
 
-    impl Render for WindowContent {
-        fn render(&mut self, _cx: &mut ViewContext<Self>) -> impl IntoElement {
-            div()
-                .flex()
-                .bg(rgb(0x1e2025))
-                .size_full()
-                .justify_center()
-                .items_center()
-                .text_xl()
-                .text_color(rgb(0xffffff))
-                .child(self.text.clone())
-        }
-    }
+                fn first_name(&self) -> &str {
+                    &self.first_name
+                }
 
-    fn main() {
-        App::new().run(|cx: &mut AppContext| {
-            // Create several new windows, positioned in the top right corner of each screen
-
-            for screen in cx.displays() {
-                let options = {
-                    let popup_margin_width = DevicePixels::from(16);
-                    let popup_margin_height = DevicePixels::from(-0) - DevicePixels::from(48);
-
-                    let window_size = Size {
-                        width: px(400.),
-                        height: px(72.),
-                    };
-
-                    let screen_bounds = screen.bounds();
-                    let size: Size<DevicePixels> = window_size.into();
-
-                    let bounds = gpui::Bounds::<DevicePixels> {
-                        origin: screen_bounds.upper_right()
-                            - point(size.width + popup_margin_width, popup_margin_height),
-                        size: window_size.into(),
-                    };
-
-                    WindowOptions {
-                        // Set the bounds of the window in screen coordinates
-                        bounds: Some(bounds),
-                        // Specify the display_id to ensure the window is created on the correct screen
-                        display_id: Some(screen.id()),
-
-                        titlebar: None,
-                        window_background: WindowBackgroundAppearance::default(),
-                        focus: false,
-                        show: true,
-                        kind: WindowKind::PopUp,
-                        is_movable: false,
-                        fullscreen: false,
-                        app_id: None,
-                    }
-                };
-
-                cx.open_window(options, |cx| {
-                    cx.new_view(|_| WindowContent {
-                        text: format!("{:?}", screen.id()).into(),
-                    })
-                });
-            }
-        });
-    }"#;
+                fn last_name(&self) -> &str {
+                    &self.last_name
+                }
 
-    fn setup_rust_language() -> Language {
-        Language::new(
-            LanguageConfig {
-                name: "Rust".into(),
-                matcher: LanguageMatcher {
-                    path_suffixes: vec!["rs".to_string()],
-                    ..Default::default()
-                },
-                ..Default::default()
+                fn age(&self) -> usize {
+                    self.ages
+                }
+            }
+        "
+        .unindent();
+
+        let chunks = chunk_text_with_size_range(
+            &text,
+            language.grammar(),
+            ChunkSizeRange {
+                min: text.find('}').unwrap(),
+                max: text.find("Self {").unwrap(),
             },
-            Some(tree_sitter_rust::language()),
-        )
-    }
-
-    #[test]
-    fn test_chunk_text() {
-        let text = "a\n".repeat(1000);
-        let chunks = chunk_text(&text, None);
-        assert_eq!(
-            chunks.len(),
-            ((2000_f64) / (CHUNK_THRESHOLD as f64)).ceil() as usize
         );
-    }
-
-    #[test]
-    fn test_chunk_text_grammar() {
-        // Let's set up a big text with some known segments
-        // We'll then chunk it and verify that the chunks are correct
-
-        let language = setup_rust_language();
-
-        let chunks = chunk_text(TEXT, language.grammar());
-        assert_eq!(chunks.len(), 2);
-
-        assert_eq!(chunks[0].range.start, 0);
-        assert_eq!(chunks[0].range.end, 1498);
-        // The break between chunks is right before the "Specify the display_id" comment
-
-        assert_eq!(chunks[1].range.start, 1498);
-        assert_eq!(chunks[1].range.end, 2434);
-    }
-
-    #[test]
-    fn test_chunk_parse_tree() {
-        let language = setup_rust_language();
-        let grammar = language.grammar().unwrap();
-
-        let tree = with_parser(|parser| {
-            parser
-                .set_language(&grammar.ts_language)
-                .expect("incompatible grammar");
-            parser.parse(TEXT, None).expect("invalid language")
-        });
-
-        let chunks = chunk_parse_tree(tree, TEXT, 250);
-        assert_eq!(chunks.len(), 11);
-    }
 
-    #[test]
-    fn test_chunk_unparsable() {
-        // Even if a chunk is unparsable, we should still be able to chunk it
-        let language = setup_rust_language();
-        let grammar = language.grammar().unwrap();
-
-        let text = r#"fn main() {"#;
-        let tree = with_parser(|parser| {
-            parser
-                .set_language(&grammar.ts_language)
-                .expect("incompatible grammar");
-            parser.parse(text, None).expect("invalid language")
-        });
+        // The entire impl cannot fit in a chunk, so it is split.
+        // Within the impl, two methods can fit in a chunk.
+        assert_chunks(
+            &text,
+            &chunks,
+            &[
+                "struct Person {", // ...
+                "impl Person {",
+                "    fn first_name",
+                "    fn age",
+            ],
+        );
 
-        let chunks = chunk_parse_tree(tree, text, 250);
-        assert_eq!(chunks.len(), 1);
+        let text = "
+            struct T {}
+            struct U {}
+            struct V {}
+            struct W {
+                a: T,
+                b: U,
+            }
+        "
+        .unindent();
+
+        let chunks = chunk_text_with_size_range(
+            &text,
+            language.grammar(),
+            ChunkSizeRange {
+                min: text.find('{').unwrap(),
+                max: text.find('V').unwrap(),
+            },
+        );
 
-        assert_eq!(chunks[0].range.start, 0);
-        assert_eq!(chunks[0].range.end, 11);
+        // Two single-line structs can fit in a chunk.
+        // The last struct cannot fit in a chunk together
+        // with the previous single-line struct.
+        assert_chunks(
+            &text,
+            &chunks,
+            &[
+                "struct T", // ...
+                "struct V", // ...
+                "struct W", // ...
+                "}",
+            ],
+        );
     }
 
     #[test]
-    fn test_empty_text() {
-        let language = setup_rust_language();
-        let grammar = language.grammar().unwrap();
-
-        let tree = with_parser(|parser| {
-            parser
-                .set_language(&grammar.ts_language)
-                .expect("incompatible grammar");
-            parser.parse("", None).expect("invalid language")
-        });
+    fn test_chunk_with_long_lines() {
+        let language = rust_language();
+
+        let text = "
+            struct S { a: u32 }
+            struct T { a: u64 }
+            struct U { a: u64, b: u64, c: u64, d: u64, e: u64, f: u64, g: u64, h: u64, i: u64, j: u64 }
+            struct W { a: u64, b: u64, c: u64, d: u64, e: u64, f: u64, g: u64, h: u64, i: u64, j: u64 }
+        "
+        .unindent();
+
+        let chunks = chunk_text_with_size_range(
+            &text,
+            language.grammar(),
+            ChunkSizeRange { min: 32, max: 64 },
+        );
 
-        let chunks = chunk_parse_tree(tree, "", CHUNK_THRESHOLD);
-        assert!(chunks.is_empty(), "Chunks should be empty for empty text");
+        // The line is too long to fit in one chunk
+        assert_chunks(
+            &text,
+            &chunks,
+            &[
+                "struct S {", // ...
+                "struct U",
+                "4, h: u64, i: u64", // ...
+                "struct W",
+                "4, h: u64, i: u64", // ...
+            ],
+        );
     }
 
-    #[test]
-    fn test_single_large_node() {
-        let large_text = "static ".to_owned() + "a".repeat(CHUNK_THRESHOLD - 1).as_str() + " = 2";
-
-        let language = setup_rust_language();
-        let grammar = language.grammar().unwrap();
-
-        let tree = with_parser(|parser| {
-            parser
-                .set_language(&grammar.ts_language)
-                .expect("incompatible grammar");
-            parser.parse(&large_text, None).expect("invalid language")
-        });
-
-        let chunks = chunk_parse_tree(tree, &large_text, CHUNK_THRESHOLD);
+    #[track_caller]
+    fn assert_chunks(text: &str, chunks: &[Chunk], expected_chunk_text_prefixes: &[&str]) {
+        check_chunk_invariants(text, chunks);
 
         assert_eq!(
             chunks.len(),
-            3,
-            "Large chunks are broken up according to grammar as best as possible"
+            expected_chunk_text_prefixes.len(),
+            "unexpected number of chunks: {chunks:?}",
         );
 
-        // Expect chunks to be static, aaaaaa..., and = 2
-        assert_eq!(chunks[0].range.start, 0);
-        assert_eq!(chunks[0].range.end, "static".len());
-
-        assert_eq!(chunks[1].range.start, "static".len());
-        assert_eq!(chunks[1].range.end, "static".len() + CHUNK_THRESHOLD);
-
-        assert_eq!(chunks[2].range.start, "static".len() + CHUNK_THRESHOLD);
-        assert_eq!(chunks[2].range.end, large_text.len());
+        let mut prev_chunk_end = 0;
+        for (ix, chunk) in chunks.iter().enumerate() {
+            let expected_prefix = expected_chunk_text_prefixes[ix];
+            let chunk_text = &text[chunk.range.clone()];
+            if !chunk_text.starts_with(expected_prefix) {
+                let chunk_prefix_offset = text[prev_chunk_end..].find(expected_prefix);
+                if let Some(chunk_prefix_offset) = chunk_prefix_offset {
+                    panic!(
+                        "chunk {ix} starts at unexpected offset {}. expected {}",
+                        chunk.range.start,
+                        chunk_prefix_offset + prev_chunk_end
+                    );
+                } else {
+                    panic!("invalid expected chunk prefix {ix}: {expected_prefix:?}");
+                }
+            }
+            prev_chunk_end = chunk.range.end;
+        }
     }
 
-    #[test]
-    fn test_multiple_small_nodes() {
-        let small_text = "a b c d e f g h i j k l m n o p q r s t u v w x y z";
-        let language = setup_rust_language();
-        let grammar = language.grammar().unwrap();
-
-        let tree = with_parser(|parser| {
-            parser
-                .set_language(&grammar.ts_language)
-                .expect("incompatible grammar");
-            parser.parse(small_text, None).expect("invalid language")
-        });
+    #[track_caller]
+    fn check_chunk_invariants(text: &str, chunks: &[Chunk]) {
+        for (ix, chunk) in chunks.iter().enumerate() {
+            if ix > 0 && chunk.range.start != chunks[ix - 1].range.end {
+                panic!("chunk ranges are not contiguous: {:?}", chunks);
+            }
+        }
 
-        let chunks = chunk_parse_tree(tree, small_text, 5);
-        assert!(
-            chunks.len() > 1,
-            "Should have multiple chunks for multiple small nodes"
-        );
+        if text.is_empty() {
+            assert!(chunks.is_empty())
+        } else if chunks.first().unwrap().range.start != 0
+            || chunks.last().unwrap().range.end != text.len()
+        {
+            panic!("chunks don't cover entire text {:?}", chunks);
+        }
     }
 
     #[test]
-    fn test_node_with_children() {
-        let nested_text = "fn main() { let a = 1; let b = 2; }";
-        let language = setup_rust_language();
-        let grammar = language.grammar().unwrap();
-
-        let tree = with_parser(|parser| {
-            parser
-                .set_language(&grammar.ts_language)
-                .expect("incompatible grammar");
-            parser.parse(nested_text, None).expect("invalid language")
-        });
-
-        let chunks = chunk_parse_tree(tree, nested_text, 10);
-        assert!(
-            chunks.len() > 1,
-            "Should have multiple chunks for a node with children"
+    fn test_chunk_text() {
+        let text = "a\n".repeat(1000);
+        let chunks = chunk_text(&text, None);
+        assert_eq!(
+            chunks.len(),
+            ((2000_f64) / (CHUNK_SIZE_RANGE.max as f64)).ceil() as usize
         );
     }
 
-    #[test]
-    fn test_text_with_unparsable_sections() {
-        // This test uses purposefully hit-or-miss sizing of 11 characters per likely chunk
-        let mixed_text = "fn main() { let a = 1; let b = 2; } unparsable bits here";
-        let language = setup_rust_language();
-        let grammar = language.grammar().unwrap();
-
-        let tree = with_parser(|parser| {
-            parser
-                .set_language(&grammar.ts_language)
-                .expect("incompatible grammar");
-            parser.parse(mixed_text, None).expect("invalid language")
-        });
-
-        let chunks = chunk_parse_tree(tree, mixed_text, 11);
-        assert!(
-            chunks.len() > 1,
-            "Should handle both parsable and unparsable sections correctly"
-        );
-
-        let expected_chunks = [
-            "fn main() {",
-            " let a = 1;",
-            " let b = 2;",
-            " }",
-            " unparsable",
-            " bits here",
-        ];
-
-        for (i, chunk) in chunks.iter().enumerate() {
-            assert_eq!(
-                &mixed_text[chunk.range.clone()],
-                expected_chunks[i],
-                "Chunk {} should match",
-                i
-            );
-        }
+    fn rust_language() -> Language {
+        Language::new(
+            LanguageConfig {
+                name: "Rust".into(),
+                matcher: LanguageMatcher {
+                    path_suffixes: vec!["rs".to_string()],
+                    ..Default::default()
+                },
+                ..Default::default()
+            },
+            Some(tree_sitter_rust::language()),
+        )
+        .with_outline_query(
+            "
+            (function_item name: (_) @name) @item
+            (impl_item type: (_) @name) @item
+            (struct_item name: (_) @name) @item
+            (field_declaration name: (_) @name) @item
+        ",
+        )
+        .unwrap()
     }
 }

Use outline queries to chunk files syntactically (#11283)

Change summary

Detailed changes

Cargo.lock 🔗

crates/language/src/language.rs 🔗

crates/language/src/syntax_map.rs 🔗

crates/semantic_index/Cargo.toml 🔗

crates/semantic_index/src/chunking.rs 🔗