parsing.rs

 1use std::{ops::Range, path::PathBuf, sync::Arc, time::SystemTime};
 2
 3use anyhow::{anyhow, Ok, Result};
 4use project::Fs;
 5use tree_sitter::{Parser, QueryCursor};
 6
 7use crate::PendingFile;
 8
 9#[derive(Debug, PartialEq, Clone)]
10pub struct Document {
11    pub offset: usize,
12    pub name: String,
13    pub embedding: Vec<f32>,
14}
15
16#[derive(Debug, PartialEq, Clone)]
17pub struct ParsedFile {
18    pub path: PathBuf,
19    pub mtime: SystemTime,
20    pub documents: Vec<Document>,
21}
22
23pub struct CodeContextRetriever {
24    pub parser: Parser,
25    pub cursor: QueryCursor,
26    pub fs: Arc<dyn Fs>,
27}
28
29impl CodeContextRetriever {
30    pub async fn parse_file(
31        &mut self,
32        pending_file: PendingFile,
33    ) -> Result<(ParsedFile, Vec<String>)> {
34        let grammar = pending_file
35            .language
36            .grammar()
37            .ok_or_else(|| anyhow!("no grammar for language"))?;
38        let embedding_config = grammar
39            .embedding_config
40            .as_ref()
41            .ok_or_else(|| anyhow!("no embedding queries"))?;
42
43        let content = self.fs.load(&pending_file.absolute_path).await?;
44
45        self.parser.set_language(grammar.ts_language).unwrap();
46
47        let tree = self
48            .parser
49            .parse(&content, None)
50            .ok_or_else(|| anyhow!("parsing failed"))?;
51
52        let mut documents = Vec::new();
53        let mut context_spans = Vec::new();
54
55        // Iterate through query matches
56        for mat in self.cursor.matches(
57            &embedding_config.query,
58            tree.root_node(),
59            content.as_bytes(),
60        ) {
61            let mut item_range: Option<Range<usize>> = None;
62            let mut name_range: Option<Range<usize>> = None;
63            for capture in mat.captures {
64                if capture.index == embedding_config.item_capture_ix {
65                    item_range = Some(capture.node.byte_range());
66                } else if capture.index == embedding_config.name_capture_ix {
67                    name_range = Some(capture.node.byte_range());
68                }
69            }
70
71            if let Some((item_range, name_range)) = item_range.zip(name_range) {
72                if let Some((item, name)) =
73                    content.get(item_range.clone()).zip(content.get(name_range))
74                {
75                    context_spans.push(item.to_string());
76                    documents.push(Document {
77                        name: name.to_string(),
78                        offset: item_range.start,
79                        embedding: Vec::new(),
80                    });
81                }
82            }
83        }
84
85        return Ok((
86            ParsedFile {
87                path: pending_file.relative_path,
88                mtime: pending_file.modified_time,
89                documents,
90            },
91            context_spans,
92        ));
93    }
94}