1use std::{ops::Range, path::PathBuf, sync::Arc, time::SystemTime};
2
3use anyhow::{anyhow, Ok, Result};
4use project::Fs;
5use tree_sitter::{Parser, QueryCursor};
6
7use crate::PendingFile;
8
9#[derive(Debug, PartialEq, Clone)]
10pub struct Document {
11 pub offset: usize,
12 pub name: String,
13 pub embedding: Vec<f32>,
14}
15
16#[derive(Debug, PartialEq, Clone)]
17pub struct ParsedFile {
18 pub path: PathBuf,
19 pub mtime: SystemTime,
20 pub documents: Vec<Document>,
21}
22
23pub struct CodeContextRetriever {
24 pub parser: Parser,
25 pub cursor: QueryCursor,
26 pub fs: Arc<dyn Fs>,
27}
28
29impl CodeContextRetriever {
30 pub async fn parse_file(
31 &mut self,
32 pending_file: PendingFile,
33 ) -> Result<(ParsedFile, Vec<String>)> {
34 let grammar = pending_file
35 .language
36 .grammar()
37 .ok_or_else(|| anyhow!("no grammar for language"))?;
38 let embedding_config = grammar
39 .embedding_config
40 .as_ref()
41 .ok_or_else(|| anyhow!("no embedding queries"))?;
42
43 let content = self.fs.load(&pending_file.absolute_path).await?;
44
45 self.parser.set_language(grammar.ts_language).unwrap();
46
47 let tree = self
48 .parser
49 .parse(&content, None)
50 .ok_or_else(|| anyhow!("parsing failed"))?;
51
52 let mut documents = Vec::new();
53 let mut context_spans = Vec::new();
54
55 // Iterate through query matches
56 for mat in self.cursor.matches(
57 &embedding_config.query,
58 tree.root_node(),
59 content.as_bytes(),
60 ) {
61 let mut item_range: Option<Range<usize>> = None;
62 let mut name_range: Option<Range<usize>> = None;
63 for capture in mat.captures {
64 if capture.index == embedding_config.item_capture_ix {
65 item_range = Some(capture.node.byte_range());
66 } else if capture.index == embedding_config.name_capture_ix {
67 name_range = Some(capture.node.byte_range());
68 }
69 }
70
71 if let Some((item_range, name_range)) = item_range.zip(name_range) {
72 if let Some((item, name)) =
73 content.get(item_range.clone()).zip(content.get(name_range))
74 {
75 context_spans.push(item.to_string());
76 documents.push(Document {
77 name: name.to_string(),
78 offset: item_range.start,
79 embedding: Vec::new(),
80 });
81 }
82 }
83 }
84
85 return Ok((
86 ParsedFile {
87 path: pending_file.relative_path,
88 mtime: pending_file.modified_time,
89 documents,
90 },
91 context_spans,
92 ));
93 }
94}