1use anyhow::Result;
2use async_compat::{Compat, CompatExt};
3use conv::ValueFrom;
4use sqlx::{migrate::MigrateDatabase, Pool, Sqlite, SqlitePool};
5use std::time::{Duration, Instant};
6
7use crate::IndexedFile;
8
9// This is saving to a local database store within the users dev zed path
10// Where do we want this to sit?
11// Assuming near where the workspace DB sits.
12const VECTOR_DB_URL: &str = "embeddings_db";
13
14pub struct VectorDatabase {}
15
16impl VectorDatabase {
17 pub async fn initialize_database() -> Result<()> {
18 // If database doesnt exist create database
19 if !Sqlite::database_exists(VECTOR_DB_URL)
20 .compat()
21 .await
22 .unwrap_or(false)
23 {
24 Sqlite::create_database(VECTOR_DB_URL).compat().await?;
25 }
26
27 let db = SqlitePool::connect(VECTOR_DB_URL).compat().await?;
28
29 // Initialize Vector Databasing Tables
30 // We may be able to skip this assuming the database is never created
31 // without creating the tables at the same time.
32 sqlx::query(
33 "CREATE TABLE IF NOT EXISTS files (
34 id INTEGER PRIMARY KEY AUTOINCREMENT,
35 path NVARCHAR(100) NOT NULL,
36 sha1 NVARCHAR(40) NOT NULL
37 )",
38 )
39 .execute(&db)
40 .compat()
41 .await?;
42
43 sqlx::query(
44 "CREATE TABLE IF NOT EXISTS documents (
45 id INTEGER PRIMARY KEY AUTOINCREMENT,
46 file_id INTEGER NOT NULL,
47 offset INTEGER NOT NULL,
48 name NVARCHAR(100) NOT NULL,
49 embedding BLOB NOT NULL,
50 FOREIGN KEY(file_id) REFERENCES files(id) ON DELETE CASCADE
51 )",
52 )
53 .execute(&db)
54 .compat()
55 .await?;
56
57 Ok(())
58 }
59
60 pub async fn insert_file(indexed_file: IndexedFile) -> Result<()> {
61 // Write to files table, and return generated id.
62 let db = SqlitePool::connect(VECTOR_DB_URL).compat().await?;
63
64 let files_insert = sqlx::query("INSERT INTO files (path, sha1) VALUES ($1, $2)")
65 .bind(indexed_file.path.to_str())
66 .bind(indexed_file.sha1)
67 .execute(&db)
68 .compat()
69 .await?;
70
71 let inserted_id = files_insert.last_insert_rowid();
72
73 // I stole this from https://stackoverflow.com/questions/71829931/how-do-i-convert-a-negative-f32-value-to-binary-string-and-back-again
74 // I imagine there is a better way to serialize to/from blob
75 fn get_binary_from_values(values: Vec<f32>) -> String {
76 let bits: Vec<_> = values.iter().map(|v| v.to_bits().to_string()).collect();
77 bits.join(";")
78 }
79
80 fn get_values_from_binary(bin: &str) -> Vec<f32> {
81 (0..bin.len() / 32)
82 .map(|i| {
83 let start = i * 32;
84 let end = start + 32;
85 f32::from_bits(u32::from_str_radix(&bin[start..end], 2).unwrap())
86 })
87 .collect()
88 }
89
90 // Currently inserting at approximately 3400 documents a second
91 // I imagine we can speed this up with a bulk insert of some kind.
92 for document in indexed_file.documents {
93 sqlx::query(
94 "INSERT INTO documents (file_id, offset, name, embedding) VALUES ($1, $2, $3, $4)",
95 )
96 .bind(inserted_id)
97 .bind(document.offset.to_string())
98 .bind(document.name)
99 .bind(get_binary_from_values(document.embedding))
100 .execute(&db)
101 .compat()
102 .await?;
103 }
104
105 Ok(())
106 }
107}