db.rs

  1use anyhow::Result;
  2use async_compat::{Compat, CompatExt};
  3use conv::ValueFrom;
  4use sqlx::{migrate::MigrateDatabase, Pool, Sqlite, SqlitePool};
  5use std::time::{Duration, Instant};
  6
  7use crate::IndexedFile;
  8
  9// This is saving to a local database store within the users dev zed path
 10// Where do we want this to sit?
 11// Assuming near where the workspace DB sits.
 12const VECTOR_DB_URL: &str = "embeddings_db";
 13
 14pub struct VectorDatabase {}
 15
 16impl VectorDatabase {
 17    pub async fn initialize_database() -> Result<()> {
 18        // If database doesnt exist create database
 19        if !Sqlite::database_exists(VECTOR_DB_URL)
 20            .compat()
 21            .await
 22            .unwrap_or(false)
 23        {
 24            Sqlite::create_database(VECTOR_DB_URL).compat().await?;
 25        }
 26
 27        let db = SqlitePool::connect(VECTOR_DB_URL).compat().await?;
 28
 29        // Initialize Vector Databasing Tables
 30        // We may be able to skip this assuming the database is never created
 31        // without creating the tables at the same time.
 32        sqlx::query(
 33            "CREATE TABLE IF NOT EXISTS files (
 34            id INTEGER PRIMARY KEY AUTOINCREMENT,
 35            path NVARCHAR(100) NOT NULL,
 36            sha1 NVARCHAR(40) NOT NULL
 37            )",
 38        )
 39        .execute(&db)
 40        .compat()
 41        .await?;
 42
 43        sqlx::query(
 44            "CREATE TABLE IF NOT EXISTS documents (
 45            id INTEGER PRIMARY KEY AUTOINCREMENT,
 46            file_id INTEGER NOT NULL,
 47            offset INTEGER NOT NULL,
 48            name NVARCHAR(100) NOT NULL,
 49            embedding BLOB NOT NULL,
 50            FOREIGN KEY(file_id) REFERENCES files(id) ON DELETE CASCADE
 51            )",
 52        )
 53        .execute(&db)
 54        .compat()
 55        .await?;
 56
 57        Ok(())
 58    }
 59
 60    pub async fn insert_file(indexed_file: IndexedFile) -> Result<()> {
 61        // Write to files table, and return generated id.
 62        let db = SqlitePool::connect(VECTOR_DB_URL).compat().await?;
 63
 64        let files_insert = sqlx::query("INSERT INTO files (path, sha1) VALUES ($1, $2)")
 65            .bind(indexed_file.path.to_str())
 66            .bind(indexed_file.sha1)
 67            .execute(&db)
 68            .compat()
 69            .await?;
 70
 71        let inserted_id = files_insert.last_insert_rowid();
 72
 73        // I stole this from https://stackoverflow.com/questions/71829931/how-do-i-convert-a-negative-f32-value-to-binary-string-and-back-again
 74        // I imagine there is a better way to serialize to/from blob
 75        fn get_binary_from_values(values: Vec<f32>) -> String {
 76            let bits: Vec<_> = values.iter().map(|v| v.to_bits().to_string()).collect();
 77            bits.join(";")
 78        }
 79
 80        fn get_values_from_binary(bin: &str) -> Vec<f32> {
 81            (0..bin.len() / 32)
 82                .map(|i| {
 83                    let start = i * 32;
 84                    let end = start + 32;
 85                    f32::from_bits(u32::from_str_radix(&bin[start..end], 2).unwrap())
 86                })
 87                .collect()
 88        }
 89
 90        // Currently inserting at approximately 3400 documents a second
 91        // I imagine we can speed this up with a bulk insert of some kind.
 92        for document in indexed_file.documents {
 93            sqlx::query(
 94                "INSERT INTO documents (file_id, offset, name, embedding) VALUES ($1, $2, $3, $4)",
 95            )
 96            .bind(inserted_id)
 97            .bind(document.offset.to_string())
 98            .bind(document.name)
 99            .bind(get_binary_from_values(document.embedding))
100            .execute(&db)
101            .compat()
102            .await?;
103        }
104
105        Ok(())
106    }
107}