@@ -162,6 +162,11 @@ impl VectorDatabase {
[],
)?;
+ db.execute(
+ "CREATE UNIQUE INDEX files_worktree_id_and_relative_path ON files (worktree_id, relative_path)",
+ [],
+ )?;
+
db.execute(
"CREATE TABLE documents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -206,43 +211,37 @@ impl VectorDatabase {
// Return the existing ID, if both the file and mtime match
let mtime = Timestamp::from(mtime);
- let mut existing_id_query = db.prepare("SELECT id FROM files WHERE worktree_id = ?1 AND relative_path = ?2 AND mtime_seconds = ?3 AND mtime_nanos = ?4")?;
- let existing_id = existing_id_query
- .query_row(
- params![worktree_id, path.to_str(), mtime.seconds, mtime.nanos],
- |row| Ok(row.get::<_, i64>(0)?),
- );
+ db.execute(
+ "
+ REPLACE INTO files
+ (worktree_id, relative_path, mtime_seconds, mtime_nanos)
+ VALUES (?1, ?2, ?3, ?4)
+ ",
+ params![worktree_id, path.to_str(), mtime.seconds, mtime.nanos],
+ )?;
+
+ let file_id = db.last_insert_rowid();
+
+ let mut query = db.prepare(
+ "
+ INSERT INTO documents
+ (file_id, start_byte, end_byte, name, embedding, digest)
+ VALUES (?1, ?2, ?3, ?4, ?5, ?6)
+ ",
+ )?;
- let file_id = if existing_id.is_ok() {
- // If already exists, just return the existing id
- existing_id?
- } else {
- // Delete Existing Row
- db.execute(
- "DELETE FROM files WHERE worktree_id = ?1 AND relative_path = ?2;",
- params![worktree_id, path.to_str()],
- )?;
- db.execute("INSERT INTO files (worktree_id, relative_path, mtime_seconds, mtime_nanos) VALUES (?1, ?2, ?3, ?4);", params![worktree_id, path.to_str(), mtime.seconds, mtime.nanos])?;
- db.last_insert_rowid()
- };
-
- // Currently inserting at approximately 3400 documents a second
- // I imagine we can speed this up with a bulk insert of some kind.
for document in documents {
- db.execute(
- "INSERT INTO documents (file_id, start_byte, end_byte, name, embedding, digest) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
- params![
- file_id,
- document.range.start.to_string(),
- document.range.end.to_string(),
- document.name,
- document.embedding,
- document.digest
- ],
- )?;
- }
-
- Ok(())
+ query.execute(params![
+ file_id,
+ document.range.start.to_string(),
+ document.range.end.to_string(),
+ document.name,
+ document.embedding,
+ document.digest
+ ])?;
+ }
+
+ Ok(())
})
}