diff --git a/crates/semantic_index/src/db.rs b/crates/semantic_index/src/db.rs index abb47cddf0..6cfd01456d 100644 --- a/crates/semantic_index/src/db.rs +++ b/crates/semantic_index/src/db.rs @@ -162,6 +162,11 @@ impl VectorDatabase { [], )?; + db.execute( + "CREATE UNIQUE INDEX files_worktree_id_and_relative_path ON files (worktree_id, relative_path)", + [], + )?; + db.execute( "CREATE TABLE documents ( id INTEGER PRIMARY KEY AUTOINCREMENT, @@ -206,43 +211,37 @@ impl VectorDatabase { // Return the existing ID, if both the file and mtime match let mtime = Timestamp::from(mtime); - let mut existing_id_query = db.prepare("SELECT id FROM files WHERE worktree_id = ?1 AND relative_path = ?2 AND mtime_seconds = ?3 AND mtime_nanos = ?4")?; - let existing_id = existing_id_query - .query_row( - params![worktree_id, path.to_str(), mtime.seconds, mtime.nanos], - |row| Ok(row.get::<_, i64>(0)?), - ); + db.execute( + " + REPLACE INTO files + (worktree_id, relative_path, mtime_seconds, mtime_nanos) + VALUES (?1, ?2, ?3, ?4) + ", + params![worktree_id, path.to_str(), mtime.seconds, mtime.nanos], + )?; - let file_id = if existing_id.is_ok() { - // If already exists, just return the existing id - existing_id? - } else { - // Delete Existing Row - db.execute( - "DELETE FROM files WHERE worktree_id = ?1 AND relative_path = ?2;", - params![worktree_id, path.to_str()], - )?; - db.execute("INSERT INTO files (worktree_id, relative_path, mtime_seconds, mtime_nanos) VALUES (?1, ?2, ?3, ?4);", params![worktree_id, path.to_str(), mtime.seconds, mtime.nanos])?; - db.last_insert_rowid() - }; + let file_id = db.last_insert_rowid(); + + let mut query = db.prepare( + " + INSERT INTO documents + (file_id, start_byte, end_byte, name, embedding, digest) + VALUES (?1, ?2, ?3, ?4, ?5, ?6) + ", + )?; - // Currently inserting at approximately 3400 documents a second - // I imagine we can speed this up with a bulk insert of some kind. for document in documents { - db.execute( - "INSERT INTO documents (file_id, start_byte, end_byte, name, embedding, digest) VALUES (?1, ?2, ?3, ?4, ?5, ?6)", - params![ - file_id, - document.range.start.to_string(), - document.range.end.to_string(), - document.name, - document.embedding, - document.digest - ], - )?; - } + query.execute(params![ + file_id, + document.range.start.to_string(), + document.range.end.to_string(), + document.name, + document.embedding, + document.digest + ])?; + } - Ok(()) + Ok(()) }) } diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs index a8518ce695..e155fe3c74 100644 --- a/crates/semantic_index/src/semantic_index.rs +++ b/crates/semantic_index/src/semantic_index.rs @@ -38,7 +38,7 @@ use util::{ }; use workspace::WorkspaceCreated; -const SEMANTIC_INDEX_VERSION: usize = 8; +const SEMANTIC_INDEX_VERSION: usize = 9; const BACKGROUND_INDEXING_DELAY: Duration = Duration::from_secs(600); const EMBEDDING_QUEUE_FLUSH_TIMEOUT: Duration = Duration::from_millis(250);