mirror of
https://github.com/zed-industries/zed.git
synced 2025-01-30 14:17:02 +00:00
optimize insert file in vector database
Co-authored-by: Max <max@zed.dev>
This commit is contained in:
parent
524533cfb2
commit
e86964eb5d
2 changed files with 33 additions and 34 deletions
|
@ -162,6 +162,11 @@ impl VectorDatabase {
|
||||||
[],
|
[],
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
db.execute(
|
||||||
|
"CREATE UNIQUE INDEX files_worktree_id_and_relative_path ON files (worktree_id, relative_path)",
|
||||||
|
[],
|
||||||
|
)?;
|
||||||
|
|
||||||
db.execute(
|
db.execute(
|
||||||
"CREATE TABLE documents (
|
"CREATE TABLE documents (
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
@ -206,43 +211,37 @@ impl VectorDatabase {
|
||||||
// Return the existing ID, if both the file and mtime match
|
// Return the existing ID, if both the file and mtime match
|
||||||
let mtime = Timestamp::from(mtime);
|
let mtime = Timestamp::from(mtime);
|
||||||
|
|
||||||
let mut existing_id_query = db.prepare("SELECT id FROM files WHERE worktree_id = ?1 AND relative_path = ?2 AND mtime_seconds = ?3 AND mtime_nanos = ?4")?;
|
db.execute(
|
||||||
let existing_id = existing_id_query
|
"
|
||||||
.query_row(
|
REPLACE INTO files
|
||||||
params![worktree_id, path.to_str(), mtime.seconds, mtime.nanos],
|
(worktree_id, relative_path, mtime_seconds, mtime_nanos)
|
||||||
|row| Ok(row.get::<_, i64>(0)?),
|
VALUES (?1, ?2, ?3, ?4)
|
||||||
);
|
",
|
||||||
|
params![worktree_id, path.to_str(), mtime.seconds, mtime.nanos],
|
||||||
|
)?;
|
||||||
|
|
||||||
let file_id = if existing_id.is_ok() {
|
let file_id = db.last_insert_rowid();
|
||||||
// If already exists, just return the existing id
|
|
||||||
existing_id?
|
let mut query = db.prepare(
|
||||||
} else {
|
"
|
||||||
// Delete Existing Row
|
INSERT INTO documents
|
||||||
db.execute(
|
(file_id, start_byte, end_byte, name, embedding, digest)
|
||||||
"DELETE FROM files WHERE worktree_id = ?1 AND relative_path = ?2;",
|
VALUES (?1, ?2, ?3, ?4, ?5, ?6)
|
||||||
params![worktree_id, path.to_str()],
|
",
|
||||||
)?;
|
)?;
|
||||||
db.execute("INSERT INTO files (worktree_id, relative_path, mtime_seconds, mtime_nanos) VALUES (?1, ?2, ?3, ?4);", params![worktree_id, path.to_str(), mtime.seconds, mtime.nanos])?;
|
|
||||||
db.last_insert_rowid()
|
|
||||||
};
|
|
||||||
|
|
||||||
// Currently inserting at approximately 3400 documents a second
|
|
||||||
// I imagine we can speed this up with a bulk insert of some kind.
|
|
||||||
for document in documents {
|
for document in documents {
|
||||||
db.execute(
|
query.execute(params![
|
||||||
"INSERT INTO documents (file_id, start_byte, end_byte, name, embedding, digest) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
|
file_id,
|
||||||
params![
|
document.range.start.to_string(),
|
||||||
file_id,
|
document.range.end.to_string(),
|
||||||
document.range.start.to_string(),
|
document.name,
|
||||||
document.range.end.to_string(),
|
document.embedding,
|
||||||
document.name,
|
document.digest
|
||||||
document.embedding,
|
])?;
|
||||||
document.digest
|
}
|
||||||
],
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@ use util::{
|
||||||
};
|
};
|
||||||
use workspace::WorkspaceCreated;
|
use workspace::WorkspaceCreated;
|
||||||
|
|
||||||
const SEMANTIC_INDEX_VERSION: usize = 8;
|
const SEMANTIC_INDEX_VERSION: usize = 9;
|
||||||
const BACKGROUND_INDEXING_DELAY: Duration = Duration::from_secs(600);
|
const BACKGROUND_INDEXING_DELAY: Duration = Duration::from_secs(600);
|
||||||
const EMBEDDING_QUEUE_FLUSH_TIMEOUT: Duration = Duration::from_millis(250);
|
const EMBEDDING_QUEUE_FLUSH_TIMEOUT: Duration = Duration::from_millis(250);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue