From 24ca64ce46a03742e1f1d3cb8ab5ac6096b6ca4a Mon Sep 17 00:00:00 2001 From: timothycarambat Date: Wed, 26 Jul 2023 18:06:53 -0700 Subject: [PATCH] improve bulkd write speeds --- server/models/documents.js | 50 +++++++++++++++++++++++++-------- server/models/vectors.js | 9 +++++- server/models/workspaceUsers.js | 20 ++++++++++--- 3 files changed, 62 insertions(+), 17 deletions(-) diff --git a/server/models/documents.js b/server/models/documents.js index 0de83bcd5..e51071a1a 100644 --- a/server/models/documents.js +++ b/server/models/documents.js @@ -69,11 +69,8 @@ const Document = { addDocuments: async function (workspace, additions = []) { const VectorDb = getVectorDbClass(); if (additions.length === 0) return; + const insertParams = []; - const db = await this.db(); - const stmt = await db.prepare( - `INSERT INTO ${this.tablename} (docId, filename, docpath, workspaceId, metadata) VALUES (?,?,?,?,?)` - ); for (const path of additions) { const data = await fileData(path); if (!data) continue; @@ -96,7 +93,8 @@ const Document = { console.error("Failed to vectorize", path); continue; } - stmt.run([ + + insertParams.push([ docId, newDoc.filename, newDoc.docpath, @@ -104,19 +102,31 @@ const Document = { newDoc.metadata, ]); } + + const db = await this.db(); + const stmt = await db.prepare( + `INSERT INTO ${this.tablename} (docId, filename, docpath, workspaceId, metadata) VALUES (?,?,?,?,?)` + ); + + await db.exec("BEGIN TRANSACTION"); + try { + for (const params of insertParams) { + await stmt.run(params); + } + await db.exec("COMMIT"); + } catch { + await db.exec("ROLLBACK"); + } + stmt.finalize(); db.close(); - return; }, removeDocuments: async function (workspace, removals = []) { const VectorDb = getVectorDbClass(); - + const deleteParams = []; if (removals.length === 0) return; - const db = await this.db(); - const stmt = await db.prepare( - `DELETE FROM ${this.tablename} WHERE docpath = ? AND workspaceId = ?` - ); + for (const path of removals) { const document = await this.firstWhere( `docPath = '${path}' AND workspaceId = ${workspace.id}` @@ -126,8 +136,24 @@ const Document = { workspace.slug, document.docId ); - stmt.run([path, workspace.id]); + deleteParams.push([path, workspace.id]); } + + const db = await this.db(); + const stmt = await db.prepare( + `DELETE FROM ${this.tablename} WHERE docpath = ? AND workspaceId = ?` + ); + + await db.exec("BEGIN TRANSACTION"); + try { + for (const params of deleteParams) { + await stmt.run(params); + } + await db.exec("COMMIT"); + } catch { + await db.exec("ROLLBACK"); + } + stmt.finalize(); db.close(); return true; diff --git a/server/models/vectors.js b/server/models/vectors.js index e568097be..d91512a15 100644 --- a/server/models/vectors.js +++ b/server/models/vectors.js @@ -58,7 +58,14 @@ const DocumentVectors = { [] ); - stmt.run(values); + await db.exec("BEGIN TRANSACTION"); + try { + await stmt.run(values); + await db.exec("COMMIT"); + } catch { + await db.exec("ROLLBACK"); + } + stmt.finalize(); db.close(); diff --git a/server/models/workspaceUsers.js b/server/models/workspaceUsers.js index 8dacbac11..74b4fab26 100644 --- a/server/models/workspaceUsers.js +++ b/server/models/workspaceUsers.js @@ -46,8 +46,14 @@ const WorkspaceUser = { `INSERT INTO ${this.tablename} (user_id, workspace_id) VALUES (?,?)` ); - for (const workspaceId of workspaceIds) { - stmt.run([userId, workspaceId]); + await db.exec("BEGIN TRANSACTION"); + try { + for (const workspaceId of workspaceIds) { + await stmt.run([userId, workspaceId]); + } + await db.exec("COMMIT"); + } catch { + await db.exec("ROLLBACK"); } stmt.finalize(); @@ -61,8 +67,14 @@ const WorkspaceUser = { `INSERT INTO ${this.tablename} (user_id, workspace_id) VALUES (?,?)` ); - for (const userId of userIds) { - stmt.run([userId, workspaceId]); + await db.exec("BEGIN TRANSACTION"); + try { + for (const userId of userIds) { + await stmt.run([userId, workspaceId]); + } + await db.exec("COMMIT"); + } catch { + await db.exec("ROLLBACK"); } stmt.finalize();