anything-llm/server/models/vectors.js
2023-07-26 18:06:53 -07:00

105 lines
3.0 KiB
JavaScript

const { checkForMigrations } = require("../utils/database");
const { Document } = require("./documents");
// TODO: Do we want to store entire vectorized chunks in here
// so that we can easily spin up temp-namespace clones for threading
const DocumentVectors = {
tablename: "document_vectors",
colsInit: `
id INTEGER PRIMARY KEY AUTOINCREMENT,
docId TEXT NOT NULL,
vectorId TEXT NOT NULL,
createdAt TEXT DEFAULT CURRENT_TIMESTAMP,
lastUpdatedAt TEXT DEFAULT CURRENT_TIMESTAMP
`,
migrateTable: async function () {
console.log(
`\x1b[34m[MIGRATING]\x1b[0m Checking for DocumentVector migrations`
);
const db = await this.db(false);
await checkForMigrations(this, db);
},
migrations: function () {
return [];
},
db: async function (tracing = true) {
const sqlite3 = require("sqlite3").verbose();
const { open } = require("sqlite");
const db = await open({
filename: `${
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "storage/"
}anythingllm.db`,
driver: sqlite3.Database,
});
await db.exec(
`PRAGMA foreign_keys = ON;CREATE TABLE IF NOT EXISTS ${this.tablename} (${this.colsInit})`
);
if (tracing) db.on("trace", (sql) => console.log(sql));
return db;
},
bulkInsert: async function (vectorRecords = []) {
if (vectorRecords.length === 0) return;
const db = await this.db();
// Build a single query string with multiple placeholders for the INSERT operation
const placeholders = vectorRecords.map(() => "(?, ?)").join(", ");
const stmt = await db.prepare(
`INSERT INTO ${this.tablename} (docId, vectorId) VALUES ${placeholders}`
);
// Flatten the vectorRecords array to match the order of placeholders
const values = vectorRecords.reduce(
(arr, record) => arr.concat([record.docId, record.vectorId]),
[]
);
await db.exec("BEGIN TRANSACTION");
try {
await stmt.run(values);
await db.exec("COMMIT");
} catch {
await db.exec("ROLLBACK");
}
stmt.finalize();
db.close();
return { documentsInserted: vectorRecords.length };
},
deleteForWorkspace: async function (workspaceId) {
const documents = await Document.forWorkspace(workspaceId);
const docIds = [...new Set(documents.map((doc) => doc.docId))];
const ids = (
await this.where(`docId IN (${docIds.map((id) => `'${id}'`).join(",")})`)
).map((doc) => doc.id);
await this.deleteIds(ids);
return true;
},
where: async function (clause = "", limit = null) {
const db = await this.db();
const results = await db.all(
`SELECT * FROM ${this.tablename} ${clause ? `WHERE ${clause}` : ""} ${
!!limit ? `LIMIT ${limit}` : ""
}`
);
db.close();
return results;
},
deleteIds: async function (ids = []) {
const db = await this.db();
await db.get(
`DELETE FROM ${this.tablename} WHERE id IN (${ids.join(", ")}) `
);
db.close();
return true;
},
};
module.exports = { DocumentVectors };