anything-llm/server/models/vectors.js

98 lines
2.9 KiB
JavaScript
Raw Normal View History

const { checkForMigrations } = require("../utils/database");
2023-06-08 06:31:35 +02:00
const { Document } = require("./documents");
2023-06-04 04:28:07 +02:00
// TODO: Do we want to store entire vectorized chunks in here
// so that we can easily spin up temp-namespace clones for threading
const DocumentVectors = {
2023-06-08 06:31:35 +02:00
tablename: "document_vectors",
2023-06-04 04:28:07 +02:00
colsInit: `
id INTEGER PRIMARY KEY AUTOINCREMENT,
docId TEXT NOT NULL,
vectorId TEXT NOT NULL,
createdAt TEXT DEFAULT CURRENT_TIMESTAMP,
lastUpdatedAt TEXT DEFAULT CURRENT_TIMESTAMP
`,
migrateTable: async function () {
console.log(
`\x1b[34m[MIGRATING]\x1b[0m Checking for DocumentVector migrations`
);
const db = await this.db(false);
await checkForMigrations(this, db);
},
migrations: function () {
return [];
},
db: async function (tracing = true) {
2023-06-08 06:31:35 +02:00
const sqlite3 = require("sqlite3").verbose();
const { open } = require("sqlite");
2023-06-04 04:28:07 +02:00
const db = await open({
filename: `${
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "storage/"
}anythingllm.db`,
2023-06-08 06:31:35 +02:00
driver: sqlite3.Database,
});
2023-06-04 04:28:07 +02:00
2023-06-08 06:31:35 +02:00
await db.exec(
`PRAGMA foreign_keys = ON;CREATE TABLE IF NOT EXISTS ${this.tablename} (${this.colsInit})`
2023-06-08 06:31:35 +02:00
);
if (tracing) db.on("trace", (sql) => console.log(sql));
2023-06-08 06:31:35 +02:00
return db;
2023-06-04 04:28:07 +02:00
},
bulkInsert: async function (vectorRecords = []) {
if (vectorRecords.length === 0) return;
2023-06-04 04:28:07 +02:00
const db = await this.db();
// Build a single query string with multiple placeholders for the INSERT operation
const placeholders = vectorRecords.map(() => "(?, ?)").join(", ");
2023-06-08 06:31:35 +02:00
const stmt = await db.prepare(
`INSERT INTO ${this.tablename} (docId, vectorId) VALUES ${placeholders}`
2023-06-08 06:31:35 +02:00
);
2023-06-04 04:28:07 +02:00
// Flatten the vectorRecords array to match the order of placeholders
const values = vectorRecords.reduce(
(arr, record) => arr.concat([record.docId, record.vectorId]),
[]
);
stmt.run(values);
2023-06-08 06:31:35 +02:00
stmt.finalize();
db.close();
2023-06-04 04:28:07 +02:00
return { documentsInserted: vectorRecords.length };
},
deleteForWorkspace: async function (workspaceId) {
const documents = await Document.forWorkspace(workspaceId);
2023-06-08 06:31:35 +02:00
const docIds = [...new Set(documents.map((doc) => doc.docId))];
const ids = (
await this.where(`docId IN (${docIds.map((id) => `'${id}'`).join(",")})`)
).map((doc) => doc.id);
await this.deleteIds(ids);
2023-06-04 04:28:07 +02:00
return true;
},
2023-06-08 06:31:35 +02:00
where: async function (clause = "", limit = null) {
const db = await this.db();
const results = await db.all(
`SELECT * FROM ${this.tablename} ${clause ? `WHERE ${clause}` : ""} ${
!!limit ? `LIMIT ${limit}` : ""
}`
);
2023-06-04 04:28:07 +02:00
2023-06-08 06:31:35 +02:00
db.close();
return results;
2023-06-04 04:28:07 +02:00
},
deleteIds: async function (ids = []) {
2023-06-08 06:31:35 +02:00
const db = await this.db();
await db.get(
`DELETE FROM ${this.tablename} WHERE id IN (${ids.join(", ")}) `
);
db.close();
return true;
},
};
2023-06-04 04:28:07 +02:00
2023-06-08 06:31:35 +02:00
module.exports = { DocumentVectors };