2023-06-08 06:31:35 +02:00
|
|
|
const { fileData } = require("../utils/files");
|
|
|
|
const { v4: uuidv4 } = require("uuid");
|
|
|
|
const { getVectorDbClass } = require("../utils/helpers");
|
2023-06-15 08:12:59 +02:00
|
|
|
const { checkForMigrations } = require("../utils/database");
|
2023-06-04 04:28:07 +02:00
|
|
|
|
|
|
|
const Document = {
|
2023-06-08 06:31:35 +02:00
|
|
|
tablename: "workspace_documents",
|
2023-06-04 04:28:07 +02:00
|
|
|
colsInit: `
|
|
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
|
|
docId TEXT NOT NULL UNIQUE,
|
|
|
|
filename TEXT NOT NULL,
|
|
|
|
docpath TEXT NOT NULL,
|
|
|
|
workspaceId INTEGER NOT NULL,
|
|
|
|
metadata TEXT NULL,
|
|
|
|
createdAt TEXT DEFAULT CURRENT_TIMESTAMP,
|
|
|
|
lastUpdatedAt TEXT DEFAULT CURRENT_TIMESTAMP
|
|
|
|
`,
|
2023-06-15 08:12:59 +02:00
|
|
|
migrateTable: async function () {
|
|
|
|
console.log(`\x1b[34m[MIGRATING]\x1b[0m Checking for Document migrations`);
|
|
|
|
const db = await this.db(false);
|
|
|
|
await checkForMigrations(this, db);
|
|
|
|
},
|
|
|
|
migrations: function () {
|
|
|
|
return [];
|
|
|
|
},
|
|
|
|
db: async function (tracing = true) {
|
2023-06-08 06:31:35 +02:00
|
|
|
const sqlite3 = require("sqlite3").verbose();
|
|
|
|
const { open } = require("sqlite");
|
2023-06-04 04:28:07 +02:00
|
|
|
|
|
|
|
const db = await open({
|
2023-06-08 23:00:43 +02:00
|
|
|
filename: `${
|
2023-06-13 20:26:11 +02:00
|
|
|
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "storage/"
|
2023-06-08 23:00:43 +02:00
|
|
|
}anythingllm.db`,
|
2023-06-08 06:31:35 +02:00
|
|
|
driver: sqlite3.Database,
|
|
|
|
});
|
2023-06-04 04:28:07 +02:00
|
|
|
|
2023-06-08 06:31:35 +02:00
|
|
|
await db.exec(
|
|
|
|
`CREATE TABLE IF NOT EXISTS ${this.tablename} (${this.colsInit})`
|
|
|
|
);
|
2023-06-15 08:12:59 +02:00
|
|
|
|
|
|
|
if (tracing) db.on("trace", (sql) => console.log(sql));
|
2023-06-08 06:31:35 +02:00
|
|
|
return db;
|
2023-06-04 04:28:07 +02:00
|
|
|
},
|
|
|
|
forWorkspace: async function (workspaceId = null) {
|
|
|
|
if (!workspaceId) return [];
|
|
|
|
return await this.where(`workspaceId = ${workspaceId}`);
|
|
|
|
},
|
2023-06-08 06:31:35 +02:00
|
|
|
delete: async function (clause = "") {
|
|
|
|
const db = await this.db();
|
|
|
|
await db.get(`DELETE FROM ${this.tablename} WHERE ${clause}`);
|
|
|
|
db.close();
|
|
|
|
return true;
|
2023-06-04 04:28:07 +02:00
|
|
|
},
|
2023-06-08 06:31:35 +02:00
|
|
|
where: async function (clause = "", limit = null) {
|
|
|
|
const db = await this.db();
|
|
|
|
const results = await db.all(
|
|
|
|
`SELECT * FROM ${this.tablename} ${clause ? `WHERE ${clause}` : ""} ${
|
|
|
|
!!limit ? `LIMIT ${limit}` : ""
|
|
|
|
}`
|
|
|
|
);
|
2023-06-04 04:28:07 +02:00
|
|
|
|
2023-06-08 06:31:35 +02:00
|
|
|
db.close();
|
|
|
|
return results;
|
2023-06-04 04:28:07 +02:00
|
|
|
},
|
2023-06-08 06:31:35 +02:00
|
|
|
firstWhere: async function (clause = "") {
|
2023-06-04 04:28:07 +02:00
|
|
|
const results = await this.where(clause);
|
2023-06-08 06:31:35 +02:00
|
|
|
return results.length > 0 ? results[0] : null;
|
2023-06-04 04:28:07 +02:00
|
|
|
},
|
|
|
|
addDocuments: async function (workspace, additions = []) {
|
2023-06-08 06:31:35 +02:00
|
|
|
const VectorDb = getVectorDbClass();
|
2023-06-04 04:28:07 +02:00
|
|
|
if (additions.length === 0) return;
|
|
|
|
|
2023-06-08 06:31:35 +02:00
|
|
|
const db = await this.db();
|
|
|
|
const stmt = await db.prepare(
|
|
|
|
`INSERT INTO ${this.tablename} (docId, filename, docpath, workspaceId, metadata) VALUES (?,?,?,?,?)`
|
|
|
|
);
|
2023-06-04 04:28:07 +02:00
|
|
|
for (const path of additions) {
|
|
|
|
const data = await fileData(path);
|
|
|
|
if (!data) continue;
|
|
|
|
|
|
|
|
const docId = uuidv4();
|
2023-06-08 06:31:35 +02:00
|
|
|
const { pageContent, ...metadata } = data;
|
2023-06-04 04:28:07 +02:00
|
|
|
const newDoc = {
|
|
|
|
docId,
|
2023-06-08 06:31:35 +02:00
|
|
|
filename: path.split("/")[1],
|
2023-06-04 04:28:07 +02:00
|
|
|
docpath: path,
|
|
|
|
workspaceId: Number(workspace.id),
|
2023-06-08 06:31:35 +02:00
|
|
|
metadata: JSON.stringify(metadata),
|
|
|
|
};
|
|
|
|
const vectorized = await VectorDb.addDocumentToNamespace(
|
|
|
|
workspace.slug,
|
|
|
|
{ ...data, docId },
|
|
|
|
path
|
|
|
|
);
|
2023-06-04 04:28:07 +02:00
|
|
|
if (!vectorized) {
|
2023-06-08 06:31:35 +02:00
|
|
|
console.error("Failed to vectorize", path);
|
2023-06-04 04:28:07 +02:00
|
|
|
continue;
|
|
|
|
}
|
2023-06-08 06:31:35 +02:00
|
|
|
stmt.run([
|
|
|
|
docId,
|
|
|
|
newDoc.filename,
|
|
|
|
newDoc.docpath,
|
|
|
|
newDoc.workspaceId,
|
|
|
|
newDoc.metadata,
|
|
|
|
]);
|
2023-06-04 04:28:07 +02:00
|
|
|
}
|
|
|
|
stmt.finalize();
|
|
|
|
db.close();
|
|
|
|
|
|
|
|
return;
|
|
|
|
},
|
|
|
|
removeDocuments: async function (workspace, removals = []) {
|
2023-06-08 06:31:35 +02:00
|
|
|
const VectorDb = getVectorDbClass();
|
2023-06-04 04:28:07 +02:00
|
|
|
|
|
|
|
if (removals.length === 0) return;
|
2023-06-08 06:31:35 +02:00
|
|
|
const db = await this.db();
|
|
|
|
const stmt = await db.prepare(
|
|
|
|
`DELETE FROM ${this.tablename} WHERE docpath = ? AND workspaceId = ?`
|
|
|
|
);
|
2023-06-04 04:28:07 +02:00
|
|
|
for (const path of removals) {
|
2023-06-08 06:31:35 +02:00
|
|
|
const document = await this.firstWhere(
|
|
|
|
`docPath = '${path}' AND workspaceId = ${workspace.id}`
|
|
|
|
);
|
2023-06-04 04:28:07 +02:00
|
|
|
if (!document) continue;
|
2023-06-08 06:31:35 +02:00
|
|
|
await VectorDb.deleteDocumentFromNamespace(
|
|
|
|
workspace.slug,
|
|
|
|
document.docId
|
|
|
|
);
|
|
|
|
stmt.run([path, workspace.id]);
|
2023-06-04 04:28:07 +02:00
|
|
|
}
|
|
|
|
stmt.finalize();
|
|
|
|
db.close();
|
|
|
|
return true;
|
2023-06-08 06:31:35 +02:00
|
|
|
},
|
|
|
|
};
|
2023-06-04 04:28:07 +02:00
|
|
|
|
2023-06-08 06:31:35 +02:00
|
|
|
module.exports = { Document };
|