anything-llm/server/models/documents.js

138 lines
3.9 KiB
JavaScript
Raw Normal View History

2023-06-08 06:31:35 +02:00
const { fileData } = require("../utils/files");
const { v4: uuidv4 } = require("uuid");
const { getVectorDbClass } = require("../utils/helpers");
const { checkForMigrations } = require("../utils/database");
2023-06-04 04:28:07 +02:00
const Document = {
2023-06-08 06:31:35 +02:00
tablename: "workspace_documents",
2023-06-04 04:28:07 +02:00
colsInit: `
id INTEGER PRIMARY KEY AUTOINCREMENT,
docId TEXT NOT NULL UNIQUE,
filename TEXT NOT NULL,
docpath TEXT NOT NULL,
workspaceId INTEGER NOT NULL,
metadata TEXT NULL,
createdAt TEXT DEFAULT CURRENT_TIMESTAMP,
lastUpdatedAt TEXT DEFAULT CURRENT_TIMESTAMP
`,
migrateTable: async function () {
console.log(`\x1b[34m[MIGRATING]\x1b[0m Checking for Document migrations`);
const db = await this.db(false);
await checkForMigrations(this, db);
},
migrations: function () {
return [];
},
db: async function (tracing = true) {
2023-06-08 06:31:35 +02:00
const sqlite3 = require("sqlite3").verbose();
const { open } = require("sqlite");
2023-06-04 04:28:07 +02:00
const db = await open({
filename: `${
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "storage/"
}anythingllm.db`,
2023-06-08 06:31:35 +02:00
driver: sqlite3.Database,
});
2023-06-04 04:28:07 +02:00
2023-06-08 06:31:35 +02:00
await db.exec(
`CREATE TABLE IF NOT EXISTS ${this.tablename} (${this.colsInit})`
);
if (tracing) db.on("trace", (sql) => console.log(sql));
2023-06-08 06:31:35 +02:00
return db;
2023-06-04 04:28:07 +02:00
},
forWorkspace: async function (workspaceId = null) {
if (!workspaceId) return [];
return await this.where(`workspaceId = ${workspaceId}`);
},
2023-06-08 06:31:35 +02:00
delete: async function (clause = "") {
const db = await this.db();
await db.get(`DELETE FROM ${this.tablename} WHERE ${clause}`);
db.close();
return true;
2023-06-04 04:28:07 +02:00
},
2023-06-08 06:31:35 +02:00
where: async function (clause = "", limit = null) {
const db = await this.db();
const results = await db.all(
`SELECT * FROM ${this.tablename} ${clause ? `WHERE ${clause}` : ""} ${
!!limit ? `LIMIT ${limit}` : ""
}`
);
2023-06-04 04:28:07 +02:00
2023-06-08 06:31:35 +02:00
db.close();
return results;
2023-06-04 04:28:07 +02:00
},
2023-06-08 06:31:35 +02:00
firstWhere: async function (clause = "") {
2023-06-04 04:28:07 +02:00
const results = await this.where(clause);
2023-06-08 06:31:35 +02:00
return results.length > 0 ? results[0] : null;
2023-06-04 04:28:07 +02:00
},
addDocuments: async function (workspace, additions = []) {
2023-06-08 06:31:35 +02:00
const VectorDb = getVectorDbClass();
2023-06-04 04:28:07 +02:00
if (additions.length === 0) return;
2023-06-08 06:31:35 +02:00
const db = await this.db();
const stmt = await db.prepare(
`INSERT INTO ${this.tablename} (docId, filename, docpath, workspaceId, metadata) VALUES (?,?,?,?,?)`
);
2023-06-04 04:28:07 +02:00
for (const path of additions) {
const data = await fileData(path);
if (!data) continue;
const docId = uuidv4();
2023-06-08 06:31:35 +02:00
const { pageContent, ...metadata } = data;
2023-06-04 04:28:07 +02:00
const newDoc = {
docId,
2023-06-08 06:31:35 +02:00
filename: path.split("/")[1],
2023-06-04 04:28:07 +02:00
docpath: path,
workspaceId: Number(workspace.id),
2023-06-08 06:31:35 +02:00
metadata: JSON.stringify(metadata),
};
const vectorized = await VectorDb.addDocumentToNamespace(
workspace.slug,
{ ...data, docId },
path
);
2023-06-04 04:28:07 +02:00
if (!vectorized) {
2023-06-08 06:31:35 +02:00
console.error("Failed to vectorize", path);
2023-06-04 04:28:07 +02:00
continue;
}
2023-06-08 06:31:35 +02:00
stmt.run([
docId,
newDoc.filename,
newDoc.docpath,
newDoc.workspaceId,
newDoc.metadata,
]);
2023-06-04 04:28:07 +02:00
}
stmt.finalize();
db.close();
return;
},
removeDocuments: async function (workspace, removals = []) {
2023-06-08 06:31:35 +02:00
const VectorDb = getVectorDbClass();
2023-06-04 04:28:07 +02:00
if (removals.length === 0) return;
2023-06-08 06:31:35 +02:00
const db = await this.db();
const stmt = await db.prepare(
`DELETE FROM ${this.tablename} WHERE docpath = ? AND workspaceId = ?`
);
2023-06-04 04:28:07 +02:00
for (const path of removals) {
2023-06-08 06:31:35 +02:00
const document = await this.firstWhere(
`docPath = '${path}' AND workspaceId = ${workspace.id}`
);
2023-06-04 04:28:07 +02:00
if (!document) continue;
2023-06-08 06:31:35 +02:00
await VectorDb.deleteDocumentFromNamespace(
workspace.slug,
document.docId
);
stmt.run([path, workspace.id]);
2023-06-04 04:28:07 +02:00
}
stmt.finalize();
db.close();
return true;
2023-06-08 06:31:35 +02:00
},
};
2023-06-04 04:28:07 +02:00
2023-06-08 06:31:35 +02:00
module.exports = { Document };