anything-llm/server/models/documents.js
timothycarambat 27c58541bd inital commit
2023-06-03 19:28:07 -07:00

99 lines
3.2 KiB
JavaScript

const { fileData } = require('../utils/files');
const { v4: uuidv4 } = require('uuid');
const Document = {
tablename: 'workspace_documents',
colsInit: `
id INTEGER PRIMARY KEY AUTOINCREMENT,
docId TEXT NOT NULL UNIQUE,
filename TEXT NOT NULL,
docpath TEXT NOT NULL,
workspaceId INTEGER NOT NULL,
metadata TEXT NULL,
createdAt TEXT DEFAULT CURRENT_TIMESTAMP,
lastUpdatedAt TEXT DEFAULT CURRENT_TIMESTAMP
`,
db: async function () {
const sqlite3 = require('sqlite3').verbose();
const { open } = require('sqlite');
const db = await open({
filename: 'anythingllm.db',
driver: sqlite3.Database
})
await db.exec(`CREATE TABLE IF NOT EXISTS ${this.tablename} (${this.colsInit})`);
db.on('trace', (sql) => console.log(sql))
return db
},
forWorkspace: async function (workspaceId = null) {
if (!workspaceId) return [];
return await this.where(`workspaceId = ${workspaceId}`);
},
delete: async function (clause = '') {
const db = await this.db()
await db.get(`DELETE FROM ${this.tablename} WHERE ${clause}`)
db.close()
return true
},
where: async function (clause = '', limit = null) {
const db = await this.db()
const results = await db.all(`SELECT * FROM ${this.tablename} ${clause ? `WHERE ${clause}` : ''} ${!!limit ? `LIMIT ${limit}` : ''}`)
db.close()
return results
},
firstWhere: async function (clause = '') {
const results = await this.where(clause);
return results.length > 0 ? results[0] : null
},
addDocuments: async function (workspace, additions = []) {
const { Pinecone } = require('../utils/pinecone');
if (additions.length === 0) return;
const db = await this.db()
const stmt = await db.prepare(`INSERT INTO ${this.tablename} (docId, filename, docpath, workspaceId, metadata) VALUES (?,?,?,?,?)`)
for (const path of additions) {
const data = await fileData(path);
if (!data) continue;
const docId = uuidv4();
const { pageContent, ...metadata } = data
const newDoc = {
docId,
filename: path.split('/')[1],
docpath: path,
workspaceId: Number(workspace.id),
metadata: JSON.stringify(metadata)
}
const vectorized = await Pinecone.addDocumentToNamespace(workspace.slug, { ...data, docId }, path);
if (!vectorized) {
console.error('Failed to vectorize', path)
continue;
}
stmt.run([docId, newDoc.filename, newDoc.docpath, newDoc.workspaceId, newDoc.metadata])
}
stmt.finalize();
db.close();
return;
},
removeDocuments: async function (workspace, removals = []) {
const { Pinecone } = require('../utils/pinecone');
if (removals.length === 0) return;
const db = await this.db()
const stmt = await db.prepare(`DELETE FROM ${this.tablename} WHERE docpath = ? AND workspaceId = ?`);
for (const path of removals) {
const document = await this.firstWhere(`docPath = '${path}' AND workspaceId = ${workspace.id}`)
if (!document) continue;
await Pinecone.deleteDocumentFromNamespace(workspace.slug, document.docId);
stmt.run([path, workspace.id])
}
stmt.finalize();
db.close();
return true;
}
}
module.exports = { Document }