mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-19 12:40:09 +01:00
PR#2355 Continued + expanded scope (#2365)
* #2317 Fetch pinned documents once per folder to reduce the number of queries. * Reorder the lines to keeps const declarations together. * Add some comments to functions move pinned document fetch for folder to function move watched documents per-folder to also function the same remove unused function in documents model --------- Co-authored-by: Błażej Owczarczyk <blazeyy@gmail.com>
This commit is contained in:
parent
ac91d0df9a
commit
b44889a843
@ -38,6 +38,13 @@ const DocumentSyncQueue = {
|
|||||||
return new Date(Number(new Date()) + queueRecord.staleAfterMs);
|
return new Date(Number(new Date()) + queueRecord.staleAfterMs);
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the document can be watched based on the metadata fields
|
||||||
|
* @param {object} metadata - metadata to check
|
||||||
|
* @param {string} metadata.title - title of the document
|
||||||
|
* @param {string} metadata.chunkSource - chunk source of the document
|
||||||
|
* @returns {boolean} - true if the document can be watched, false otherwise
|
||||||
|
*/
|
||||||
canWatch: function ({ title, chunkSource = null } = {}) {
|
canWatch: function ({ title, chunkSource = null } = {}) {
|
||||||
if (chunkSource.startsWith("link://") && title.endsWith(".html"))
|
if (chunkSource.startsWith("link://") && title.endsWith(".html"))
|
||||||
return true; // If is web-link material (prior to feature most chunkSources were links://)
|
return true; // If is web-link material (prior to feature most chunkSources were links://)
|
||||||
|
@ -57,26 +57,12 @@ const Document = {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
getOnlyWorkspaceIds: async function (clause = {}) {
|
|
||||||
try {
|
|
||||||
const workspaceIds = await prisma.workspace_documents.findMany({
|
|
||||||
where: clause,
|
|
||||||
select: {
|
|
||||||
workspaceId: true,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
return workspaceIds.map((record) => record.workspaceId) || [];
|
|
||||||
} catch (error) {
|
|
||||||
console.error(error.message);
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
where: async function (
|
where: async function (
|
||||||
clause = {},
|
clause = {},
|
||||||
limit = null,
|
limit = null,
|
||||||
orderBy = null,
|
orderBy = null,
|
||||||
include = null
|
include = null,
|
||||||
|
select = null
|
||||||
) {
|
) {
|
||||||
try {
|
try {
|
||||||
const results = await prisma.workspace_documents.findMany({
|
const results = await prisma.workspace_documents.findMany({
|
||||||
@ -84,6 +70,7 @@ const Document = {
|
|||||||
...(limit !== null ? { take: limit } : {}),
|
...(limit !== null ? { take: limit } : {}),
|
||||||
...(orderBy !== null ? { orderBy } : {}),
|
...(orderBy !== null ? { orderBy } : {}),
|
||||||
...(include !== null ? { include } : {}),
|
...(include !== null ? { include } : {}),
|
||||||
|
...(select !== null ? { select: { ...select } } : {}),
|
||||||
});
|
});
|
||||||
return results;
|
return results;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
@ -44,6 +44,7 @@ async function viewLocalFiles() {
|
|||||||
items: [],
|
items: [],
|
||||||
};
|
};
|
||||||
const subfiles = fs.readdirSync(folderPath);
|
const subfiles = fs.readdirSync(folderPath);
|
||||||
|
const filenames = {};
|
||||||
|
|
||||||
for (const subfile of subfiles) {
|
for (const subfile of subfiles) {
|
||||||
if (path.extname(subfile) !== ".json") continue;
|
if (path.extname(subfile) !== ".json") continue;
|
||||||
@ -51,30 +52,32 @@ async function viewLocalFiles() {
|
|||||||
const rawData = fs.readFileSync(filePath, "utf8");
|
const rawData = fs.readFileSync(filePath, "utf8");
|
||||||
const cachefilename = `${file}/${subfile}`;
|
const cachefilename = `${file}/${subfile}`;
|
||||||
const { pageContent, ...metadata } = JSON.parse(rawData);
|
const { pageContent, ...metadata } = JSON.parse(rawData);
|
||||||
const pinnedInWorkspaces = await Document.getOnlyWorkspaceIds({
|
|
||||||
docpath: cachefilename,
|
|
||||||
pinned: true,
|
|
||||||
});
|
|
||||||
const watchedInWorkspaces = liveSyncAvailable
|
|
||||||
? await Document.getOnlyWorkspaceIds({
|
|
||||||
docpath: cachefilename,
|
|
||||||
watched: true,
|
|
||||||
})
|
|
||||||
: [];
|
|
||||||
|
|
||||||
subdocs.items.push({
|
subdocs.items.push({
|
||||||
name: subfile,
|
name: subfile,
|
||||||
type: "file",
|
type: "file",
|
||||||
...metadata,
|
...metadata,
|
||||||
cached: await cachedVectorInformation(cachefilename, true),
|
cached: await cachedVectorInformation(cachefilename, true),
|
||||||
pinnedWorkspaces: pinnedInWorkspaces,
|
|
||||||
canWatch: liveSyncAvailable
|
canWatch: liveSyncAvailable
|
||||||
? DocumentSyncQueue.canWatch(metadata)
|
? DocumentSyncQueue.canWatch(metadata)
|
||||||
: false,
|
: false,
|
||||||
// Is file watched in any workspace since sync updates all workspaces where file is referenced
|
// pinnedWorkspaces: [], // This is the list of workspaceIds that have pinned this document
|
||||||
watched: watchedInWorkspaces.length !== 0,
|
// watched: false, // boolean to indicate if this document is watched in ANY workspace
|
||||||
});
|
});
|
||||||
|
filenames[cachefilename] = subfile;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Grab the pinned workspaces and watched documents for this folder's documents
|
||||||
|
// at the time of the query so we don't have to re-query the database for each file
|
||||||
|
const pinnedWorkspacesByDocument =
|
||||||
|
await getPinnedWorkspacesByDocument(filenames);
|
||||||
|
const watchedDocumentsFilenames =
|
||||||
|
await getWatchedDocumentFilenames(filenames);
|
||||||
|
for (const item of subdocs.items) {
|
||||||
|
item.pinnedWorkspaces = pinnedWorkspacesByDocument[item.name] || [];
|
||||||
|
item.watched =
|
||||||
|
watchedDocumentsFilenames.hasOwnProperty(item.name) || false;
|
||||||
|
}
|
||||||
|
|
||||||
directory.items.push(subdocs);
|
directory.items.push(subdocs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -88,8 +91,13 @@ async function viewLocalFiles() {
|
|||||||
return directory;
|
return directory;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Searches the vector-cache folder for existing information so we dont have to re-embed a
|
/**
|
||||||
// document and can instead push directly to vector db.
|
* Searches the vector-cache folder for existing information so we dont have to re-embed a
|
||||||
|
* document and can instead push directly to vector db.
|
||||||
|
* @param {string} filename - the filename to check for cached vector information
|
||||||
|
* @param {boolean} checkOnly - if true, only check if the file exists, do not return the cached data
|
||||||
|
* @returns {Promise<{exists: boolean, chunks: any[]}>} - a promise that resolves to an object containing the existence of the file and its cached chunks
|
||||||
|
*/
|
||||||
async function cachedVectorInformation(filename = null, checkOnly = false) {
|
async function cachedVectorInformation(filename = null, checkOnly = false) {
|
||||||
if (!filename) return checkOnly ? false : { exists: false, chunks: [] };
|
if (!filename) return checkOnly ? false : { exists: false, chunks: [] };
|
||||||
|
|
||||||
@ -218,6 +226,61 @@ function hasVectorCachedFiles() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {string[]} filenames - array of filenames to check for pinned workspaces
|
||||||
|
* @returns {Promise<Record<string, string[]>>} - a record of filenames and their corresponding workspaceIds
|
||||||
|
*/
|
||||||
|
async function getPinnedWorkspacesByDocument(filenames = []) {
|
||||||
|
return (
|
||||||
|
await Document.where(
|
||||||
|
{
|
||||||
|
docpath: {
|
||||||
|
in: Object.keys(filenames),
|
||||||
|
},
|
||||||
|
pinned: true,
|
||||||
|
},
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
{
|
||||||
|
workspaceId: true,
|
||||||
|
docpath: true,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
).reduce((result, { workspaceId, docpath }) => {
|
||||||
|
const filename = filenames[docpath];
|
||||||
|
if (!result[filename]) result[filename] = [];
|
||||||
|
if (!result[filename].includes(workspaceId))
|
||||||
|
result[filename].push(workspaceId);
|
||||||
|
return result;
|
||||||
|
}, {});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a record of filenames and their corresponding workspaceIds that have watched a document
|
||||||
|
* that will be used to determine if a document should be displayed in the watched documents sidebar
|
||||||
|
* @param {string[]} filenames - array of filenames to check for watched workspaces
|
||||||
|
* @returns {Promise<Record<string, string[]>>} - a record of filenames and their corresponding workspaceIds
|
||||||
|
*/
|
||||||
|
async function getWatchedDocumentFilenames(filenames = []) {
|
||||||
|
return (
|
||||||
|
await Document.where(
|
||||||
|
{
|
||||||
|
docpath: { in: Object.keys(filenames) },
|
||||||
|
watched: true,
|
||||||
|
},
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
null,
|
||||||
|
{ workspaceId: true, docpath: true }
|
||||||
|
)
|
||||||
|
).reduce((result, { workspaceId, docpath }) => {
|
||||||
|
const filename = filenames[docpath];
|
||||||
|
result[filename] = workspaceId;
|
||||||
|
return result;
|
||||||
|
}, {});
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
findDocumentInDocuments,
|
findDocumentInDocuments,
|
||||||
cachedVectorInformation,
|
cachedVectorInformation,
|
||||||
|
Loading…
Reference in New Issue
Block a user