diff --git a/.vscode/settings.json b/.vscode/settings.json index 8d924b71..b9bde685 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -11,6 +11,7 @@ "cooldowns", "Deduplicator", "Dockerized", + "docpath", "elevenlabs", "Embeddable", "epub", diff --git a/collector/extensions/index.js b/collector/extensions/index.js index 07726464..a88b38ee 100644 --- a/collector/extensions/index.js +++ b/collector/extensions/index.js @@ -1,18 +1,41 @@ +const { setDataSigner } = require("../middleware/setDataSigner"); const { verifyPayloadIntegrity } = require("../middleware/verifyIntegrity"); const { reqBody } = require("../utils/http"); const { validURL } = require("../utils/url"); +const RESYNC_METHODS = require("./resync"); function extensions(app) { if (!app) return; app.post( - "/ext/github-repo", - [verifyPayloadIntegrity], + "/ext/resync-source-document", + [verifyPayloadIntegrity, setDataSigner], async function (request, response) { try { - const loadGithubRepo = require("../utils/extensions/GithubRepo"); + const { type, options } = reqBody(request); + if (!RESYNC_METHODS.hasOwnProperty(type)) throw new Error(`Type "${type}" is not a valid type to sync.`); + return await RESYNC_METHODS[type](options, response); + } catch (e) { + console.error(e); + response.status(200).json({ + success: false, + content: null, + reason: e.message || "A processing error occurred.", + }); + } + return; + } + ) + + app.post( + "/ext/github-repo", + [verifyPayloadIntegrity, setDataSigner], + async function (request, response) { + try { + const { loadGithubRepo } = require("../utils/extensions/GithubRepo"); const { success, reason, data } = await loadGithubRepo( - reqBody(request) + reqBody(request), + response, ); response.status(200).json({ success, @@ -67,7 +90,7 @@ function extensions(app) { [verifyPayloadIntegrity], async function (request, response) { try { - const loadYouTubeTranscript = require("../utils/extensions/YoutubeTranscript"); + const { loadYouTubeTranscript } = require("../utils/extensions/YoutubeTranscript"); const { success, reason, data } = await loadYouTubeTranscript( reqBody(request) ); @@ -108,12 +131,13 @@ function extensions(app) { app.post( "/ext/confluence", - [verifyPayloadIntegrity], + [verifyPayloadIntegrity, setDataSigner], async function (request, response) { try { - const loadConfluence = require("../utils/extensions/Confluence"); + const { loadConfluence } = require("../utils/extensions/Confluence"); const { success, reason, data } = await loadConfluence( - reqBody(request) + reqBody(request), + response ); response.status(200).json({ success, reason, data }); } catch (e) { diff --git a/collector/extensions/resync/index.js b/collector/extensions/resync/index.js new file mode 100644 index 00000000..ba967962 --- /dev/null +++ b/collector/extensions/resync/index.js @@ -0,0 +1,113 @@ +const { getLinkText } = require("../../processLink"); + +/** + * Fetches the content of a raw link. Returns the content as a text string of the link in question. + * @param {object} data - metadata from document (eg: link) + * @param {import("../../middleware/setDataSigner").ResponseWithSigner} response + */ +async function resyncLink({ link }, response) { + if (!link) throw new Error('Invalid link provided'); + try { + const { success, content = null } = await getLinkText(link); + if (!success) throw new Error(`Failed to sync link content. ${reason}`); + response.status(200).json({ success, content }); + } catch (e) { + console.error(e); + response.status(200).json({ + success: false, + content: null, + }); + } +} + +/** + * Fetches the content of a YouTube link. Returns the content as a text string of the video in question. + * We offer this as there may be some videos where a transcription could be manually edited after initial scraping + * but in general - transcriptions often never change. + * @param {object} data - metadata from document (eg: link) + * @param {import("../../middleware/setDataSigner").ResponseWithSigner} response + */ +async function resyncYouTube({ link }, response) { + if (!link) throw new Error('Invalid link provided'); + try { + const { fetchVideoTranscriptContent } = require("../../utils/extensions/YoutubeTranscript"); + const { success, reason, content } = await fetchVideoTranscriptContent({ url: link }); + if (!success) throw new Error(`Failed to sync YouTube video transcript. ${reason}`); + response.status(200).json({ success, content }); + } catch (e) { + console.error(e); + response.status(200).json({ + success: false, + content: null, + }); + } +} + +/** + * Fetches the content of a specific confluence page via its chunkSource. + * Returns the content as a text string of the page in question and only that page. + * @param {object} data - metadata from document (eg: chunkSource) + * @param {import("../../middleware/setDataSigner").ResponseWithSigner} response + */ +async function resyncConfluence({ chunkSource }, response) { + if (!chunkSource) throw new Error('Invalid source property provided'); + try { + // Confluence data is `payload` encrypted. So we need to expand its + // encrypted payload back into query params so we can reFetch the page with same access token/params. + const source = response.locals.encryptionWorker.expandPayload(chunkSource); + const { fetchConfluencePage } = require("../../utils/extensions/Confluence"); + const { success, reason, content } = await fetchConfluencePage({ + pageUrl: `https:${source.pathname}`, // need to add back the real protocol + baseUrl: source.searchParams.get('baseUrl'), + accessToken: source.searchParams.get('token'), + username: source.searchParams.get('username'), + }); + + if (!success) throw new Error(`Failed to sync Confluence page content. ${reason}`); + response.status(200).json({ success, content }); + } catch (e) { + console.error(e); + response.status(200).json({ + success: false, + content: null, + }); + } +} + +/** + * Fetches the content of a specific confluence page via its chunkSource. + * Returns the content as a text string of the page in question and only that page. + * @param {object} data - metadata from document (eg: chunkSource) + * @param {import("../../middleware/setDataSigner").ResponseWithSigner} response + */ +async function resyncGithub({ chunkSource }, response) { + if (!chunkSource) throw new Error('Invalid source property provided'); + try { + // Github file data is `payload` encrypted (might contain PAT). So we need to expand its + // encrypted payload back into query params so we can reFetch the page with same access token/params. + const source = response.locals.encryptionWorker.expandPayload(chunkSource); + const { fetchGithubFile } = require("../../utils/extensions/GithubRepo"); + const { success, reason, content } = await fetchGithubFile({ + repoUrl: `https:${source.pathname}`, // need to add back the real protocol + branch: source.searchParams.get('branch'), + accessToken: source.searchParams.get('pat'), + sourceFilePath: source.searchParams.get('path'), + }); + + if (!success) throw new Error(`Failed to sync Github file content. ${reason}`); + response.status(200).json({ success, content }); + } catch (e) { + console.error(e); + response.status(200).json({ + success: false, + content: null, + }); + } +} + +module.exports = { + link: resyncLink, + youtube: resyncYouTube, + confluence: resyncConfluence, + github: resyncGithub, +} \ No newline at end of file diff --git a/collector/middleware/setDataSigner.js b/collector/middleware/setDataSigner.js new file mode 100644 index 00000000..3ea3b2f8 --- /dev/null +++ b/collector/middleware/setDataSigner.js @@ -0,0 +1,41 @@ +const { EncryptionWorker } = require("../utils/EncryptionWorker"); +const { CommunicationKey } = require("../utils/comKey"); + +/** + * Express Response Object interface with defined encryptionWorker attached to locals property. + * @typedef {import("express").Response & import("express").Response['locals'] & {encryptionWorker: EncryptionWorker} } ResponseWithSigner +*/ + +// You can use this middleware to assign the EncryptionWorker to the response locals +// property so that if can be used to encrypt/decrypt arbitrary data via response object. +// eg: Encrypting API keys in chunk sources. + +// The way this functions is that the rolling RSA Communication Key is used server-side to private-key encrypt the raw +// key of the persistent EncryptionManager credentials. Since EncryptionManager credentials do _not_ roll, we should not send them +// even between server<>collector in plaintext because if the user configured the server/collector to be public they could technically +// be exposing the key in transit via the X-Payload-Signer header. Even if this risk is minimal we should not do this. + +// This middleware uses the CommunicationKey public key to first decrypt the base64 representation of the EncryptionManager credentials +// and then loads that in to the EncryptionWorker as a buffer so we can use the same credentials across the system. Should we ever break the +// collector out into its own service this would still work without SSL/TLS. + +/** + * + * @param {import("express").Request} request + * @param {import("express").Response} response + * @param {import("express").NextFunction} next + */ +function setDataSigner(request, response, next) { + const comKey = new CommunicationKey(); + const encryptedPayloadSigner = request.header("X-Payload-Signer"); + if (!encryptedPayloadSigner) console.log('Failed to find signed-payload to set encryption worker! Encryption calls will fail.'); + + const decryptedPayloadSignerKey = comKey.decrypt(encryptedPayloadSigner); + const encryptionWorker = new EncryptionWorker(decryptedPayloadSignerKey); + response.locals.encryptionWorker = encryptionWorker; + next(); +} + +module.exports = { + setDataSigner +} \ No newline at end of file diff --git a/collector/utils/EncryptionWorker/index.js b/collector/utils/EncryptionWorker/index.js new file mode 100644 index 00000000..ddc27733 --- /dev/null +++ b/collector/utils/EncryptionWorker/index.js @@ -0,0 +1,77 @@ +const crypto = require("crypto"); + +// Differs from EncryptionManager in that is does not set or define the keys that will be used +// to encrypt or read data and it must be told the key (as base64 string) explicitly that will be used and is provided to +// the class on creation. This key should be the same `key` that is used by the EncryptionManager class. +class EncryptionWorker { + constructor(presetKeyBase64 = "") { + this.key = Buffer.from(presetKeyBase64, "base64"); + this.algorithm = "aes-256-cbc"; + this.separator = ":"; + } + + log(text, ...args) { + console.log(`\x1b[36m[EncryptionManager]\x1b[0m ${text}`, ...args); + } + + /** + * Give a chunk source, parse its payload query param and expand that object back into the URL + * as additional query params + * @param {string} chunkSource + * @returns {URL} Javascript URL object with query params decrypted from payload query param. + */ + expandPayload(chunkSource = "") { + try { + const url = new URL(chunkSource); + if (!url.searchParams.has("payload")) return url; + + const decryptedPayload = this.decrypt(url.searchParams.get("payload")); + const encodedParams = JSON.parse(decryptedPayload); + url.searchParams.delete("payload"); // remove payload prop + + // Add all query params needed to replay as query params + Object.entries(encodedParams).forEach(([key, value]) => + url.searchParams.append(key, value) + ); + return url; + } catch (e) { + console.error(e); + } + return new URL(chunkSource); + } + + encrypt(plainTextString = null) { + try { + if (!plainTextString) + throw new Error("Empty string is not valid for this method."); + const iv = crypto.randomBytes(16); + const cipher = crypto.createCipheriv(this.algorithm, this.key, iv); + const encrypted = cipher.update(plainTextString, "utf8", "hex"); + return [ + encrypted + cipher.final("hex"), + Buffer.from(iv).toString("hex"), + ].join(this.separator); + } catch (e) { + this.log(e); + return null; + } + } + + decrypt(encryptedString) { + try { + const [encrypted, iv] = encryptedString.split(this.separator); + if (!iv) throw new Error("IV not found"); + const decipher = crypto.createDecipheriv( + this.algorithm, + this.key, + Buffer.from(iv, "hex") + ); + return decipher.update(encrypted, "hex", "utf8") + decipher.final("utf8"); + } catch (e) { + this.log(e); + return null; + } + } +} + +module.exports = { EncryptionWorker }; diff --git a/collector/utils/comKey/index.js b/collector/utils/comKey/index.js index 77ec1c61..a2e2f52a 100644 --- a/collector/utils/comKey/index.js +++ b/collector/utils/comKey/index.js @@ -40,6 +40,15 @@ class CommunicationKey { } catch {} return false; } + + // Use the rolling public-key to decrypt arbitrary data that was encrypted via the private key on the server side CommunicationKey class + // that we know was done with the same key-pair and the given input is in base64 format already. + // Returns plaintext string of the data that was encrypted. + decrypt(base64String = "") { + return crypto + .publicDecrypt(this.#readPublicKey(), Buffer.from(base64String, "base64")) + .toString(); + } } module.exports = { CommunicationKey }; diff --git a/collector/utils/extensions/Confluence/index.js b/collector/utils/extensions/Confluence/index.js index 0bee1561..6df06310 100644 --- a/collector/utils/extensions/Confluence/index.js +++ b/collector/utils/extensions/Confluence/index.js @@ -9,7 +9,13 @@ const { ConfluencePagesLoader, } = require("langchain/document_loaders/web/confluence"); -async function loadConfluence({ pageUrl, username, accessToken }) { +/** + * Load Confluence documents from a spaceID and Confluence credentials + * @param {object} args - forwarded request body params + * @param {import("../../../middleware/setDataSigner").ResponseWithSigner} response - Express response object with encryptionWorker + * @returns + */ +async function loadConfluence({ pageUrl, username, accessToken }, response) { if (!pageUrl || !username || !accessToken) { return { success: false, @@ -79,7 +85,10 @@ async function loadConfluence({ pageUrl, username, accessToken }) { docAuthor: subdomain, description: doc.metadata.title, docSource: `${subdomain} Confluence`, - chunkSource: `confluence://${doc.metadata.url}`, + chunkSource: generateChunkSource( + { doc, baseUrl, accessToken, username }, + response.locals.encryptionWorker + ), published: new Date().toLocaleString(), wordCount: doc.pageContent.split(" ").length, pageContent: doc.pageContent, @@ -106,6 +115,82 @@ async function loadConfluence({ pageUrl, username, accessToken }) { }; } +/** + * Gets the page content from a specific Confluence page, not all pages in a workspace. + * @returns + */ +async function fetchConfluencePage({ + pageUrl, + baseUrl, + username, + accessToken, +}) { + if (!pageUrl || !baseUrl || !username || !accessToken) { + return { + success: false, + content: null, + reason: + "You need either a username and access token, or a personal access token (PAT), to use the Confluence connector.", + }; + } + + const { valid, result } = validSpaceUrl(pageUrl); + if (!valid) { + return { + success: false, + content: null, + reason: + "Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/* or https://customDomain/wiki/space/~SPACEID/*", + }; + } + + console.log(`-- Working Confluence Page ${pageUrl} --`); + const { spaceKey } = result; + const loader = new ConfluencePagesLoader({ + baseUrl, + spaceKey, + username, + accessToken, + }); + + const { docs, error } = await loader + .load() + .then((docs) => { + return { docs, error: null }; + }) + .catch((e) => { + return { + docs: [], + error: e.message?.split("Error:")?.[1] || e.message, + }; + }); + + if (!docs.length || !!error) { + return { + success: false, + reason: error ?? "No pages found for that Confluence space.", + content: null, + }; + } + + const targetDocument = docs.find( + (doc) => doc.pageContent && doc.metadata.url === pageUrl + ); + if (!targetDocument) { + return { + success: false, + reason: "Target page could not be found in Confluence space.", + content: null, + }; + } + + return { + success: true, + reason: null, + content: targetDocument.pageContent, + }; +} + /** * A match result for a url-pattern of a Confluence URL * @typedef {Object} ConfluenceMatchResult @@ -195,4 +280,29 @@ function validSpaceUrl(spaceUrl = "") { return { valid: false, result: null }; } -module.exports = loadConfluence; +/** + * Generate the full chunkSource for a specific Confluence page so that we can resync it later. + * This data is encrypted into a single `payload` query param so we can replay credentials later + * since this was encrypted with the systems persistent password and salt. + * @param {object} chunkSourceInformation + * @param {import("../../EncryptionWorker").EncryptionWorker} encryptionWorker + * @returns {string} + */ +function generateChunkSource( + { doc, baseUrl, accessToken, username }, + encryptionWorker +) { + const payload = { + baseUrl, + token: accessToken, + username, + }; + return `confluence://${doc.metadata.url}?payload=${encryptionWorker.encrypt( + JSON.stringify(payload) + )}`; +} + +module.exports = { + loadConfluence, + fetchConfluencePage, +}; diff --git a/collector/utils/extensions/GithubRepo/RepoLoader/index.js b/collector/utils/extensions/GithubRepo/RepoLoader/index.js index c842f621..af8a1dfc 100644 --- a/collector/utils/extensions/GithubRepo/RepoLoader/index.js +++ b/collector/utils/extensions/GithubRepo/RepoLoader/index.js @@ -150,6 +150,36 @@ class RepoLoader { this.branches = [...new Set(branches.flat())]; return this.#branchPrefSort(this.branches); } + + async fetchSingleFile(sourceFilePath) { + try { + return fetch( + `https://api.github.com/repos/${this.author}/${this.project}/contents/${sourceFilePath}?ref=${this.branch}`, + { + method: "GET", + headers: { + Accept: "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + ...(!!this.accessToken + ? { Authorization: `Bearer ${this.accessToken}` } + : {}), + }, + } + ) + .then((res) => { + if (res.ok) return res.json(); + throw new Error(`Failed to fetch from Github API: ${res.statusText}`); + }) + .then((json) => { + if (json.hasOwnProperty("status") || !json.hasOwnProperty("content")) + throw new Error(json?.message || "missing content"); + return atob(json.content); + }); + } catch (e) { + console.error(`RepoLoader.fetchSingleFile`, e); + return null; + } + } } module.exports = RepoLoader; diff --git a/collector/utils/extensions/GithubRepo/index.js b/collector/utils/extensions/GithubRepo/index.js index a694a8cd..f40215cb 100644 --- a/collector/utils/extensions/GithubRepo/index.js +++ b/collector/utils/extensions/GithubRepo/index.js @@ -6,7 +6,13 @@ const { v4 } = require("uuid"); const { writeToServerDocuments } = require("../../files"); const { tokenizeString } = require("../../tokenizer"); -async function loadGithubRepo(args) { +/** + * Load in a Github Repo recursively or just the top level if no PAT is provided + * @param {object} args - forwarded request body params + * @param {import("../../../middleware/setDataSigner").ResponseWithSigner} response - Express response object with encryptionWorker + * @returns + */ +async function loadGithubRepo(args, response) { const repo = new RepoLoader(args); await repo.init(); @@ -52,7 +58,11 @@ async function loadGithubRepo(args) { docAuthor: repo.author, description: "No description found.", docSource: doc.metadata.source, - chunkSource: `link://${doc.metadata.repository}/blob/${doc.metadata.branch}/${doc.metadata.source}`, + chunkSource: generateChunkSource( + repo, + doc, + response.locals.encryptionWorker + ), published: new Date().toLocaleString(), wordCount: doc.pageContent.split(" ").length, pageContent: doc.pageContent, @@ -81,4 +91,69 @@ async function loadGithubRepo(args) { }; } -module.exports = loadGithubRepo; +/** + * Gets the page content from a specific source file in a give Github Repo, not all items in a repo. + * @returns + */ +async function fetchGithubFile({ + repoUrl, + branch, + accessToken = null, + sourceFilePath, +}) { + const repo = new RepoLoader({ + repo: repoUrl, + branch, + accessToken, + }); + await repo.init(); + + if (!repo.ready) + return { + success: false, + content: null, + reason: "Could not prepare Github repo for loading! Check URL or PAT.", + }; + + console.log( + `-- Working Github ${repo.author}/${repo.project}:${repo.branch} file:${sourceFilePath} --` + ); + const fileContent = await repo.fetchSingleFile(sourceFilePath); + if (!fileContent) { + return { + success: false, + reason: "Target file returned a null content response.", + content: null, + }; + } + + return { + success: true, + reason: null, + content: fileContent, + }; +} + +/** + * Generate the full chunkSource for a specific file so that we can resync it later. + * This data is encrypted into a single `payload` query param so we can replay credentials later + * since this was encrypted with the systems persistent password and salt. + * @param {RepoLoader} repo + * @param {import("@langchain/core/documents").Document} doc + * @param {import("../../EncryptionWorker").EncryptionWorker} encryptionWorker + * @returns {string} + */ +function generateChunkSource(repo, doc, encryptionWorker) { + const payload = { + owner: repo.author, + project: repo.project, + branch: repo.branch, + path: doc.metadata.source, + pat: !!repo.accessToken ? repo.accessToken : null, + }; + return `github://${repo.repo}?payload=${encryptionWorker.encrypt( + JSON.stringify(payload) + )}`; +} + +module.exports = { loadGithubRepo, fetchGithubFile }; diff --git a/collector/utils/extensions/YoutubeTranscript/index.js b/collector/utils/extensions/YoutubeTranscript/index.js index e5fa336b..c7cf7c1f 100644 --- a/collector/utils/extensions/YoutubeTranscript/index.js +++ b/collector/utils/extensions/YoutubeTranscript/index.js @@ -26,11 +26,13 @@ function validYoutubeVideoUrl(link) { return false; } -async function loadYouTubeTranscript({ url }) { +async function fetchVideoTranscriptContent({ url }) { if (!validYoutubeVideoUrl(url)) { return { success: false, reason: "Invalid URL. Should be youtu.be or youtube.com/watch.", + content: null, + metadata: {}, }; } @@ -52,6 +54,8 @@ async function loadYouTubeTranscript({ url }) { return { success: false, reason: error ?? "No transcript found for that YouTube video.", + content: null, + metadata: {}, }; } @@ -61,9 +65,30 @@ async function loadYouTubeTranscript({ url }) { return { success: false, reason: "No transcript could be parsed for that YouTube video.", + content: null, + metadata: {}, }; } + return { + success: true, + reason: null, + content, + metadata, + }; +} + +async function loadYouTubeTranscript({ url }) { + const transcriptResults = await fetchVideoTranscriptContent({ url }); + if (!transcriptResults.success) { + return { + success: false, + reason: + transcriptResults.reason || + "An unknown error occurred during transcription retrieval", + }; + } + const { content, metadata } = transcriptResults; const outFolder = slugify( `${metadata.author} YouTube transcripts` ).toLowerCase(); @@ -86,7 +111,7 @@ async function loadYouTubeTranscript({ url }) { docAuthor: metadata.author, description: metadata.description, docSource: url, - chunkSource: `link://${url}`, + chunkSource: `youtube://${url}`, published: new Date().toLocaleString(), wordCount: content.split(" ").length, pageContent: content, @@ -111,4 +136,7 @@ async function loadYouTubeTranscript({ url }) { }; } -module.exports = loadYouTubeTranscript; +module.exports = { + loadYouTubeTranscript, + fetchVideoTranscriptContent, +}; diff --git a/docker/.env.example b/docker/.env.example index 71572cc8..f682f8bf 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -2,6 +2,8 @@ SERVER_PORT=3001 STORAGE_DIR="/app/server/storage" UID='1000' GID='1000' +# SIG_KEY='passphrase' # Please generate random string at least 32 chars long. +# SIG_SALT='salt' # Please generate random string at least 32 chars long. # JWT_SECRET="my-random-string-for-seeding" # Only needed if AUTH_TOKEN is set. Please generate random string at least 12 chars long. ########################################### diff --git a/frontend/package.json b/frontend/package.json index e584d9a3..e3e27f2b 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -24,6 +24,7 @@ "js-levenshtein": "^1.1.6", "lodash.debounce": "^4.0.8", "markdown-it": "^13.0.1", + "moment": "^2.30.1", "pluralize": "^8.0.0", "react": "^18.2.0", "react-device-detect": "^2.2.2", diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index c9ad8104..dcf3c5f9 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -57,6 +57,12 @@ const EmbedChats = lazy(() => import("@/pages/GeneralSettings/EmbedChats")); const PrivacyAndData = lazy( () => import("@/pages/GeneralSettings/PrivacyAndData") ); +const ExperimentalFeatures = lazy( + () => import("@/pages/Admin/ExperimentalFeatures") +); +const LiveDocumentSyncManage = lazy( + () => import("@/pages/Admin/ExperimentalFeatures/Features/LiveSync/manage") +); export default function App() { return ( @@ -142,6 +148,10 @@ export default function App() { path="/settings/appearance" element={} /> + } + /> } @@ -169,9 +179,16 @@ export default function App() { {/* Onboarding Flow */} } /> } /> + + {/* Experimental feature pages */} + {/* Live Document Sync feature */} + } + /> + - diff --git a/frontend/src/components/Modals/ManageWorkspace/Documents/Directory/utils.js b/frontend/src/components/Modals/ManageWorkspace/Documents/Directory/utils.js index 1bea2615..31454085 100644 --- a/frontend/src/components/Modals/ManageWorkspace/Documents/Directory/utils.js +++ b/frontend/src/components/Modals/ManageWorkspace/Documents/Directory/utils.js @@ -8,7 +8,7 @@ const uuidPattern = const jsonPattern = /\.json$/; // Function to strip UUID v4 and JSON from file names as that will impact search results. -const stripUuidAndJsonFromString = (input = "") => { +export const stripUuidAndJsonFromString = (input = "") => { return input ?.replace(uuidPattern, "") // remove v4 uuid ?.replace(jsonPattern, "") // remove trailing .json diff --git a/frontend/src/components/Modals/ManageWorkspace/Documents/WorkspaceDirectory/WorkspaceFileRow/index.jsx b/frontend/src/components/Modals/ManageWorkspace/Documents/WorkspaceDirectory/WorkspaceFileRow/index.jsx index 727331ad..929487a8 100644 --- a/frontend/src/components/Modals/ManageWorkspace/Documents/WorkspaceDirectory/WorkspaceFileRow/index.jsx +++ b/frontend/src/components/Modals/ManageWorkspace/Documents/WorkspaceDirectory/WorkspaceFileRow/index.jsx @@ -4,9 +4,10 @@ import { getFileExtension, middleTruncate, } from "@/utils/directories"; -import { ArrowUUpLeft, File, PushPin } from "@phosphor-icons/react"; +import { ArrowUUpLeft, Eye, File, PushPin } from "@phosphor-icons/react"; import Workspace from "@/models/workspace"; import showToast from "@/utils/toast"; +import System from "@/models/system"; import { Tooltip } from "react-tooltip"; export default function WorkspaceFileRow({ @@ -61,6 +62,11 @@ export default function WorkspaceFileRow({
) : (
+ { } }; - if (!item) return
; + if (!item) return
; return (
{ ); }); +const WatchForChanges = memo(({ workspace, docPath, item }) => { + const [watched, setWatched] = useState(item?.watched || false); + const [hover, setHover] = useState(false); + const watchEvent = new CustomEvent("watch_document_for_changes"); + + const updateWatchStatus = async () => { + try { + if (!watched) window.dispatchEvent(watchEvent); + const success = + await System.experimentalFeatures.liveSync.setWatchStatusForDocument( + workspace.slug, + docPath, + !watched + ); + + if (!success) { + showToast( + `Failed to ${!watched ? "watch" : "unwatch"} document.`, + "error", + { + clear: true, + } + ); + return; + } + + showToast( + `Document ${ + !watched + ? "will be watched for changes" + : "will no longer be watched for changes" + }.`, + "success", + { clear: true } + ); + setWatched(!watched); + } catch (error) { + showToast(`Failed to watch document. ${error.message}`, "error", { + clear: true, + }); + return; + } + }; + + if (!item || !item.canWatch) return
; + + return ( +
setHover(true)} + onMouseLeave={() => setHover(false)} + className="flex gap-x-2 items-center hover:bg-main-gradient p-[2px] rounded ml-2" + > + + +
+ ); +}); + const RemoveItemFromWorkspace = ({ item, onClick }) => { return (
diff --git a/frontend/src/components/Modals/ManageWorkspace/Documents/WorkspaceDirectory/index.jsx b/frontend/src/components/Modals/ManageWorkspace/Documents/WorkspaceDirectory/index.jsx index fa32bf9f..4ec3a86e 100644 --- a/frontend/src/components/Modals/ManageWorkspace/Documents/WorkspaceDirectory/index.jsx +++ b/frontend/src/components/Modals/ManageWorkspace/Documents/WorkspaceDirectory/index.jsx @@ -3,8 +3,10 @@ import { dollarFormat } from "@/utils/numbers"; import WorkspaceFileRow from "./WorkspaceFileRow"; import { memo, useEffect, useState } from "react"; import ModalWrapper from "@/components/ModalWrapper"; -import { PushPin } from "@phosphor-icons/react"; -import { SEEN_DOC_PIN_ALERT } from "@/utils/constants"; +import { Eye, PushPin } from "@phosphor-icons/react"; +import { SEEN_DOC_PIN_ALERT, SEEN_WATCH_ALERT } from "@/utils/constants"; +import paths from "@/utils/paths"; +import { Link } from "react-router-dom"; function WorkspaceDirectory({ workspace, @@ -118,6 +120,7 @@ function WorkspaceDirectory({ )}
+ ); } @@ -184,4 +187,75 @@ const PinAlert = memo(() => { ); }); +const DocumentWatchAlert = memo(() => { + const [showAlert, setShowAlert] = useState(false); + function dismissAlert() { + setShowAlert(false); + window.localStorage.setItem(SEEN_WATCH_ALERT, "1"); + window.removeEventListener(handlePinEvent); + } + + function handlePinEvent() { + if (!!window?.localStorage?.getItem(SEEN_WATCH_ALERT)) return; + setShowAlert(true); + } + + useEffect(() => { + if (!window || !!window?.localStorage?.getItem(SEEN_WATCH_ALERT)) return; + window?.addEventListener("watch_document_for_changes", handlePinEvent); + }, []); + + return ( + +
+
+
+
+ +

+ What does watching a document do? +

+
+
+
+

+ When you watch a document in AnythingLLM we will{" "} + automatically sync your document content from it's original + source on regular intervals. This will automatically update the + content in every workspace where this file is managed. +

+

+ This feature currently supports online-based content and will not + be available for manually uploaded documents. +

+

+ You can manage what documents are watched from the{" "} + + File manager + {" "} + admin view. +

+
+ +
+ +
+
+
+
+ ); +}); + export default memo(WorkspaceDirectory); diff --git a/frontend/src/components/SettingsSidebar/index.jsx b/frontend/src/components/SettingsSidebar/index.jsx index 9dada953..112be312 100644 --- a/frontend/src/components/SettingsSidebar/index.jsx +++ b/frontend/src/components/SettingsSidebar/index.jsx @@ -23,6 +23,7 @@ import { SplitVertical, Microphone, Robot, + Flask, } from "@phosphor-icons/react"; import useUser from "@/hooks/useUser"; import { USER_BACKGROUND_COLOR } from "@/utils/constants"; @@ -30,6 +31,7 @@ import { isMobile } from "react-device-detect"; import Footer from "../Footer"; import { Link } from "react-router-dom"; import { useTranslation } from "react-i18next"; +import showToast from "@/utils/toast"; export default function SettingsSidebar() { const { t } = useTranslation(); @@ -385,5 +387,63 @@ const SidebarOptions = ({ user = null, t }) => ( flex={true} allowedRole={["admin"]} /> + + ); + +function HoldToReveal({ children, holdForMs = 3_000 }) { + let timeout; + const [showing, setShowing] = useState( + window.localStorage.getItem( + "anythingllm_experimental_feature_preview_unlocked" + ) + ); + + useEffect(() => { + const onPress = (e) => { + if (!["Control", "Meta"].includes(e.key)) return; + timeout = setTimeout(() => { + setShowing(true); + showToast("Experimental feature previews unlocked!"); + window.localStorage.setItem( + "anythingllm_experimental_feature_preview_unlocked", + "enabled" + ); + window.removeEventListener("keypress", onPress); + window.removeEventListener("keyup", onRelease); + clearTimeout(timeout); + }, holdForMs); + }; + const onRelease = (e) => { + if (!["Control", "Meta"].includes(e.key)) return; + if (showing) { + window.removeEventListener("keypress", onPress); + window.removeEventListener("keyup", onRelease); + clearTimeout(timeout); + return; + } + clearTimeout(timeout); + }; + + if (!showing) { + window.addEventListener("keydown", onPress); + window.addEventListener("keyup", onRelease); + } + return () => { + window.removeEventListener("keydown", onPress); + window.removeEventListener("keyup", onRelease); + }; + }, []); + + if (!showing) return null; + return children; +} diff --git a/frontend/src/models/experimental/liveSync.js b/frontend/src/models/experimental/liveSync.js new file mode 100644 index 00000000..54091418 --- /dev/null +++ b/frontend/src/models/experimental/liveSync.js @@ -0,0 +1,59 @@ +import { API_BASE } from "@/utils/constants"; +import { baseHeaders } from "@/utils/request"; + +const LiveDocumentSync = { + featureFlag: "experimental_live_file_sync", + toggleFeature: async function (updatedStatus = false) { + return await fetch(`${API_BASE}/experimental/toggle-live-sync`, { + method: "POST", + headers: baseHeaders(), + body: JSON.stringify({ updatedStatus }), + }) + .then((res) => { + if (!res.ok) throw new Error("Could not update status."); + return true; + }) + .then((res) => res) + .catch((e) => { + console.error(e); + return false; + }); + }, + queues: async function () { + return await fetch(`${API_BASE}/experimental/live-sync/queues`, { + headers: baseHeaders(), + }) + .then((res) => { + if (!res.ok) throw new Error("Could not update status."); + return res.json(); + }) + .then((res) => res?.queues || []) + .catch((e) => { + console.error(e); + return []; + }); + }, + + // Should be in Workspaces but is here for now while in preview + setWatchStatusForDocument: async function (slug, docPath, watchStatus) { + return fetch(`${API_BASE}/workspace/${slug}/update-watch-status`, { + method: "POST", + headers: baseHeaders(), + body: JSON.stringify({ docPath, watchStatus }), + }) + .then((res) => { + if (!res.ok) { + throw new Error( + res.statusText || "Error setting watch status for document." + ); + } + return true; + }) + .catch((e) => { + console.error(e); + return false; + }); + }, +}; + +export default LiveDocumentSync; diff --git a/frontend/src/models/system.js b/frontend/src/models/system.js index d2252be1..b922457b 100644 --- a/frontend/src/models/system.js +++ b/frontend/src/models/system.js @@ -1,6 +1,7 @@ import { API_BASE, AUTH_TIMESTAMP, fullApiUrl } from "@/utils/constants"; import { baseHeaders, safeJsonParse } from "@/utils/request"; import DataConnector from "./dataConnector"; +import LiveDocumentSync from "./experimental/liveSync"; const System = { cacheKeys: { @@ -675,6 +676,9 @@ const System = { return false; }); }, + experimentalFeatures: { + liveSync: LiveDocumentSync, + }, }; export default System; diff --git a/frontend/src/pages/Admin/ExperimentalFeatures/Features/LiveSync/manage/DocumentSyncQueueRow/index.jsx b/frontend/src/pages/Admin/ExperimentalFeatures/Features/LiveSync/manage/DocumentSyncQueueRow/index.jsx new file mode 100644 index 00000000..30795628 --- /dev/null +++ b/frontend/src/pages/Admin/ExperimentalFeatures/Features/LiveSync/manage/DocumentSyncQueueRow/index.jsx @@ -0,0 +1,44 @@ +import { useRef } from "react"; +import { Trash } from "@phosphor-icons/react"; +import { stripUuidAndJsonFromString } from "@/components/Modals/ManageWorkspace/Documents/Directory/utils"; +import moment from "moment"; +import System from "@/models/system"; + +export default function DocumentSyncQueueRow({ queue }) { + const rowRef = useRef(null); + const handleDelete = async () => { + rowRef?.current?.remove(); + await System.experimentalFeatures.liveSync.setWatchStatusForDocument( + queue.workspaceDoc.workspace.slug, + queue.workspaceDoc.docpath, + false + ); + }; + + return ( + <> + + + {stripUuidAndJsonFromString(queue.workspaceDoc.filename)} + + {moment(queue.lastSyncedAt).fromNow()} + + {moment(queue.nextSyncAt).format("lll")} + ({moment(queue.nextSyncAt).fromNow()}) + + {moment(queue.createdAt).format("lll")} + + + + + + ); +} diff --git a/frontend/src/pages/Admin/ExperimentalFeatures/Features/LiveSync/manage/index.jsx b/frontend/src/pages/Admin/ExperimentalFeatures/Features/LiveSync/manage/index.jsx new file mode 100644 index 00000000..027ebb7e --- /dev/null +++ b/frontend/src/pages/Admin/ExperimentalFeatures/Features/LiveSync/manage/index.jsx @@ -0,0 +1,92 @@ +import { useEffect, useState } from "react"; +import Sidebar from "@/components/Sidebar"; +import { isMobile } from "react-device-detect"; +import * as Skeleton from "react-loading-skeleton"; +import "react-loading-skeleton/dist/skeleton.css"; +import System from "@/models/system"; +import DocumentSyncQueueRow from "./DocumentSyncQueueRow"; + +export default function LiveDocumentSyncManager() { + return ( +
+ +
+
+
+
+

+ Watched documents +

+
+

+ These are all the documents that are currently being watched in + your instance. The content of these documents will be periodically + synced. +

+
+ +
+
+
+ ); +} + +function WatchedDocumentsContainer() { + const [loading, setLoading] = useState(true); + const [queues, setQueues] = useState([]); + + useEffect(() => { + async function fetchData() { + const _queues = await System.experimentalFeatures.liveSync.queues(); + setQueues(_queues); + setLoading(false); + } + fetchData(); + }, []); + + if (loading) { + return ( + + ); + } + + return ( + + + + + + + + + + + + {queues.map((queue) => ( + + ))} + +
+ Document Name + + Last Synced + + Time until next refresh + + Created On + + {" "} +
+ ); +} diff --git a/frontend/src/pages/Admin/ExperimentalFeatures/Features/LiveSync/toggle.jsx b/frontend/src/pages/Admin/ExperimentalFeatures/Features/LiveSync/toggle.jsx new file mode 100644 index 00000000..69a758d5 --- /dev/null +++ b/frontend/src/pages/Admin/ExperimentalFeatures/Features/LiveSync/toggle.jsx @@ -0,0 +1,90 @@ +import System from "@/models/system"; +import paths from "@/utils/paths"; +import showToast from "@/utils/toast"; +import { ArrowSquareOut } from "@phosphor-icons/react"; +import { useState } from "react"; +import { Link } from "react-router-dom"; + +export default function LiveSyncToggle({ enabled = false, onToggle }) { + const [status, setStatus] = useState(enabled); + + async function toggleFeatureFlag() { + const updated = await System.experimentalFeatures.liveSync.toggleFeature( + !status + ); + if (!updated) { + showToast("Failed to update status of feature.", "error", { + clear: true, + }); + return false; + } + + setStatus(!status); + showToast( + `Live document content sync has been ${ + !status ? "enabled" : "disabled" + }.`, + "success", + { clear: true } + ); + onToggle(); + } + + return ( +
+
+
+

+ Automatic Document Content Sync +

+ +
+
+

+ Enable the ability to specify a document to be "watched". Watched + document's content will be regularly fetched and updated in + AnythingLLM. +

+

+ Watched documents will automatically update in all workspaces they + are referenced in at the same time of update. +

+

+ This feature only applies to web-based content, such as websites, + Confluence, YouTube, and GitHub files. +

+
+
+
+ +
+
+ ); +} diff --git a/frontend/src/pages/Admin/ExperimentalFeatures/features.js b/frontend/src/pages/Admin/ExperimentalFeatures/features.js new file mode 100644 index 00000000..7dc8251e --- /dev/null +++ b/frontend/src/pages/Admin/ExperimentalFeatures/features.js @@ -0,0 +1,9 @@ +import LiveSyncToggle from "./Features/LiveSync/toggle"; + +export const configurableFeatures = { + experimental_live_file_sync: { + title: "Live Document Sync", + component: LiveSyncToggle, + key: "experimental_live_file_sync", + }, +}; diff --git a/frontend/src/pages/Admin/ExperimentalFeatures/index.jsx b/frontend/src/pages/Admin/ExperimentalFeatures/index.jsx new file mode 100644 index 00000000..0652f26d --- /dev/null +++ b/frontend/src/pages/Admin/ExperimentalFeatures/index.jsx @@ -0,0 +1,280 @@ +import { useEffect, useState } from "react"; +import Sidebar from "@/components/SettingsSidebar"; +import { isMobile } from "react-device-detect"; +import Admin from "@/models/admin"; +import { FullScreenLoader } from "@/components/Preloader"; +import { CaretRight, Flask } from "@phosphor-icons/react"; +import { configurableFeatures } from "./features"; +import ModalWrapper from "@/components/ModalWrapper"; +import paths from "@/utils/paths"; +import showToast from "@/utils/toast"; + +export default function ExperimentalFeatures() { + const [featureFlags, setFeatureFlags] = useState({}); + const [loading, setLoading] = useState(true); + const [selectedFeature, setSelectedFeature] = useState( + "experimental_live_file_sync" + ); + + useEffect(() => { + async function fetchSettings() { + setLoading(true); + const { settings } = await Admin.systemPreferences(); + setFeatureFlags(settings?.feature_flags ?? {}); + setLoading(false); + } + fetchSettings(); + }, []); + + const refresh = async () => { + const { settings } = await Admin.systemPreferences(); + setFeatureFlags(settings?.feature_flags ?? {}); + }; + + if (loading) { + return ( +
+ +
+ ); + } + + return ( + +
+ {/* Feature settings nav */} +
+
+ +

Experimental Features

+
+ {/* Feature list */} + featureFlags[flag] + )} + /> +
+ + {/* Selected feature setting panel */} + +
+
+ {selectedFeature ? ( + + ) : ( +
+ +

Select an experimental feature

+
+ )} +
+
+
+
+
+ ); +} + +function FeatureLayout({ children }) { + return ( +
+ +
+ {children} +
+
+ ); +} + +function FeatureList({ + features = [], + selectedFeature = null, + handleClick = null, + activeFeatures = [], +}) { + if (Object.keys(features).length === 0) return null; + + return ( +
+ {Object.entries(features).map(([feature, settings], index) => ( +
handleClick?.(feature)} + > +
{settings.title}
+
+
+ {activeFeatures.includes(settings.key) ? "On" : "Off"} +
+ +
+
+ ))} +
+ ); +} + +function SelectedFeatureComponent({ feature, settings, refresh }) { + const Component = feature?.component; + return Component ? ( + + ) : null; +} + +function FeatureVerification({ children }) { + if ( + !window.localStorage.getItem("anythingllm_tos_experimental_feature_set") + ) { + function acceptTos(e) { + e.preventDefault(); + + window.localStorage.setItem( + "anythingllm_tos_experimental_feature_set", + "accepted" + ); + showToast( + "Experimental Feature set enabled. Reloading the page.", + "success" + ); + setTimeout(() => { + window.location.reload(); + }, 2_500); + return; + } + + return ( + <> + +
+
+
+

+ Terms of use for experimental features +

+
+
+
+

+ Experimental features of AnythingLLM are features that we + are piloting and are opt-in. We proactively will + condition or warn you on any potential concerns should any + exist prior to approval of any feature. +

+ +
+

+ Use of any feature on this page can result in, but not + limited to, the following possibilities. +

+
    +
  • Loss of data.
  • +
  • Change in quality of results.
  • +
  • Increased storage.
  • +
  • Increased resource consumption.
  • +
  • + Increased cost or use of any connected LLM or embedding + provider. +
  • +
  • Potential bugs or issues using AnythingLLM.
  • +
+
+ +
+

+ Use of an experimental feature also comes with the + following list of non-exhaustive conditions. +

+
    +
  • Feature may not exist in future updates.
  • +
  • The feature being used is not currently stable.
  • +
  • + The feature may not be available in future versions, + configurations, or subscriptions of AnythingLLM. +
  • +
  • + Your privacy settings will be honored with use of + any beta feature. +
  • +
  • These conditions may change in future updates.
  • +
+
+ +

+ Access to any features requires approval of this modal. If + you would like to read more you can refer to{" "} + + docs.useanything.com + {" "} + or email{" "} + + team@mintplexlabs.com + +

+
+
+
+ + Reject & Close + + +
+
+
+
+ {children} + + ); + } + return <>{children}; +} diff --git a/frontend/src/utils/constants.js b/frontend/src/utils/constants.js index 6fd29534..3f637617 100644 --- a/frontend/src/utils/constants.js +++ b/frontend/src/utils/constants.js @@ -5,6 +5,7 @@ export const AUTH_TOKEN = "anythingllm_authToken"; export const AUTH_TIMESTAMP = "anythingllm_authTimestamp"; export const COMPLETE_QUESTIONNAIRE = "anythingllm_completed_questionnaire"; export const SEEN_DOC_PIN_ALERT = "anythingllm_pinned_document_alert"; +export const SEEN_WATCH_ALERT = "anythingllm_watched_document_alert"; export const USER_BACKGROUND_COLOR = "bg-historical-msg-user"; export const AI_BACKGROUND_COLOR = "bg-historical-msg-system"; diff --git a/frontend/src/utils/paths.js b/frontend/src/utils/paths.js index b1ffcb2b..8ee924fc 100644 --- a/frontend/src/utils/paths.js +++ b/frontend/src/utils/paths.js @@ -135,5 +135,13 @@ export default { embedChats: () => { return `/settings/embed-chats`; }, + experimental: () => { + return `/settings/beta-features`; + }, + }, + experimental: { + liveDocumentSync: { + manage: () => `/settings/beta-features/live-document-sync/manage`, + }, }, }; diff --git a/frontend/yarn.lock b/frontend/yarn.lock index 82d68d02..348541b8 100644 --- a/frontend/yarn.lock +++ b/frontend/yarn.lock @@ -2464,6 +2464,11 @@ minimatch@^3.0.4, minimatch@^3.0.5, minimatch@^3.1.1, minimatch@^3.1.2: dependencies: brace-expansion "^1.1.7" +moment@^2.30.1: + version "2.30.1" + resolved "https://registry.yarnpkg.com/moment/-/moment-2.30.1.tgz#f8c91c07b7a786e30c59926df530b4eac96974ae" + integrity sha512-uEmtNhbDOrWPFS+hdjFCBfy9f2YoyzRpwcl+DqpC6taX21FzsTLQVbMV/W7PzNSX6x/bhC1zA3c2UQ5NzH6how== + ms@2.1.2: version "2.1.2" resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009" diff --git a/server/.env.example b/server/.env.example index 3a4fb072..145e00da 100644 --- a/server/.env.example +++ b/server/.env.example @@ -1,5 +1,7 @@ SERVER_PORT=3001 JWT_SECRET="my-random-string-for-seeding" # Please generate random string at least 12 chars long. +SIG_KEY='passphrase' # Please generate random string at least 32 chars long. +SIG_SALT='salt' # Please generate random string at least 32 chars long. ########################################### ######## LLM API SElECTION ################ diff --git a/server/endpoints/admin.js b/server/endpoints/admin.js index 1bdfd8b1..67d7210f 100644 --- a/server/endpoints/admin.js +++ b/server/endpoints/admin.js @@ -358,6 +358,7 @@ function adminEndpoints(app) { custom_app_name: (await SystemSettings.get({ label: "custom_app_name" }))?.value || null, + feature_flags: (await SystemSettings.getFeatureFlags()) || {}, }; response.status(200).json({ settings }); } catch (e) { diff --git a/server/endpoints/experimental/index.js b/server/endpoints/experimental/index.js new file mode 100644 index 00000000..e452aff3 --- /dev/null +++ b/server/endpoints/experimental/index.js @@ -0,0 +1,10 @@ +const { liveSyncEndpoints } = require("./liveSync"); + +// All endpoints here are not stable and can move around - have breaking changes +// or are opt-in features that are not fully released. +// When a feature is promoted it should be removed from here and added to the appropriate scope. +function experimentalEndpoints(router) { + liveSyncEndpoints(router); +} + +module.exports = { experimentalEndpoints }; diff --git a/server/endpoints/experimental/liveSync.js b/server/endpoints/experimental/liveSync.js new file mode 100644 index 00000000..2a22d9a9 --- /dev/null +++ b/server/endpoints/experimental/liveSync.js @@ -0,0 +1,114 @@ +const { DocumentSyncQueue } = require("../../models/documentSyncQueue"); +const { Document } = require("../../models/documents"); +const { EventLogs } = require("../../models/eventLogs"); +const { SystemSettings } = require("../../models/systemSettings"); +const { Telemetry } = require("../../models/telemetry"); +const { reqBody } = require("../../utils/http"); +const { + featureFlagEnabled, +} = require("../../utils/middleware/featureFlagEnabled"); +const { + flexUserRoleValid, + ROLES, +} = require("../../utils/middleware/multiUserProtected"); +const { validWorkspaceSlug } = require("../../utils/middleware/validWorkspace"); +const { validatedRequest } = require("../../utils/middleware/validatedRequest"); + +function liveSyncEndpoints(app) { + if (!app) return; + + app.post( + "/experimental/toggle-live-sync", + [validatedRequest, flexUserRoleValid([ROLES.admin])], + async (request, response) => { + try { + const { updatedStatus = false } = reqBody(request); + const newStatus = + SystemSettings.validations.experimental_live_file_sync(updatedStatus); + const currentStatus = + (await SystemSettings.get({ label: "experimental_live_file_sync" })) + ?.value || "disabled"; + if (currentStatus === newStatus) + return response + .status(200) + .json({ liveSyncEnabled: newStatus === "enabled" }); + + // Already validated earlier - so can hot update. + await SystemSettings._updateSettings({ + experimental_live_file_sync: newStatus, + }); + if (newStatus === "enabled") { + await Telemetry.sendTelemetry("experimental_feature_enabled", { + feature: "live_file_sync", + }); + await EventLogs.logEvent("experimental_feature_enabled", { + feature: "live_file_sync", + }); + DocumentSyncQueue.bootWorkers(); + } else { + DocumentSyncQueue.killWorkers(); + } + + response.status(200).json({ liveSyncEnabled: newStatus === "enabled" }); + } catch (e) { + console.error(e); + response.status(500).end(); + } + } + ); + + app.get( + "/experimental/live-sync/queues", + [ + validatedRequest, + flexUserRoleValid([ROLES.admin]), + featureFlagEnabled(DocumentSyncQueue.featureKey), + ], + async (_, response) => { + const queues = await DocumentSyncQueue.where( + {}, + null, + { createdAt: "asc" }, + { + workspaceDoc: { + include: { + workspace: true, + }, + }, + } + ); + response.status(200).json({ queues }); + } + ); + + // Should be in workspace routes, but is here for now. + app.post( + "/workspace/:slug/update-watch-status", + [ + validatedRequest, + flexUserRoleValid([ROLES.admin, ROLES.manager]), + validWorkspaceSlug, + featureFlagEnabled(DocumentSyncQueue.featureKey), + ], + async (request, response) => { + try { + const { docPath, watchStatus = false } = reqBody(request); + const workspace = response.locals.workspace; + + const document = await Document.get({ + workspaceId: workspace.id, + docpath: docPath, + }); + if (!document) return response.sendStatus(404).end(); + + await DocumentSyncQueue.toggleWatchStatus(document, watchStatus); + return response.status(200).end(); + } catch (error) { + console.error("Error processing the watch status update:", error); + return response.status(500).end(); + } + } + ); +} + +module.exports = { liveSyncEndpoints }; diff --git a/server/index.js b/server/index.js index 59d8fec6..987a0cab 100644 --- a/server/index.js +++ b/server/index.js @@ -22,6 +22,7 @@ const { bootHTTP, bootSSL } = require("./utils/boot"); const { workspaceThreadEndpoints } = require("./endpoints/workspaceThreads"); const { documentEndpoints } = require("./endpoints/document"); const { agentWebsocket } = require("./endpoints/agentWebsocket"); +const { experimentalEndpoints } = require("./endpoints/experimental"); const app = express(); const apiRouter = express.Router(); const FILE_LIMIT = "3GB"; @@ -54,6 +55,7 @@ embedManagementEndpoints(apiRouter); utilEndpoints(apiRouter); documentEndpoints(apiRouter); agentWebsocket(apiRouter); +experimentalEndpoints(apiRouter); developerEndpoints(app, apiRouter); // Externally facing embedder endpoints diff --git a/server/jobs/helpers/index.js b/server/jobs/helpers/index.js new file mode 100644 index 00000000..c2dae213 --- /dev/null +++ b/server/jobs/helpers/index.js @@ -0,0 +1,30 @@ +const path = require('node:path'); +const fs = require('node:fs'); +const { parentPort } = require('node:worker_threads'); +const documentsPath = + process.env.NODE_ENV === "development" + ? path.resolve(__dirname, `../../storage/documents`) + : path.resolve(process.env.STORAGE_DIR, `documents`); + +function log(stringContent = '') { + if (parentPort) parentPort.postMessage(stringContent); + else console.log(`parentPort is undefined. Debug: ${stringContent}`) +} + +function conclude() { + if (parentPort) parentPort.postMessage('done'); + else process.exit(0); +} + +function updateSourceDocument(docPath = null, jsonContent = {}) { + const destinationFilePath = path.resolve(documentsPath, docPath); + fs.writeFileSync(destinationFilePath, JSON.stringify(jsonContent, null, 4), { + encoding: "utf-8", + }); +} + +module.exports = { + log, + conclude, + updateSourceDocument, +} \ No newline at end of file diff --git a/server/jobs/sync-watched-documents.js b/server/jobs/sync-watched-documents.js new file mode 100644 index 00000000..c4f235a0 --- /dev/null +++ b/server/jobs/sync-watched-documents.js @@ -0,0 +1,153 @@ +const { Document } = require('../models/documents.js'); +const { DocumentSyncQueue } = require('../models/documentSyncQueue.js'); +const { CollectorApi } = require('../utils/collectorApi'); +const { fileData } = require("../utils/files"); +const { log, conclude, updateSourceDocument } = require('./helpers/index.js'); +const { getVectorDbClass } = require('../utils/helpers/index.js'); +const { DocumentSyncRun } = require('../models/documentSyncRun.js'); + +(async () => { + try { + const queuesToProcess = await DocumentSyncQueue.staleDocumentQueues(); + if (queuesToProcess.length === 0) { + log('No outstanding documents to sync. Exiting.'); + return; + } + + const collector = new CollectorApi(); + if (!(await collector.online())) { + log('Could not reach collector API. Exiting.'); + return; + } + + log(`${queuesToProcess.length} watched documents have been found to be stale and will be updated now.`) + for (const queue of queuesToProcess) { + let newContent = null; + const document = queue.workspaceDoc; + const workspace = document.workspace; + const { metadata, type, source } = Document.parseDocumentTypeAndSource(document); + + if (!metadata || !DocumentSyncQueue.validFileTypes.includes(type)) { + // Document is either broken, invalid, or not supported so drop it from future queues. + log(`Document ${document.filename} has no metadata, is broken, or invalid and has been removed from all future runs.`) + await DocumentSyncQueue.unwatch(document); + continue; + } + + if (type === 'link' || type === 'youtube') { + const response = await collector.forwardExtensionRequest({ + endpoint: "/ext/resync-source-document", + method: "POST", + body: JSON.stringify({ + type, + options: { link: source } + }) + }); + newContent = response?.content; + } + + if (type === 'confluence' || type === 'github') { + const response = await collector.forwardExtensionRequest({ + endpoint: "/ext/resync-source-document", + method: "POST", + body: JSON.stringify({ + type, + options: { chunkSource: metadata.chunkSource } + }) + }); + newContent = response?.content; + } + + if (!newContent) { + // Check if the last "x" runs were all failures (not exits!). If so - remove the job entirely since it is broken. + const failedRunCount = (await DocumentSyncRun.where({ queueId: queue.id }, DocumentSyncQueue.maxRepeatFailures, { createdAt: 'desc' })).filter((run) => run.status === DocumentSyncRun.statuses.failed).length; + if (failedRunCount >= DocumentSyncQueue.maxRepeatFailures) { + log(`Document ${document.filename} has failed to refresh ${failedRunCount} times continuously and will now be removed from the watched document set.`) + await DocumentSyncQueue.unwatch(document); + continue; + } + + log(`Failed to get a new content response from collector for source ${source}. Skipping, but will retry next worker interval. Attempt ${failedRunCount === 0 ? 1 : failedRunCount}/${DocumentSyncQueue.maxRepeatFailures}`); + await DocumentSyncQueue.saveRun(queue.id, DocumentSyncRun.statuses.failed, { filename: document.filename, workspacesModified: [], reason: 'No content found.' }) + continue; + } + + const currentDocumentData = await fileData(document.docpath) + if (currentDocumentData.pageContent === newContent) { + const nextSync = DocumentSyncQueue.calcNextSync(queue) + log(`Source ${source} is unchanged and will be skipped. Next sync will be ${nextSync.toLocaleString()}.`); + await DocumentSyncQueue._update( + queue.id, + { + lastSyncedAt: new Date().toISOString(), + nextSyncAt: nextSync.toISOString(), + } + ); + await DocumentSyncQueue.saveRun(queue.id, DocumentSyncRun.statuses.exited, { filename: document.filename, workspacesModified: [], reason: 'Content unchanged.' }) + continue; + } + + // update the defined document and workspace vectorDB with the latest information + // it will skip cache and create a new vectorCache file. + const vectorDatabase = getVectorDbClass(); + await vectorDatabase.deleteDocumentFromNamespace(workspace.slug, document.docId); + await vectorDatabase.addDocumentToNamespace( + workspace.slug, + { ...currentDocumentData, pageContent: newContent, docId: document.docId }, + document.docpath, + true + ); + updateSourceDocument( + document.docpath, + { + ...currentDocumentData, + pageContent: newContent, + docId: document.docId, + published: (new Date).toLocaleString(), + // Todo: Update word count and token_estimate? + } + ) + log(`Workspace "${workspace.name}" vectors of ${source} updated. Document and vector cache updated.`) + + + // Now we can bloom the results to all matching documents in all other workspaces + const workspacesModified = [workspace.slug]; + const moreReferences = await Document.where({ + id: { not: document.id }, + filename: document.filename + }, null, null, { workspace: true }); + + if (moreReferences.length !== 0) { + log(`${source} is referenced in ${moreReferences.length} other workspaces. Updating those workspaces as well...`) + for (const additionalDocumentRef of moreReferences) { + const additionalWorkspace = additionalDocumentRef.workspace; + workspacesModified.push(additionalWorkspace.slug); + + await vectorDatabase.deleteDocumentFromNamespace(additionalWorkspace.slug, additionalDocumentRef.docId); + await vectorDatabase.addDocumentToNamespace( + additionalWorkspace.slug, + { ...currentDocumentData, pageContent: newContent, docId: additionalDocumentRef.docId }, + additionalDocumentRef.docpath, + ); + log(`Workspace "${additionalWorkspace.name}" vectors for ${source} was also updated with the new content from cache.`) + } + } + + const nextRefresh = DocumentSyncQueue.calcNextSync(queue); + log(`${source} has been refreshed in all workspaces it is currently referenced in. Next refresh will be ${nextRefresh.toLocaleString()}.`) + await DocumentSyncQueue._update( + queue.id, + { + lastSyncedAt: new Date().toISOString(), + nextSyncAt: nextRefresh.toISOString(), + } + ); + await DocumentSyncQueue.saveRun(queue.id, DocumentSyncRun.statuses.success, { filename: document.filename, workspacesModified }) + } + } catch (e) { + console.error(e) + log(`errored with ${e.message}`) + } finally { + conclude(); + } +})(); diff --git a/server/models/documentSyncQueue.js b/server/models/documentSyncQueue.js new file mode 100644 index 00000000..b034643c --- /dev/null +++ b/server/models/documentSyncQueue.js @@ -0,0 +1,237 @@ +const { BackgroundService } = require("../utils/BackgroundWorkers"); +const prisma = require("../utils/prisma"); +const { SystemSettings } = require("./systemSettings"); +const { Telemetry } = require("./telemetry"); + +/** + * @typedef {('link'|'youtube'|'confluence'|'github')} validFileType + */ + +const DocumentSyncQueue = { + featureKey: "experimental_live_file_sync", + // update the validFileTypes and .canWatch properties when adding elements here. + validFileTypes: ["link", "youtube", "confluence", "github"], + defaultStaleAfter: 604800000, + maxRepeatFailures: 5, // How many times a run can fail in a row before pruning. + writable: [], + + bootWorkers: function () { + new BackgroundService().boot(); + }, + + killWorkers: function () { + new BackgroundService().stop(); + }, + + /** Check is the Document Sync/Watch feature is enabled and can be used. */ + enabled: async function () { + return ( + (await SystemSettings.get({ label: this.featureKey }))?.value === + "enabled" + ); + }, + + /** + * @param {import("@prisma/client").document_sync_queues} queueRecord - queue record to calculate for + */ + calcNextSync: function (queueRecord) { + return new Date(Number(new Date()) + queueRecord.staleAfterMs); + }, + + canWatch: function ({ title, chunkSource = null } = {}) { + if (chunkSource.startsWith("link://") && title.endsWith(".html")) + return true; // If is web-link material (prior to feature most chunkSources were links://) + if (chunkSource.startsWith("youtube://")) return true; // If is a youtube link + if (chunkSource.startsWith("confluence://")) return true; // If is a confluence document link + if (chunkSource.startsWith("github://")) return true; // If is a Github file reference + return false; + }, + + /** + * Creates Queue record and updates document watch status to true on Document record + * @param {import("@prisma/client").workspace_documents} document - document record to watch, must have `id` + */ + watch: async function (document = null) { + if (!document) return false; + try { + const { Document } = require("./documents"); + + // Get all documents that are watched and share the same unique filename. If this value is + // non-zero then we exit early so that we do not have duplicated watch queues for the same file + // across many workspaces. + const workspaceDocIds = ( + await Document.where({ filename: document.filename, watched: true }) + ).map((rec) => rec.id); + const hasRecords = + (await this.count({ workspaceDocId: { in: workspaceDocIds } })) > 0; + if (hasRecords) + throw new Error( + `Cannot watch this document again - it already has a queue set.` + ); + + const queue = await prisma.document_sync_queues.create({ + data: { + workspaceDocId: document.id, + nextSyncAt: new Date(Number(new Date()) + this.defaultStaleAfter), + }, + }); + await Document._updateAll( + { filename: document.filename }, + { watched: true } + ); + return queue || null; + } catch (error) { + console.error(error.message); + return null; + } + }, + + /** + * Deletes Queue record and updates document watch status to false on Document record + * @param {import("@prisma/client").workspace_documents} document - document record to unwatch, must have `id` + */ + unwatch: async function (document = null) { + if (!document) return false; + try { + const { Document } = require("./documents"); + + // We could have been given a document to unwatch which is a clone of one that is already being watched but by another workspaceDocument id. + // so in this instance we need to delete any queues related to this document by any WorkspaceDocumentId it is referenced by. + const workspaceDocIds = ( + await Document.where({ filename: document.filename, watched: true }) + ).map((rec) => rec.id); + await this.delete({ workspaceDocId: { in: workspaceDocIds } }); + await Document._updateAll( + { filename: document.filename }, + { watched: false } + ); + return true; + } catch (error) { + console.error(error.message); + return false; + } + }, + + _update: async function (id = null, data = {}) { + if (!id) throw new Error("No id provided for update"); + + try { + await prisma.document_sync_queues.update({ + where: { id }, + data, + }); + return true; + } catch (error) { + console.error(error.message); + return false; + } + }, + + get: async function (clause = {}) { + try { + const queue = await prisma.document_sync_queues.findFirst({ + where: clause, + }); + return queue || null; + } catch (error) { + console.error(error.message); + return null; + } + }, + + where: async function ( + clause = {}, + limit = null, + orderBy = null, + include = {} + ) { + try { + const results = await prisma.document_sync_queues.findMany({ + where: clause, + ...(limit !== null ? { take: limit } : {}), + ...(orderBy !== null ? { orderBy } : {}), + ...(include !== null ? { include } : {}), + }); + return results; + } catch (error) { + console.error(error.message); + return []; + } + }, + + count: async function (clause = {}, limit = null) { + try { + const count = await prisma.document_sync_queues.count({ + where: clause, + ...(limit !== null ? { take: limit } : {}), + }); + return count; + } catch (error) { + console.error("FAILED TO COUNT DOCUMENTS.", error.message); + return 0; + } + }, + + delete: async function (clause = {}) { + try { + await prisma.document_sync_queues.deleteMany({ where: clause }); + return true; + } catch (error) { + console.error(error.message); + return false; + } + }, + + /** + * Gets the "stale" queues where the queue's nextSyncAt is less than the current time + * @returns {Promise<( + * import("@prisma/client").document_sync_queues & + * { workspaceDoc: import("@prisma/client").workspace_documents & + * { workspace: import("@prisma/client").workspaces } + * })[]}>} + */ + staleDocumentQueues: async function () { + const queues = await this.where( + { + nextSyncAt: { + lte: new Date().toISOString(), + }, + }, + null, + null, + { + workspaceDoc: { + include: { + workspace: true, + }, + }, + } + ); + return queues; + }, + + saveRun: async function (queueId = null, status = null, result = {}) { + const { DocumentSyncRun } = require("./documentSyncRun"); + return DocumentSyncRun.save(queueId, status, result); + }, + + /** + * Updates document to be watched/unwatched & creates or deletes any queue records and updated Document record `watched` status + * @param {import("@prisma/client").workspace_documents} documentRecord + * @param {boolean} watchStatus - indicate if queue record should be created or not. + * @returns + */ + toggleWatchStatus: async function (documentRecord, watchStatus = false) { + if (!watchStatus) { + await Telemetry.sendTelemetry("document_unwatched"); + await this.unwatch(documentRecord); + return; + } + + await this.watch(documentRecord); + await Telemetry.sendTelemetry("document_watched"); + return; + }, +}; + +module.exports = { DocumentSyncQueue }; diff --git a/server/models/documentSyncRun.js b/server/models/documentSyncRun.js new file mode 100644 index 00000000..94fcf3ff --- /dev/null +++ b/server/models/documentSyncRun.js @@ -0,0 +1,88 @@ +const prisma = require("../utils/prisma"); +const DocumentSyncRun = { + statuses: { + unknown: "unknown", + exited: "exited", + failed: "failed", + success: "success", + }, + + save: async function (queueId = null, status = null, result = {}) { + try { + if (!this.statuses.hasOwnProperty(status)) + throw new Error( + `DocumentSyncRun status ${status} is not a valid status.` + ); + + const run = await prisma.document_sync_executions.create({ + data: { + queueId: Number(queueId), + status: String(status), + result: JSON.stringify(result), + }, + }); + return run || null; + } catch (error) { + console.error(error.message); + return null; + } + }, + + get: async function (clause = {}) { + try { + const queue = await prisma.document_sync_executions.findFirst({ + where: clause, + }); + return queue || null; + } catch (error) { + console.error(error.message); + return null; + } + }, + + where: async function ( + clause = {}, + limit = null, + orderBy = null, + include = {} + ) { + try { + const results = await prisma.document_sync_executions.findMany({ + where: clause, + ...(limit !== null ? { take: limit } : {}), + ...(orderBy !== null ? { orderBy } : {}), + ...(include !== null ? { include } : {}), + }); + return results; + } catch (error) { + console.error(error.message); + return []; + } + }, + + count: async function (clause = {}, limit = null, orderBy = {}) { + try { + const count = await prisma.document_sync_executions.count({ + where: clause, + ...(limit !== null ? { take: limit } : {}), + ...(orderBy !== null ? { orderBy } : {}), + }); + return count; + } catch (error) { + console.error("FAILED TO COUNT DOCUMENTS.", error.message); + return 0; + } + }, + + delete: async function (clause = {}) { + try { + await prisma.document_sync_executions.deleteMany({ where: clause }); + return true; + } catch (error) { + console.error(error.message); + return false; + } + }, +}; + +module.exports = { DocumentSyncRun }; diff --git a/server/models/documents.js b/server/models/documents.js index 6c09651c..80d4fd85 100644 --- a/server/models/documents.js +++ b/server/models/documents.js @@ -3,9 +3,30 @@ const { getVectorDbClass } = require("../utils/helpers"); const prisma = require("../utils/prisma"); const { Telemetry } = require("./telemetry"); const { EventLogs } = require("./eventLogs"); +const { safeJsonParse } = require("../utils/http"); const Document = { - writable: ["pinned"], + writable: ["pinned", "watched", "lastUpdatedAt"], + /** + * @param {import("@prisma/client").workspace_documents} document - Document PrismaRecord + * @returns {{ + * metadata: (null|object), + * type: import("./documentSyncQueue.js").validFileType, + * source: string + * }} + */ + parseDocumentTypeAndSource: function (document) { + const metadata = safeJsonParse(document.metadata, null); + if (!metadata) return { metadata: null, type: null, source: null }; + + // Parse the correct type of source and its original source path. + const idx = metadata.chunkSource.indexOf("://"); + const [type, source] = [ + metadata.chunkSource.slice(0, idx), + metadata.chunkSource.slice(idx + 3), + ]; + return { metadata, type, source: this._stripSource(source, type) }; + }, forWorkspace: async function (workspaceId = null) { if (!workspaceId) return []; @@ -36,7 +57,7 @@ const Document = { } }, - getPins: async function (clause = {}) { + getOnlyWorkspaceIds: async function (clause = {}) { try { const workspaceIds = await prisma.workspace_documents.findMany({ where: clause, @@ -44,19 +65,25 @@ const Document = { workspaceId: true, }, }); - return workspaceIds.map((pin) => pin.workspaceId) || []; + return workspaceIds.map((record) => record.workspaceId) || []; } catch (error) { console.error(error.message); return []; } }, - where: async function (clause = {}, limit = null, orderBy = null) { + where: async function ( + clause = {}, + limit = null, + orderBy = null, + include = null + ) { try { const results = await prisma.workspace_documents.findMany({ where: clause, ...(limit !== null ? { take: limit } : {}), ...(orderBy !== null ? { orderBy } : {}), + ...(include !== null ? { include } : {}), }); return results; } catch (error) { @@ -202,6 +229,18 @@ const Document = { return { document: null, message: error.message }; } }, + _updateAll: async function (clause = {}, data = {}) { + try { + await prisma.workspace_documents.updateMany({ + where: clause, + data, + }); + return true; + } catch (error) { + console.error(error.message); + return false; + } + }, content: async function (docId) { if (!docId) throw new Error("No workspace docId provided!"); const document = await this.get({ docId: String(docId) }); @@ -211,6 +250,22 @@ const Document = { const data = await fileData(document.docpath); return { title: data.title, content: data.pageContent }; }, + contentByDocPath: async function (docPath) { + const { fileData } = require("../utils/files"); + const data = await fileData(docPath); + return { title: data.title, content: data.pageContent }; + }, + + // Some data sources have encoded params in them we don't want to log - so strip those details. + _stripSource: function (sourceString, type) { + if (["confluence", "github"].includes(type)) { + const _src = new URL(sourceString); + _src.search = ""; // remove all search params that are encoded for resync. + return _src.toString(); + } + + return sourceString; + }, }; module.exports = { Document }; diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 4d998e81..eae75d9c 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -28,6 +28,9 @@ const SystemSettings = { "default_agent_skills", "agent_sql_connections", "custom_app_name", + + // beta feature flags + "experimental_live_file_sync", ], validations: { footer_data: (updates) => { @@ -114,6 +117,12 @@ const SystemSettings = { return JSON.stringify(existingConnections ?? []); } }, + experimental_live_file_sync: (update) => { + if (typeof update === "boolean") + return update === true ? "enabled" : "disabled"; + if (!["enabled", "disabled"].includes(update)) return "disabled"; + return String(update); + }, }, currentSettings: async function () { const { hasVectorCachedFiles } = require("../utils/files"); @@ -459,6 +468,13 @@ const SystemSettings = { }); }, }, + getFeatureFlags: async function () { + return { + experimental_live_file_sync: + (await SystemSettings.get({ label: "experimental_live_file_sync" })) + ?.value === "enabled", + }; + }, }; function mergeConnections(existingConnections = [], updates = []) { diff --git a/server/package.json b/server/package.json index 9cc27c8b..77120297 100644 --- a/server/package.json +++ b/server/package.json @@ -25,6 +25,7 @@ "@datastax/astra-db-ts": "^0.1.3", "@google/generative-ai": "^0.7.1", "@googleapis/youtube": "^9.0.0", + "@ladjs/graceful": "^3.2.2", "@langchain/anthropic": "0.1.16", "@langchain/community": "0.0.53", "@langchain/core": "0.1.61", @@ -38,6 +39,7 @@ "archiver": "^5.3.1", "bcrypt": "^5.1.0", "body-parser": "^1.20.2", + "bree": "^9.2.3", "chalk": "^4", "check-disk-space": "^3.4.0", "chromadb": "^1.5.2", @@ -80,7 +82,8 @@ "uuid": "^9.0.0", "uuid-apikey": "^1.5.3", "vectordb": "0.4.11", - "weaviate-ts-client": "^1.4.0" + "weaviate-ts-client": "^1.4.0", + "winston": "^3.13.0" }, "devDependencies": { "@inquirer/prompts": "^4.3.1", diff --git a/server/prisma/migrations/20240618224346_init/migration.sql b/server/prisma/migrations/20240618224346_init/migration.sql new file mode 100644 index 00000000..cce17134 --- /dev/null +++ b/server/prisma/migrations/20240618224346_init/migration.sql @@ -0,0 +1,26 @@ +-- AlterTable +ALTER TABLE "workspace_documents" ADD COLUMN "watched" BOOLEAN DEFAULT false; + +-- CreateTable +CREATE TABLE "document_sync_queues" ( + "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + "staleAfterMs" INTEGER NOT NULL DEFAULT 604800000, + "nextSyncAt" DATETIME NOT NULL, + "createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + "lastSyncedAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + "workspaceDocId" INTEGER NOT NULL, + CONSTRAINT "document_sync_queues_workspaceDocId_fkey" FOREIGN KEY ("workspaceDocId") REFERENCES "workspace_documents" ("id") ON DELETE CASCADE ON UPDATE CASCADE +); + +-- CreateTable +CREATE TABLE "document_sync_executions" ( + "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT, + "queueId" INTEGER NOT NULL, + "status" TEXT NOT NULL DEFAULT 'unknown', + "result" TEXT, + "createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + CONSTRAINT "document_sync_executions_queueId_fkey" FOREIGN KEY ("queueId") REFERENCES "document_sync_queues" ("id") ON DELETE CASCADE ON UPDATE CASCADE +); + +-- CreateIndex +CREATE UNIQUE INDEX "document_sync_queues_workspaceDocId_key" ON "document_sync_queues"("workspaceDocId"); diff --git a/server/prisma/schema.prisma b/server/prisma/schema.prisma index 0ded65be..f385e66f 100644 --- a/server/prisma/schema.prisma +++ b/server/prisma/schema.prisma @@ -24,16 +24,18 @@ model api_keys { } model workspace_documents { - id Int @id @default(autoincrement()) - docId String @unique - filename String - docpath String - workspaceId Int - metadata String? - pinned Boolean? @default(false) - createdAt DateTime @default(now()) - lastUpdatedAt DateTime @default(now()) - workspace workspaces @relation(fields: [workspaceId], references: [id]) + id Int @id @default(autoincrement()) + docId String @unique + filename String + docpath String + workspaceId Int + metadata String? + pinned Boolean? @default(false) + watched Boolean? @default(false) + createdAt DateTime @default(now()) + lastUpdatedAt DateTime @default(now()) + workspace workspaces @relation(fields: [workspaceId], references: [id]) + document_sync_queues document_sync_queues? } model invites { @@ -275,3 +277,23 @@ model slash_command_presets { @@unique([uid, command]) } + +model document_sync_queues { + id Int @id @default(autoincrement()) + staleAfterMs Int @default(604800000) // 7 days + nextSyncAt DateTime + createdAt DateTime @default(now()) + lastSyncedAt DateTime @default(now()) + workspaceDocId Int @unique + workspaceDoc workspace_documents? @relation(fields: [workspaceDocId], references: [id], onDelete: Cascade) + runs document_sync_executions[] +} + +model document_sync_executions { + id Int @id @default(autoincrement()) + queueId Int + status String @default("unknown") + result String? + createdAt DateTime @default(now()) + queue document_sync_queues @relation(fields: [queueId], references: [id], onDelete: Cascade) +} diff --git a/server/utils/BackgroundWorkers/index.js b/server/utils/BackgroundWorkers/index.js new file mode 100644 index 00000000..97445bd5 --- /dev/null +++ b/server/utils/BackgroundWorkers/index.js @@ -0,0 +1,88 @@ +const path = require("path"); +const Graceful = require("@ladjs/graceful"); +const Bree = require("bree"); + +class BackgroundService { + name = "BackgroundWorkerService"; + static _instance = null; + #root = path.resolve(__dirname, "../../jobs"); + + constructor() { + if (BackgroundService._instance) { + this.#log("SINGLETON LOCK: Using existing BackgroundService."); + return BackgroundService._instance; + } + + this.logger = this.getLogger(); + BackgroundService._instance = this; + } + + #log(text, ...args) { + console.log(`\x1b[36m[${this.name}]\x1b[0m ${text}`, ...args); + } + + async boot() { + const { DocumentSyncQueue } = require("../../models/documentSyncQueue"); + if (!(await DocumentSyncQueue.enabled())) { + this.#log("Feature is not enabled and will not be started."); + return; + } + + this.#log("Starting..."); + this.bree = new Bree({ + logger: this.logger, + root: this.#root, + jobs: this.jobs(), + errorHandler: this.onError, + workerMessageHandler: this.onWorkerMessageHandler, + }); + this.graceful = new Graceful({ brees: [this.bree], logger: this.logger }); + this.graceful.listen(); + this.bree.start(); + this.#log("Service started"); + } + + async stop() { + this.#log("Stopping..."); + if (!!this.graceful && !!this.bree) this.graceful.stopBree(this.bree, 0); + this.bree = null; + this.graceful = null; + this.#log("Service stopped"); + } + + jobs() { + return [ + // Job for auto-sync of documents + // https://github.com/breejs/bree + { + name: "sync-watched-documents", + interval: "1hr", + }, + ]; + } + + getLogger() { + const { format, createLogger, transports } = require("winston"); + return new createLogger({ + level: "info", + format: format.combine( + format.colorize(), + format.printf(({ level, message, service }) => { + return `\x1b[36m[${service}]\x1b[0m ${level}: ${message}`; + }) + ), + defaultMeta: { service: this.name }, + transports: [new transports.Console()], + }); + } + + onError(error, _workerMetadata) { + this.logger.error(`[${error.name}]: ${error.message}`); + } + + onWorkerMessageHandler(message, _workerMetadata) { + this.logger.info(`[${message.name}]: ${message.message}`); + } +} + +module.exports.BackgroundService = BackgroundService; diff --git a/server/utils/EncryptionManager/index.js b/server/utils/EncryptionManager/index.js new file mode 100644 index 00000000..8ef5619e --- /dev/null +++ b/server/utils/EncryptionManager/index.js @@ -0,0 +1,85 @@ +const crypto = require("crypto"); +const { dumpENV } = require("../helpers/updateENV"); + +// Class that is used to arbitrarily encrypt/decrypt string data via a persistent passphrase/salt that +// is either user defined or is created and saved to the ENV on creation. +class EncryptionManager { + #keyENV = "SIG_KEY"; + #saltENV = "SIG_SALT"; + #encryptionKey; + #encryptionSalt; + + constructor({ key = null, salt = null } = {}) { + this.#loadOrCreateKeySalt(key, salt); + this.key = crypto.scryptSync(this.#encryptionKey, this.#encryptionSalt, 32); + this.algorithm = "aes-256-cbc"; + this.separator = ":"; + + // Used to send key to collector process to be able to decrypt data since they do not share ENVs + // this value should use the CommunicationKey.encrypt process before sending anywhere outside the + // server process so it is never sent in its raw format. + this.xPayload = this.key.toString("base64"); + } + + log(text, ...args) { + console.log(`\x1b[36m[EncryptionManager]\x1b[0m ${text}`, ...args); + } + + #loadOrCreateKeySalt(_key = null, _salt = null) { + if (!!_key && !!_salt) { + this.log( + "Pre-assigned key & salt for encrypting arbitrary data was used." + ); + this.#encryptionKey = _key; + this.#encryptionSalt = _salt; + return; + } + + if (!process.env[this.#keyENV] || !process.env[this.#saltENV]) { + this.log("Self-assigning key & salt for encrypting arbitrary data."); + process.env[this.#keyENV] = crypto.randomBytes(32).toString("hex"); + process.env[this.#saltENV] = crypto.randomBytes(32).toString("hex"); + if (process.env.NODE_ENV === "production") dumpENV(); + } else + this.log("Loaded existing key & salt for encrypting arbitrary data."); + + this.#encryptionKey = process.env[this.#keyENV]; + this.#encryptionSalt = process.env[this.#saltENV]; + return; + } + + encrypt(plainTextString = null) { + try { + if (!plainTextString) + throw new Error("Empty string is not valid for this method."); + const iv = crypto.randomBytes(16); + const cipher = crypto.createCipheriv(this.algorithm, this.key, iv); + const encrypted = cipher.update(plainTextString, "utf8", "hex"); + return [ + encrypted + cipher.final("hex"), + Buffer.from(iv).toString("hex"), + ].join(this.separator); + } catch (e) { + this.log(e); + return null; + } + } + + decrypt(encryptedString) { + try { + const [encrypted, iv] = encryptedString.split(this.separator); + if (!iv) throw new Error("IV not found"); + const decipher = crypto.createDecipheriv( + this.algorithm, + this.key, + Buffer.from(iv, "hex") + ); + return decipher.update(encrypted, "hex", "utf8") + decipher.final("utf8"); + } catch (e) { + this.log(e); + return null; + } + } +} + +module.exports = { EncryptionManager }; diff --git a/server/utils/boot/index.js b/server/utils/boot/index.js index 2022f66e..8a3dcbd2 100644 --- a/server/utils/boot/index.js +++ b/server/utils/boot/index.js @@ -1,4 +1,6 @@ const { Telemetry } = require("../../models/telemetry"); +const { BackgroundService } = require("../BackgroundWorkers"); +const { EncryptionManager } = require("../EncryptionManager"); const { CommunicationKey } = require("../comKey"); const setupTelemetry = require("../telemetry"); @@ -18,6 +20,8 @@ function bootSSL(app, port = 3001) { .listen(port, async () => { await setupTelemetry(); new CommunicationKey(true); + new EncryptionManager(); + new BackgroundService().boot(); console.log(`Primary server in HTTPS mode listening on port ${port}`); }) .on("error", catchSigTerms); @@ -45,6 +49,8 @@ function bootHTTP(app, port = 3001) { .listen(port, async () => { await setupTelemetry(); new CommunicationKey(true); + new EncryptionManager(); + new BackgroundService().boot(); console.log(`Primary server in HTTP mode listening on port ${port}`); }) .on("error", catchSigTerms); diff --git a/server/utils/collectorApi/index.js b/server/utils/collectorApi/index.js index 6971f640..7f578191 100644 --- a/server/utils/collectorApi/index.js +++ b/server/utils/collectorApi/index.js @@ -1,8 +1,9 @@ +const { EncryptionManager } = require("../EncryptionManager"); + // When running locally will occupy the 0.0.0.0 hostname space but when deployed inside // of docker this endpoint is not exposed so it is only on the Docker instances internal network // so no additional security is needed on the endpoint directly. Auth is done however by the express // middleware prior to leaving the node-side of the application so that is good enough >:) - class CollectorApi { constructor() { const { CommunicationKey } = require("../comKey"); @@ -54,6 +55,9 @@ class CollectorApi { headers: { "Content-Type": "application/json", "X-Integrity": this.comkey.sign(data), + "X-Payload-Signer": this.comkey.encrypt( + new EncryptionManager().xPayload + ), }, body: data, }) @@ -77,6 +81,9 @@ class CollectorApi { headers: { "Content-Type": "application/json", "X-Integrity": this.comkey.sign(data), + "X-Payload-Signer": this.comkey.encrypt( + new EncryptionManager().xPayload + ), }, body: data, }) @@ -98,6 +105,9 @@ class CollectorApi { headers: { "Content-Type": "application/json", "X-Integrity": this.comkey.sign(data), + "X-Payload-Signer": this.comkey.encrypt( + new EncryptionManager().xPayload + ), }, body: data, }) @@ -122,6 +132,9 @@ class CollectorApi { headers: { "Content-Type": "application/json", "X-Integrity": this.comkey.sign(body), + "X-Payload-Signer": this.comkey.encrypt( + new EncryptionManager().xPayload + ), }, }) .then((res) => { @@ -144,6 +157,9 @@ class CollectorApi { headers: { "Content-Type": "application/json", "X-Integrity": this.comkey.sign(data), + "X-Payload-Signer": this.comkey.encrypt( + new EncryptionManager().xPayload + ), }, body: data, }) diff --git a/server/utils/comKey/index.js b/server/utils/comKey/index.js index aec436bb..5cc6b0c0 100644 --- a/server/utils/comKey/index.js +++ b/server/utils/comKey/index.js @@ -73,6 +73,14 @@ class CommunicationKey { .sign("RSA-SHA256", Buffer.from(textData), this.#readPrivateKey()) .toString("hex"); } + + // Use the rolling priv-key to encrypt arbitrary data that is text + // returns the encrypted content as a base64 string. + encrypt(textData = "") { + return crypto + .privateEncrypt(this.#readPrivateKey(), Buffer.from(textData, "utf-8")) + .toString("base64"); + } } module.exports = { CommunicationKey }; diff --git a/server/utils/files/index.js b/server/utils/files/index.js index fea6f7f7..58bdf807 100644 --- a/server/utils/files/index.js +++ b/server/utils/files/index.js @@ -2,6 +2,7 @@ const fs = require("fs"); const path = require("path"); const { v5: uuidv5 } = require("uuid"); const { Document } = require("../../models/documents"); +const { DocumentSyncQueue } = require("../../models/documentSyncQueue"); const documentsPath = process.env.NODE_ENV === "development" ? path.resolve(__dirname, `../../storage/documents`) @@ -25,7 +26,7 @@ async function fileData(filePath = null) { async function viewLocalFiles() { if (!fs.existsSync(documentsPath)) fs.mkdirSync(documentsPath); - + const liveSyncAvailable = await DocumentSyncQueue.enabled(); const directory = { name: "documents", type: "folder", @@ -50,16 +51,28 @@ async function viewLocalFiles() { const rawData = fs.readFileSync(filePath, "utf8"); const cachefilename = `${file}/${subfile}`; const { pageContent, ...metadata } = JSON.parse(rawData); + const pinnedInWorkspaces = await Document.getOnlyWorkspaceIds({ + docpath: cachefilename, + pinned: true, + }); + const watchedInWorkspaces = liveSyncAvailable + ? await Document.getOnlyWorkspaceIds({ + docpath: cachefilename, + watched: true, + }) + : []; subdocs.items.push({ name: subfile, type: "file", ...metadata, cached: await cachedVectorInformation(cachefilename, true), - pinnedWorkspaces: await Document.getPins({ - docpath: cachefilename, - pinned: true, - }), + pinnedWorkspaces: pinnedInWorkspaces, + canWatch: liveSyncAvailable + ? DocumentSyncQueue.canWatch(metadata) + : false, + // Is file watched in any workspace since sync updates all workspaces where file is referenced + watched: watchedInWorkspaces.length !== 0, }); } directory.items.push(subdocs); diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 785c45ef..6abd6408 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -760,6 +760,9 @@ function dumpENV() { // and are either managed or manually set ENV key:values. "STORAGE_DIR", "SERVER_PORT", + // For persistent data encryption + "SIG_KEY", + "SIG_SALT", // Password Schema Keys if present. "PASSWORDMINCHAR", "PASSWORDMAXCHAR", diff --git a/server/utils/middleware/featureFlagEnabled.js b/server/utils/middleware/featureFlagEnabled.js new file mode 100644 index 00000000..f13a8886 --- /dev/null +++ b/server/utils/middleware/featureFlagEnabled.js @@ -0,0 +1,24 @@ +const { SystemSettings } = require("../../models/systemSettings"); + +// Explicitly check that a specific feature flag is enabled. +// This should match the key in the SystemSetting label. +function featureFlagEnabled(featureFlagKey = null) { + return async (_, response, next) => { + if (!featureFlagKey) return response.sendStatus(401).end(); + + const flagValue = ( + await SystemSettings.get({ label: String(featureFlagKey) }) + )?.value; + if (!flagValue) return response.sendStatus(401).end(); + + if (flagValue === "enabled") { + next(); + return; + } + + return response.sendStatus(401).end(); + }; +} +module.exports = { + featureFlagEnabled, +}; diff --git a/server/utils/vectorDbProviders/astra/index.js b/server/utils/vectorDbProviders/astra/index.js index 30ff2bbf..efaaa135 100644 --- a/server/utils/vectorDbProviders/astra/index.js +++ b/server/utils/vectorDbProviders/astra/index.js @@ -100,7 +100,8 @@ const AstraDB = { addDocumentToNamespace: async function ( namespace, documentData = {}, - fullFilePath = null + fullFilePath = null, + skipCache = false ) { const { DocumentVectors } = require("../../../models/vectors"); try { @@ -109,40 +110,42 @@ const AstraDB = { if (!pageContent || pageContent.length == 0) return false; console.log("Adding new vectorized document into namespace", namespace); - const cacheResult = await cachedVectorInformation(fullFilePath); - if (cacheResult.exists) { - const { client } = await this.connect(); - const { chunks } = cacheResult; - const documentVectors = []; - vectorDimension = chunks[0][0].values.length || null; + if (!skipCache) { + const cacheResult = await cachedVectorInformation(fullFilePath); + if (cacheResult.exists) { + const { client } = await this.connect(); + const { chunks } = cacheResult; + const documentVectors = []; + vectorDimension = chunks[0][0].values.length || null; - const collection = await this.getOrCreateCollection( - client, - namespace, - vectorDimension - ); - if (!(await this.isRealCollection(collection))) - throw new Error("Failed to create new AstraDB collection!", { + const collection = await this.getOrCreateCollection( + client, namespace, - }); + vectorDimension + ); + if (!(await this.isRealCollection(collection))) + throw new Error("Failed to create new AstraDB collection!", { + namespace, + }); - for (const chunk of chunks) { - // Before sending to Astra and saving the records to our db - // we need to assign the id of each chunk that is stored in the cached file. - const newChunks = chunk.map((chunk) => { - const _id = uuidv4(); - documentVectors.push({ docId, vectorId: _id }); - return { - _id: _id, - $vector: chunk.values, - metadata: chunk.metadata || {}, - }; - }); + for (const chunk of chunks) { + // Before sending to Astra and saving the records to our db + // we need to assign the id of each chunk that is stored in the cached file. + const newChunks = chunk.map((chunk) => { + const _id = uuidv4(); + documentVectors.push({ docId, vectorId: _id }); + return { + _id: _id, + $vector: chunk.values, + metadata: chunk.metadata || {}, + }; + }); - await collection.insertMany(newChunks); + await collection.insertMany(newChunks); + } + await DocumentVectors.bulkInsert(documentVectors); + return { vectorized: true, error: null }; } - await DocumentVectors.bulkInsert(documentVectors); - return { vectorized: true, error: null }; } const EmbedderEngine = getEmbeddingEngineSelection(); diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js index 5bea32bf..a79d4fc4 100644 --- a/server/utils/vectorDbProviders/chroma/index.js +++ b/server/utils/vectorDbProviders/chroma/index.js @@ -185,7 +185,8 @@ const Chroma = { addDocumentToNamespace: async function ( namespace, documentData = {}, - fullFilePath = null + fullFilePath = null, + skipCache = false ) { const { DocumentVectors } = require("../../../models/vectors"); try { @@ -193,43 +194,45 @@ const Chroma = { if (!pageContent || pageContent.length == 0) return false; console.log("Adding new vectorized document into namespace", namespace); - const cacheResult = await cachedVectorInformation(fullFilePath); - if (cacheResult.exists) { - const { client } = await this.connect(); - const collection = await client.getOrCreateCollection({ - name: this.normalize(namespace), - metadata: { "hnsw:space": "cosine" }, - }); - const { chunks } = cacheResult; - const documentVectors = []; - - for (const chunk of chunks) { - const submission = { - ids: [], - embeddings: [], - metadatas: [], - documents: [], - }; - - // Before sending to Chroma and saving the records to our db - // we need to assign the id of each chunk that is stored in the cached file. - chunk.forEach((chunk) => { - const id = uuidv4(); - const { id: _id, ...metadata } = chunk.metadata; - documentVectors.push({ docId, vectorId: id }); - submission.ids.push(id); - submission.embeddings.push(chunk.values); - submission.metadatas.push(metadata); - submission.documents.push(metadata.text); + if (skipCache) { + const cacheResult = await cachedVectorInformation(fullFilePath); + if (cacheResult.exists) { + const { client } = await this.connect(); + const collection = await client.getOrCreateCollection({ + name: this.normalize(namespace), + metadata: { "hnsw:space": "cosine" }, }); + const { chunks } = cacheResult; + const documentVectors = []; - const additionResult = await collection.add(submission); - if (!additionResult) - throw new Error("Error embedding into ChromaDB", additionResult); + for (const chunk of chunks) { + const submission = { + ids: [], + embeddings: [], + metadatas: [], + documents: [], + }; + + // Before sending to Chroma and saving the records to our db + // we need to assign the id of each chunk that is stored in the cached file. + chunk.forEach((chunk) => { + const id = uuidv4(); + const { id: _id, ...metadata } = chunk.metadata; + documentVectors.push({ docId, vectorId: id }); + submission.ids.push(id); + submission.embeddings.push(chunk.values); + submission.metadatas.push(metadata); + submission.documents.push(metadata.text); + }); + + const additionResult = await collection.add(submission); + if (!additionResult) + throw new Error("Error embedding into ChromaDB", additionResult); + } + + await DocumentVectors.bulkInsert(documentVectors); + return { vectorized: true, error: null }; } - - await DocumentVectors.bulkInsert(documentVectors); - return { vectorized: true, error: null }; } // If we are here then we are going to embed and store a novel document. diff --git a/server/utils/vectorDbProviders/lance/index.js b/server/utils/vectorDbProviders/lance/index.js index 54c12c04..e1e6a5e6 100644 --- a/server/utils/vectorDbProviders/lance/index.js +++ b/server/utils/vectorDbProviders/lance/index.js @@ -153,7 +153,8 @@ const LanceDb = { addDocumentToNamespace: async function ( namespace, documentData = {}, - fullFilePath = null + fullFilePath = null, + skipCache = false ) { const { DocumentVectors } = require("../../../models/vectors"); try { @@ -161,25 +162,27 @@ const LanceDb = { if (!pageContent || pageContent.length == 0) return false; console.log("Adding new vectorized document into namespace", namespace); - const cacheResult = await cachedVectorInformation(fullFilePath); - if (cacheResult.exists) { - const { client } = await this.connect(); - const { chunks } = cacheResult; - const documentVectors = []; - const submissions = []; + if (!skipCache) { + const cacheResult = await cachedVectorInformation(fullFilePath); + if (cacheResult.exists) { + const { client } = await this.connect(); + const { chunks } = cacheResult; + const documentVectors = []; + const submissions = []; - for (const chunk of chunks) { - chunk.forEach((chunk) => { - const id = uuidv4(); - const { id: _id, ...metadata } = chunk.metadata; - documentVectors.push({ docId, vectorId: id }); - submissions.push({ id: id, vector: chunk.values, ...metadata }); - }); + for (const chunk of chunks) { + chunk.forEach((chunk) => { + const id = uuidv4(); + const { id: _id, ...metadata } = chunk.metadata; + documentVectors.push({ docId, vectorId: id }); + submissions.push({ id: id, vector: chunk.values, ...metadata }); + }); + } + + await this.updateOrCreateCollection(client, submissions, namespace); + await DocumentVectors.bulkInsert(documentVectors); + return { vectorized: true, error: null }; } - - await this.updateOrCreateCollection(client, submissions, namespace); - await DocumentVectors.bulkInsert(documentVectors); - return { vectorized: true, error: null }; } // If we are here then we are going to embed and store a novel document. diff --git a/server/utils/vectorDbProviders/milvus/index.js b/server/utils/vectorDbProviders/milvus/index.js index d720c265..14d54d6e 100644 --- a/server/utils/vectorDbProviders/milvus/index.js +++ b/server/utils/vectorDbProviders/milvus/index.js @@ -137,7 +137,8 @@ const Milvus = { addDocumentToNamespace: async function ( namespace, documentData = {}, - fullFilePath = null + fullFilePath = null, + skipCache = false ) { const { DocumentVectors } = require("../../../models/vectors"); try { @@ -146,38 +147,40 @@ const Milvus = { if (!pageContent || pageContent.length == 0) return false; console.log("Adding new vectorized document into namespace", namespace); - const cacheResult = await cachedVectorInformation(fullFilePath); - if (cacheResult.exists) { - const { client } = await this.connect(); - const { chunks } = cacheResult; - const documentVectors = []; - vectorDimension = chunks[0][0].values.length || null; + if (skipCache) { + const cacheResult = await cachedVectorInformation(fullFilePath); + if (cacheResult.exists) { + const { client } = await this.connect(); + const { chunks } = cacheResult; + const documentVectors = []; + vectorDimension = chunks[0][0].values.length || null; - await this.getOrCreateCollection(client, namespace, vectorDimension); - for (const chunk of chunks) { - // Before sending to Pinecone and saving the records to our db - // we need to assign the id of each chunk that is stored in the cached file. - const newChunks = chunk.map((chunk) => { - const id = uuidv4(); - documentVectors.push({ docId, vectorId: id }); - return { id, vector: chunk.values, metadata: chunk.metadata }; - }); - const insertResult = await client.insert({ - collection_name: this.normalize(namespace), - data: newChunks, - }); + await this.getOrCreateCollection(client, namespace, vectorDimension); + for (const chunk of chunks) { + // Before sending to Pinecone and saving the records to our db + // we need to assign the id of each chunk that is stored in the cached file. + const newChunks = chunk.map((chunk) => { + const id = uuidv4(); + documentVectors.push({ docId, vectorId: id }); + return { id, vector: chunk.values, metadata: chunk.metadata }; + }); + const insertResult = await client.insert({ + collection_name: this.normalize(namespace), + data: newChunks, + }); - if (insertResult?.status.error_code !== "Success") { - throw new Error( - `Error embedding into Milvus! Reason:${insertResult?.status.reason}` - ); + if (insertResult?.status.error_code !== "Success") { + throw new Error( + `Error embedding into Milvus! Reason:${insertResult?.status.reason}` + ); + } } + await DocumentVectors.bulkInsert(documentVectors); + await client.flushSync({ + collection_names: [this.normalize(namespace)], + }); + return { vectorized: true, error: null }; } - await DocumentVectors.bulkInsert(documentVectors); - await client.flushSync({ - collection_names: [this.normalize(namespace)], - }); - return { vectorized: true, error: null }; } const EmbedderEngine = getEmbeddingEngineSelection(); diff --git a/server/utils/vectorDbProviders/pinecone/index.js b/server/utils/vectorDbProviders/pinecone/index.js index d1aeb2f6..040f41d3 100644 --- a/server/utils/vectorDbProviders/pinecone/index.js +++ b/server/utils/vectorDbProviders/pinecone/index.js @@ -96,7 +96,8 @@ const PineconeDB = { addDocumentToNamespace: async function ( namespace, documentData = {}, - fullFilePath = null + fullFilePath = null, + skipCache = false ) { const { DocumentVectors } = require("../../../models/vectors"); try { @@ -104,26 +105,28 @@ const PineconeDB = { if (!pageContent || pageContent.length == 0) return false; console.log("Adding new vectorized document into namespace", namespace); - const cacheResult = await cachedVectorInformation(fullFilePath); - if (cacheResult.exists) { - const { pineconeIndex } = await this.connect(); - const pineconeNamespace = pineconeIndex.namespace(namespace); - const { chunks } = cacheResult; - const documentVectors = []; + if (!skipCache) { + const cacheResult = await cachedVectorInformation(fullFilePath); + if (cacheResult.exists) { + const { pineconeIndex } = await this.connect(); + const pineconeNamespace = pineconeIndex.namespace(namespace); + const { chunks } = cacheResult; + const documentVectors = []; - for (const chunk of chunks) { - // Before sending to Pinecone and saving the records to our db - // we need to assign the id of each chunk that is stored in the cached file. - const newChunks = chunk.map((chunk) => { - const id = uuidv4(); - documentVectors.push({ docId, vectorId: id }); - return { ...chunk, id }; - }); - await pineconeNamespace.upsert([...newChunks]); + for (const chunk of chunks) { + // Before sending to Pinecone and saving the records to our db + // we need to assign the id of each chunk that is stored in the cached file. + const newChunks = chunk.map((chunk) => { + const id = uuidv4(); + documentVectors.push({ docId, vectorId: id }); + return { ...chunk, id }; + }); + await pineconeNamespace.upsert([...newChunks]); + } + + await DocumentVectors.bulkInsert(documentVectors); + return { vectorized: true, error: null }; } - - await DocumentVectors.bulkInsert(documentVectors); - return { vectorized: true, error: null }; } // If we are here then we are going to embed and store a novel document. diff --git a/server/utils/vectorDbProviders/qdrant/index.js b/server/utils/vectorDbProviders/qdrant/index.js index 77945915..36550f09 100644 --- a/server/utils/vectorDbProviders/qdrant/index.js +++ b/server/utils/vectorDbProviders/qdrant/index.js @@ -137,7 +137,8 @@ const QDrant = { addDocumentToNamespace: async function ( namespace, documentData = {}, - fullFilePath = null + fullFilePath = null, + skipCache = false ) { const { DocumentVectors } = require("../../../models/vectors"); try { @@ -146,59 +147,63 @@ const QDrant = { if (!pageContent || pageContent.length == 0) return false; console.log("Adding new vectorized document into namespace", namespace); - const cacheResult = await cachedVectorInformation(fullFilePath); - if (cacheResult.exists) { - const { client } = await this.connect(); - const { chunks } = cacheResult; - const documentVectors = []; - vectorDimension = - chunks[0][0]?.vector?.length ?? chunks[0][0]?.values?.length ?? null; + if (skipCache) { + const cacheResult = await cachedVectorInformation(fullFilePath); + if (cacheResult.exists) { + const { client } = await this.connect(); + const { chunks } = cacheResult; + const documentVectors = []; + vectorDimension = + chunks[0][0]?.vector?.length ?? + chunks[0][0]?.values?.length ?? + null; - const collection = await this.getOrCreateCollection( - client, - namespace, - vectorDimension - ); - if (!collection) - throw new Error("Failed to create new QDrant collection!", { + const collection = await this.getOrCreateCollection( + client, namespace, - }); + vectorDimension + ); + if (!collection) + throw new Error("Failed to create new QDrant collection!", { + namespace, + }); - for (const chunk of chunks) { - const submission = { - ids: [], - vectors: [], - payloads: [], - }; + for (const chunk of chunks) { + const submission = { + ids: [], + vectors: [], + payloads: [], + }; - // Before sending to Qdrant and saving the records to our db - // we need to assign the id of each chunk that is stored in the cached file. - // The id property must be defined or else it will be unable to be managed by ALLM. - chunk.forEach((chunk) => { - const id = uuidv4(); - if (chunk?.payload?.hasOwnProperty("id")) { - const { id: _id, ...payload } = chunk.payload; - documentVectors.push({ docId, vectorId: id }); - submission.ids.push(id); - submission.vectors.push(chunk.vector); - submission.payloads.push(payload); - } else { - console.error( - "The 'id' property is not defined in chunk.payload - it will be omitted from being inserted in QDrant collection." - ); - } - }); + // Before sending to Qdrant and saving the records to our db + // we need to assign the id of each chunk that is stored in the cached file. + // The id property must be defined or else it will be unable to be managed by ALLM. + chunk.forEach((chunk) => { + const id = uuidv4(); + if (chunk?.payload?.hasOwnProperty("id")) { + const { id: _id, ...payload } = chunk.payload; + documentVectors.push({ docId, vectorId: id }); + submission.ids.push(id); + submission.vectors.push(chunk.vector); + submission.payloads.push(payload); + } else { + console.error( + "The 'id' property is not defined in chunk.payload - it will be omitted from being inserted in QDrant collection." + ); + } + }); - const additionResult = await client.upsert(namespace, { - wait: true, - batch: { ...submission }, - }); - if (additionResult?.status !== "completed") - throw new Error("Error embedding into QDrant", additionResult); + const additionResult = await client.upsert(namespace, { + wait: true, + batch: { ...submission }, + }); + if (additionResult?.status !== "completed") + throw new Error("Error embedding into QDrant", additionResult); + } + + await DocumentVectors.bulkInsert(documentVectors); + return { vectorized: true, error: null }; } - - await DocumentVectors.bulkInsert(documentVectors); - return { vectorized: true, error: null }; } // If we are here then we are going to embed and store a novel document. diff --git a/server/utils/vectorDbProviders/weaviate/index.js b/server/utils/vectorDbProviders/weaviate/index.js index 978e2557..176c56d6 100644 --- a/server/utils/vectorDbProviders/weaviate/index.js +++ b/server/utils/vectorDbProviders/weaviate/index.js @@ -179,7 +179,8 @@ const Weaviate = { addDocumentToNamespace: async function ( namespace, documentData = {}, - fullFilePath = null + fullFilePath = null, + skipCache = false ) { const { DocumentVectors } = require("../../../models/vectors"); try { @@ -192,55 +193,57 @@ const Weaviate = { if (!pageContent || pageContent.length == 0) return false; console.log("Adding new vectorized document into namespace", namespace); - const cacheResult = await cachedVectorInformation(fullFilePath); - if (cacheResult.exists) { - const { client } = await this.connect(); - const weaviateClassExits = await this.hasNamespace(namespace); - if (!weaviateClassExits) { - await client.schema - .classCreator() - .withClass({ - class: camelCase(namespace), - description: `Class created by AnythingLLM named ${camelCase( - namespace - )}`, - vectorizer: "none", - }) - .do(); - } - - const { chunks } = cacheResult; - const documentVectors = []; - const vectors = []; - - for (const chunk of chunks) { - // Before sending to Weaviate and saving the records to our db - // we need to assign the id of each chunk that is stored in the cached file. - chunk.forEach((chunk) => { - const id = uuidv4(); - const flattenedMetadata = this.flattenObjectForWeaviate( - chunk.properties ?? chunk.metadata - ); - documentVectors.push({ docId, vectorId: id }); - const vectorRecord = { - id, - class: camelCase(namespace), - vector: chunk.vector || chunk.values || [], - properties: { ...flattenedMetadata }, - }; - vectors.push(vectorRecord); - }); - - const { success: additionResult, errors = [] } = - await this.addVectors(client, vectors); - if (!additionResult) { - console.error("Weaviate::addVectors failed to insert", errors); - throw new Error("Error embedding into Weaviate"); + if (skipCache) { + const cacheResult = await cachedVectorInformation(fullFilePath); + if (cacheResult.exists) { + const { client } = await this.connect(); + const weaviateClassExits = await this.hasNamespace(namespace); + if (!weaviateClassExits) { + await client.schema + .classCreator() + .withClass({ + class: camelCase(namespace), + description: `Class created by AnythingLLM named ${camelCase( + namespace + )}`, + vectorizer: "none", + }) + .do(); } - } - await DocumentVectors.bulkInsert(documentVectors); - return { vectorized: true, error: null }; + const { chunks } = cacheResult; + const documentVectors = []; + const vectors = []; + + for (const chunk of chunks) { + // Before sending to Weaviate and saving the records to our db + // we need to assign the id of each chunk that is stored in the cached file. + chunk.forEach((chunk) => { + const id = uuidv4(); + const flattenedMetadata = this.flattenObjectForWeaviate( + chunk.properties ?? chunk.metadata + ); + documentVectors.push({ docId, vectorId: id }); + const vectorRecord = { + id, + class: camelCase(namespace), + vector: chunk.vector || chunk.values || [], + properties: { ...flattenedMetadata }, + }; + vectors.push(vectorRecord); + }); + + const { success: additionResult, errors = [] } = + await this.addVectors(client, vectors); + if (!additionResult) { + console.error("Weaviate::addVectors failed to insert", errors); + throw new Error("Error embedding into Weaviate"); + } + } + + await DocumentVectors.bulkInsert(documentVectors); + return { vectorized: true, error: null }; + } } // If we are here then we are going to embed and store a novel document. diff --git a/server/utils/vectorDbProviders/zilliz/index.js b/server/utils/vectorDbProviders/zilliz/index.js index ebb59157..cb60d2e3 100644 --- a/server/utils/vectorDbProviders/zilliz/index.js +++ b/server/utils/vectorDbProviders/zilliz/index.js @@ -138,7 +138,8 @@ const Zilliz = { addDocumentToNamespace: async function ( namespace, documentData = {}, - fullFilePath = null + fullFilePath = null, + skipCache = false ) { const { DocumentVectors } = require("../../../models/vectors"); try { @@ -147,38 +148,40 @@ const Zilliz = { if (!pageContent || pageContent.length == 0) return false; console.log("Adding new vectorized document into namespace", namespace); - const cacheResult = await cachedVectorInformation(fullFilePath); - if (cacheResult.exists) { - const { client } = await this.connect(); - const { chunks } = cacheResult; - const documentVectors = []; - vectorDimension = chunks[0][0].values.length || null; + if (skipCache) { + const cacheResult = await cachedVectorInformation(fullFilePath); + if (cacheResult.exists) { + const { client } = await this.connect(); + const { chunks } = cacheResult; + const documentVectors = []; + vectorDimension = chunks[0][0].values.length || null; - await this.getOrCreateCollection(client, namespace, vectorDimension); - for (const chunk of chunks) { - // Before sending to Pinecone and saving the records to our db - // we need to assign the id of each chunk that is stored in the cached file. - const newChunks = chunk.map((chunk) => { - const id = uuidv4(); - documentVectors.push({ docId, vectorId: id }); - return { id, vector: chunk.values, metadata: chunk.metadata }; - }); - const insertResult = await client.insert({ - collection_name: this.normalize(namespace), - data: newChunks, - }); + await this.getOrCreateCollection(client, namespace, vectorDimension); + for (const chunk of chunks) { + // Before sending to Pinecone and saving the records to our db + // we need to assign the id of each chunk that is stored in the cached file. + const newChunks = chunk.map((chunk) => { + const id = uuidv4(); + documentVectors.push({ docId, vectorId: id }); + return { id, vector: chunk.values, metadata: chunk.metadata }; + }); + const insertResult = await client.insert({ + collection_name: this.normalize(namespace), + data: newChunks, + }); - if (insertResult?.status.error_code !== "Success") { - throw new Error( - `Error embedding into Zilliz! Reason:${insertResult?.status.reason}` - ); + if (insertResult?.status.error_code !== "Success") { + throw new Error( + `Error embedding into Zilliz! Reason:${insertResult?.status.reason}` + ); + } } + await DocumentVectors.bulkInsert(documentVectors); + await client.flushSync({ + collection_names: [this.normalize(namespace)], + }); + return { vectorized: true, error: null }; } - await DocumentVectors.bulkInsert(documentVectors); - await client.flushSync({ - collection_names: [this.normalize(namespace)], - }); - return { vectorized: true, error: null }; } const EmbedderEngine = getEmbeddingEngineSelection(); diff --git a/server/yarn.lock b/server/yarn.lock index a05c62fc..6c9de0e2 100644 --- a/server/yarn.lock +++ b/server/yarn.lock @@ -272,6 +272,18 @@ "@azure/logger" "^1.0.3" tslib "^2.4.0" +"@babel/runtime@^7.10.5": + version "7.24.7" + resolved "https://registry.yarnpkg.com/@babel/runtime/-/runtime-7.24.7.tgz#f4f0d5530e8dbdf59b3451b9b3e594b6ba082e12" + integrity sha512-UwgBRMjJP+xv857DCngvqXI3Iq6J4v0wXmwc6sapg+zyhbwmQX67LUEFrkK5tbyJ30jGuG3ZvWpBiB9LCy1kWw== + dependencies: + regenerator-runtime "^0.14.0" + +"@breejs/later@^4.2.0": + version "4.2.0" + resolved "https://registry.yarnpkg.com/@breejs/later/-/later-4.2.0.tgz#669661f3a02535ef900f360c74e48c3f5483c786" + integrity sha512-EVMD0SgJtOuFeg0lAVbCwa+qeTKILb87jqvLyUtQswGD9+ce2nB52Y5zbTF1Hc0MDFfbydcMcxb47jSdhikVHA== + "@colors/colors@1.6.0", "@colors/colors@^1.6.0": version "1.6.0" resolved "https://registry.yarnpkg.com/@colors/colors/-/colors-1.6.0.tgz#ec6cd237440700bc23ca23087f513c75508958b0" @@ -567,6 +579,14 @@ resolved "https://registry.yarnpkg.com/@kwsites/promise-deferred/-/promise-deferred-1.1.1.tgz#8ace5259254426ccef57f3175bc64ed7095ed919" integrity sha512-GaHYm+c0O9MjZRu0ongGBRbinu8gVAMd2UZjji6jVmqKtZluZnptXGWhz1E8j8D2HJ3f/yMxKAUC0b+57wncIw== +"@ladjs/graceful@^3.2.2": + version "3.2.2" + resolved "https://registry.yarnpkg.com/@ladjs/graceful/-/graceful-3.2.2.tgz#1b141a9dc2604df99177d6714dbe4a0bff5e2ddf" + integrity sha512-GyL5Cpgh2RlndFW2e4AUHrEDe0tzyXKpAs92wrAQhNKcY0y++qfK8PC+6TOHzN9zvxPY9j1KAU29Gfa9vxWzDg== + dependencies: + lil-http-terminator "^1.2.2" + p-is-promise "3" + "@lancedb/vectordb-darwin-arm64@0.4.11": version "0.4.11" resolved "https://registry.yarnpkg.com/@lancedb/vectordb-darwin-arm64/-/vectordb-darwin-arm64-0.4.11.tgz#390549891e03f28ba0c1b741f30730b2d09227da" @@ -1114,6 +1134,11 @@ dependencies: "@types/node" "*" +"@types/lodash@^4.14.165": + version "4.17.5" + resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.17.5.tgz#e6c29b58e66995d57cd170ce3e2a61926d55ee04" + integrity sha512-MBIOHVZqVqgfro1euRDWX7OO0fBVUUMrN6Pwm8LQsz8cWhEpihlvR70ENj3f40j58TNxZaWv2ndSkInykNBBJw== + "@types/long@^4.0.1": version "4.0.2" resolved "https://registry.yarnpkg.com/@types/long/-/long-4.0.2.tgz#b74129719fc8d11c01868010082d483b7545591a" @@ -1708,6 +1733,11 @@ boolbase@^1.0.0: resolved "https://registry.yarnpkg.com/boolbase/-/boolbase-1.0.0.tgz#68dff5fbe60c51eb37725ea9e3ed310dcc1e776e" integrity sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww== +boolean@^3.2.0: + version "3.2.0" + resolved "https://registry.yarnpkg.com/boolean/-/boolean-3.2.0.tgz#9e5294af4e98314494cbb17979fa54ca159f116b" + integrity sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw== + bottleneck@^2.15.3: version "2.19.5" resolved "https://registry.yarnpkg.com/bottleneck/-/bottleneck-2.19.5.tgz#5df0b90f59fd47656ebe63c78a98419205cadd91" @@ -1735,6 +1765,22 @@ braces@~3.0.2: dependencies: fill-range "^7.0.1" +bree@^9.2.3: + version "9.2.3" + resolved "https://registry.yarnpkg.com/bree/-/bree-9.2.3.tgz#8c47402efcc79ed6da31637f84092ef59743d395" + integrity sha512-iCVyLVcqql8rFogVX5gzkofdo6OZu8mxe5dUSkAZyaR43UdNfP0DOj3jJk31yogy6lfnRMhGvO5Gj1ypLeInuA== + dependencies: + "@breejs/later" "^4.2.0" + boolean "^3.2.0" + combine-errors "^3.0.3" + cron-validate "^1.4.5" + human-interval "^2.0.1" + is-string-and-not-blank "^0.0.2" + is-valid-path "^0.1.1" + ms "^2.1.3" + p-wait-for "3" + safe-timers "^1.1.0" + bson@^6.2.0: version "6.6.0" resolved "https://registry.yarnpkg.com/bson/-/bson-6.6.0.tgz#f225137eb49fe19bee4d87949a0515c05176e2ad" @@ -2040,6 +2086,14 @@ colorspace@1.1.x: color "^3.1.3" text-hex "1.0.x" +combine-errors@^3.0.3: + version "3.0.3" + resolved "https://registry.yarnpkg.com/combine-errors/-/combine-errors-3.0.3.tgz#f4df6740083e5703a3181110c2b10551f003da86" + integrity sha512-C8ikRNRMygCwaTx+Ek3Yr+OuZzgZjduCOfSQBjbM8V3MfgcjSTeto/GXP6PAwKvJz/v15b7GHZvx5rOlczFw/Q== + dependencies: + custom-error-instance "2.1.1" + lodash.uniqby "4.5.0" + combined-stream@^1.0.8: version "1.0.8" resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f" @@ -2165,6 +2219,13 @@ crc32-stream@^4.0.2: crc-32 "^1.2.0" readable-stream "^3.4.0" +cron-validate@^1.4.5: + version "1.4.5" + resolved "https://registry.yarnpkg.com/cron-validate/-/cron-validate-1.4.5.tgz#eceb221f7558e6302e5f84c7b3a454fdf4d064c3" + integrity sha512-nKlOJEnYKudMn/aNyNH8xxWczlfpaazfWV32Pcx/2St51r2bxWbGhZD7uwzMcRhunA/ZNL+Htm/i0792Z59UMQ== + dependencies: + yup "0.32.9" + cross-env@^7.0.3: version "7.0.3" resolved "https://registry.yarnpkg.com/cross-env/-/cross-env-7.0.3.tgz#865264b29677dc015ba8418918965dd232fc54cf" @@ -2209,6 +2270,11 @@ css-what@^6.1.0: resolved "https://registry.yarnpkg.com/css-what/-/css-what-6.1.0.tgz#fb5effcf76f1ddea2c81bdfaa4de44e79bac70f4" integrity sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw== +custom-error-instance@2.1.1: + version "2.1.1" + resolved "https://registry.yarnpkg.com/custom-error-instance/-/custom-error-instance-2.1.1.tgz#3cf6391487a6629a6247eb0ca0ce00081b7e361a" + integrity sha512-p6JFxJc3M4OTD2li2qaHkDCw9SfMw82Ldr6OC9Je1aXiGfhx2W8p3GaoeaGrPJTUN9NirTM/KTxHWMUdR1rsUg== + data-view-buffer@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/data-view-buffer/-/data-view-buffer-1.0.1.tgz#8ea6326efec17a2e42620696e671d7d5a8bc66b2" @@ -3521,6 +3587,13 @@ https-proxy-agent@^7.0.0: agent-base "^7.0.2" debug "4" +human-interval@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/human-interval/-/human-interval-2.0.1.tgz#655baf606c7067bb26042dcae14ec777b099af15" + integrity sha512-r4Aotzf+OtKIGQCB3odUowy4GfUDTy3aTWTfLd7ZF2gBCy3XW3v/dJLRefZnOFFnjqs5B1TypvS8WarpBkYUNQ== + dependencies: + numbered "^1.1.0" + human-signals@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-2.1.0.tgz#dc91fcba42e4d06e4abaed33b3e7a3c02f514ea0" @@ -3708,6 +3781,11 @@ is-docker@^2.0.0, is-docker@^2.1.1: resolved "https://registry.yarnpkg.com/is-docker/-/is-docker-2.2.1.tgz#33eeabe23cfe86f14bde4408a02c0cfb853acdaa" integrity sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ== +is-extglob@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/is-extglob/-/is-extglob-1.0.0.tgz#ac468177c4943405a092fc8f29760c6ffc6206c0" + integrity sha512-7Q+VbVafe6x2T+Tu6NcOf6sRklazEPmBoB3IWk3WdGZM2iGUwU/Oe3Wtq5lSEkDTTlpp8yx+5t4pzO/i9Ty1ww== + is-extglob@^2.1.1: version "2.1.1" resolved "https://registry.yarnpkg.com/is-extglob/-/is-extglob-2.1.1.tgz#a88c02535791f02ed37c76a1b9ea9773c833f8c2" @@ -3732,6 +3810,13 @@ is-generator-function@^1.0.10: dependencies: has-tostringtag "^1.0.0" +is-glob@^2.0.0: + version "2.0.1" + resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-2.0.1.tgz#d096f926a3ded5600f3fdfd91198cb0888c2d863" + integrity sha512-a1dBeB19NXsf/E0+FHqkagizel/LQw2DjSQpvQrj3zT+jYPpaUCryPnrQajXKFLCMuf4I6FhRpaGtw4lPrG6Eg== + dependencies: + is-extglob "^1.0.0" + is-glob@^4.0.0, is-glob@^4.0.1, is-glob@^4.0.3, is-glob@~4.0.1: version "4.0.3" resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-4.0.3.tgz#64f61e42cbbb2eec2071a9dac0b28ba1e65d5084" @@ -3744,6 +3829,13 @@ is-interactive@^2.0.0: resolved "https://registry.yarnpkg.com/is-interactive/-/is-interactive-2.0.0.tgz#40c57614593826da1100ade6059778d597f16e90" integrity sha512-qP1vozQRI+BMOPcjFzrjXuQvdak2pHNUMZoeG2eRbiSqyvbEf/wQtEOTOX1guk6E3t36RkaqiSt8A/6YElNxLQ== +is-invalid-path@^0.1.0: + version "0.1.0" + resolved "https://registry.yarnpkg.com/is-invalid-path/-/is-invalid-path-0.1.0.tgz#307a855b3cf1a938b44ea70d2c61106053714f34" + integrity sha512-aZMG0T3F34mTg4eTdszcGXx54oiZ4NtHSft3hWNJMGJXUUqdIj3cOZuHcU0nCWWcY3jd7yRe/3AEm3vSNTpBGQ== + dependencies: + is-glob "^2.0.0" + is-lambda@^1.0.1: version "1.0.1" resolved "https://registry.yarnpkg.com/is-lambda/-/is-lambda-1.0.1.tgz#3d9877899e6a53efc0160504cde15f82e6f061d5" @@ -3806,6 +3898,18 @@ is-stream@^2.0.0: resolved "https://registry.yarnpkg.com/is-stream/-/is-stream-2.0.1.tgz#fac1e3d53b97ad5a9d0ae9cef2389f5810a5c077" integrity sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg== +is-string-and-not-blank@^0.0.2: + version "0.0.2" + resolved "https://registry.yarnpkg.com/is-string-and-not-blank/-/is-string-and-not-blank-0.0.2.tgz#cd19eded2ca4a514f79ca528915f1fb28e5dd38a" + integrity sha512-FyPGAbNVyZpTeDCTXnzuwbu9/WpNXbCfbHXLpCRpN4GANhS00eEIP5Ef+k5HYSNIzIhdN9zRDoBj6unscECvtQ== + dependencies: + is-string-blank "^1.0.1" + +is-string-blank@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/is-string-blank/-/is-string-blank-1.0.1.tgz#866dca066d41d2894ebdfd2d8fe93e586e583a03" + integrity sha512-9H+ZBCVs3L9OYqv8nuUAzpcT9OTgMD1yAWrG7ihlnibdkbtB850heAmYWxHuXc4CHy4lKeK69tN+ny1K7gBIrw== + is-string@^1.0.5, is-string@^1.0.7: version "1.0.7" resolved "https://registry.yarnpkg.com/is-string/-/is-string-1.0.7.tgz#0dd12bf2006f255bb58f695110eff7491eebc0fd" @@ -3832,6 +3936,13 @@ is-unicode-supported@^1.1.0, is-unicode-supported@^1.3.0: resolved "https://registry.yarnpkg.com/is-unicode-supported/-/is-unicode-supported-1.3.0.tgz#d824984b616c292a2e198207d4a609983842f714" integrity sha512-43r2mRvz+8JRIKnWJ+3j8JtjRKZ6GmjzfaE/qiBJnikNnYv/6bagRJ1kUhNk8R5EX/GkobD+r+sfxCPJsiKBLQ== +is-valid-path@^0.1.1: + version "0.1.1" + resolved "https://registry.yarnpkg.com/is-valid-path/-/is-valid-path-0.1.1.tgz#110f9ff74c37f663e1ec7915eb451f2db93ac9df" + integrity sha512-+kwPrVDu9Ms03L90Qaml+79+6DZHqHyRoANI6IsZJ/g8frhnfchDOBCa0RbQ6/kdHt5CS5OeIEyrYznNuVN+8A== + dependencies: + is-invalid-path "^0.1.0" + is-weakmap@^2.0.2: version "2.0.2" resolved "https://registry.yarnpkg.com/is-weakmap/-/is-weakmap-2.0.2.tgz#bf72615d649dfe5f699079c54b83e47d1ae19cfd" @@ -4157,6 +4268,11 @@ levn@^0.4.1: prelude-ls "^1.2.1" type-check "~0.4.0" +lil-http-terminator@^1.2.2: + version "1.2.3" + resolved "https://registry.yarnpkg.com/lil-http-terminator/-/lil-http-terminator-1.2.3.tgz#594ef0f3c2b2f7d43a8f2989b2b3de611bf507eb" + integrity sha512-vQcHSwAFq/kTR2cG6peOVS7SjgksGgSPeH0G2lkw+buue33thE/FCHdn10wJXXshc5RswFy0Iaz48qA2Busw5Q== + locate-path@^6.0.0: version "6.0.0" resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-6.0.0.tgz#55321eb309febbc59c4801d931a72452a681d286" @@ -4164,6 +4280,48 @@ locate-path@^6.0.0: dependencies: p-locate "^5.0.0" +lodash-es@^4.17.15: + version "4.17.21" + resolved "https://registry.yarnpkg.com/lodash-es/-/lodash-es-4.17.21.tgz#43e626c46e6591b7750beb2b50117390c609e3ee" + integrity sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw== + +lodash._baseiteratee@~4.7.0: + version "4.7.0" + resolved "https://registry.yarnpkg.com/lodash._baseiteratee/-/lodash._baseiteratee-4.7.0.tgz#34a9b5543572727c3db2e78edae3c0e9e66bd102" + integrity sha512-nqB9M+wITz0BX/Q2xg6fQ8mLkyfF7MU7eE+MNBNjTHFKeKaZAPEzEg+E8LWxKWf1DQVflNEn9N49yAuqKh2mWQ== + dependencies: + lodash._stringtopath "~4.8.0" + +lodash._basetostring@~4.12.0: + version "4.12.0" + resolved "https://registry.yarnpkg.com/lodash._basetostring/-/lodash._basetostring-4.12.0.tgz#9327c9dc5158866b7fa4b9d42f4638e5766dd9df" + integrity sha512-SwcRIbyxnN6CFEEK4K1y+zuApvWdpQdBHM/swxP962s8HIxPO3alBH5t3m/dl+f4CMUug6sJb7Pww8d13/9WSw== + +lodash._baseuniq@~4.6.0: + version "4.6.0" + resolved "https://registry.yarnpkg.com/lodash._baseuniq/-/lodash._baseuniq-4.6.0.tgz#0ebb44e456814af7905c6212fa2c9b2d51b841e8" + integrity sha512-Ja1YevpHZctlI5beLA7oc5KNDhGcPixFhcqSiORHNsp/1QTv7amAXzw+gu4YOvErqVlMVyIJGgtzeepCnnur0A== + dependencies: + lodash._createset "~4.0.0" + lodash._root "~3.0.0" + +lodash._createset@~4.0.0: + version "4.0.3" + resolved "https://registry.yarnpkg.com/lodash._createset/-/lodash._createset-4.0.3.tgz#0f4659fbb09d75194fa9e2b88a6644d363c9fe26" + integrity sha512-GTkC6YMprrJZCYU3zcqZj+jkXkrXzq3IPBcF/fIPpNEAB4hZEtXU8zp/RwKOvZl43NUmwDbyRk3+ZTbeRdEBXA== + +lodash._root@~3.0.0: + version "3.0.1" + resolved "https://registry.yarnpkg.com/lodash._root/-/lodash._root-3.0.1.tgz#fba1c4524c19ee9a5f8136b4609f017cf4ded692" + integrity sha512-O0pWuFSK6x4EXhM1dhZ8gchNtG7JMqBtrHdoUFUWXD7dJnNSUze1GuyQr5sOs0aCvgGeI3o/OJW8f4ca7FDxmQ== + +lodash._stringtopath@~4.8.0: + version "4.8.0" + resolved "https://registry.yarnpkg.com/lodash._stringtopath/-/lodash._stringtopath-4.8.0.tgz#941bcf0e64266e5fc1d66fed0a6959544c576824" + integrity sha512-SXL66C731p0xPDC5LZg4wI5H+dJo/EO4KTqOMwLYCH3+FmmfAKJEZCm6ohGpI+T1xwsDsJCfL4OnhorllvlTPQ== + dependencies: + lodash._basetostring "~4.12.0" + lodash.assignwith@^4.2.0: version "4.2.0" resolved "https://registry.yarnpkg.com/lodash.assignwith/-/lodash.assignwith-4.2.0.tgz#127a97f02adc41751a954d24b0de17e100e038eb" @@ -4234,7 +4392,15 @@ lodash.union@^4.6.0: resolved "https://registry.yarnpkg.com/lodash.union/-/lodash.union-4.6.0.tgz#48bb5088409f16f1821666641c44dd1aaae3cd88" integrity sha512-c4pB2CdGrGdjMKYLA+XiRDO7Y0PRQbm/Gzg8qMj+QH+pFVAoTp5sBpO0odL3FjoPCGjK96p6qsP+yQoiLoOBcw== -lodash@^4.17.21: +lodash.uniqby@4.5.0: + version "4.5.0" + resolved "https://registry.yarnpkg.com/lodash.uniqby/-/lodash.uniqby-4.5.0.tgz#a3a17bbf62eeb6240f491846e97c1c4e2a5e1e21" + integrity sha512-IRt7cfTtHy6f1aRVA5n7kT8rgN3N1nH6MOWLcHfpWG2SH19E3JksLK38MktLxZDhlAjCP9jpIXkOnRXlu6oByQ== + dependencies: + lodash._baseiteratee "~4.7.0" + lodash._baseuniq "~4.6.0" + +lodash@^4.17.20, lodash@^4.17.21: version "4.17.21" resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg== @@ -4546,7 +4712,7 @@ ms@2.1.2: resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.2.tgz#d09d1f357b443f493382a8eb3ccd183872ae6009" integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w== -ms@2.1.3, ms@^2.0.0, ms@^2.1.1: +ms@2.1.3, ms@^2.0.0, ms@^2.1.1, ms@^2.1.3: version "2.1.3" resolved "https://registry.yarnpkg.com/ms/-/ms-2.1.3.tgz#574c8138ce1d2b5861f0b44579dbadd60c6615b2" integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA== @@ -4607,6 +4773,11 @@ named-placeholders@^1.1.3: dependencies: lru-cache "^7.14.1" +nanoclone@^0.2.1: + version "0.2.1" + resolved "https://registry.yarnpkg.com/nanoclone/-/nanoclone-0.2.1.tgz#dd4090f8f1a110d26bb32c49ed2f5b9235209ed4" + integrity sha512-wynEP02LmIbLpcYw8uBKpcfF6dmg2vcpKqxeH5UcoKEYdExslsdUA4ugFauuaeYdTB76ez6gJW8XAZ6CgkXYxA== + napi-build-utils@^1.0.1: version "1.0.2" resolved "https://registry.yarnpkg.com/napi-build-utils/-/napi-build-utils-1.0.2.tgz#b1fddc0b2c46e380a0b7a76f984dd47c41a13806" @@ -4808,6 +4979,11 @@ num-sort@^2.0.0: resolved "https://registry.yarnpkg.com/num-sort/-/num-sort-2.1.0.tgz#1cbb37aed071329fdf41151258bc011898577a9b" integrity sha512-1MQz1Ed8z2yckoBeSfkQHHO9K1yDRxxtotKSJ9yvcTUUxSvfvzEq5GwBrjjHEpMlq/k5gvXdmJ1SbYxWtpNoVg== +numbered@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/numbered/-/numbered-1.1.0.tgz#9fcd79564c73a84b9574e8370c3d8e58fe3c133c" + integrity sha512-pv/ue2Odr7IfYOO0byC1KgBI10wo5YDauLhxY6/saNzAdAs0r1SotGCPzzCLNPL0xtrAwWRialLu23AAu9xO1g== + object-assign@^4, object-assign@^4.1.1: version "4.1.1" resolved "https://registry.yarnpkg.com/object-assign/-/object-assign-4.1.1.tgz#2109adc7965887cfc05cbbd442cac8bfbb360863" @@ -5031,6 +5207,11 @@ p-finally@^1.0.0: resolved "https://registry.yarnpkg.com/p-finally/-/p-finally-1.0.0.tgz#3fbcfb15b899a44123b34b6dcc18b724336a2cae" integrity sha512-LICb2p9CB7FS+0eR1oqWnHhp0FljGLZCWBE9aix0Uye9W8LTQPwMTYVGWQWIw9RdQiDg4+epXQODwIYJtSJaow== +p-is-promise@3: + version "3.0.0" + resolved "https://registry.yarnpkg.com/p-is-promise/-/p-is-promise-3.0.0.tgz#58e78c7dfe2e163cf2a04ff869e7c1dba64a5971" + integrity sha512-Wo8VsW4IRQSKVXsJCn7TomUaVtyfjVDn3nUP7kE967BQk0CwFpdbZs0X0uk5sW9mkBa9eNM7hCMaG93WUAwxYQ== + p-limit@^3.0.2: version "3.1.0" resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-3.1.0.tgz#e1daccbe78d0d1388ca18c64fea38e3e57e3706b" @@ -5068,13 +5249,20 @@ p-retry@4: "@types/retry" "0.12.0" retry "^0.13.1" -p-timeout@^3.2.0: +p-timeout@^3.0.0, p-timeout@^3.2.0: version "3.2.0" resolved "https://registry.yarnpkg.com/p-timeout/-/p-timeout-3.2.0.tgz#c7e17abc971d2a7962ef83626b35d635acf23dfe" integrity sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg== dependencies: p-finally "^1.0.0" +p-wait-for@3: + version "3.2.0" + resolved "https://registry.yarnpkg.com/p-wait-for/-/p-wait-for-3.2.0.tgz#640429bcabf3b0dd9f492c31539c5718cb6a3f1f" + integrity sha512-wpgERjNkLrBiFmkMEjuZJEWKKDrNfHCKA1OhyN1wg1FrLkULbviEy6py1AyJUgZ72YWFbZ38FIpnqvVqAlDUwA== + dependencies: + p-timeout "^3.0.0" + pad-left@^2.1.0: version "2.1.0" resolved "https://registry.yarnpkg.com/pad-left/-/pad-left-2.1.0.tgz#16e6a3b2d44a8e138cb0838cc7cb403a4fc9e994" @@ -5318,6 +5506,11 @@ prop-types@^15.8.1: object-assign "^4.1.1" react-is "^16.13.1" +property-expr@^2.0.4: + version "2.0.6" + resolved "https://registry.yarnpkg.com/property-expr/-/property-expr-2.0.6.tgz#f77bc00d5928a6c748414ad12882e83f24aec1e8" + integrity sha512-SVtmxhRE/CGkn3eZY1T6pC8Nln6Fr/lu1mKSgRud0eC73whjGfoAogbn78LkD8aFL0zz3bAFerKSnOl7NlErBA== + protobufjs@^6.8.8: version "6.11.4" resolved "https://registry.yarnpkg.com/protobufjs/-/protobufjs-6.11.4.tgz#29a412c38bf70d89e537b6d02d904a6f448173aa" @@ -5507,6 +5700,11 @@ reflect.getprototypeof@^1.0.4: globalthis "^1.0.3" which-builtin-type "^1.1.3" +regenerator-runtime@^0.14.0: + version "0.14.1" + resolved "https://registry.yarnpkg.com/regenerator-runtime/-/regenerator-runtime-0.14.1.tgz#356ade10263f685dda125100cd862c1db895327f" + integrity sha512-dYnhHh0nJoMfnkZs6GmmhFknAGRrLznOu5nc9ML+EJxGvrx6H7teuevqVqCuPcPK//3eDrrjQhehXVx9cnkGdw== + regexp.prototype.flags@^1.5.2: version "1.5.2" resolved "https://registry.yarnpkg.com/regexp.prototype.flags/-/regexp.prototype.flags-1.5.2.tgz#138f644a3350f981a858c44f6bb1a61ff59be334" @@ -5627,6 +5825,11 @@ safe-stable-stringify@^2.3.1: resolved "https://registry.yarnpkg.com/safe-stable-stringify/-/safe-stable-stringify-2.4.3.tgz#138c84b6f6edb3db5f8ef3ef7115b8f55ccbf886" integrity sha512-e2bDA2WJT0wxseVd4lsDP4+3ONX6HpMXQa1ZhFQ7SU+GjvORCmShbCMltrtIDfkYhVHrOcPtj+KhmDBdPdZD1g== +safe-timers@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/safe-timers/-/safe-timers-1.1.0.tgz#c58ae8325db8d3b067322f0a4ef3a0cad67aad83" + integrity sha512-9aqY+v5eMvmRaluUEtdRThV1EjlSElzO7HuCj0sTW9xvp++8iJ9t/RWGNWV6/WHcUJLHpyT2SNf/apoKTU2EpA== + "safer-buffer@>= 2.1.2 < 3", "safer-buffer@>= 2.1.2 < 3.0.0": version "2.1.2" resolved "https://registry.yarnpkg.com/safer-buffer/-/safer-buffer-2.1.2.tgz#44fa161b0187b9549dd84bb91802f9bd8385cd6a" @@ -6194,6 +6397,11 @@ toidentifier@1.0.1: resolved "https://registry.yarnpkg.com/toidentifier/-/toidentifier-1.0.1.tgz#3be34321a88a820ed1bd80dfaa33e479fbb8dd35" integrity sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA== +toposort@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/toposort/-/toposort-2.0.2.tgz#ae21768175d1559d48bef35420b2f4962f09c330" + integrity sha512-0a5EOkAUp8D4moMi2W8ZF8jcga7BgZd91O/yabJCFY8az+XSzeGyTKs0Aoo897iV1Nj6guFq8orWDS96z91oGg== + touch@^3.1.0: version "3.1.0" resolved "https://registry.yarnpkg.com/touch/-/touch-3.1.0.tgz#fe365f5f75ec9ed4e56825e0bb76d24ab74af83b" @@ -6571,7 +6779,7 @@ winston-transport@^4.7.0: readable-stream "^3.6.0" triple-beam "^1.3.0" -winston@^3.7.2, winston@^3.9.0: +winston@^3.13.0, winston@^3.7.2, winston@^3.9.0: version "3.13.0" resolved "https://registry.yarnpkg.com/winston/-/winston-3.13.0.tgz#e76c0d722f78e04838158c61adc1287201de7ce3" integrity sha512-rwidmA1w3SE4j0E5MuIufFhyJPBDG7Nu71RkZor1p2+qHvJSZ9GYDA81AyleQcZbh/+V6HjeBdfnTZJm9rSeQQ== @@ -6677,6 +6885,19 @@ yocto-queue@^0.1.0: resolved "https://registry.yarnpkg.com/yocto-queue/-/yocto-queue-0.1.0.tgz#0294eb3dee05028d31ee1a5fa2c556a6aaf10a1b" integrity sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q== +yup@0.32.9: + version "0.32.9" + resolved "https://registry.yarnpkg.com/yup/-/yup-0.32.9.tgz#9367bec6b1b0e39211ecbca598702e106019d872" + integrity sha512-Ci1qN+i2H0XpY7syDQ0k5zKQ/DoxO0LzPg8PAR/X4Mpj6DqaeCoIYEEjDJwhArh3Fa7GWbQQVDZKeXYlSH4JMg== + dependencies: + "@babel/runtime" "^7.10.5" + "@types/lodash" "^4.14.165" + lodash "^4.17.20" + lodash-es "^4.17.15" + nanoclone "^0.2.1" + property-expr "^2.0.4" + toposort "^2.0.2" + zip-stream@^4.1.0: version "4.1.1" resolved "https://registry.yarnpkg.com/zip-stream/-/zip-stream-4.1.1.tgz#1337fe974dbaffd2fa9a1ba09662a66932bd7135"