mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-14 18:40:11 +01:00
dc4ad6b5a9
* wip bg workers for live document sync * Add ability to re-embed specific documents across many workspaces via background queue bgworkser is gated behind expieremental system setting flag that needs to be explictly enabled UI for watching/unwatching docments that are embedded. TODO: UI to easily manage all bg tasks and see run results TODO: UI to enable this feature and background endpoints to manage it * create frontend views and paths Move elements to correct experimental scope * update migration to delete runs on removal of watched document * Add watch support to YouTube transcripts (#1716) * Add watch support to YouTube transcripts refactor how sync is done for supported types * Watch specific files in Confluence space (#1718) Add failure-prune check for runs * create tmp workflow modifications for beta image * create tmp workflow modifications for beta image * create tmp workflow modifications for beta image * dual build update copy of alert modals * update job interval * Add support for live-sync of Github files * update copy for document sync feature * hide Experimental features from UI * update docs links * [FEAT] Implement new settings menu for experimental features (#1735) * implement new settings menu for experimental features * remove unused context save bar --------- Co-authored-by: timothycarambat <rambat1010@gmail.com> * dont run job on boot * unset workflow changes * Add persistent encryption service Relay key to collector so persistent encryption can be used Encrypt any private data in chunkSources used for replay during resync jobs * update jsDOC * Linting and organization * update modal copy for feature --------- Co-authored-by: Sean Hatfield <seanhatfield5@gmail.com>
113 lines
4.5 KiB
JavaScript
113 lines
4.5 KiB
JavaScript
const { getLinkText } = require("../../processLink");
|
|
|
|
/**
|
|
* Fetches the content of a raw link. Returns the content as a text string of the link in question.
|
|
* @param {object} data - metadata from document (eg: link)
|
|
* @param {import("../../middleware/setDataSigner").ResponseWithSigner} response
|
|
*/
|
|
async function resyncLink({ link }, response) {
|
|
if (!link) throw new Error('Invalid link provided');
|
|
try {
|
|
const { success, content = null } = await getLinkText(link);
|
|
if (!success) throw new Error(`Failed to sync link content. ${reason}`);
|
|
response.status(200).json({ success, content });
|
|
} catch (e) {
|
|
console.error(e);
|
|
response.status(200).json({
|
|
success: false,
|
|
content: null,
|
|
});
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fetches the content of a YouTube link. Returns the content as a text string of the video in question.
|
|
* We offer this as there may be some videos where a transcription could be manually edited after initial scraping
|
|
* but in general - transcriptions often never change.
|
|
* @param {object} data - metadata from document (eg: link)
|
|
* @param {import("../../middleware/setDataSigner").ResponseWithSigner} response
|
|
*/
|
|
async function resyncYouTube({ link }, response) {
|
|
if (!link) throw new Error('Invalid link provided');
|
|
try {
|
|
const { fetchVideoTranscriptContent } = require("../../utils/extensions/YoutubeTranscript");
|
|
const { success, reason, content } = await fetchVideoTranscriptContent({ url: link });
|
|
if (!success) throw new Error(`Failed to sync YouTube video transcript. ${reason}`);
|
|
response.status(200).json({ success, content });
|
|
} catch (e) {
|
|
console.error(e);
|
|
response.status(200).json({
|
|
success: false,
|
|
content: null,
|
|
});
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fetches the content of a specific confluence page via its chunkSource.
|
|
* Returns the content as a text string of the page in question and only that page.
|
|
* @param {object} data - metadata from document (eg: chunkSource)
|
|
* @param {import("../../middleware/setDataSigner").ResponseWithSigner} response
|
|
*/
|
|
async function resyncConfluence({ chunkSource }, response) {
|
|
if (!chunkSource) throw new Error('Invalid source property provided');
|
|
try {
|
|
// Confluence data is `payload` encrypted. So we need to expand its
|
|
// encrypted payload back into query params so we can reFetch the page with same access token/params.
|
|
const source = response.locals.encryptionWorker.expandPayload(chunkSource);
|
|
const { fetchConfluencePage } = require("../../utils/extensions/Confluence");
|
|
const { success, reason, content } = await fetchConfluencePage({
|
|
pageUrl: `https:${source.pathname}`, // need to add back the real protocol
|
|
baseUrl: source.searchParams.get('baseUrl'),
|
|
accessToken: source.searchParams.get('token'),
|
|
username: source.searchParams.get('username'),
|
|
});
|
|
|
|
if (!success) throw new Error(`Failed to sync Confluence page content. ${reason}`);
|
|
response.status(200).json({ success, content });
|
|
} catch (e) {
|
|
console.error(e);
|
|
response.status(200).json({
|
|
success: false,
|
|
content: null,
|
|
});
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Fetches the content of a specific confluence page via its chunkSource.
|
|
* Returns the content as a text string of the page in question and only that page.
|
|
* @param {object} data - metadata from document (eg: chunkSource)
|
|
* @param {import("../../middleware/setDataSigner").ResponseWithSigner} response
|
|
*/
|
|
async function resyncGithub({ chunkSource }, response) {
|
|
if (!chunkSource) throw new Error('Invalid source property provided');
|
|
try {
|
|
// Github file data is `payload` encrypted (might contain PAT). So we need to expand its
|
|
// encrypted payload back into query params so we can reFetch the page with same access token/params.
|
|
const source = response.locals.encryptionWorker.expandPayload(chunkSource);
|
|
const { fetchGithubFile } = require("../../utils/extensions/GithubRepo");
|
|
const { success, reason, content } = await fetchGithubFile({
|
|
repoUrl: `https:${source.pathname}`, // need to add back the real protocol
|
|
branch: source.searchParams.get('branch'),
|
|
accessToken: source.searchParams.get('pat'),
|
|
sourceFilePath: source.searchParams.get('path'),
|
|
});
|
|
|
|
if (!success) throw new Error(`Failed to sync Github file content. ${reason}`);
|
|
response.status(200).json({ success, content });
|
|
} catch (e) {
|
|
console.error(e);
|
|
response.status(200).json({
|
|
success: false,
|
|
content: null,
|
|
});
|
|
}
|
|
}
|
|
|
|
module.exports = {
|
|
link: resyncLink,
|
|
youtube: resyncYouTube,
|
|
confluence: resyncConfluence,
|
|
github: resyncGithub,
|
|
} |