diff --git a/collector/package.json b/collector/package.json index 7c82014a4..4a5a99fff 100644 --- a/collector/package.json +++ b/collector/package.json @@ -34,6 +34,7 @@ "mime": "^3.0.0", "moment": "^2.29.4", "multer": "^1.4.5-lts.1", + "node-html-parser": "^6.1.13", "officeparser": "^4.0.5", "openai": "^3.2.1", "pdf-parse": "^1.1.1", @@ -42,11 +43,10 @@ "url-pattern": "^1.0.3", "uuid": "^9.0.0", "wavefile": "^11.0.0", - "youtube-transcript": "^1.0.6", "youtubei.js": "^9.1.0" }, "devDependencies": { "nodemon": "^2.0.22", "prettier": "^2.4.1" } -} +} \ No newline at end of file diff --git a/collector/utils/extensions/YoutubeTranscript/YoutubeLoader/index.js b/collector/utils/extensions/YoutubeTranscript/YoutubeLoader/index.js new file mode 100644 index 000000000..aac94eb48 --- /dev/null +++ b/collector/utils/extensions/YoutubeTranscript/YoutubeLoader/index.js @@ -0,0 +1,90 @@ +/* + * This is just a custom implementation of the Langchain JS YouTubeLoader class + * as the dependency for YoutubeTranscript is quite fickle and its a rat race to keep it up + * and instead of waiting for patches we can just bring this simple script in-house and at least + * be able to patch it since its so flaky. When we have more connectors we can kill this because + * it will be a pain to maintain over time. + */ +class YoutubeLoader { + #videoId; + #language; + #addVideoInfo; + + constructor({ videoId = null, language = null, addVideoInfo = false } = {}) { + if (!videoId) throw new Error("Invalid video id!"); + this.#videoId = videoId; + this.#language = language; + this.#addVideoInfo = addVideoInfo; + } + + /** + * Extracts the videoId from a YouTube video URL. + * @param url The URL of the YouTube video. + * @returns The videoId of the YouTube video. + */ + static getVideoID(url) { + const match = url.match( + /.*(?:youtu.be\/|v\/|u\/\w\/|embed\/|watch\?v=)([^#&?]*).*/ + ); + if (match !== null && match[1].length === 11) { + return match[1]; + } else { + throw new Error("Failed to get youtube video id from the url"); + } + } + + /** + * Creates a new instance of the YoutubeLoader class from a YouTube video + * URL. + * @param url The URL of the YouTube video. + * @param config Optional configuration options for the YoutubeLoader instance, excluding the videoId. + * @returns A new instance of the YoutubeLoader class. + */ + static createFromUrl(url, config = {}) { + const videoId = YoutubeLoader.getVideoID(url); + return new YoutubeLoader({ ...config, videoId }); + } + + /** + * Loads the transcript and video metadata from the specified YouTube + * video. It uses the youtube-transcript library to fetch the transcript + * and the youtubei.js library to fetch the video metadata. + * @returns Langchain like doc that is 1 element with PageContent and + */ + async load() { + let transcript; + const metadata = { + source: this.#videoId, + }; + try { + const { YoutubeTranscript } = require("./youtube-transcript"); + transcript = await YoutubeTranscript.fetchTranscript(this.#videoId, { + lang: this.#language, + }); + if (!transcript) { + throw new Error("Transcription not found"); + } + if (this.#addVideoInfo) { + const { Innertube } = require("youtubei.js"); + const youtube = await Innertube.create(); + const info = (await youtube.getBasicInfo(this.#videoId)).basic_info; + metadata.description = info.short_description; + metadata.title = info.title; + metadata.view_count = info.view_count; + metadata.author = info.author; + } + } catch (e) { + throw new Error( + `Failed to get YouTube video transcription: ${e?.message}` + ); + } + return [ + { + pageContent: transcript, + metadata, + }, + ]; + } +} + +module.exports.YoutubeLoader = YoutubeLoader; diff --git a/collector/utils/extensions/YoutubeTranscript/YoutubeLoader/youtube-transcript.js b/collector/utils/extensions/YoutubeTranscript/YoutubeLoader/youtube-transcript.js new file mode 100644 index 000000000..c81c0ec56 --- /dev/null +++ b/collector/utils/extensions/YoutubeTranscript/YoutubeLoader/youtube-transcript.js @@ -0,0 +1,115 @@ +const { parse } = require("node-html-parser"); +const RE_YOUTUBE = + /(?:youtube\.com\/(?:[^\/]+\/.+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^"&?\/\s]{11})/i; +const USER_AGENT = + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36,gzip(gfe)"; + +class YoutubeTranscriptError extends Error { + constructor(message) { + super(`[YoutubeTranscript] ${message}`); + } +} + +/** + * Class to retrieve transcript if exist + */ +class YoutubeTranscript { + /** + * Fetch transcript from YTB Video + * @param videoId Video url or video identifier + * @param config Object with lang param (eg: en, es, hk, uk) format. + * Will just the grab first caption if it can find one, so no special lang caption support. + */ + static async fetchTranscript(videoId, config = {}) { + const identifier = this.retrieveVideoId(videoId); + const lang = config?.lang ?? "en"; + try { + const transcriptUrl = await fetch( + `https://www.youtube.com/watch?v=${identifier}`, + { + headers: { + "User-Agent": USER_AGENT, + }, + } + ) + .then((res) => res.text()) + .then((html) => parse(html)) + .then((html) => this.#parseTranscriptEndpoint(html, lang)); + + if (!transcriptUrl) + throw new Error("Failed to locate a transcript for this video!"); + + // Result is hopefully some XML. + const transcriptXML = await fetch(transcriptUrl) + .then((res) => res.text()) + .then((xml) => parse(xml)); + + let transcript = ""; + const chunks = transcriptXML.getElementsByTagName("text"); + for (const chunk of chunks) { + transcript += chunk.textContent; + } + + return transcript; + } catch (e) { + throw new YoutubeTranscriptError(e); + } + } + + static #parseTranscriptEndpoint(document, langCode = null) { + try { + // Get all script tags on document page + const scripts = document.getElementsByTagName("script"); + + // find the player data script. + const playerScript = scripts.find((script) => + script.textContent.includes("var ytInitialPlayerResponse = {") + ); + + const dataString = + playerScript.textContent + ?.split("var ytInitialPlayerResponse = ")?.[1] //get the start of the object {.... + ?.split("};")?.[0] + // chunk off any code after object closure. + "}"; // add back that curly brace we just cut. + + const data = JSON.parse(dataString.trim()); // Attempt a JSON parse + const availableCaptions = + data?.captions?.playerCaptionsTracklistRenderer?.captionTracks || []; + + // If languageCode was specified then search for it's code, otherwise get the first. + let captionTrack = availableCaptions?.[0]; + if (langCode) + captionTrack = + availableCaptions.find((track) => + track.languageCode.includes(langCode) + ) ?? availableCaptions?.[0]; + + return captionTrack?.baseUrl; + } catch (e) { + console.error(`YoutubeTranscript.#parseTranscriptEndpoint ${e.message}`); + return null; + } + } + + /** + * Retrieve video id from url or string + * @param videoId video url or video id + */ + static retrieveVideoId(videoId) { + if (videoId.length === 11) { + return videoId; + } + const matchId = videoId.match(RE_YOUTUBE); + if (matchId && matchId.length) { + return matchId[1]; + } + throw new YoutubeTranscriptError( + "Impossible to retrieve Youtube video ID." + ); + } +} + +module.exports = { + YoutubeTranscript, + YoutubeTranscriptError, +}; diff --git a/collector/utils/extensions/YoutubeTranscript/index.js b/collector/utils/extensions/YoutubeTranscript/index.js index a44fe9b1e..b1622870c 100644 --- a/collector/utils/extensions/YoutubeTranscript/index.js +++ b/collector/utils/extensions/YoutubeTranscript/index.js @@ -1,17 +1,17 @@ -const { YoutubeLoader } = require("langchain/document_loaders/web/youtube"); const fs = require("fs"); const path = require("path"); const { default: slugify } = require("slugify"); const { v4 } = require("uuid"); const { writeToServerDocuments, documentsFolder } = require("../../files"); const { tokenizeString } = require("../../tokenizer"); +const { YoutubeLoader } = require("./YoutubeLoader"); function validYoutubeVideoUrl(link) { const UrlPattern = require("url-pattern"); const opts = new URL(link); - const url = `${opts.protocol}//${opts.host}${ - opts.pathname - }?v=${opts.searchParams.get("v")}`; + const url = `${opts.protocol}//${opts.host}${opts.pathname}${ + opts.searchParams.has("v") ? `?v=${opts.searchParams.get("v")}` : "" + }`; const shortPatternMatch = new UrlPattern( "https\\://(www.)youtu.be/(:videoId)" @@ -56,9 +56,7 @@ async function loadYouTubeTranscript({ url }) { } const metadata = docs[0].metadata; - let content = ""; - docs.forEach((doc) => (content = content.concat(doc.pageContent))); - + const content = docs[0].pageContent; if (!content.length) { return { success: false, diff --git a/collector/utils/files/index.js b/collector/utils/files/index.js index 4dcc8e39c..1263a59d0 100644 --- a/collector/utils/files/index.js +++ b/collector/utils/files/index.js @@ -84,7 +84,7 @@ async function wipeCollectorStorage() { if (file === "__HOTDIR__.md") continue; try { fs.rmSync(path.join(directory, file)); - } catch { } + } catch {} } resolve(); }); @@ -99,7 +99,7 @@ async function wipeCollectorStorage() { if (file === ".placeholder") continue; try { fs.rmSync(path.join(directory, file)); - } catch { } + } catch {} } resolve(); }); diff --git a/collector/yarn.lock b/collector/yarn.lock index f7b7b696c..0938c995f 100644 --- a/collector/yarn.lock +++ b/collector/yarn.lock @@ -503,6 +503,11 @@ body-parser@^1.20.2: type-is "~1.6.18" unpipe "1.0.0" +boolbase@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/boolbase/-/boolbase-1.0.0.tgz#68dff5fbe60c51eb37725ea9e3ed310dcc1e776e" + integrity sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww== + brace-expansion@^1.1.7: version "1.1.11" resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-1.1.11.tgz#3c7fcbf529d87226f3d2f52b966ff5271eb441dd" @@ -589,11 +594,6 @@ camelcase@6: resolved "https://registry.yarnpkg.com/camelcase/-/camelcase-6.3.0.tgz#5685b95eb209ac9c0c177467778c9c84df58ba9a" integrity sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA== -centra@^2.6.0: - version "2.6.0" - resolved "https://registry.yarnpkg.com/centra/-/centra-2.6.0.tgz#79117998ee6908642258db263871381aa5d1204a" - integrity sha512-dgh+YleemrT8u85QL11Z6tYhegAs3MMxsaWAq/oXeAmYJ7VxL3SI9TZtnfaEvNDMAPolj25FXIb3S+HCI4wQaQ== - chalk@^2.4.2: version "2.4.2" resolved "https://registry.yarnpkg.com/chalk/-/chalk-2.4.2.tgz#cd42541677a54333cf541a49108c1432b44c9424" @@ -796,6 +796,22 @@ crypt@0.0.2: resolved "https://registry.yarnpkg.com/crypt/-/crypt-0.0.2.tgz#88d7ff7ec0dfb86f713dc87bbb42d044d3e6c41b" integrity sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow== +css-select@^5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/css-select/-/css-select-5.1.0.tgz#b8ebd6554c3637ccc76688804ad3f6a6fdaea8a6" + integrity sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg== + dependencies: + boolbase "^1.0.0" + css-what "^6.1.0" + domhandler "^5.0.2" + domutils "^3.0.1" + nth-check "^2.0.1" + +css-what@^6.1.0: + version "6.1.0" + resolved "https://registry.yarnpkg.com/css-what/-/css-what-6.1.0.tgz#fb5effcf76f1ddea2c81bdfaa4de44e79bac70f4" + integrity sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw== + data-uri-to-buffer@^6.0.0: version "6.0.1" resolved "https://registry.yarnpkg.com/data-uri-to-buffer/-/data-uri-to-buffer-6.0.1.tgz#540bd4c8753a25ee129035aebdedf63b078703c7" @@ -2244,6 +2260,14 @@ node-forge@^1.3.1: resolved "https://registry.yarnpkg.com/node-forge/-/node-forge-1.3.1.tgz#be8da2af243b2417d5f646a770663a92b7e9ded3" integrity sha512-dPEtOeMvF9VMcYV/1Wb8CPoVAXtp6MKMlcbAt4ddqmGqUJ6fQZFXkNZNkNlfevtNkGtaSoXf/vNNNSvgrdXwtA== +node-html-parser@^6.1.13: + version "6.1.13" + resolved "https://registry.yarnpkg.com/node-html-parser/-/node-html-parser-6.1.13.tgz#a1df799b83df5c6743fcd92740ba14682083b7e4" + integrity sha512-qIsTMOY4C/dAa5Q5vsobRpOOvPfC4pB61UVW2uSwZNUp0QU/jCekTal1vMmbO0DgdHeLUJpv/ARmDqErVxA3Sg== + dependencies: + css-select "^5.1.0" + he "1.2.0" + nodemailer@6.9.3: version "6.9.3" resolved "https://registry.yarnpkg.com/nodemailer/-/nodemailer-6.9.3.tgz#e4425b85f05d83c43c5cd81bf84ab968f8ef5cbe" @@ -2294,6 +2318,13 @@ npmlog@^5.0.1: gauge "^3.0.0" set-blocking "^2.0.0" +nth-check@^2.0.1: + version "2.1.1" + resolved "https://registry.yarnpkg.com/nth-check/-/nth-check-2.1.1.tgz#c9eab428effce36cd6b92c924bdb000ef1f1ed1d" + integrity sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w== + dependencies: + boolbase "^1.0.0" + num-sort@^2.0.0: version "2.1.0" resolved "https://registry.yarnpkg.com/num-sort/-/num-sort-2.1.0.tgz#1cbb37aed071329fdf41151258bc011898577a9b" @@ -2522,13 +2553,6 @@ pend@~1.2.0: resolved "https://registry.yarnpkg.com/pend/-/pend-1.2.0.tgz#7a57eb550a6783f9115331fcf4663d5c8e007a50" integrity sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg== -phin@^3.5.0: - version "3.7.0" - resolved "https://registry.yarnpkg.com/phin/-/phin-3.7.0.tgz#eeeff7660408515d8cf0c6252901012d4ab7153b" - integrity sha512-DqnVNrpYhKGBZppNKprD+UJylMeEKOZxHgPB+ZP6mGzf3uA2uox4Ep9tUm+rUc8WLIdHT3HcAE4X8fhwQA9JKg== - dependencies: - centra "^2.6.0" - picomatch@^2.0.4, picomatch@^2.2.1: version "2.3.1" resolved "https://registry.yarnpkg.com/picomatch/-/picomatch-2.3.1.tgz#3ba3833733646d9d3e4995946c1365a67fb07a42" @@ -3421,13 +3445,6 @@ yauzl@^2.10.0, yauzl@^2.4.2: buffer-crc32 "~0.2.3" fd-slicer "~1.1.0" -youtube-transcript@^1.0.6: - version "1.0.6" - resolved "https://registry.yarnpkg.com/youtube-transcript/-/youtube-transcript-1.0.6.tgz#8414c04380d3ef1102bd00ca3729e94c46ae7a14" - integrity sha512-k/6uxB9voj/5astl6+q+VArX/aWHhnmle8BucvUCTYTQQEOSVlBiXkrI0KD3o8A0b44MV6q0bmVNiJFIpTlcZA== - dependencies: - phin "^3.5.0" - youtubei.js@^9.1.0: version "9.1.0" resolved "https://registry.yarnpkg.com/youtubei.js/-/youtubei.js-9.1.0.tgz#bcf154c9fa21d3c8c1d00a5e10360d0a065c660e" diff --git a/frontend/.gitignore b/frontend/.gitignore index 196c8f691..787206034 100644 --- a/frontend/.gitignore +++ b/frontend/.gitignore @@ -12,6 +12,7 @@ dist lib dist-ssr *.local +!frontend/components/lib # Editor directories and files .vscode/* diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index 9ef160e72..0a5ed65fc 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -35,16 +35,13 @@ const GeneralTranscriptionPreference = lazy( const GeneralEmbeddingPreference = lazy( () => import("@/pages/GeneralSettings/EmbeddingPreference") ); +const EmbeddingTextSplitterPreference = lazy( + () => import("@/pages/GeneralSettings/EmbeddingTextSplitterPreference") +); const GeneralVectorDatabase = lazy( () => import("@/pages/GeneralSettings/VectorDatabase") ); const GeneralSecurity = lazy(() => import("@/pages/GeneralSettings/Security")); -const DataConnectors = lazy( - () => import("@/pages/GeneralSettings/DataConnectors") -); -const DataConnectorSetup = lazy( - () => import("@/pages/GeneralSettings/DataConnectors/Connectors") -); const WorkspaceSettings = lazy(() => import("@/pages/WorkspaceSettings")); const EmbedConfigSetup = lazy( () => import("@/pages/GeneralSettings/EmbedConfigs") @@ -92,6 +89,12 @@ export default function App() { path="/settings/embedding-preference" element={} /> + + } + /> } @@ -145,15 +148,6 @@ export default function App() { path="/settings/workspaces" element={} /> - } - /> - } - /> - {/* Onboarding Flow */} } /> } /> diff --git a/frontend/src/components/DataConnectorOption/index.jsx b/frontend/src/components/DataConnectorOption/index.jsx index df7fad0f6..038624acf 100644 --- a/frontend/src/components/DataConnectorOption/index.jsx +++ b/frontend/src/components/DataConnectorOption/index.jsx @@ -1,6 +1,3 @@ -import paths from "@/utils/paths"; -import ConnectorImages from "./media"; - export default function DataConnectorOption({ slug }) { if (!DATA_CONNECTORS.hasOwnProperty(slug)) return null; const { path, image, name, description, link } = DATA_CONNECTORS[slug]; @@ -26,22 +23,3 @@ export default function DataConnectorOption({ slug }) { ); } - -export const DATA_CONNECTORS = { - github: { - name: "GitHub Repo", - path: paths.settings.dataConnectors.github(), - image: ConnectorImages.github, - description: - "Import an entire public or private Github repository in a single click.", - link: "https://github.com", - }, - "youtube-transcript": { - name: "YouTube Transcript", - path: paths.settings.dataConnectors.youtubeTranscript(), - image: ConnectorImages.youtube, - description: - "Import the transcription of an entire YouTube video from a link.", - link: "https://youtube.com", - }, -}; diff --git a/frontend/src/components/DataConnectorOption/media/github.png b/frontend/src/components/DataConnectorOption/media/github.png deleted file mode 100644 index 835221bab..000000000 Binary files a/frontend/src/components/DataConnectorOption/media/github.png and /dev/null differ diff --git a/frontend/src/components/DataConnectorOption/media/github.svg b/frontend/src/components/DataConnectorOption/media/github.svg new file mode 100644 index 000000000..e01722151 --- /dev/null +++ b/frontend/src/components/DataConnectorOption/media/github.svg @@ -0,0 +1,4 @@ + + + + diff --git a/frontend/src/components/DataConnectorOption/media/index.js b/frontend/src/components/DataConnectorOption/media/index.js index b3bacc1de..543bed5f7 100644 --- a/frontend/src/components/DataConnectorOption/media/index.js +++ b/frontend/src/components/DataConnectorOption/media/index.js @@ -1,5 +1,5 @@ -import Github from "./github.png"; -import YouTube from "./youtube.png"; +import Github from "./github.svg"; +import YouTube from "./youtube.svg"; const ConnectorImages = { github: Github, diff --git a/frontend/src/components/DataConnectorOption/media/youtube.png b/frontend/src/components/DataConnectorOption/media/youtube.png deleted file mode 100644 index aed2b0475..000000000 Binary files a/frontend/src/components/DataConnectorOption/media/youtube.png and /dev/null differ diff --git a/frontend/src/components/DataConnectorOption/media/youtube.svg b/frontend/src/components/DataConnectorOption/media/youtube.svg new file mode 100644 index 000000000..5fd97768a --- /dev/null +++ b/frontend/src/components/DataConnectorOption/media/youtube.svg @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx b/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx index e8c288d60..9fe283ffe 100644 --- a/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx +++ b/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx @@ -1,26 +1,6 @@ -import { Info } from "@phosphor-icons/react"; -import paths from "@/utils/paths"; - -export default function AnthropicAiOptions({ settings, showAlert = false }) { +export default function AnthropicAiOptions({ settings }) { return (
- {showAlert && ( -
-
- -

- Anthropic as your LLM requires you to set an embedding service to - use. -

-
- - Manage embedding → - -
- )}
-
- - -
+ {!settings?.credentialsOnly && ( +
+ + +
+ )}
); diff --git a/frontend/src/components/LLMSelection/GeminiLLMOptions/index.jsx b/frontend/src/components/LLMSelection/GeminiLLMOptions/index.jsx index 3b53ccc1e..a46e51329 100644 --- a/frontend/src/components/LLMSelection/GeminiLLMOptions/index.jsx +++ b/frontend/src/components/LLMSelection/GeminiLLMOptions/index.jsx @@ -18,25 +18,27 @@ export default function GeminiLLMOptions({ settings }) { /> -
- - -
+ {!settings?.credentialsOnly && ( +
+ + +
+ )} ); diff --git a/frontend/src/components/LLMSelection/GroqAiOptions/index.jsx b/frontend/src/components/LLMSelection/GroqAiOptions/index.jsx index cc6fbbcc0..c85f0f1e0 100644 --- a/frontend/src/components/LLMSelection/GroqAiOptions/index.jsx +++ b/frontend/src/components/LLMSelection/GroqAiOptions/index.jsx @@ -17,25 +17,27 @@ export default function GroqAiOptions({ settings }) { /> -
- - -
+ {!settings?.credentialsOnly && ( +
+ + +
+ )} ); } diff --git a/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx b/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx index 200c77a6e..9a1c59bc7 100644 --- a/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx +++ b/frontend/src/components/LLMSelection/LMStudioOptions/index.jsx @@ -21,7 +21,7 @@ export default function LMStudioOptions({ settings, showAlert = false }) {

Manage embedding → @@ -46,23 +46,27 @@ export default function LMStudioOptions({ settings, showAlert = false }) { onBlur={() => setBasePath(basePathValue)} /> - -
- - e.target.blur()} - defaultValue={settings?.LMStudioTokenLimit} - required={true} - autoComplete="off" - /> -
+ {!settings?.credentialsOnly && ( + <> + +
+ + e.target.blur()} + defaultValue={settings?.LMStudioTokenLimit} + required={true} + autoComplete="off" + /> +
+ + )} ); diff --git a/frontend/src/components/LLMSelection/LocalAiOptions/index.jsx b/frontend/src/components/LLMSelection/LocalAiOptions/index.jsx index 91e386702..1304c9e1b 100644 --- a/frontend/src/components/LLMSelection/LocalAiOptions/index.jsx +++ b/frontend/src/components/LLMSelection/LocalAiOptions/index.jsx @@ -21,7 +21,7 @@ export default function LocalAiOptions({ settings, showAlert = false }) {

Manage embedding → @@ -46,27 +46,31 @@ export default function LocalAiOptions({ settings, showAlert = false }) { onBlur={() => setBasePath(basePathValue)} /> - -
- - e.target.blur()} - defaultValue={settings?.LocalAiTokenLimit} - required={true} - autoComplete="off" - /> -
+ {!settings?.credentialsOnly && ( + <> + +
+ + e.target.blur()} + defaultValue={settings?.LocalAiTokenLimit} + required={true} + autoComplete="off" + /> +
+ + )}
diff --git a/frontend/src/components/LLMSelection/MistralOptions/index.jsx b/frontend/src/components/LLMSelection/MistralOptions/index.jsx index a143436ee..4daadcff1 100644 --- a/frontend/src/components/LLMSelection/MistralOptions/index.jsx +++ b/frontend/src/components/LLMSelection/MistralOptions/index.jsx @@ -24,7 +24,9 @@ export default function MistralOptions({ settings }) { onBlur={() => setMistralKey(inputValue)} />
- + {!settings?.credentialsOnly && ( + + )}
); } diff --git a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx index ddfd7a81b..b08f29447 100644 --- a/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx +++ b/frontend/src/components/LLMSelection/OllamaLLMOptions/index.jsx @@ -27,23 +27,27 @@ export default function OllamaLLMOptions({ settings }) { onBlur={() => setBasePath(basePathValue)} /> - -
- - e.target.blur()} - defaultValue={settings?.OllamaLLMTokenLimit} - required={true} - autoComplete="off" - /> -
+ {!settings?.credentialsOnly && ( + <> + +
+ + e.target.blur()} + defaultValue={settings?.OllamaLLMTokenLimit} + required={true} + autoComplete="off" + /> +
+ + )} ); diff --git a/frontend/src/components/LLMSelection/OpenAiOptions/index.jsx b/frontend/src/components/LLMSelection/OpenAiOptions/index.jsx index 1e3493096..c5ec337d0 100644 --- a/frontend/src/components/LLMSelection/OpenAiOptions/index.jsx +++ b/frontend/src/components/LLMSelection/OpenAiOptions/index.jsx @@ -24,7 +24,9 @@ export default function OpenAiOptions({ settings }) { onBlur={() => setOpenAIKey(inputValue)} /> - + {!settings?.credentialsOnly && ( + + )} ); } diff --git a/frontend/src/components/LLMSelection/OpenRouterOptions/index.jsx b/frontend/src/components/LLMSelection/OpenRouterOptions/index.jsx index ff2a1d8f0..94ae320a2 100644 --- a/frontend/src/components/LLMSelection/OpenRouterOptions/index.jsx +++ b/frontend/src/components/LLMSelection/OpenRouterOptions/index.jsx @@ -19,7 +19,9 @@ export default function OpenRouterOptions({ settings }) { spellCheck={false} /> - + {!settings?.credentialsOnly && ( + + )} ); } @@ -84,7 +86,7 @@ function OpenRouterModelSelection({ settings }) { diff --git a/frontend/src/components/LLMSelection/PerplexityOptions/index.jsx b/frontend/src/components/LLMSelection/PerplexityOptions/index.jsx index 6c4522495..9b53cd191 100644 --- a/frontend/src/components/LLMSelection/PerplexityOptions/index.jsx +++ b/frontend/src/components/LLMSelection/PerplexityOptions/index.jsx @@ -19,7 +19,9 @@ export default function PerplexityOptions({ settings }) { spellCheck={false} /> - + {!settings?.credentialsOnly && ( + + )} ); } diff --git a/frontend/src/components/LLMSelection/TogetherAiOptions/index.jsx b/frontend/src/components/LLMSelection/TogetherAiOptions/index.jsx index 2c816339f..a0eefc83a 100644 --- a/frontend/src/components/LLMSelection/TogetherAiOptions/index.jsx +++ b/frontend/src/components/LLMSelection/TogetherAiOptions/index.jsx @@ -19,7 +19,9 @@ export default function TogetherAiOptions({ settings }) { spellCheck={false} /> - + {!settings?.credentialsOnly && ( + + )} ); } @@ -84,7 +86,7 @@ function TogetherAiModelSelection({ settings }) { diff --git a/frontend/src/components/Modals/MangeWorkspace/DataConnectors/ConnectorOption/index.jsx b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/ConnectorOption/index.jsx new file mode 100644 index 000000000..e0b10e050 --- /dev/null +++ b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/ConnectorOption/index.jsx @@ -0,0 +1,25 @@ +export default function ConnectorOption({ + slug, + selectedConnector, + setSelectedConnector, + image, + name, + description, +}) { + return ( + + ); +} diff --git a/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/Github/index.jsx b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/Github/index.jsx new file mode 100644 index 000000000..de6ed77e1 --- /dev/null +++ b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/Github/index.jsx @@ -0,0 +1,271 @@ +import React, { useEffect, useState } from "react"; +import System from "@/models/system"; +import showToast from "@/utils/toast"; +import pluralize from "pluralize"; +import { TagsInput } from "react-tag-input-component"; +import { Warning } from "@phosphor-icons/react"; +import { Tooltip } from "react-tooltip"; + +const DEFAULT_BRANCHES = ["main", "master"]; +export default function GithubOptions() { + const [loading, setLoading] = useState(false); + const [repo, setRepo] = useState(null); + const [accessToken, setAccessToken] = useState(null); + const [ignores, setIgnores] = useState([]); + + const [settings, setSettings] = useState({ + repo: null, + accessToken: null, + }); + + const handleSubmit = async (e) => { + e.preventDefault(); + const form = new FormData(e.target); + + try { + setLoading(true); + showToast( + "Fetching all files for repo - this may take a while.", + "info", + { clear: true, autoClose: false } + ); + const { data, error } = await System.dataConnectors.github.collect({ + repo: form.get("repo"), + accessToken: form.get("accessToken"), + branch: form.get("branch"), + ignorePaths: ignores, + }); + + if (!!error) { + showToast(error, "error", { clear: true }); + setLoading(false); + return; + } + + showToast( + `${data.files} ${pluralize("file", data.files)} collected from ${ + data.author + }/${data.repo}:${data.branch}. Output folder is ${data.destination}.`, + "success", + { clear: true } + ); + e.target.reset(); + setLoading(false); + return; + } catch (e) { + console.error(e); + showToast(e.message, "error", { clear: true }); + setLoading(false); + } + }; + + return ( +
+
+
+
+ + +
+
+ +

+ List in .gitignore format to ignore specific files during + collection. Press enter after each entry you want to save. +

+
+ +
+
+ +
+ + {loading && ( +

+ Once complete, all files will be available for embedding into + workspaces in the document picker. +

+ )} +
+
+
+
+ ); +} + +function GitHubBranchSelection({ repo, accessToken }) { + const [allBranches, setAllBranches] = useState(DEFAULT_BRANCHES); + const [loading, setLoading] = useState(true); + + useEffect(() => { + async function fetchAllBranches() { + if (!repo) { + setAllBranches(DEFAULT_BRANCHES); + setLoading(false); + return; + } + + setLoading(true); + const { branches } = await System.dataConnectors.github.branches({ + repo, + accessToken, + }); + setAllBranches(branches.length > 0 ? branches : DEFAULT_BRANCHES); + setLoading(false); + } + fetchAllBranches(); + }, [repo, accessToken]); + + if (loading) { + return ( +
+
+ +

+ Branch you wish to collect files from. +

+
+ +
+ ); + } + + return ( +
+
+ +

+ Branch you wish to collect files from. +

+
+ +
+ ); +} diff --git a/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/Youtube/index.jsx b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/Youtube/index.jsx new file mode 100644 index 000000000..ed18dcd42 --- /dev/null +++ b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/Connectors/Youtube/index.jsx @@ -0,0 +1,91 @@ +import React, { useState } from "react"; +import System from "@/models/system"; +import showToast from "@/utils/toast"; + +export default function YoutubeOptions() { + const [loading, setLoading] = useState(false); + + const handleSubmit = async (e) => { + e.preventDefault(); + const form = new FormData(e.target); + + try { + setLoading(true); + showToast("Fetching transcript for YouTube video.", "info", { + clear: true, + autoClose: false, + }); + + const { data, error } = await System.dataConnectors.youtube.transcribe({ + url: form.get("url"), + }); + + if (!!error) { + showToast(error, "error", { clear: true }); + setLoading(false); + return; + } + + showToast( + `${data.title} by ${data.author} transcription completed. Output folder is ${data.destination}.`, + "success", + { clear: true } + ); + e.target.reset(); + setLoading(false); + return; + } catch (e) { + console.error(e); + showToast(e.message, "error", { clear: true }); + setLoading(false); + } + }; + + return ( +
+
+
+
+
+
+
+ +

+ URL of the YouTube video you wish to transcribe. +

+
+ +
+
+
+ +
+ + {loading && ( +

+ Once complete, the transcription will be available for embedding + into workspaces in the document picker. +

+ )} +
+
+
+
+ ); +} diff --git a/frontend/src/components/Modals/MangeWorkspace/DataConnectors/index.jsx b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/index.jsx new file mode 100644 index 000000000..419fc1fc9 --- /dev/null +++ b/frontend/src/components/Modals/MangeWorkspace/DataConnectors/index.jsx @@ -0,0 +1,77 @@ +import ConnectorImages from "@/components/DataConnectorOption/media"; +import { MagnifyingGlass } from "@phosphor-icons/react"; +import GithubOptions from "./Connectors/Github"; +import YoutubeOptions from "./Connectors/Youtube"; +import { useState } from "react"; +import ConnectorOption from "./ConnectorOption"; + +export const DATA_CONNECTORS = { + github: { + name: "GitHub Repo", + image: ConnectorImages.github, + description: + "Import an entire public or private Github repository in a single click.", + options: , + }, + "youtube-transcript": { + name: "YouTube Transcript", + image: ConnectorImages.youtube, + description: + "Import the transcription of an entire YouTube video from a link.", + options: , + }, +}; + +export default function DataConnectors() { + const [selectedConnector, setSelectedConnector] = useState("github"); + const [searchQuery, setSearchQuery] = useState(""); + + const filteredConnectors = Object.keys(DATA_CONNECTORS).filter((slug) => + DATA_CONNECTORS[slug].name.toLowerCase().includes(searchQuery.toLowerCase()) + ); + + return ( +
+
+
+ + setSearchQuery(e.target.value)} + /> +
+
+ {filteredConnectors.length > 0 ? ( + filteredConnectors.map((slug, index) => ( + + )) + ) : ( +
+ No data connectors found. +
+ )} +
+
+
+
+ {DATA_CONNECTORS[selectedConnector].options} +
+
+ ); +} diff --git a/frontend/src/components/Modals/MangeWorkspace/index.jsx b/frontend/src/components/Modals/MangeWorkspace/index.jsx index 4898f531f..2c6e658b0 100644 --- a/frontend/src/components/Modals/MangeWorkspace/index.jsx +++ b/frontend/src/components/Modals/MangeWorkspace/index.jsx @@ -6,12 +6,15 @@ import System from "../../../models/system"; import { isMobile } from "react-device-detect"; import useUser from "../../../hooks/useUser"; import DocumentSettings from "./Documents"; +import DataConnectors from "./DataConnectors"; const noop = () => {}; const ManageWorkspace = ({ hideModal = noop, providedSlug = null }) => { const { slug } = useParams(); + const { user } = useUser(); const [workspace, setWorkspace] = useState(null); const [settings, setSettings] = useState({}); + const [selectedTab, setSelectedTab] = useState("documents"); useEffect(() => { async function getSettings() { @@ -67,7 +70,6 @@ const ManageWorkspace = ({ hideModal = noop, providedSlug = null }) => {
-
- + + {user?.role !== "default" && ( + + )} + + {selectedTab === "documents" ? ( + + ) : ( + + )}
@@ -84,6 +98,35 @@ const ManageWorkspace = ({ hideModal = noop, providedSlug = null }) => { }; export default memo(ManageWorkspace); + +const ModalTabSwitcher = ({ selectedTab, setSelectedTab }) => { + return ( +
+
+ + +
+
+ ); +}; export function useManageWorkspaceModal() { const { user } = useUser(); const [showing, setShowing] = useState(false); diff --git a/frontend/src/components/SettingsSidebar/index.jsx b/frontend/src/components/SettingsSidebar/index.jsx index 66f881ff6..67797d266 100644 --- a/frontend/src/components/SettingsSidebar/index.jsx +++ b/frontend/src/components/SettingsSidebar/index.jsx @@ -15,12 +15,12 @@ import { House, List, FileCode, - Plugs, Notepad, CodeBlock, Barcode, ClosedCaptioning, EyeSlash, + SplitVertical, } from "@phosphor-icons/react"; import useUser from "@/hooks/useUser"; import { USER_BACKGROUND_COLOR } from "@/utils/constants"; @@ -289,12 +289,25 @@ const SidebarOptions = ({ user = null }) => ( allowedRole={["admin"]} />