diff --git a/.github/workflows/dev-build.yaml b/.github/workflows/dev-build.yaml index 40f4971c5..860ea5f6f 100644 --- a/.github/workflows/dev-build.yaml +++ b/.github/workflows/dev-build.yaml @@ -6,7 +6,7 @@ concurrency: on: push: - branches: ['1915-docker-perms'] # master branch only. Do not modify. + branches: ['-dev'] # put your current branch to create a build. Core team only. paths-ignore: - '**.md' - 'cloud-deployments/*' diff --git a/.vscode/settings.json b/.vscode/settings.json index 5e26e4778..60ff747fd 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -31,6 +31,7 @@ "Mintplex", "moderations", "numpages", + "odbc", "Ollama", "Oobabooga", "openai", diff --git a/collector/extensions/index.js b/collector/extensions/index.js index a88b38eee..30beaa3e7 100644 --- a/collector/extensions/index.js +++ b/collector/extensions/index.js @@ -1,5 +1,6 @@ const { setDataSigner } = require("../middleware/setDataSigner"); const { verifyPayloadIntegrity } = require("../middleware/verifyIntegrity"); +const { resolveRepoLoader, resolveRepoLoaderFunction } = require("../utils/extensions/RepoLoader"); const { reqBody } = require("../utils/http"); const { validURL } = require("../utils/url"); const RESYNC_METHODS = require("./resync"); @@ -28,15 +29,16 @@ function extensions(app) { ) app.post( - "/ext/github-repo", + "/ext/:repo_platform-repo", [verifyPayloadIntegrity, setDataSigner], async function (request, response) { try { - const { loadGithubRepo } = require("../utils/extensions/GithubRepo"); - const { success, reason, data } = await loadGithubRepo( + const loadRepo = resolveRepoLoaderFunction(request.params.repo_platform); + const { success, reason, data } = await loadRepo( reqBody(request), response, ); + console.log({ success, reason, data }) response.status(200).json({ success, reason, @@ -56,12 +58,12 @@ function extensions(app) { // gets all branches for a specific repo app.post( - "/ext/github-repo/branches", + "/ext/:repo_platform-repo/branches", [verifyPayloadIntegrity], async function (request, response) { try { - const GithubRepoLoader = require("../utils/extensions/GithubRepo/RepoLoader"); - const allBranches = await new GithubRepoLoader( + const RepoLoader = resolveRepoLoader(request.params.repo_platform); + const allBranches = await new RepoLoader( reqBody(request) ).getRepoBranches(); response.status(200).json({ diff --git a/collector/extensions/resync/index.js b/collector/extensions/resync/index.js index ba967962e..66882ba7a 100644 --- a/collector/extensions/resync/index.js +++ b/collector/extensions/resync/index.js @@ -86,7 +86,7 @@ async function resyncGithub({ chunkSource }, response) { // Github file data is `payload` encrypted (might contain PAT). So we need to expand its // encrypted payload back into query params so we can reFetch the page with same access token/params. const source = response.locals.encryptionWorker.expandPayload(chunkSource); - const { fetchGithubFile } = require("../../utils/extensions/GithubRepo"); + const { fetchGithubFile } = require("../../utils/extensions/RepoLoader/GithubRepo"); const { success, reason, content } = await fetchGithubFile({ repoUrl: `https:${source.pathname}`, // need to add back the real protocol branch: source.searchParams.get('branch'), diff --git a/collector/package.json b/collector/package.json index 5e3873d1e..cbc5ceed0 100644 --- a/collector/package.json +++ b/collector/package.json @@ -32,6 +32,7 @@ "mammoth": "^1.6.0", "mbox-parser": "^1.0.1", "mime": "^3.0.0", + "minimatch": "5.1.0", "moment": "^2.29.4", "multer": "^1.4.5-lts.1", "node-html-parser": "^6.1.13", @@ -50,4 +51,4 @@ "nodemon": "^2.0.22", "prettier": "^2.4.1" } -} +} \ No newline at end of file diff --git a/collector/utils/extensions/GithubRepo/RepoLoader/index.js b/collector/utils/extensions/RepoLoader/GithubRepo/RepoLoader/index.js similarity index 80% rename from collector/utils/extensions/GithubRepo/RepoLoader/index.js rename to collector/utils/extensions/RepoLoader/GithubRepo/RepoLoader/index.js index af8a1dfc3..08121f44f 100644 --- a/collector/utils/extensions/GithubRepo/RepoLoader/index.js +++ b/collector/utils/extensions/RepoLoader/GithubRepo/RepoLoader/index.js @@ -1,4 +1,21 @@ -class RepoLoader { +/** + * @typedef {Object} RepoLoaderArgs + * @property {string} repo - The GitHub repository URL. + * @property {string} [branch] - The branch to load from (optional). + * @property {string} [accessToken] - GitHub access token for authentication (optional). + * @property {string[]} [ignorePaths] - Array of paths to ignore when loading (optional). + */ + +/** + * @class + * @classdesc Loads and manages GitHub repository content. + */ +class GitHubRepoLoader { + /** + * Creates an instance of RepoLoader. + * @param {RepoLoaderArgs} [args] - The configuration options. + * @returns {GitHubRepoLoader} + */ constructor(args = {}) { this.ready = false; this.repo = args?.repo; @@ -67,6 +84,10 @@ class RepoLoader { return; } + /** + * Initializes the RepoLoader instance. + * @returns {Promise} The initialized RepoLoader instance. + */ async init() { if (!this.#validGithubUrl()) return; await this.#validBranch(); @@ -75,6 +96,11 @@ class RepoLoader { return this; } + /** + * Recursively loads the repository content. + * @returns {Promise>} An array of loaded documents. + * @throws {Error} If the RepoLoader is not in a ready state. + */ async recursiveLoader() { if (!this.ready) throw new Error("[Github Loader]: not in ready state!"); const { @@ -109,7 +135,10 @@ class RepoLoader { }, []); } - // Get all branches for a given repo. + /** + * Retrieves all branches for the repository. + * @returns {Promise} An array of branch names. + */ async getRepoBranches() { if (!this.#validGithubUrl() || !this.author || !this.project) return []; await this.#validateAccessToken(); // Ensure API access token is valid for pre-flight @@ -151,6 +180,11 @@ class RepoLoader { return this.#branchPrefSort(this.branches); } + /** + * Fetches the content of a single file from the repository. + * @param {string} sourceFilePath - The path to the file in the repository. + * @returns {Promise} The content of the file, or null if fetching fails. + */ async fetchSingleFile(sourceFilePath) { try { return fetch( @@ -182,4 +216,4 @@ class RepoLoader { } } -module.exports = RepoLoader; +module.exports = GitHubRepoLoader; diff --git a/collector/utils/extensions/GithubRepo/index.js b/collector/utils/extensions/RepoLoader/GithubRepo/index.js similarity index 98% rename from collector/utils/extensions/GithubRepo/index.js rename to collector/utils/extensions/RepoLoader/GithubRepo/index.js index 40f5a6922..10b408584 100644 --- a/collector/utils/extensions/GithubRepo/index.js +++ b/collector/utils/extensions/RepoLoader/GithubRepo/index.js @@ -3,8 +3,8 @@ const fs = require("fs"); const path = require("path"); const { default: slugify } = require("slugify"); const { v4 } = require("uuid"); -const { writeToServerDocuments, documentsFolder } = require("../../files"); -const { tokenizeString } = require("../../tokenizer"); +const { writeToServerDocuments, documentsFolder } = require("../../../files"); +const { tokenizeString } = require("../../../tokenizer"); /** * Load in a Github Repo recursively or just the top level if no PAT is provided @@ -37,6 +37,7 @@ async function loadGithubRepo(args, response) { const outFolder = slugify( `${repo.author}-${repo.project}-${repo.branch}-${v4().slice(0, 4)}` ).toLowerCase(); + const outFolderPath = path.resolve(documentsFolder, outFolder); if (!fs.existsSync(outFolderPath)) fs.mkdirSync(outFolderPath, { recursive: true }); diff --git a/collector/utils/extensions/RepoLoader/GitlabRepo/RepoLoader/index.js b/collector/utils/extensions/RepoLoader/GitlabRepo/RepoLoader/index.js new file mode 100644 index 000000000..c90932986 --- /dev/null +++ b/collector/utils/extensions/RepoLoader/GitlabRepo/RepoLoader/index.js @@ -0,0 +1,289 @@ +const minimatch = require("minimatch"); + +/** + * @typedef {Object} RepoLoaderArgs + * @property {string} repo - The GitLab repository URL. + * @property {string} [branch] - The branch to load from (optional). + * @property {string} [accessToken] - GitLab access token for authentication (optional). + * @property {string[]} [ignorePaths] - Array of paths to ignore when loading (optional). + */ + +/** + * @typedef {Object} FileTreeObject + * @property {string} id - The file object ID. + * @property {string} name - name of file. + * @property {('blob'|'tree')} type - type of file object. + * @property {string} path - path + name of file. + * @property {string} mode - Linux permission code. + */ + +/** + * @class + * @classdesc Loads and manages GitLab repository content. + */ +class GitLabRepoLoader { + /** + * Creates an instance of RepoLoader. + * @param {RepoLoaderArgs} [args] - The configuration options. + * @returns {GitLabRepoLoader} + */ + constructor(args = {}) { + this.ready = false; + this.repo = args?.repo; + this.branch = args?.branch; + this.accessToken = args?.accessToken || null; + this.ignorePaths = args?.ignorePaths || []; + + this.projectId = null; + this.apiBase = "https://gitlab.com"; + this.author = null; + this.project = null; + this.branches = []; + } + + #validGitlabUrl() { + const UrlPattern = require("url-pattern"); + const validPatterns = [ + new UrlPattern("https\\://gitlab.com/(:projectId(*))", { + segmentValueCharset: "a-zA-Z0-9-._~%/+", + }), + // This should even match the regular hosted URL, but we may want to know + // if this was a hosted GitLab (above) or a self-hosted (below) instance + // since the API interface could be different. + new UrlPattern( + "(:protocol(http|https))\\://(:hostname*)/(:projectId(*))", + { + segmentValueCharset: "a-zA-Z0-9-._~%/+", + } + ), + ]; + + let match = null; + for (const pattern of validPatterns) { + if (match !== null) continue; + match = pattern.match(this.repo); + } + if (!match) return false; + const [author, project] = match.projectId.split("/"); + + this.projectId = encodeURIComponent(match.projectId); + this.apiBase = new URL(this.repo).origin; + this.author = author; + this.project = project; + return true; + } + + async #validBranch() { + await this.getRepoBranches(); + if (!!this.branch && this.branches.includes(this.branch)) return; + + console.log( + "[Gitlab Loader]: Branch not set! Auto-assigning to a default branch." + ); + this.branch = this.branches.includes("main") ? "main" : "master"; + console.log(`[Gitlab Loader]: Branch auto-assigned to ${this.branch}.`); + return; + } + + async #validateAccessToken() { + if (!this.accessToken) return; + try { + await fetch(`${this.apiBase}/api/v4/user`, { + method: "GET", + headers: this.accessToken ? { "PRIVATE-TOKEN": this.accessToken } : {}, + }).then((res) => res.ok); + } catch (e) { + console.error( + "Invalid Gitlab Access Token provided! Access token will not be used", + e.message + ); + this.accessToken = null; + } + } + + /** + * Initializes the RepoLoader instance. + * @returns {Promise} The initialized RepoLoader instance. + */ + async init() { + if (!this.#validGitlabUrl()) return; + await this.#validBranch(); + await this.#validateAccessToken(); + this.ready = true; + return this; + } + + /** + * Recursively loads the repository content. + * @returns {Promise>} An array of loaded documents. + * @throws {Error} If the RepoLoader is not in a ready state. + */ + async recursiveLoader() { + if (!this.ready) throw new Error("[Gitlab Loader]: not in ready state!"); + + if (this.accessToken) + console.log( + `[Gitlab Loader]: Access token set! Recursive loading enabled!` + ); + + const files = await this.fetchFilesRecursive(); + const docs = []; + + for (const file of files) { + if (this.ignorePaths.some((path) => file.path.includes(path))) continue; + + const content = await this.fetchSingleFileContents(file.path); + if (content) { + docs.push({ + pageContent: content, + metadata: { source: file.path }, + }); + } + } + + return docs; + } + + #branchPrefSort(branches = []) { + const preferredSort = ["main", "master"]; + return branches.reduce((acc, branch) => { + if (preferredSort.includes(branch)) return [branch, ...acc]; + return [...acc, branch]; + }, []); + } + + /** + * Retrieves all branches for the repository. + * @returns {Promise} An array of branch names. + */ + async getRepoBranches() { + if (!this.#validGitlabUrl() || !this.projectId) return []; + await this.#validateAccessToken(); + + try { + this.branches = await fetch( + `${this.apiBase}/api/v4/projects/${this.projectId}/repository/branches`, + { + method: "GET", + headers: { + Accepts: "application/json", + ...(this.accessToken ? { "PRIVATE-TOKEN": this.accessToken } : {}), + }, + } + ) + .then((res) => res.json()) + .then((branches) => { + return branches.map((b) => b.name); + }) + .catch((e) => { + console.error(e); + return []; + }); + + return this.#branchPrefSort(this.branches); + } catch (err) { + console.log(`RepoLoader.getRepoBranches`, err); + this.branches = []; + return []; + } + } + + /** + * Returns list of all file objects from tree API for GitLab + * @returns {Promise} + */ + async fetchFilesRecursive() { + const files = []; + let perPage = 100; + let fetching = true; + let page = 1; + + while (fetching) { + try { + const params = new URLSearchParams({ + ref: this.branch, + recursive: true, + per_page: perPage, + page, + }); + const queryUrl = `${this.apiBase}/api/v4/projects/${ + this.projectId + }/repository/tree?${params.toString()}`; + const response = await fetch(queryUrl, { + method: "GET", + headers: this.accessToken + ? { "PRIVATE-TOKEN": this.accessToken } + : {}, + }); + const totalPages = Number(response.headers.get("x-total-pages")); + const nextPage = Number(response.headers.get("x-next-page")); + const data = await response.json(); + + /** @type {FileTreeObject[]} */ + const objects = Array.isArray(data) + ? data.filter((item) => item.type === "blob") + : []; // only get files, not paths or submodules + if (objects.length === 0) { + fetching = false; + break; + } + + // Apply ignore path rules to found objects. If any rules match it is an invalid file path. + console.log( + `Found ${objects.length} blobs from repo from pg ${page}/${totalPages}` + ); + for (const file of objects) { + const isIgnored = this.ignorePaths.some((ignorePattern) => + minimatch(file.path, ignorePattern, { matchBase: true }) + ); + if (!isIgnored) files.push(file); + } + + if (page === totalPages) { + fetching = false; + break; + } + + page = Number(nextPage); + } catch (e) { + console.error(`RepoLoader.getRepositoryTree`, e); + fetching = false; + break; + } + } + return files; + } + + /** + * Fetches the content of a single file from the repository. + * @param {string} sourceFilePath - The path to the file in the repository. + * @returns {Promise} The content of the file, or null if fetching fails. + */ + async fetchSingleFileContents(sourceFilePath) { + try { + const data = await fetch( + `${this.apiBase}/api/v4/projects/${ + this.projectId + }/repository/files/${encodeURIComponent(sourceFilePath)}/raw?ref=${ + this.branch + }`, + { + method: "GET", + headers: this.accessToken + ? { "PRIVATE-TOKEN": this.accessToken } + : {}, + } + ).then((res) => { + if (res.ok) return res.text(); + throw new Error(`Failed to fetch single file ${sourceFilePath}`); + }); + + return data; + } catch (e) { + console.error(`RepoLoader.fetchSingleFileContents`, e); + return null; + } + } +} + +module.exports = GitLabRepoLoader; diff --git a/collector/utils/extensions/RepoLoader/GitlabRepo/index.js b/collector/utils/extensions/RepoLoader/GitlabRepo/index.js new file mode 100644 index 000000000..450763bbf --- /dev/null +++ b/collector/utils/extensions/RepoLoader/GitlabRepo/index.js @@ -0,0 +1,138 @@ +const RepoLoader = require("./RepoLoader"); +const fs = require("fs"); +const path = require("path"); +const { default: slugify } = require("slugify"); +const { v4 } = require("uuid"); +const { writeToServerDocuments, documentsFolder } = require("../../../files"); +const { tokenizeString } = require("../../../tokenizer"); + +/** + * Load in a Gitlab Repo recursively or just the top level if no PAT is provided + * @param {object} args - forwarded request body params + * @param {import("../../../middleware/setDataSigner").ResponseWithSigner} response - Express response object with encryptionWorker + * @returns + */ +async function loadGitlabRepo(args, response) { + const repo = new RepoLoader(args); + await repo.init(); + + if (!repo.ready) + return { + success: false, + reason: "Could not prepare Gitlab repo for loading! Check URL", + }; + + console.log( + `-- Working GitLab ${repo.author}/${repo.project}:${repo.branch} --` + ); + const docs = await repo.recursiveLoader(); + if (!docs.length) { + return { + success: false, + reason: "No files were found for those settings.", + }; + } + + console.log(`[GitLab Loader]: Found ${docs.length} source files. Saving...`); + const outFolder = slugify( + `${repo.author}-${repo.project}-${repo.branch}-${v4().slice(0, 4)}` + ).toLowerCase(); + + const outFolderPath = path.resolve(documentsFolder, outFolder); + if (!fs.existsSync(outFolderPath)) + fs.mkdirSync(outFolderPath, { recursive: true }); + + for (const doc of docs) { + if (!doc.pageContent) continue; + const data = { + id: v4(), + url: "gitlab://" + doc.metadata.source, + title: doc.metadata.source, + docAuthor: repo.author, + description: "No description found.", + docSource: doc.metadata.source, + chunkSource: generateChunkSource( + repo, + doc, + response.locals.encryptionWorker + ), + published: new Date().toLocaleString(), + wordCount: doc.pageContent.split(" ").length, + pageContent: doc.pageContent, + token_count_estimate: tokenizeString(doc.pageContent).length, + }; + console.log( + `[GitLab Loader]: Saving ${doc.metadata.source} to ${outFolder}` + ); + writeToServerDocuments( + data, + `${slugify(doc.metadata.source)}-${data.id}`, + outFolderPath + ); + } + + return { + success: true, + reason: null, + data: { + author: repo.author, + repo: repo.project, + projectId: repo.projectId, + branch: repo.branch, + files: docs.length, + destination: outFolder, + }, + }; +} + +async function fetchGitlabFile({ + repoUrl, + branch, + accessToken = null, + sourceFilePath, +}) { + const repo = new RepoLoader({ + repo: repoUrl, + branch, + accessToken, + }); + await repo.init(); + + if (!repo.ready) + return { + success: false, + content: null, + reason: "Could not prepare GitLab repo for loading! Check URL or PAT.", + }; + console.log( + `-- Working GitLab ${repo.author}/${repo.project}:${repo.branch} file:${sourceFilePath} --` + ); + const fileContent = await repo.fetchSingleFile(sourceFilePath); + if (!fileContent) { + return { + success: false, + reason: "Target file returned a null content response.", + content: null, + }; + } + + return { + success: true, + reason: null, + content: fileContent, + }; +} + +function generateChunkSource(repo, doc, encryptionWorker) { + const payload = { + projectId: decodeURIComponent(repo.projectId), + branch: repo.branch, + path: doc.metadata.source, + pat: !!repo.accessToken ? repo.accessToken : null, + }; + return `gitlab://${repo.repo}?payload=${encryptionWorker.encrypt( + JSON.stringify(payload) + )}`; +} + +module.exports = { loadGitlabRepo, fetchGitlabFile }; diff --git a/collector/utils/extensions/RepoLoader/index.js b/collector/utils/extensions/RepoLoader/index.js new file mode 100644 index 000000000..6395e889e --- /dev/null +++ b/collector/utils/extensions/RepoLoader/index.js @@ -0,0 +1,41 @@ +/** + * Dynamically load the correct repository loader from a specific platform + * by default will return Github. + * @param {('github'|'gitlab')} platform + * @returns {import("./GithubRepo/RepoLoader")|import("./GitlabRepo/RepoLoader")} the repo loader class for provider + */ +function resolveRepoLoader(platform = "github") { + switch (platform) { + case "github": + console.log(`Loading GitHub RepoLoader...`); + return require("./GithubRepo/RepoLoader"); + case "gitlab": + console.log(`Loading GitLab RepoLoader...`); + return require("./GitlabRepo/RepoLoader"); + default: + console.log(`Loading GitHub RepoLoader...`); + return require("./GithubRepo/RepoLoader"); + } +} + +/** + * Dynamically load the correct repository loader function from a specific platform + * by default will return Github. + * @param {('github'|'gitlab')} platform + * @returns {import("./GithubRepo")['fetchGithubFile'] | import("./GitlabRepo")['fetchGitlabFile']} the repo loader class for provider + */ +function resolveRepoLoaderFunction(platform = "github") { + switch (platform) { + case "github": + console.log(`Loading GitHub loader function...`); + return require("./GithubRepo").loadGithubRepo; + case "gitlab": + console.log(`Loading GitLab loader function...`); + return require("./GitlabRepo").loadGitlabRepo; + default: + console.log(`Loading GitHub loader function...`); + return require("./GithubRepo").loadGithubRepo; + } +} + +module.exports = { resolveRepoLoader, resolveRepoLoaderFunction }; diff --git a/collector/yarn.lock b/collector/yarn.lock index 24dfd435f..68d0181a6 100644 --- a/collector/yarn.lock +++ b/collector/yarn.lock @@ -581,6 +581,13 @@ brace-expansion@^1.1.7: balanced-match "^1.0.0" concat-map "0.0.1" +brace-expansion@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-2.0.1.tgz#1edc459e0f0c548486ecf9fc99f2221364b9a0ae" + integrity sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA== + dependencies: + balanced-match "^1.0.0" + braces@~3.0.2: version "3.0.2" resolved "https://registry.yarnpkg.com/braces/-/braces-3.0.2.tgz#3454e1a462ee8d599e236df336cd9ea4f8afe107" @@ -2226,6 +2233,13 @@ mimic-response@^3.1.0: resolved "https://registry.yarnpkg.com/mimic-response/-/mimic-response-3.1.0.tgz#2d1d59af9c1b129815accc2c46a022a5ce1fa3c9" integrity sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ== +minimatch@5.1.0: + version "5.1.0" + resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-5.1.0.tgz#1717b464f4971b144f6aabe8f2d0b8e4511e09c7" + integrity sha512-9TPBGGak4nHfGZsPBohm9AWg6NoT7QTCehS3BIJABslyZbzxfV78QM2Y6+i741OPZIafFAaiiEMh5OyIrJPgtg== + dependencies: + brace-expansion "^2.0.1" + minimatch@^3.1.1, minimatch@^3.1.2: version "3.1.2" resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.1.2.tgz#19cd194bfd3e428f049a70817c038d89ab4be35b" diff --git a/frontend/src/components/DataConnectorOption/media/gitlab.svg b/frontend/src/components/DataConnectorOption/media/gitlab.svg new file mode 100644 index 000000000..0d48a00cb --- /dev/null +++ b/frontend/src/components/DataConnectorOption/media/gitlab.svg @@ -0,0 +1,7 @@ + + + + + + + diff --git a/frontend/src/components/DataConnectorOption/media/index.js b/frontend/src/components/DataConnectorOption/media/index.js index dee46a12b..cbc80b642 100644 --- a/frontend/src/components/DataConnectorOption/media/index.js +++ b/frontend/src/components/DataConnectorOption/media/index.js @@ -1,10 +1,12 @@ import Github from "./github.svg"; +import Gitlab from "./gitlab.svg"; import YouTube from "./youtube.svg"; import Link from "./link.svg"; import Confluence from "./confluence.jpeg"; const ConnectorImages = { github: Github, + gitlab: Gitlab, youtube: YouTube, websiteDepth: Link, confluence: Confluence, diff --git a/frontend/src/components/Modals/ManageWorkspace/DataConnectors/Connectors/Gitlab/index.jsx b/frontend/src/components/Modals/ManageWorkspace/DataConnectors/Connectors/Gitlab/index.jsx new file mode 100644 index 000000000..f3c34dc8a --- /dev/null +++ b/frontend/src/components/Modals/ManageWorkspace/DataConnectors/Connectors/Gitlab/index.jsx @@ -0,0 +1,310 @@ +import React, { useEffect, useState } from "react"; +import System from "@/models/system"; +import showToast from "@/utils/toast"; +import pluralize from "pluralize"; +import { TagsInput } from "react-tag-input-component"; +import { Info, Warning } from "@phosphor-icons/react"; +import { Tooltip } from "react-tooltip"; + +const DEFAULT_BRANCHES = ["main", "master"]; +export default function GitlabOptions() { + const [loading, setLoading] = useState(false); + const [repo, setRepo] = useState(null); + const [accessToken, setAccessToken] = useState(null); + const [ignores, setIgnores] = useState([]); + const [settings, setSettings] = useState({ + repo: null, + accessToken: null, + }); + + const handleSubmit = async (e) => { + e.preventDefault(); + const form = new FormData(e.target); + + try { + setLoading(true); + showToast( + `Fetching all files for repo ${repo} - this may take a while.`, + "info", + { clear: true, autoClose: false } + ); + + const { data, error } = await System.dataConnectors.gitlab.collect({ + repo: form.get("repo"), + accessToken: form.get("accessToken"), + branch: form.get("branch"), + ignorePaths: ignores, + }); + + if (!!error) { + showToast(error, "error", { clear: true }); + setLoading(false); + return; + } + + showToast( + `${data.files} ${pluralize("file", data.files)} collected from ${ + data.author + }/${data.repo}:${data.branch}. Output folder is ${data.destination}.`, + "success", + { clear: true } + ); + e.target.reset(); + setLoading(false); + return; + } catch (e) { + console.error(e); + showToast(e.message, "error", { clear: true }); + setLoading(false); + } + }; + + return ( +
+
+
+
+
+
+
+ +

+ URL of the GitLab repo you wish to collect. +

+
+ setRepo(e.target.value)} + onBlur={() => setSettings({ ...settings, repo })} + spellCheck={false} + rows={2} + /> +
+
+
+ +

+ Access Token to prevent rate limiting. +

+
+ setAccessToken(e.target.value)} + onBlur={() => setSettings({ ...settings, accessToken })} + /> +
+ +
+ +
+
+ +

+ List in .gitignore format to ignore specific files during + collection. Press enter after each entry you want to save. +

+
+ +
+
+ +
+ + + {loading && ( +

+ Once complete, all files will be available for embedding into + workspaces in the document picker. +

+ )} +
+
+
+
+ ); +} + +function GitLabBranchSelection({ repo, accessToken }) { + const [allBranches, setAllBranches] = useState(DEFAULT_BRANCHES); + const [loading, setLoading] = useState(true); + + useEffect(() => { + async function fetchAllBranches() { + if (!repo) { + setAllBranches(DEFAULT_BRANCHES); + setLoading(false); + return; + } + + setLoading(true); + const { branches } = await System.dataConnectors.gitlab.branches({ + repo, + accessToken, + }); + setAllBranches(branches.length > 0 ? branches : DEFAULT_BRANCHES); + setLoading(false); + } + fetchAllBranches(); + }, [repo, accessToken]); + + if (loading) { + return ( +
+
+ +

+ Branch you wish to collect files from. +

+
+ +
+ ); + } + + return ( +
+
+ +

+ Branch you wish to collect files from. +

+
+ +
+ ); +} + +function PATAlert({ accessToken }) { + if (!!accessToken) return null; + return ( +
+
+ +

+ Without filling out the GitLab Access Token this data connector + will only be able to collect the top-level files of the repo + due to GitLab's public API rate-limits. +
+
+ e.stopPropagation()} + > + {" "} + Get a free Personal Access Token with a GitLab account here. + +

+
+
+ ); +} + +function PATTooltip({ accessToken }) { + if (!!accessToken) return null; + return ( + <> + {!accessToken && ( + + )} + +

+ Without a{" "} + e.stopPropagation()} + > + Personal Access Token + + , the GitLab API may limit the number of files that can be collected + due to rate limits. You can{" "} + e.stopPropagation()} + > + create a temporary Access Token + {" "} + to avoid this issue. +

+
+ + ); +} diff --git a/frontend/src/components/Modals/ManageWorkspace/DataConnectors/index.jsx b/frontend/src/components/Modals/ManageWorkspace/DataConnectors/index.jsx index c2c14dff3..9df6dd7d8 100644 --- a/frontend/src/components/Modals/ManageWorkspace/DataConnectors/index.jsx +++ b/frontend/src/components/Modals/ManageWorkspace/DataConnectors/index.jsx @@ -1,6 +1,7 @@ import ConnectorImages from "@/components/DataConnectorOption/media"; import { MagnifyingGlass } from "@phosphor-icons/react"; import GithubOptions from "./Connectors/Github"; +import GitlabOptions from "./Connectors/Gitlab"; import YoutubeOptions from "./Connectors/Youtube"; import ConfluenceOptions from "./Connectors/Confluence"; import { useState } from "react"; @@ -15,6 +16,13 @@ export const DATA_CONNECTORS = { "Import an entire public or private Github repository in a single click.", options: , }, + gitlab: { + name: "GitLab Repo", + image: ConnectorImages.gitlab, + description: + "Import an entire public or private GitLab repository in a single click.", + options: , + }, "youtube-transcript": { name: "YouTube Transcript", image: ConnectorImages.youtube, diff --git a/frontend/src/models/dataConnector.js b/frontend/src/models/dataConnector.js index d01c3c8b8..c363835c8 100644 --- a/frontend/src/models/dataConnector.js +++ b/frontend/src/models/dataConnector.js @@ -42,6 +42,45 @@ const DataConnector = { }); }, }, + gitlab: { + branches: async ({ repo, accessToken }) => { + return await fetch(`${API_BASE}/ext/gitlab/branches`, { + method: "POST", + headers: baseHeaders(), + cache: "force-cache", + body: JSON.stringify({ repo, accessToken }), + }) + .then((res) => res.json()) + .then((res) => { + if (!res.success) throw new Error(res.reason); + return res.data; + }) + .then((data) => { + return { branches: data?.branches || [], error: null }; + }) + .catch((e) => { + console.error(e); + showToast(e.message, "error"); + return { branches: [], error: e.message }; + }); + }, + collect: async function ({ repo, accessToken, branch, ignorePaths = [] }) { + return await fetch(`${API_BASE}/ext/gitlab/repo`, { + method: "POST", + headers: baseHeaders(), + body: JSON.stringify({ repo, accessToken, branch, ignorePaths }), + }) + .then((res) => res.json()) + .then((res) => { + if (!res.success) throw new Error(res.reason); + return { data: res.data, error: null }; + }) + .catch((e) => { + console.error(e); + return { data: null, error: e.message }; + }); + }, + }, youtube: { transcribe: async ({ url }) => { return await fetch(`${API_BASE}/ext/youtube/transcript`, { diff --git a/frontend/src/pages/Admin/Agents/SQLConnectorSelection/DBConnection.jsx b/frontend/src/pages/Admin/Agents/SQLConnectorSelection/DBConnection.jsx index 9d7b35b0a..d7361baea 100644 --- a/frontend/src/pages/Admin/Agents/SQLConnectorSelection/DBConnection.jsx +++ b/frontend/src/pages/Admin/Agents/SQLConnectorSelection/DBConnection.jsx @@ -1,12 +1,14 @@ import PostgreSQLLogo from "./icons/postgresql.png"; import MySQLLogo from "./icons/mysql.png"; import MSSQLLogo from "./icons/mssql.png"; +import ODBCLogo from "./icons/odbc.png"; import { X } from "@phosphor-icons/react"; export const DB_LOGOS = { postgresql: PostgreSQLLogo, mysql: MySQLLogo, "sql-server": MSSQLLogo, + odbc: ODBCLogo, }; export default function DBConnection({ connection, onRemove, setHasChanges }) { diff --git a/frontend/src/pages/Admin/Agents/SQLConnectorSelection/NewConnectionModal.jsx b/frontend/src/pages/Admin/Agents/SQLConnectorSelection/NewConnectionModal.jsx index e5f4c3016..f6b1c21e3 100644 --- a/frontend/src/pages/Admin/Agents/SQLConnectorSelection/NewConnectionModal.jsx +++ b/frontend/src/pages/Admin/Agents/SQLConnectorSelection/NewConnectionModal.jsx @@ -11,6 +11,7 @@ function assembleConnectionString({ host = "", port = "", database = "", + driver = "", }) { if ([username, password, host, database].every((i) => !!i) === false) return `Please fill out all the fields above.`; @@ -21,6 +22,9 @@ function assembleConnectionString({ return `mysql://${username}:${password}@${host}:${port}/${database}`; case "sql-server": return `mssql://${username}:${password}@${host}:${port}/${database}`; + case "odbc": + if (!driver) return `Please fill out the driver field.`; + return `Driver={${driver}};Server=${host};Port=${port};Database=${database};UID=${username};PWD=${password}`; default: return null; } @@ -33,6 +37,7 @@ const DEFAULT_CONFIG = { host: null, port: null, database: null, + driver: null, }; export default function NewSQLConnection({ isOpen, closeModal, onSubmit }) { @@ -48,12 +53,14 @@ export default function NewSQLConnection({ isOpen, closeModal, onSubmit }) { function onFormChange() { const form = new FormData(document.getElementById("sql-connection-form")); + setConfig({ username: form.get("username").trim(), password: form.get("password"), host: form.get("host").trim(), port: form.get("port").trim(), database: form.get("database").trim(), + driver: form.get("driver")?.trim(), }); } @@ -74,7 +81,7 @@ export default function NewSQLConnection({ isOpen, closeModal, onSubmit }) { // to the parent container form so we don't have nested forms. return createPortal( -
+

@@ -114,7 +121,7 @@ export default function NewSQLConnection({ isOpen, closeModal, onSubmit }) { -
+
setEngine("sql-server")} /> + setEngine("odbc")} + />
@@ -224,6 +236,23 @@ export default function NewSQLConnection({ isOpen, closeModal, onSubmit }) { spellCheck={false} />

+ + {engine === "odbc" && ( +
+ + +
+ )}

{assembleConnectionString({ engine, ...config })}

diff --git a/frontend/src/pages/Admin/Agents/SQLConnectorSelection/icons/odbc.png b/frontend/src/pages/Admin/Agents/SQLConnectorSelection/icons/odbc.png new file mode 100644 index 000000000..c28755887 Binary files /dev/null and b/frontend/src/pages/Admin/Agents/SQLConnectorSelection/icons/odbc.png differ diff --git a/server/endpoints/extensions/index.js b/server/endpoints/extensions/index.js index cf8e1191c..8f836ce07 100644 --- a/server/endpoints/extensions/index.js +++ b/server/endpoints/extensions/index.js @@ -5,18 +5,26 @@ const { ROLES, } = require("../../utils/middleware/multiUserProtected"); const { validatedRequest } = require("../../utils/middleware/validatedRequest"); +const { + isSupportedRepoProvider, +} = require("../../utils/middleware/isSupportedRepoProviders"); function extensionEndpoints(app) { if (!app) return; app.post( - "/ext/github/branches", - [validatedRequest, flexUserRoleValid([ROLES.admin, ROLES.manager])], + "/ext/:repo_platform/branches", + [ + validatedRequest, + flexUserRoleValid([ROLES.admin, ROLES.manager]), + isSupportedRepoProvider, + ], async (request, response) => { try { + const { repo_platform } = request.params; const responseFromProcessor = await new CollectorApi().forwardExtensionRequest({ - endpoint: "/ext/github-repo/branches", + endpoint: `/ext/${repo_platform}-repo/branches`, method: "POST", body: request.body, }); @@ -29,18 +37,23 @@ function extensionEndpoints(app) { ); app.post( - "/ext/github/repo", - [validatedRequest, flexUserRoleValid([ROLES.admin, ROLES.manager])], + "/ext/:repo_platform/repo", + [ + validatedRequest, + flexUserRoleValid([ROLES.admin, ROLES.manager]), + isSupportedRepoProvider, + ], async (request, response) => { try { + const { repo_platform } = request.params; const responseFromProcessor = await new CollectorApi().forwardExtensionRequest({ - endpoint: "/ext/github-repo", + endpoint: `/ext/${repo_platform}-repo`, method: "POST", body: request.body, }); await Telemetry.sendTelemetry("extension_invoked", { - type: "github_repo", + type: `${repo_platform}_repo`, }); response.status(200).json(responseFromProcessor); } catch (e) { diff --git a/server/package.json b/server/package.json index 4f07e68b5..0b9982402 100644 --- a/server/package.json +++ b/server/package.json @@ -66,6 +66,7 @@ "mysql2": "^3.9.8", "node-html-markdown": "^1.3.0", "node-llama-cpp": "^2.8.0", + "odbc": "^2.4.8", "ollama": "^0.5.0", "openai": "4.38.5", "pg": "^8.11.5", @@ -101,4 +102,4 @@ "nodemon": "^2.0.22", "prettier": "^3.0.3" } -} +} \ No newline at end of file diff --git a/server/utils/agents/aibitat/plugins/sql-agent/SQLConnectors/ODBC.js b/server/utils/agents/aibitat/plugins/sql-agent/SQLConnectors/ODBC.js new file mode 100644 index 000000000..d4f58464e --- /dev/null +++ b/server/utils/agents/aibitat/plugins/sql-agent/SQLConnectors/ODBC.js @@ -0,0 +1,60 @@ +const odbc = require("odbc"); +const UrlPattern = require("url-pattern"); + +class ODBCConnector { + #connected = false; + database_id = ""; + constructor( + config = { + connectionString: null, + } + ) { + this.connectionString = config.connectionString; + this._client = null; + this.database_id = this.#parseDatabase(); + } + + #parseDatabase() { + const regex = /Database=([^;]+)/; + const match = this.connectionString.match(regex); + return match ? match[1] : null; + } + + async connect() { + this._client = await odbc.connect(this.connectionString); + this.#connected = true; + return this._client; + } + + /** + * + * @param {string} queryString the SQL query to be run + * @returns {import(".").QueryResult} + */ + async runQuery(queryString = "") { + const result = { rows: [], count: 0, error: null }; + try { + if (!this.#connected) await this.connect(); + const query = await this._client.query(queryString); + result.rows = query; + result.count = query.length; + } catch (err) { + console.log(this.constructor.name, err); + result.error = err.message; + } finally { + await this._client.close(); + this.#connected = false; + } + return result; + } + + getTablesSql() { + return `SELECT table_name FROM information_schema.tables WHERE table_schema = '${this.database_id}'`; + } + + getTableSchemaSql(table_name) { + return `SHOW COLUMNS FROM ${this.database_id}.${table_name};`; + } +} + +module.exports.ODBCConnector = ODBCConnector; diff --git a/server/utils/agents/aibitat/plugins/sql-agent/SQLConnectors/index.js b/server/utils/agents/aibitat/plugins/sql-agent/SQLConnectors/index.js index 9cf1e1ff4..2e153b7e7 100644 --- a/server/utils/agents/aibitat/plugins/sql-agent/SQLConnectors/index.js +++ b/server/utils/agents/aibitat/plugins/sql-agent/SQLConnectors/index.js @@ -2,7 +2,7 @@ const { SystemSettings } = require("../../../../../../models/systemSettings"); const { safeJsonParse } = require("../../../../../http"); /** - * @typedef {('postgresql'|'mysql'|'sql-server')} SQLEngine + * @typedef {('postgresql'|'mysql'|'sql-server'|'odbc')} SQLEngine */ /** @@ -36,6 +36,9 @@ function getDBClient(identifier = "", connectionConfig = {}) { case "sql-server": const { MSSQLConnector } = require("./MSSQL"); return new MSSQLConnector(connectionConfig); + case "odbc": + const { ODBCConnector } = require("./ODBC"); + return new ODBCConnector(connectionConfig); default: throw new Error( `There is no supported database connector for ${identifier}` diff --git a/server/utils/middleware/isSupportedRepoProviders.js b/server/utils/middleware/isSupportedRepoProviders.js new file mode 100644 index 000000000..6a5cfb169 --- /dev/null +++ b/server/utils/middleware/isSupportedRepoProviders.js @@ -0,0 +1,12 @@ +// Middleware to validate that a repo provider URL is supported. +const REPO_PLATFORMS = ["github", "gitlab"]; + +function isSupportedRepoProvider(request, response, next) { + const { repo_platform = null } = request.params; + if (!repo_platform || !REPO_PLATFORMS.includes(repo_platform)) + return response + .status(500) + .text(`Unsupported repo platform ${repo_platform}`); + next(); +} +module.exports = { isSupportedRepoProvider }; diff --git a/server/yarn.lock b/server/yarn.lock index 3c5484d4b..96df39c4a 100644 --- a/server/yarn.lock +++ b/server/yarn.lock @@ -673,7 +673,7 @@ "@langchain/core" "~0.1" js-tiktoken "^1.0.11" -"@mapbox/node-pre-gyp@^1.0.11": +"@mapbox/node-pre-gyp@^1.0.11", "@mapbox/node-pre-gyp@^1.0.5": version "1.0.11" resolved "https://registry.yarnpkg.com/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.11.tgz#417db42b7f5323d79e93b34a6d7a2a12c0df43fa" integrity sha512-Yhlar6v9WQgUp/He7BdgzOz8lqMQ8sU+jkCq7Wx8Myc5YFJLbEe7lgui/V7G1qB1DJykHSGwreceSaD60Y0PUQ== @@ -1588,7 +1588,7 @@ arrify@^2.0.0: resolved "https://registry.yarnpkg.com/arrify/-/arrify-2.0.1.tgz#c9655e9331e0abcd588d2a7cad7e9956f66701fa" integrity sha512-3duEwti880xqi4eAMN8AyR4a0ByT90zoYdLlevfrvU43vb0YZwZVfxOgxWrLXXXpyugL0hNZc9G6BiB5B3nUug== -async@^3.2.3, async@^3.2.4: +async@^3.0.1, async@^3.2.3, async@^3.2.4: version "3.2.5" resolved "https://registry.yarnpkg.com/async/-/async-3.2.5.tgz#ebd52a8fdaf7a2289a24df399f8d8485c8a46b66" integrity sha512-baNZyqaaLhyLVKm/DlvdW051MSgO6b8eVfIezl9E5PqWxFgzLm/wQntEW4zOytVburDEr0JlALEpdOFwvErLsg== @@ -4813,6 +4813,11 @@ node-abort-controller@^3.1.1: resolved "https://registry.yarnpkg.com/node-abort-controller/-/node-abort-controller-3.1.1.tgz#a94377e964a9a37ac3976d848cb5c765833b8548" integrity sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ== +node-addon-api@^3.0.2: + version "3.2.1" + resolved "https://registry.yarnpkg.com/node-addon-api/-/node-addon-api-3.2.1.tgz#81325e0a2117789c0128dab65e7e38f07ceba161" + integrity sha512-mmcei9JghVNDYydghQmeDX8KoAm0FAiYyIcUt/N4nhyAipB17pllZQDOJD2fotxABnt4Mdz+dKTO7eftLg4d0A== + node-addon-api@^5.0.0: version "5.1.0" resolved "https://registry.yarnpkg.com/node-addon-api/-/node-addon-api-5.1.0.tgz#49da1ca055e109a23d537e9de43c09cca21eb762" @@ -5065,6 +5070,15 @@ octokit@^3.1.0: "@octokit/request-error" "^5.0.0" "@octokit/types" "^12.0.0" +odbc@^2.4.8: + version "2.4.8" + resolved "https://registry.yarnpkg.com/odbc/-/odbc-2.4.8.tgz#56e34a1cafbaf1c2c53eec229b3a7604f890e3bf" + integrity sha512-W4VkBcr8iSe8hqpp2GoFPybCAJefC7eK837XThJkYCW4tBzyQisqkciwt1UYidU1OpKy1589y9dMN0tStiVB1Q== + dependencies: + "@mapbox/node-pre-gyp" "^1.0.5" + async "^3.0.1" + node-addon-api "^3.0.2" + ollama@^0.5.0: version "0.5.0" resolved "https://registry.yarnpkg.com/ollama/-/ollama-0.5.0.tgz#cb9bc709d4d3278c9f484f751b0d9b98b06f4859"