[FEAT] Confluence data connector (#1181)

* WIP Confluence data connector backend

* confluence data connector complete

* confluence citations

* fix citation for confluence

* Patch confulence integration

* fix Citation Icon for confluence

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
Sean Hatfield 2024-04-25 17:53:38 -07:00 committed by GitHub
parent 11f6419c3c
commit 348b36bf85
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 458 additions and 68 deletions

View File

@ -4,69 +4,112 @@ const { reqBody } = require("../utils/http");
function extensions(app) {
if (!app) return;
app.post("/ext/github-repo", [verifyPayloadIntegrity], async function (request, response) {
try {
const loadGithubRepo = require("../utils/extensions/GithubRepo");
const { success, reason, data } = await loadGithubRepo(reqBody(request));
response.status(200).json({
success,
reason,
data
});
} catch (e) {
console.error(e);
response.status(200).json({
success: false,
reason: e.message || "A processing error occurred.",
data: {},
});
app.post(
"/ext/github-repo",
[verifyPayloadIntegrity],
async function (request, response) {
try {
const loadGithubRepo = require("../utils/extensions/GithubRepo");
const { success, reason, data } = await loadGithubRepo(
reqBody(request)
);
response.status(200).json({
success,
reason,
data,
});
} catch (e) {
console.error(e);
response.status(200).json({
success: false,
reason: e.message || "A processing error occurred.",
data: {},
});
}
return;
}
return;
});
);
// gets all branches for a specific repo
app.post("/ext/github-repo/branches", [verifyPayloadIntegrity], async function (request, response) {
try {
const GithubRepoLoader = require("../utils/extensions/GithubRepo/RepoLoader");
const allBranches = await (new GithubRepoLoader(reqBody(request))).getRepoBranches()
response.status(200).json({
success: true,
reason: null,
data: {
branches: allBranches
}
});
} catch (e) {
console.error(e);
response.status(400).json({
success: false,
reason: e.message,
data: {
branches: []
}
});
app.post(
"/ext/github-repo/branches",
[verifyPayloadIntegrity],
async function (request, response) {
try {
const GithubRepoLoader = require("../utils/extensions/GithubRepo/RepoLoader");
const allBranches = await new GithubRepoLoader(
reqBody(request)
).getRepoBranches();
response.status(200).json({
success: true,
reason: null,
data: {
branches: allBranches,
},
});
} catch (e) {
console.error(e);
response.status(400).json({
success: false,
reason: e.message,
data: {
branches: [],
},
});
}
return;
}
return;
});
);
app.post("/ext/youtube-transcript", [verifyPayloadIntegrity], async function (request, response) {
try {
const loadYouTubeTranscript = require("../utils/extensions/YoutubeTranscript");
const { success, reason, data } = await loadYouTubeTranscript(reqBody(request));
response.status(200).json({ success, reason, data });
} catch (e) {
console.error(e);
response.status(400).json({
success: false,
reason: e.message,
data: {
title: null,
author: null
}
});
app.post(
"/ext/youtube-transcript",
[verifyPayloadIntegrity],
async function (request, response) {
try {
const loadYouTubeTranscript = require("../utils/extensions/YoutubeTranscript");
const { success, reason, data } = await loadYouTubeTranscript(
reqBody(request)
);
response.status(200).json({ success, reason, data });
} catch (e) {
console.error(e);
response.status(400).json({
success: false,
reason: e.message,
data: {
title: null,
author: null,
},
});
}
return;
}
return;
});
);
app.post(
"/ext/confluence",
[verifyPayloadIntegrity],
async function (request, response) {
try {
const loadConfluence = require("../utils/extensions/Confluence");
const { success, reason, data } = await loadConfluence(
reqBody(request)
);
response.status(200).json({ success, reason, data });
} catch (e) {
console.error(e);
response.status(400).json({
success: false,
reason: e.message,
data: {
title: null,
author: null,
},
});
}
return;
}
);
}
module.exports = extensions;

View File

@ -49,4 +49,4 @@
"nodemon": "^2.0.22",
"prettier": "^2.4.1"
}
}
}

View File

@ -0,0 +1,110 @@
const fs = require("fs");
const path = require("path");
const { default: slugify } = require("slugify");
const { v4 } = require("uuid");
const { writeToServerDocuments } = require("../../files");
const { tokenizeString } = require("../../tokenizer");
const {
ConfluencePagesLoader,
} = require("langchain/document_loaders/web/confluence");
function validSpaceUrl(spaceUrl = "") {
const UrlPattern = require("url-pattern");
const pattern = new UrlPattern(
"https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*"
);
const match = pattern.match(spaceUrl);
if (!match) return { valid: false, result: null };
return { valid: true, result: match };
}
async function loadConfluence({ pageUrl, username, accessToken }) {
if (!pageUrl || !username || !accessToken) {
return {
success: false,
reason:
"You need either a username and access token, or a personal access token (PAT), to use the Confluence connector.",
};
}
const validSpace = validSpaceUrl(pageUrl);
if (!validSpace.result) {
return {
success: false,
reason:
"Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/*",
};
}
const { subdomain, spaceKey } = validSpace.result;
console.log(`-- Working Confluence ${subdomain}.atlassian.net --`);
const loader = new ConfluencePagesLoader({
baseUrl: `https://${subdomain}.atlassian.net/wiki`,
spaceKey,
username,
accessToken,
});
const { docs, error } = await loader
.load()
.then((docs) => {
return { docs, error: null };
})
.catch((e) => {
return {
docs: [],
error: e.message?.split("Error:")?.[1] || e.message,
};
});
if (!docs.length || !!error) {
return {
success: false,
reason: error ?? "No pages found for that Confluence space.",
};
}
const outFolder = slugify(
`${subdomain}-confluence-${v4().slice(0, 4)}`
).toLowerCase();
const outFolderPath = path.resolve(
__dirname,
`../../../../server/storage/documents/${outFolder}`
);
fs.mkdirSync(outFolderPath);
docs.forEach((doc) => {
const data = {
id: v4(),
url: doc.metadata.url + ".page",
title: doc.metadata.title || doc.metadata.source,
docAuthor: subdomain,
description: doc.metadata.title,
docSource: `${subdomain} Confluence`,
chunkSource: `confluence://${doc.metadata.url}`,
published: new Date().toLocaleString(),
wordCount: doc.pageContent.split(" ").length,
pageContent: doc.pageContent,
token_count_estimate: tokenizeString(doc.pageContent).length,
};
console.log(
`[Confluence Loader]: Saving ${doc.metadata.title} to ${outFolder}`
);
writeToServerDocuments(
data,
`${slugify(doc.metadata.title)}-${data.id}`,
outFolderPath
);
});
return {
success: true,
reason: null,
data: {
spaceKey,
destination: outFolder,
},
};
}
module.exports = loadConfluence;

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

View File

@ -1,9 +1,11 @@
import Github from "./github.svg";
import YouTube from "./youtube.svg";
import Confluence from "./confluence.jpeg";
const ConnectorImages = {
github: Github,
youtube: YouTube,
confluence: Confluence,
};
export default ConnectorImages;

View File

@ -0,0 +1,164 @@
import { useState } from "react";
import System from "@/models/system";
import showToast from "@/utils/toast";
import { Warning } from "@phosphor-icons/react";
import { Tooltip } from "react-tooltip";
export default function ConfluenceOptions() {
const [loading, setLoading] = useState(false);
const handleSubmit = async (e) => {
e.preventDefault();
const form = new FormData(e.target);
try {
setLoading(true);
showToast(
"Fetching all pages for Confluence space - this may take a while.",
"info",
{
clear: true,
autoClose: false,
}
);
const { data, error } = await System.dataConnectors.confluence.collect({
pageUrl: form.get("pageUrl"),
username: form.get("username"),
accessToken: form.get("accessToken"),
});
if (!!error) {
showToast(error, "error", { clear: true });
setLoading(false);
return;
}
showToast(
`Pages collected from Confluence space ${data.spaceKey}. Output folder is ${data.destination}.`,
"success",
{ clear: true }
);
e.target.reset();
setLoading(false);
} catch (e) {
console.error(e);
showToast(e.message, "error", { clear: true });
setLoading(false);
}
};
return (
<div className="flex w-full">
<div className="flex flex-col w-full px-1 md:pb-6 pb-16">
<form className="w-full" onSubmit={handleSubmit}>
<div className="w-full flex flex-col py-2">
<div className="w-full flex flex-col gap-4">
<div className="flex flex-col pr-10">
<div className="flex flex-col gap-y-1 mb-4">
<label className="text-white text-sm font-bold flex gap-x-2 items-center">
<p className="font-bold text-white">Confluence Page URL</p>
</label>
<p className="text-xs font-normal text-white/50">
URL of a page in the Confluence space.
</p>
</div>
<input
type="url"
name="pageUrl"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="https://example.atlassian.net/wiki/spaces/~7120208c08555d52224113949698b933a3bb56/pages/851969/Test+anythingLLM+page"
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<div className="flex flex-col pr-10">
<div className="flex flex-col gap-y-1 mb-4">
<label className="text-white text-sm font-bold">
Confluence Username
</label>
<p className="text-xs font-normal text-white/50">
Your Confluence username.
</p>
</div>
<input
type="email"
name="username"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="jdoe@example.com"
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<div className="flex flex-col pr-10">
<div className="flex flex-col gap-y-1 mb-4">
<label className="text-white text-sm font-bold flex gap-x-2 items-center">
<p className="font-bold text-white">
Confluence Access Token
</p>
<Warning
size={14}
className="ml-1 text-orange-500 cursor-pointer"
data-tooltip-id="access-token-tooltip"
data-tooltip-place="right"
/>
<Tooltip
delayHide={300}
id="access-token-tooltip"
className="max-w-xs"
clickable={true}
>
<p className="text-sm">
You need to provide an access token for authentication.
You can generate an access token{" "}
<a
href="https://id.atlassian.com/manage-profile/security/api-tokens"
target="_blank"
rel="noopener noreferrer"
className="underline"
onClick={(e) => e.stopPropagation()}
>
here
</a>
.
</p>
</Tooltip>
</label>
<p className="text-xs font-normal text-white/50">
Access token for authentication.
</p>
</div>
<input
type="password"
name="accessToken"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="abcd1234"
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
</div>
</div>
<div className="flex flex-col gap-y-2 w-full pr-10">
<button
type="submit"
disabled={loading}
className="mt-2 w-full justify-center border border-slate-200 px-4 py-2 rounded-lg text-[#222628] text-sm font-bold items-center flex gap-x-2 bg-slate-200 hover:bg-slate-300 hover:text-slate-800 disabled:bg-slate-300 disabled:cursor-not-allowed"
>
{loading ? "Collecting pages..." : "Submit"}
</button>
{loading && (
<p className="text-xs text-white/50">
Once complete, all pages will be available for embedding into
workspaces.
</p>
)}
</div>
</form>
</div>
</div>
);
}

View File

@ -2,6 +2,7 @@ import ConnectorImages from "@/components/DataConnectorOption/media";
import { MagnifyingGlass } from "@phosphor-icons/react";
import GithubOptions from "./Connectors/Github";
import YoutubeOptions from "./Connectors/Youtube";
import ConfluenceOptions from "./Connectors/Confluence";
import { useState } from "react";
import ConnectorOption from "./ConnectorOption";
@ -20,6 +21,12 @@ export const DATA_CONNECTORS = {
"Import the transcription of an entire YouTube video from a link.",
options: <YoutubeOptions />,
},
confluence: {
name: "Confluence",
image: ConnectorImages.confluence,
description: "Import an entire Confluence page in a single click.",
options: <ConfluenceOptions />,
},
};
export default function DataConnectors() {

View File

@ -1,4 +1,4 @@
import { memo, useState } from "react";
import React, { memo, useState } from "react";
import { v4 } from "uuid";
import { decode as HTMLDecode } from "he";
import truncate from "truncate";
@ -14,6 +14,7 @@ import {
X,
YoutubeLogo,
} from "@phosphor-icons/react";
import ConfluenceLogo from "@/media/dataConnectors/confluence.png";
import { Tooltip } from "react-tooltip";
import { toPercentString } from "@/utils/numbers";
@ -202,13 +203,6 @@ function CitationDetailModal({ source, onClose }) {
);
}
const ICONS = {
file: FileText,
link: Link,
youtube: YoutubeLogo,
github: GithubLogo,
};
// Show the correct title and/or display text for citations
// which contain valid outbound links that can be clicked by the
// user when viewing a citation. Optionally allows various icons
@ -221,10 +215,17 @@ function parseChunkSource({ title = "", chunks = [] }) {
icon: "file",
};
if (!chunks.length || !chunks[0].chunkSource.startsWith("link://"))
if (
!chunks.length ||
(!chunks[0].chunkSource.startsWith("link://") &&
!chunks[0].chunkSource.startsWith("confluence://"))
)
return nullResponse;
try {
const url = new URL(chunks[0].chunkSource.split("link://")[1]);
const url = new URL(
chunks[0].chunkSource.split("link://")[1] ||
chunks[0].chunkSource.split("confluence://")[1]
);
let text = url.host + url.pathname;
let icon = "link";
@ -238,6 +239,11 @@ function parseChunkSource({ title = "", chunks = [] }) {
icon = "github";
}
if (url.host.includes("atlassian.net")) {
text = title;
icon = "confluence";
}
return {
isUrl: true,
href: url.toString(),
@ -247,3 +253,16 @@ function parseChunkSource({ title = "", chunks = [] }) {
} catch {}
return nullResponse;
}
// Patch to render Confluence icon as a element like we do with Phosphor
const ConfluenceIcon = ({ ...props }) => (
<img src={ConfluenceLogo} {...props} />
);
const ICONS = {
file: FileText,
link: Link,
youtube: YoutubeLogo,
github: GithubLogo,
confluence: ConfluenceIcon,
};

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.4 KiB

View File

@ -60,6 +60,29 @@ const DataConnector = {
});
},
},
confluence: {
collect: async function ({ pageUrl, username, accessToken }) {
return await fetch(`${API_BASE}/ext/confluence`, {
method: "POST",
headers: baseHeaders(),
body: JSON.stringify({
pageUrl,
username,
accessToken,
}),
})
.then((res) => res.json())
.then((res) => {
if (!res.success) throw new Error(res.reason);
return { data: res.data, error: null };
})
.catch((e) => {
console.error(e);
return { data: null, error: e.message };
});
},
},
};
export default DataConnector;

View File

@ -71,6 +71,28 @@ function extensionEndpoints(app) {
}
}
);
app.post(
"/ext/confluence",
[validatedRequest, flexUserRoleValid([ROLES.admin, ROLES.manager])],
async (request, response) => {
try {
const responseFromProcessor =
await new CollectorApi().forwardExtensionRequest({
endpoint: "/ext/confluence",
method: "POST",
body: request.body,
});
await Telemetry.sendTelemetry("extension_invoked", {
type: "confluence",
});
response.status(200).json(responseFromProcessor);
} catch (e) {
console.error(e);
response.sendStatus(500).end();
}
}
);
}
module.exports = { extensionEndpoints };