[FEAT] Confluence Data Connector handles custom Confluence urls (#1362)

* chore: confluence data connector can now handle custom urls, in addition to default {subdomain}.atlassian.net ones

* chore: formatting as per yarn lint
This commit is contained in:
Predrag Stojadinović 2024-05-14 19:21:04 +02:00 committed by GitHub
parent 95f6000a67
commit 78e3e35d27
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2,6 +2,7 @@ const fs = require("fs");
const path = require("path");
const { default: slugify } = require("slugify");
const { v4 } = require("uuid");
const UrlPattern = require("url-pattern");
const { writeToServerDocuments } = require("../../files");
const { tokenizeString } = require("../../tokenizer");
const {
@ -9,13 +10,29 @@ const {
} = require("langchain/document_loaders/web/confluence");
function validSpaceUrl(spaceUrl = "") {
const UrlPattern = require("url-pattern");
const pattern = new UrlPattern(
"https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*"
// Atlassian default URL match
const atlassianPattern = new UrlPattern(
"https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)/*"
);
const match = pattern.match(spaceUrl);
if (!match) return { valid: false, result: null };
return { valid: true, result: match };
const atlassianMatch = atlassianPattern.match(spaceUrl);
if (atlassianMatch) {
return { valid: true, result: atlassianMatch };
}
// Custom Confluence URL match
const customPattern = new UrlPattern(
"https\\://(:subdomain.):domain.:tld/wiki/spaces/(:spaceKey)/*"
);
const customMatch = customPattern.match(spaceUrl);
if (customMatch) {
customMatch.customDomain =
(customMatch.subdomain ? `${customMatch.subdomain}.` : "") + //
`${customMatch.domain}.${customMatch.tld}`;
return { valid: true, result: customMatch, custom: true };
}
// No match
return { valid: false, result: null };
}
async function loadConfluence({ pageUrl, username, accessToken }) {
@ -32,14 +49,19 @@ async function loadConfluence({ pageUrl, username, accessToken }) {
return {
success: false,
reason:
"Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/*",
"Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/* or https://customDomain/wiki/space/~SPACEID/*",
};
}
const { subdomain, spaceKey } = validSpace.result;
console.log(`-- Working Confluence ${subdomain}.atlassian.net --`);
const { subdomain, customDomain, spaceKey } = validSpace.result;
let baseUrl = `https://${subdomain}.atlassian.net/wiki`;
if (customDomain) {
baseUrl = `https://${customDomain}/wiki`;
}
console.log(`-- Working Confluence ${baseUrl} --`);
const loader = new ConfluencePagesLoader({
baseUrl: `https://${subdomain}.atlassian.net/wiki`,
baseUrl,
spaceKey,
username,
accessToken,