chore: confluence data connector can now handle custom urls, in addition to default {subdomain}.atlassian.net ones

This commit is contained in:
Predrag Stojadinovic 2024-05-11 18:43:55 +02:00
parent 948ac8a3dd
commit 6369d5f462

View File

@ -2,20 +2,30 @@ const fs = require("fs");
const path = require("path");
const { default: slugify } = require("slugify");
const { v4 } = require("uuid");
const UrlPattern = require("url-pattern");
const { writeToServerDocuments } = require("../../files");
const { tokenizeString } = require("../../tokenizer");
const {
ConfluencePagesLoader,
} = require("langchain/document_loaders/web/confluence");
const { ConfluencePagesLoader } = require("langchain/document_loaders/web/confluence");
function validSpaceUrl(spaceUrl = "") {
const UrlPattern = require("url-pattern");
const pattern = new UrlPattern(
"https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*"
);
const match = pattern.match(spaceUrl);
if (!match) return { valid: false, result: null };
return { valid: true, result: match };
// Atlassian default URL match
const atlassianPattern = new UrlPattern("https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)/*");
const atlassianMatch = atlassianPattern.match(spaceUrl);
if (atlassianMatch) {
return { valid: true, result: atlassianMatch };
}
// Custom Confluence URL match
const customPattern = new UrlPattern("https\\://(:subdomain.):domain.:tld/wiki/spaces/(:spaceKey)/*");
const customMatch = customPattern.match(spaceUrl);
if (customMatch) {
customMatch.customDomain = (customMatch.subdomain ? `${customMatch.subdomain}.` : "") + //
(`${customMatch.domain}.${customMatch.tld}`);
return { valid: true, result: customMatch, custom: true };
}
// No match
return { valid: false, result: null };
}
async function loadConfluence({ pageUrl, username, accessToken }) {
@ -31,15 +41,19 @@ async function loadConfluence({ pageUrl, username, accessToken }) {
if (!validSpace.result) {
return {
success: false,
reason:
"Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/*",
reason: "Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/* or https://customDomain/wiki/space/~SPACEID/*",
};
}
const { subdomain, spaceKey } = validSpace.result;
console.log(`-- Working Confluence ${subdomain}.atlassian.net --`);
const { subdomain, customDomain, spaceKey } = validSpace.result;
let baseUrl = `https://${subdomain}.atlassian.net/wiki`;
if (customDomain) {
baseUrl = `https://${customDomain}/wiki`;
}
console.log(`-- Working Confluence ${baseUrl} --`);
const loader = new ConfluencePagesLoader({
baseUrl: `https://${subdomain}.atlassian.net/wiki`,
baseUrl,
spaceKey,
username,
accessToken,