From 6369d5f4627065f5285feecb54adbdde16bfd998 Mon Sep 17 00:00:00 2001 From: Predrag Stojadinovic Date: Sat, 11 May 2024 18:43:55 +0200 Subject: [PATCH] chore: confluence data connector can now handle custom urls, in addition to default {subdomain}.atlassian.net ones --- .../utils/extensions/Confluence/index.js | 44 ++++++++++++------- 1 file changed, 29 insertions(+), 15 deletions(-) diff --git a/collector/utils/extensions/Confluence/index.js b/collector/utils/extensions/Confluence/index.js index 5a473f65..63094a1f 100644 --- a/collector/utils/extensions/Confluence/index.js +++ b/collector/utils/extensions/Confluence/index.js @@ -2,20 +2,30 @@ const fs = require("fs"); const path = require("path"); const { default: slugify } = require("slugify"); const { v4 } = require("uuid"); +const UrlPattern = require("url-pattern"); const { writeToServerDocuments } = require("../../files"); const { tokenizeString } = require("../../tokenizer"); -const { - ConfluencePagesLoader, -} = require("langchain/document_loaders/web/confluence"); +const { ConfluencePagesLoader } = require("langchain/document_loaders/web/confluence"); function validSpaceUrl(spaceUrl = "") { - const UrlPattern = require("url-pattern"); - const pattern = new UrlPattern( - "https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*" - ); - const match = pattern.match(spaceUrl); - if (!match) return { valid: false, result: null }; - return { valid: true, result: match }; + // Atlassian default URL match + const atlassianPattern = new UrlPattern("https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)/*"); + const atlassianMatch = atlassianPattern.match(spaceUrl); + if (atlassianMatch) { + return { valid: true, result: atlassianMatch }; + } + + // Custom Confluence URL match + const customPattern = new UrlPattern("https\\://(:subdomain.):domain.:tld/wiki/spaces/(:spaceKey)/*"); + const customMatch = customPattern.match(spaceUrl); + if (customMatch) { + customMatch.customDomain = (customMatch.subdomain ? `${customMatch.subdomain}.` : "") + // + (`${customMatch.domain}.${customMatch.tld}`); + return { valid: true, result: customMatch, custom: true }; + } + + // No match + return { valid: false, result: null }; } async function loadConfluence({ pageUrl, username, accessToken }) { @@ -31,15 +41,19 @@ async function loadConfluence({ pageUrl, username, accessToken }) { if (!validSpace.result) { return { success: false, - reason: - "Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/*", + reason: "Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/* or https://customDomain/wiki/space/~SPACEID/*", }; } - const { subdomain, spaceKey } = validSpace.result; - console.log(`-- Working Confluence ${subdomain}.atlassian.net --`); + const { subdomain, customDomain, spaceKey } = validSpace.result; + let baseUrl = `https://${subdomain}.atlassian.net/wiki`; + if (customDomain) { + baseUrl = `https://${customDomain}/wiki`; + } + + console.log(`-- Working Confluence ${baseUrl} --`); const loader = new ConfluencePagesLoader({ - baseUrl: `https://${subdomain}.atlassian.net/wiki`, + baseUrl, spaceKey, username, accessToken,