From 78e3e35d270fcc1c3c0c1ee98092c37d2fcda623 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Predrag=20Stojadinovi=C4=87?= Date: Tue, 14 May 2024 19:21:04 +0200 Subject: [PATCH] [FEAT] Confluence Data Connector handles custom Confluence urls (#1362) * chore: confluence data connector can now handle custom urls, in addition to default {subdomain}.atlassian.net ones * chore: formatting as per yarn lint --- .../utils/extensions/Confluence/index.js | 42 ++++++++++++++----- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/collector/utils/extensions/Confluence/index.js b/collector/utils/extensions/Confluence/index.js index 5a473f65..5bb44dee 100644 --- a/collector/utils/extensions/Confluence/index.js +++ b/collector/utils/extensions/Confluence/index.js @@ -2,6 +2,7 @@ const fs = require("fs"); const path = require("path"); const { default: slugify } = require("slugify"); const { v4 } = require("uuid"); +const UrlPattern = require("url-pattern"); const { writeToServerDocuments } = require("../../files"); const { tokenizeString } = require("../../tokenizer"); const { @@ -9,13 +10,29 @@ const { } = require("langchain/document_loaders/web/confluence"); function validSpaceUrl(spaceUrl = "") { - const UrlPattern = require("url-pattern"); - const pattern = new UrlPattern( - "https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*" + // Atlassian default URL match + const atlassianPattern = new UrlPattern( + "https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)/*" ); - const match = pattern.match(spaceUrl); - if (!match) return { valid: false, result: null }; - return { valid: true, result: match }; + const atlassianMatch = atlassianPattern.match(spaceUrl); + if (atlassianMatch) { + return { valid: true, result: atlassianMatch }; + } + + // Custom Confluence URL match + const customPattern = new UrlPattern( + "https\\://(:subdomain.):domain.:tld/wiki/spaces/(:spaceKey)/*" + ); + const customMatch = customPattern.match(spaceUrl); + if (customMatch) { + customMatch.customDomain = + (customMatch.subdomain ? `${customMatch.subdomain}.` : "") + // + `${customMatch.domain}.${customMatch.tld}`; + return { valid: true, result: customMatch, custom: true }; + } + + // No match + return { valid: false, result: null }; } async function loadConfluence({ pageUrl, username, accessToken }) { @@ -32,14 +49,19 @@ async function loadConfluence({ pageUrl, username, accessToken }) { return { success: false, reason: - "Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/*", + "Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/* or https://customDomain/wiki/space/~SPACEID/*", }; } - const { subdomain, spaceKey } = validSpace.result; - console.log(`-- Working Confluence ${subdomain}.atlassian.net --`); + const { subdomain, customDomain, spaceKey } = validSpace.result; + let baseUrl = `https://${subdomain}.atlassian.net/wiki`; + if (customDomain) { + baseUrl = `https://${customDomain}/wiki`; + } + + console.log(`-- Working Confluence ${baseUrl} --`); const loader = new ConfluencePagesLoader({ - baseUrl: `https://${subdomain}.atlassian.net/wiki`, + baseUrl, spaceKey, username, accessToken,