mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-10 17:00:11 +01:00
1347 human readable confluence url (#1706)
* chore: confluence data connector can now handle custom urls, in addition to default {subdomain}.atlassian.net ones * chore: formatting as per yarn lint * chore: fixing the human readable confluence url fetch baseUrl * chore: fixing the human readable confluence url fetch baseUrl * chore: fixing the human readable confluence url fetch baseUrl * chore: fixing the human readable confluence url fetch baseUrl * chore: fixing the human readable confluence url fetch baseUrl * refactor implementation of various types of Confluence URL patterns --------- Co-authored-by: Predrag Stojadinovic <predrag@stojadinovic.net> Co-authored-by: Predrag Stojadinović <cope@users.noreply.github.com> Co-authored-by: Predrag Stojadinovic <predrags@nvidia.com>
This commit is contained in:
parent
c8c618137f
commit
a598c8e04c
@ -9,37 +9,6 @@ const {
|
||||
ConfluencePagesLoader,
|
||||
} = require("langchain/document_loaders/web/confluence");
|
||||
|
||||
function validSpaceUrl(spaceUrl = "") {
|
||||
// Atlassian default URL match
|
||||
const atlassianPattern = new UrlPattern(
|
||||
"https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*"
|
||||
);
|
||||
const atlassianMatch = atlassianPattern.match(spaceUrl);
|
||||
if (atlassianMatch) {
|
||||
return { valid: true, result: atlassianMatch };
|
||||
}
|
||||
|
||||
let customMatch = null;
|
||||
[
|
||||
"https\\://(:subdomain.):domain.:tld/wiki/spaces/(:spaceKey)*", // Custom Confluence space
|
||||
"https\\://(:subdomain.):domain.:tld/display/(:spaceKey)*", // Custom Confluence space + Human-readable space tag.
|
||||
].forEach((matchPattern) => {
|
||||
if (!!customMatch) return;
|
||||
const pattern = new UrlPattern(matchPattern);
|
||||
customMatch = pattern.match(spaceUrl);
|
||||
});
|
||||
|
||||
if (customMatch) {
|
||||
customMatch.customDomain =
|
||||
(customMatch.subdomain ? `${customMatch.subdomain}.` : "") + //
|
||||
`${customMatch.domain}.${customMatch.tld}`;
|
||||
return { valid: true, result: customMatch, custom: true };
|
||||
}
|
||||
|
||||
// No match
|
||||
return { valid: false, result: null };
|
||||
}
|
||||
|
||||
async function loadConfluence({ pageUrl, username, accessToken }) {
|
||||
if (!pageUrl || !username || !accessToken) {
|
||||
return {
|
||||
@ -49,21 +18,16 @@ async function loadConfluence({ pageUrl, username, accessToken }) {
|
||||
};
|
||||
}
|
||||
|
||||
const validSpace = validSpaceUrl(pageUrl);
|
||||
if (!validSpace.result) {
|
||||
const { valid, result } = validSpaceUrl(pageUrl);
|
||||
if (!valid) {
|
||||
return {
|
||||
success: false,
|
||||
reason:
|
||||
"Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/* or https://customDomain/wiki/space/~SPACEID/*",
|
||||
"Confluence space URL is not in the expected format of one of https://domain.atlassian.net/wiki/space/~SPACEID/* or https://customDomain/wiki/space/~SPACEID/* or https://customDomain/display/~SPACEID/*",
|
||||
};
|
||||
}
|
||||
|
||||
const { subdomain, customDomain, spaceKey } = validSpace.result;
|
||||
let baseUrl = `https://${subdomain}.atlassian.net/wiki`;
|
||||
if (customDomain) {
|
||||
baseUrl = `https://${customDomain}/wiki`;
|
||||
}
|
||||
|
||||
const { apiBase: baseUrl, spaceKey, subdomain } = result;
|
||||
console.log(`-- Working Confluence ${baseUrl} --`);
|
||||
const loader = new ConfluencePagesLoader({
|
||||
baseUrl,
|
||||
@ -142,4 +106,93 @@ async function loadConfluence({ pageUrl, username, accessToken }) {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* A match result for a url-pattern of a Confluence URL
|
||||
* @typedef {Object} ConfluenceMatchResult
|
||||
* @property {string} subdomain - the subdomain of an organization's Confluence space
|
||||
* @property {string} spaceKey - the spaceKey of an organization that determines the documents to collect.
|
||||
* @property {string} apiBase - the correct REST API url to use for loader.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Generates the correct API base URL for interfacing with the Confluence REST API
|
||||
* depending on the URL pattern being used since there are various ways to host/access a
|
||||
* Confluence space.
|
||||
* @param {ConfluenceMatchResult} matchResult - result from `url-pattern`.match
|
||||
* @param {boolean} isCustomDomain - determines if we need to coerce the subpath of the provided URL
|
||||
* @returns {string} - the resulting REST API URL
|
||||
*/
|
||||
function generateAPIBaseUrl(matchResult = {}, isCustomDomain = false) {
|
||||
const { subdomain } = matchResult;
|
||||
let subpath = isCustomDomain ? `` : `/wiki`;
|
||||
if (isCustomDomain) return `https://${customDomain}${subpath}`;
|
||||
return `https://${subdomain}.atlassian.net${subpath}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates and parses the correct information from a given Confluence URL
|
||||
* @param {string} spaceUrl - The organization's Confluence URL to parse
|
||||
* @returns {{
|
||||
* valid: boolean,
|
||||
* result: (ConfluenceMatchResult|null),
|
||||
* }}
|
||||
*/
|
||||
function validSpaceUrl(spaceUrl = "") {
|
||||
let matchResult;
|
||||
const patterns = {
|
||||
default: new UrlPattern(
|
||||
"https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*"
|
||||
),
|
||||
subdomain: new UrlPattern(
|
||||
"https\\://(:subdomain.):domain.:tld/wiki/spaces/(:spaceKey)*"
|
||||
),
|
||||
custom: new UrlPattern(
|
||||
"https\\://(:subdomain.):domain.:tld/display/(:spaceKey)*"
|
||||
),
|
||||
};
|
||||
|
||||
// If using the default Atlassian Confluence URL pattern.
|
||||
// We can proceed because the Library/API can use this base url scheme.
|
||||
matchResult = patterns.default.match(spaceUrl);
|
||||
if (matchResult)
|
||||
return {
|
||||
valid: matchResult.hasOwnProperty("spaceKey"),
|
||||
result: {
|
||||
...matchResult,
|
||||
apiBase: generateAPIBaseUrl(matchResult),
|
||||
},
|
||||
};
|
||||
|
||||
// If using a custom subdomain Confluence URL pattern.
|
||||
// We need to attach the customDomain as a property to the match result
|
||||
// so we can form the correct REST API base from the subdomain.
|
||||
matchResult = patterns.subdomain.match(spaceUrl);
|
||||
if (matchResult) {
|
||||
return {
|
||||
valid: matchResult.hasOwnProperty("spaceKey"),
|
||||
result: {
|
||||
...matchResult,
|
||||
apiBase: generateAPIBaseUrl(matchResult),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// If using a base FQDN Confluence URL pattern.
|
||||
// We need to attach the customDomain as a property to the match result
|
||||
// so we can form the correct REST API base from the root domain since /display/ is basically a URL mask.
|
||||
matchResult = patterns.custom.match(spaceUrl);
|
||||
if (matchResult) {
|
||||
return {
|
||||
valid: matchResult.hasOwnProperty("spaceKey"),
|
||||
result: {
|
||||
...matchResult,
|
||||
apiBase: generateAPIBaseUrl(matchResult, true),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// No match
|
||||
return { valid: false, result: null };
|
||||
}
|
||||
|
||||
module.exports = loadConfluence;
|
||||
|
Loading…
Reference in New Issue
Block a user