mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-09 00:10:10 +01:00
150 lines
4.3 KiB
JavaScript
150 lines
4.3 KiB
JavaScript
|
class RepoLoader {
|
||
|
constructor(args = {}) {
|
||
|
this.ready = false;
|
||
|
this.repo = args?.repo;
|
||
|
this.branch = args?.branch;
|
||
|
this.accessToken = args?.accessToken || null;
|
||
|
this.ignorePaths = args?.ignorePaths || [];
|
||
|
|
||
|
this.author = null;
|
||
|
this.project = null;
|
||
|
this.branches = [];
|
||
|
}
|
||
|
|
||
|
#validGithubUrl() {
|
||
|
const UrlPattern = require("url-pattern");
|
||
|
const pattern = new UrlPattern("https\\://github.com/(:author)/(:project)");
|
||
|
const match = pattern.match(this.repo);
|
||
|
if (!match) return false;
|
||
|
|
||
|
this.author = match.author;
|
||
|
this.project = match.project;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
// Ensure the branch provided actually exists
|
||
|
// and if it does not or has not been set auto-assign to primary branch.
|
||
|
async #validBranch() {
|
||
|
await this.getRepoBranches();
|
||
|
if (!!this.branch && this.branches.includes(this.branch)) return;
|
||
|
|
||
|
console.log(
|
||
|
"[Github Loader]: Branch not set! Auto-assigning to a default branch."
|
||
|
);
|
||
|
this.branch = this.branches.includes("main") ? "main" : "master";
|
||
|
console.log(`[Github Loader]: Branch auto-assigned to ${this.branch}.`);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
async #validateAccessToken() {
|
||
|
if (!this.accessToken) return;
|
||
|
const valid = await fetch("https://api.github.com/octocat", {
|
||
|
method: "GET",
|
||
|
headers: {
|
||
|
Authorization: `Bearer ${this.accessToken}`,
|
||
|
"X-GitHub-Api-Version": "2022-11-28",
|
||
|
},
|
||
|
})
|
||
|
.then((res) => {
|
||
|
if (!res.ok) throw new Error(res.statusText);
|
||
|
return res.ok;
|
||
|
})
|
||
|
.catch((e) => {
|
||
|
console.error(
|
||
|
"Invalid Github Access Token provided! Access token will not be used",
|
||
|
e.message
|
||
|
);
|
||
|
return false;
|
||
|
});
|
||
|
|
||
|
if (!valid) this.accessToken = null;
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
async init() {
|
||
|
if (!this.#validGithubUrl()) return;
|
||
|
await this.#validBranch();
|
||
|
await this.#validateAccessToken();
|
||
|
this.ready = true;
|
||
|
return this;
|
||
|
}
|
||
|
|
||
|
async recursiveLoader() {
|
||
|
if (!this.ready) throw new Error("[Github Loader]: not in ready state!");
|
||
|
const {
|
||
|
GithubRepoLoader: LCGithubLoader,
|
||
|
} = require("langchain/document_loaders/web/github");
|
||
|
|
||
|
if (this.accessToken)
|
||
|
console.log(
|
||
|
`[Github Loader]: Access token set! Recursive loading enabled!`
|
||
|
);
|
||
|
|
||
|
const loader = new LCGithubLoader(this.repo, {
|
||
|
accessToken: this.accessToken,
|
||
|
branch: this.branch,
|
||
|
recursive: !!this.accessToken, // Recursive will hit rate limits.
|
||
|
maxConcurrency: 5,
|
||
|
unknown: "ignore",
|
||
|
ignorePaths: this.ignorePaths,
|
||
|
});
|
||
|
|
||
|
const docs = [];
|
||
|
for await (const doc of loader.loadAsStream()) docs.push(doc);
|
||
|
return docs;
|
||
|
}
|
||
|
|
||
|
// Sort branches to always show either main or master at the top of the result.
|
||
|
#branchPrefSort(branches = []) {
|
||
|
const preferredSort = ["main", "master"];
|
||
|
return branches.reduce((acc, branch) => {
|
||
|
if (preferredSort.includes(branch)) return [branch, ...acc];
|
||
|
return [...acc, branch];
|
||
|
}, []);
|
||
|
}
|
||
|
|
||
|
// Get all branches for a given repo.
|
||
|
async getRepoBranches() {
|
||
|
if (!this.#validGithubUrl() || !this.author || !this.project) return [];
|
||
|
await this.#validateAccessToken(); // Ensure API access token is valid for pre-flight
|
||
|
|
||
|
let page = 0;
|
||
|
let polling = true;
|
||
|
const branches = [];
|
||
|
|
||
|
while (polling) {
|
||
|
console.log(`Fetching page ${page} of branches for ${this.project}`);
|
||
|
await fetch(
|
||
|
`https://api.github.com/repos/${this.author}/${this.project}/branches?per_page=100&page=${page}`,
|
||
|
{
|
||
|
method: "GET",
|
||
|
headers: {
|
||
|
...(this.accessToken
|
||
|
? { Authorization: `Bearer ${this.accessToken}` }
|
||
|
: {}),
|
||
|
"X-GitHub-Api-Version": "2022-11-28",
|
||
|
},
|
||
|
}
|
||
|
)
|
||
|
.then((res) => {
|
||
|
if (res.ok) return res.json();
|
||
|
throw new Error(`Invalid request to Github API: ${res.statusText}`);
|
||
|
})
|
||
|
.then((branchObjects) => {
|
||
|
polling = branchObjects.length > 0;
|
||
|
branches.push(branchObjects.map((branch) => branch.name));
|
||
|
page++;
|
||
|
})
|
||
|
.catch((err) => {
|
||
|
polling = false;
|
||
|
console.log(`RepoLoader.branches`, err);
|
||
|
});
|
||
|
}
|
||
|
|
||
|
this.branches = [...new Set(branches.flat())];
|
||
|
return this.#branchPrefSort(this.branches);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
module.exports = RepoLoader;
|