mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-14 10:30:10 +01:00
d52f8aafd4
* Include links in citations force ChunkSource key to retain this information old links will be unsupported * show special icons depending on source * remove console log * reset server documents writeTo
79 lines
2.2 KiB
JavaScript
79 lines
2.2 KiB
JavaScript
const RepoLoader = require("./RepoLoader");
|
|
const fs = require("fs");
|
|
const path = require("path");
|
|
const { default: slugify } = require("slugify");
|
|
const { v4 } = require("uuid");
|
|
const { writeToServerDocuments } = require("../../files");
|
|
const { tokenizeString } = require("../../tokenizer");
|
|
|
|
async function loadGithubRepo(args) {
|
|
const repo = new RepoLoader(args);
|
|
await repo.init();
|
|
|
|
if (!repo.ready)
|
|
return {
|
|
success: false,
|
|
reason: "Could not prepare Github repo for loading! Check URL",
|
|
};
|
|
|
|
console.log(
|
|
`-- Working Github ${repo.author}/${repo.project}:${repo.branch} --`
|
|
);
|
|
const docs = await repo.recursiveLoader();
|
|
if (!docs.length) {
|
|
return {
|
|
success: false,
|
|
reason: "No files were found for those settings.",
|
|
};
|
|
}
|
|
|
|
console.log(`[Github Loader]: Found ${docs.length} source files. Saving...`);
|
|
const outFolder = slugify(
|
|
`${repo.author}-${repo.project}-${repo.branch}-${v4().slice(0, 4)}`
|
|
).toLowerCase();
|
|
const outFolderPath = path.resolve(
|
|
__dirname,
|
|
`../../../../server/storage/documents/${outFolder}`
|
|
);
|
|
fs.mkdirSync(outFolderPath);
|
|
|
|
for (const doc of docs) {
|
|
if (!doc.pageContent) continue;
|
|
const data = {
|
|
id: v4(),
|
|
url: "github://" + doc.metadata.source,
|
|
title: doc.metadata.source,
|
|
docAuthor: repo.author,
|
|
description: "No description found.",
|
|
docSource: doc.metadata.source,
|
|
chunkSource: `link://${doc.metadata.repository}/blob/${doc.metadata.branch}/${doc.metadata.source}`,
|
|
published: new Date().toLocaleString(),
|
|
wordCount: doc.pageContent.split(" ").length,
|
|
pageContent: doc.pageContent,
|
|
token_count_estimate: tokenizeString(doc.pageContent).length,
|
|
};
|
|
console.log(
|
|
`[Github Loader]: Saving ${doc.metadata.source} to ${outFolder}`
|
|
);
|
|
writeToServerDocuments(
|
|
data,
|
|
`${slugify(doc.metadata.source)}-${data.id}`,
|
|
outFolderPath
|
|
);
|
|
}
|
|
|
|
return {
|
|
success: true,
|
|
reason: null,
|
|
data: {
|
|
author: repo.author,
|
|
repo: repo.project,
|
|
branch: repo.branch,
|
|
files: docs.length,
|
|
destination: outFolder,
|
|
},
|
|
};
|
|
}
|
|
|
|
module.exports = loadGithubRepo;
|