Generic upload fallback as text file. (#808)

* Do not block any file upload
fallback unknown/unsupported types to text if possible

* reduce call for frontend

* patch
This commit is contained in:
Timothy Carambat 2024-02-26 13:43:54 -08:00 committed by GitHub
parent ca2e0f8e6f
commit 6d18d79bb7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 54 additions and 42 deletions

View File

@ -4,7 +4,7 @@ const {
WATCH_DIRECTORY,
SUPPORTED_FILETYPE_CONVERTERS,
} = require("../utils/constants");
const { trashFile } = require("../utils/files");
const { trashFile, isTextType } = require("../utils/files");
const RESERVED_FILES = ["__HOTDIR__.md"];
async function processSingleFile(targetFilename) {
@ -31,17 +31,25 @@ async function processSingleFile(targetFilename) {
};
}
if (!Object.keys(SUPPORTED_FILETYPE_CONVERTERS).includes(fileExtension)) {
trashFile(fullFilePath);
return {
success: false,
reason: `File extension ${fileExtension} not supported for parsing.`,
documents: [],
};
let processFileAs = fileExtension;
if (!SUPPORTED_FILETYPE_CONVERTERS.hasOwnProperty(fileExtension)) {
if (isTextType(fullFilePath)) {
console.log(
`\x1b[33m[Collector]\x1b[0m The provided filetype of ${fileExtension} does not have a preset and will be processed as .txt.`
);
processFileAs = ".txt";
} else {
trashFile(fullFilePath);
return {
success: false,
reason: `File extension ${fileExtension} not supported for parsing and cannot be assumed as text file type.`,
documents: [],
};
}
}
const FileTypeProcessor = require(SUPPORTED_FILETYPE_CONVERTERS[
fileExtension
processFileAs
]);
return await FileTypeProcessor({
fullFilePath,

View File

@ -1,5 +1,33 @@
const fs = require("fs");
const path = require("path");
const { getType } = require("mime");
function isTextType(filepath) {
if (!fs.existsSync(filepath)) return false;
// These are types of mime primary classes that for sure
// cannot also for forced into a text type.
const nonTextTypes = ["multipart", "image", "model", "audio", "video"];
// These are full-mimes we for sure cannot parse or interpret as text
// documents
const BAD_MIMES = [
"application/octet-stream",
"application/zip",
"application/pkcs8",
"application/vnd.microsoft.portable-executable",
"application/x-msdownload",
];
try {
const mime = getType(filepath);
if (BAD_MIMES.includes(mime)) return false;
const type = mime.split("/")[0];
if (nonTextTypes.includes(type)) return false;
return true;
} catch {
return false;
}
}
function trashFile(filepath) {
if (!fs.existsSync(filepath)) return;
@ -94,6 +122,7 @@ async function wipeCollectorStorage() {
module.exports = {
trashFile,
isTextType,
createdDate,
writeToServerDocuments,
wipeCollectorStorage,

View File

@ -8,7 +8,6 @@ function Directory({
files,
loading,
setLoading,
fileTypes,
workspace,
fetchKeys,
selectedItems,
@ -135,9 +134,7 @@ function Directory({
</div>
)}
</div>
<UploadFile
fileTypes={fileTypes}
workspace={workspace}
fetchKeys={fetchKeys}
setLoading={setLoading}

View File

@ -61,7 +61,7 @@ function FileUploadProgressComponent({
if (status === "failed") {
return (
<div className="h-14 px-2 py-2 flex items-center gap-x-4 rounded-lg bg-white/5 border border-white/40 overflow-y-auto">
<div className="w-6 h-6">
<div className="w-6 h-6 flex-shrink-0">
<XCircle className="w-6 h-6 stroke-white bg-red-500 rounded-full p-1 w-full h-full" />
</div>
<div className="flex flex-col">
@ -76,7 +76,7 @@ function FileUploadProgressComponent({
return (
<div className="h-14 px-2 py-2 flex items-center gap-x-4 rounded-lg bg-white/5 border border-white/40">
<div className="w-6 h-6">
<div className="w-6 h-6 flex-shrink-0">
{status !== "complete" ? (
<div className="flex items-center justify-center">
<PreLoader size="6" />

View File

@ -7,12 +7,7 @@ import { v4 } from "uuid";
import FileUploadProgress from "./FileUploadProgress";
import Workspace from "../../../../../models/workspace";
export default function UploadFile({
workspace,
fileTypes,
fetchKeys,
setLoading,
}) {
export default function UploadFile({ workspace, fetchKeys, setLoading }) {
const [ready, setReady] = useState(false);
const [files, setFiles] = useState([]);
const [fetchingUrl, setFetchingUrl] = useState(false);
@ -76,9 +71,6 @@ export default function UploadFile({
const { getRootProps, getInputProps } = useDropzone({
onDrop,
accept: {
...fileTypes,
},
disabled: !ready,
});
@ -109,9 +101,7 @@ export default function UploadFile({
Click to upload or drag and drop
</div>
<div className="text-white text-opacity-60 text-xs font-medium py-1">
{Object.values(fileTypes ?? [])
.flat()
.join(" ")}
supports text files, csv's, spreadsheets, audio files, and more!
</div>
</div>
) : (

View File

@ -15,11 +15,7 @@ const MODEL_COSTS = {
"text-embedding-3-large": 0.00000013, // $0.00013 / 1K tokens
};
export default function DocumentSettings({
workspace,
fileTypes,
systemSettings,
}) {
export default function DocumentSettings({ workspace, systemSettings }) {
const [highlightWorkspace, setHighlightWorkspace] = useState(false);
const [availableDocs, setAvailableDocs] = useState([]);
const [loading, setLoading] = useState(true);
@ -201,7 +197,6 @@ export default function DocumentSettings({
loading={loading}
loadingMessage={loadingMessage}
setLoading={setLoading}
fileTypes={fileTypes}
workspace={workspace}
fetchKeys={fetchKeys}
selectedItems={selectedItems}

View File

@ -11,17 +11,14 @@ const noop = () => {};
const ManageWorkspace = ({ hideModal = noop, providedSlug = null }) => {
const { slug } = useParams();
const [workspace, setWorkspace] = useState(null);
const [fileTypes, setFileTypes] = useState(null);
const [settings, setSettings] = useState({});
useEffect(() => {
async function checkSupportedFiletypes() {
const acceptedTypes = await System.acceptedDocumentTypes();
async function getSettings() {
const _settings = await System.keys();
setFileTypes(acceptedTypes ?? {});
setSettings(_settings ?? {});
}
checkSupportedFiletypes();
getSettings();
}, []);
useEffect(() => {
@ -78,11 +75,7 @@ const ManageWorkspace = ({ hideModal = noop, providedSlug = null }) => {
<X className="text-gray-300 text-lg" />
</button>
</div>
<DocumentSettings
workspace={workspace}
fileTypes={fileTypes}
systemSettings={settings}
/>
<DocumentSettings workspace={workspace} systemSettings={settings} />
</div>
</div>
</div>