diff --git a/README.md b/README.md
index ff50a8587..68653bddc 100644
--- a/README.md
+++ b/README.md
@@ -71,6 +71,8 @@ Some cool features of AnythingLLM
- [LM Studio (all models)](https://lmstudio.ai)
- [LocalAi (all models)](https://localai.io/)
- [Together AI (chat models)](https://www.together.ai/)
+- [Perplexity (chat models)](https://www.perplexity.ai/)
+- [OpenRouter (chat models)](https://openrouter.ai/)
- [Mistral](https://mistral.ai/)
**Supported Embedding models:**
@@ -80,6 +82,7 @@ Some cool features of AnythingLLM
- [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service)
- [LM Studio (all)](https://lmstudio.ai)
- [LocalAi (all)](https://localai.io/)
+- [Ollama (all)](https://ollama.ai/)
**Supported Vector Databases:**
@@ -108,8 +111,8 @@ Mintplex Labs & the community maintain a number of deployment methods, scripts,
|----------------------------------------|----:|-----|---------------|------------|
| [![Deploy on Docker][docker-btn]][docker-deploy] | [![Deploy on AWS][aws-btn]][aws-deploy] | [![Deploy on GCP][gcp-btn]][gcp-deploy] | [![Deploy on DigitalOcean][do-btn]][aws-deploy] | [![Deploy on Render.com][render-btn]][render-deploy] |
-| Railway |
-|----------------------------------------|
+| Railway |
+| --------------------------------------------------- |
| [![Deploy on Railway][railway-btn]][railway-deploy] |
[or set up a production AnythingLLM instance without Docker →](./BARE_METAL.md)
diff --git a/collector/processSingleFile/index.js b/collector/processSingleFile/index.js
index 9efd3a70f..569a2cde2 100644
--- a/collector/processSingleFile/index.js
+++ b/collector/processSingleFile/index.js
@@ -4,7 +4,7 @@ const {
WATCH_DIRECTORY,
SUPPORTED_FILETYPE_CONVERTERS,
} = require("../utils/constants");
-const { trashFile } = require("../utils/files");
+const { trashFile, isTextType } = require("../utils/files");
const RESERVED_FILES = ["__HOTDIR__.md"];
async function processSingleFile(targetFilename) {
@@ -31,17 +31,25 @@ async function processSingleFile(targetFilename) {
};
}
- if (!Object.keys(SUPPORTED_FILETYPE_CONVERTERS).includes(fileExtension)) {
- trashFile(fullFilePath);
- return {
- success: false,
- reason: `File extension ${fileExtension} not supported for parsing.`,
- documents: [],
- };
+ let processFileAs = fileExtension;
+ if (!SUPPORTED_FILETYPE_CONVERTERS.hasOwnProperty(fileExtension)) {
+ if (isTextType(fullFilePath)) {
+ console.log(
+ `\x1b[33m[Collector]\x1b[0m The provided filetype of ${fileExtension} does not have a preset and will be processed as .txt.`
+ );
+ processFileAs = ".txt";
+ } else {
+ trashFile(fullFilePath);
+ return {
+ success: false,
+ reason: `File extension ${fileExtension} not supported for parsing and cannot be assumed as text file type.`,
+ documents: [],
+ };
+ }
}
const FileTypeProcessor = require(SUPPORTED_FILETYPE_CONVERTERS[
- fileExtension
+ processFileAs
]);
return await FileTypeProcessor({
fullFilePath,
diff --git a/collector/utils/files/index.js b/collector/utils/files/index.js
index fc4e2d9cc..269567c40 100644
--- a/collector/utils/files/index.js
+++ b/collector/utils/files/index.js
@@ -1,10 +1,38 @@
const fs = require("fs");
const path = require("path");
+const { getType } = require("mime");
const documentsFolder =
process.env.NODE_ENV === "production"
? path.resolve("/storage/documents") // hardcoded to Render storage mount.
: path.resolve(__dirname, "../../../server/storage/documents");
+function isTextType(filepath) {
+ if (!fs.existsSync(filepath)) return false;
+ // These are types of mime primary classes that for sure
+ // cannot also for forced into a text type.
+ const nonTextTypes = ["multipart", "image", "model", "audio", "video"];
+ // These are full-mimes we for sure cannot parse or interpret as text
+ // documents
+ const BAD_MIMES = [
+ "application/octet-stream",
+ "application/zip",
+ "application/pkcs8",
+ "application/vnd.microsoft.portable-executable",
+ "application/x-msdownload",
+ ];
+
+ try {
+ const mime = getType(filepath);
+ if (BAD_MIMES.includes(mime)) return false;
+
+ const type = mime.split("/")[0];
+ if (nonTextTypes.includes(type)) return false;
+ return true;
+ } catch {
+ return false;
+ }
+}
+
function trashFile(filepath) {
if (!fs.existsSync(filepath)) return;
@@ -97,6 +125,7 @@ async function wipeCollectorStorage() {
module.exports = {
documentsFolder,
trashFile,
+ isTextType,
createdDate,
writeToServerDocuments,
wipeCollectorStorage,
diff --git a/docker/.env.example b/docker/.env.example
index b14d3c6ed..ba33bd5c0 100644
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -48,6 +48,14 @@ GID='1000'
# MISTRAL_API_KEY='example-mistral-ai-api-key'
# MISTRAL_MODEL_PREF='mistral-tiny'
+# LLM_PROVIDER='perplexity'
+# PERPLEXITY_API_KEY='my-perplexity-key'
+# PERPLEXITY_MODEL_PREF='codellama-34b-instruct'
+
+# LLM_PROVIDER='openrouter'
+# OPENROUTER_API_KEY='my-openrouter-key'
+# OPENROUTER_MODEL_PREF='openrouter/auto'
+
# LLM_PROVIDER='huggingface'
# HUGGING_FACE_LLM_ENDPOINT=https://uuid-here.us-east-1.aws.endpoints.huggingface.cloud
# HUGGING_FACE_LLM_API_KEY=hf_xxxxxx
@@ -71,6 +79,11 @@ GID='1000'
# EMBEDDING_MODEL_PREF='text-embedding-ada-002'
# EMBEDDING_MODEL_MAX_CHUNK_LENGTH=1000 # The max chunk size in chars a string to embed can be
+# EMBEDDING_ENGINE='ollama'
+# EMBEDDING_BASE_PATH='http://127.0.0.1:11434'
+# EMBEDDING_MODEL_PREF='nomic-embed-text:latest'
+# EMBEDDING_MODEL_MAX_CHUNK_LENGTH=8192
+
###########################################
######## Vector Database Selection ########
###########################################
diff --git a/frontend/src/components/EmbeddingSelection/AzureAiOptions/index.jsx b/frontend/src/components/EmbeddingSelection/AzureAiOptions/index.jsx
index c782c51f3..209c0aa21 100644
--- a/frontend/src/components/EmbeddingSelection/AzureAiOptions/index.jsx
+++ b/frontend/src/components/EmbeddingSelection/AzureAiOptions/index.jsx
@@ -9,7 +9,7 @@ export default function AzureAiOptions({ settings }) {
-
+
{name}
-
- {description}
-
+
{description}
diff --git a/frontend/src/components/EmbeddingSelection/LocalAiOptions/index.jsx b/frontend/src/components/EmbeddingSelection/LocalAiOptions/index.jsx
index 651d3e950..8c611cd31 100644
--- a/frontend/src/components/EmbeddingSelection/LocalAiOptions/index.jsx
+++ b/frontend/src/components/EmbeddingSelection/LocalAiOptions/index.jsx
@@ -19,7 +19,7 @@ export default function LocalAiOptions({ settings }) {
setBasePathValue(e.target.value)}
@@ -41,7 +41,7 @@ export default function LocalAiOptions({ settings }) {
e.target.blur()}
@@ -62,7 +62,7 @@ export default function LocalAiOptions({ settings }) {