diff --git a/.vscode/settings.json b/.vscode/settings.json index d60238c72..14efd3fae 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -5,6 +5,7 @@ "AIbitat", "allm", "anythingllm", + "Apipie", "Astra", "Chartable", "cleancss", @@ -18,6 +19,7 @@ "elevenlabs", "Embeddable", "epub", + "fireworksai", "GROQ", "hljs", "huggingface", @@ -40,17 +42,18 @@ "pagerender", "Qdrant", "royalblue", - "searxng", "SearchApi", + "searxng", "Serper", "Serply", "streamable", "textgenwebui", "togetherai", - "fireworksai", "Unembed", + "uuidv", "vectordbs", "Weaviate", + "XAILLM", "Zilliz" ], "eslint.experimental.useFlatConfig": true, diff --git a/README.md b/README.md index 68c21e4b5..b1f308a14 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,8 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace - [KoboldCPP](https://github.com/LostRuins/koboldcpp) - [LiteLLM](https://github.com/BerriAI/litellm) - [Text Generation Web UI](https://github.com/oobabooga/text-generation-webui) +- [Apipie](https://apipie.ai/) +- [xAI](https://x.ai/) **Embedder models:** @@ -116,6 +118,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace - [PiperTTSLocal - runs in browser](https://github.com/rhasspy/piper) - [OpenAI TTS](https://platform.openai.com/docs/guides/text-to-speech/voice-options) - [ElevenLabs](https://elevenlabs.io/) +- Any OpenAI Compatible TTS service. **STT (speech-to-text) support:** diff --git a/collector/index.js b/collector/index.js index 2893754af..7c41002da 100644 --- a/collector/index.js +++ b/collector/index.js @@ -16,12 +16,14 @@ const extensions = require("./extensions"); const { processRawText } = require("./processRawText"); const { verifyPayloadIntegrity } = require("./middleware/verifyIntegrity"); const app = express(); +const FILE_LIMIT = "3GB"; app.use(cors({ origin: true })); app.use( - bodyParser.text(), - bodyParser.json(), + bodyParser.text({ limit: FILE_LIMIT }), + bodyParser.json({ limit: FILE_LIMIT }), bodyParser.urlencoded({ + limit: FILE_LIMIT, extended: true, }) ); diff --git a/collector/package.json b/collector/package.json index 4ce85e68e..bf6498c06 100644 --- a/collector/package.json +++ b/collector/package.json @@ -33,6 +33,7 @@ "mime": "^3.0.0", "moment": "^2.29.4", "node-html-parser": "^6.1.13", + "node-xlsx": "^0.24.0", "officeparser": "^4.0.5", "openai": "4.38.5", "pdf-parse": "^1.1.1", @@ -48,4 +49,4 @@ "nodemon": "^2.0.22", "prettier": "^2.4.1" } -} \ No newline at end of file +} diff --git a/collector/processLink/convert/generic.js b/collector/processLink/convert/generic.js index 2f88a4b32..393f421d5 100644 --- a/collector/processLink/convert/generic.js +++ b/collector/processLink/convert/generic.js @@ -27,7 +27,8 @@ async function scrapeGenericUrl(link, textOnly = false) { } const url = new URL(link); - const filename = (url.host + "-" + url.pathname).replace(".", "_"); + const decodedPathname = decodeURIComponent(url.pathname); + const filename = `${url.hostname}${decodedPathname.replace(/\//g, "_")}`; const data = { id: v4(), diff --git a/collector/processSingleFile/convert/asXlsx.js b/collector/processSingleFile/convert/asXlsx.js new file mode 100644 index 000000000..f21c6f1d9 --- /dev/null +++ b/collector/processSingleFile/convert/asXlsx.js @@ -0,0 +1,113 @@ +const { v4 } = require("uuid"); +const xlsx = require("node-xlsx").default; +const path = require("path"); +const fs = require("fs"); +const { + createdDate, + trashFile, + writeToServerDocuments, +} = require("../../utils/files"); +const { tokenizeString } = require("../../utils/tokenizer"); +const { default: slugify } = require("slugify"); + +function convertToCSV(data) { + return data + .map((row) => + row + .map((cell) => { + if (cell === null || cell === undefined) return ""; + if (typeof cell === "string" && cell.includes(",")) + return `"${cell}"`; + return cell; + }) + .join(",") + ) + .join("\n"); +} + +async function asXlsx({ fullFilePath = "", filename = "" }) { + const documents = []; + const folderName = slugify(`${path.basename(filename)}-${v4().slice(0, 4)}`, { + lower: true, + trim: true, + }); + + const outFolderPath = + process.env.NODE_ENV === "development" + ? path.resolve( + __dirname, + `../../../server/storage/documents/${folderName}` + ) + : path.resolve(process.env.STORAGE_DIR, `documents/${folderName}`); + + try { + const workSheetsFromFile = xlsx.parse(fullFilePath); + if (!fs.existsSync(outFolderPath)) + fs.mkdirSync(outFolderPath, { recursive: true }); + + for (const sheet of workSheetsFromFile) { + try { + const { name, data } = sheet; + const content = convertToCSV(data); + + if (!content?.length) { + console.warn(`Sheet "${name}" is empty. Skipping.`); + continue; + } + + console.log(`-- Processing sheet: ${name} --`); + const sheetData = { + id: v4(), + url: `file://${path.join(outFolderPath, `${slugify(name)}.csv`)}`, + title: `${filename} - Sheet:${name}`, + docAuthor: "Unknown", + description: `Spreadsheet data from sheet: ${name}`, + docSource: "an xlsx file uploaded by the user.", + chunkSource: "", + published: createdDate(fullFilePath), + wordCount: content.split(/\s+/).length, + pageContent: content, + token_count_estimate: tokenizeString(content).length, + }; + + const document = writeToServerDocuments( + sheetData, + `sheet-${slugify(name)}`, + outFolderPath + ); + documents.push(document); + console.log( + `[SUCCESS]: Sheet "${name}" converted & ready for embedding.` + ); + } catch (err) { + console.error(`Error processing sheet "${name}":`, err); + continue; + } + } + } catch (err) { + console.error("Could not process xlsx file!", err); + return { + success: false, + reason: `Error processing ${filename}: ${err.message}`, + documents: [], + }; + } finally { + trashFile(fullFilePath); + } + + if (documents.length === 0) { + console.error(`No valid sheets found in ${filename}.`); + return { + success: false, + reason: `No valid sheets found in ${filename}.`, + documents: [], + }; + } + + console.log( + `[SUCCESS]: ${filename} fully processed. Created ${documents.length} document(s).\n` + ); + return { success: true, reason: null, documents }; +} + +module.exports = asXlsx; diff --git a/collector/processSingleFile/index.js b/collector/processSingleFile/index.js index bdefb79e0..a00b139ed 100644 --- a/collector/processSingleFile/index.js +++ b/collector/processSingleFile/index.js @@ -38,7 +38,7 @@ async function processSingleFile(targetFilename, options = {}) { }; const fileExtension = path.extname(fullFilePath).toLowerCase(); - if (!fileExtension) { + if (fullFilePath.includes(".") && !fileExtension) { return { success: false, reason: `No file extension found. This file cannot be processed.`, diff --git a/collector/utils/constants.js b/collector/utils/constants.js index ee9ad22ae..c7beeb4b2 100644 --- a/collector/utils/constants.js +++ b/collector/utils/constants.js @@ -11,6 +11,10 @@ const ACCEPTED_MIMES = { ".pptx", ], + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": [ + ".xlsx", + ], + "application/vnd.oasis.opendocument.text": [".odt"], "application/vnd.oasis.opendocument.presentation": [".odp"], @@ -41,6 +45,8 @@ const SUPPORTED_FILETYPE_CONVERTERS = { ".odt": "./convert/asOfficeMime.js", ".odp": "./convert/asOfficeMime.js", + ".xlsx": "./convert/asXlsx.js", + ".mbox": "./convert/asMbox.js", ".epub": "./convert/asEPub.js", diff --git a/collector/utils/extensions/RepoLoader/GithubRepo/RepoLoader/index.js b/collector/utils/extensions/RepoLoader/GithubRepo/RepoLoader/index.js index 61f208742..61ef2036e 100644 --- a/collector/utils/extensions/RepoLoader/GithubRepo/RepoLoader/index.js +++ b/collector/utils/extensions/RepoLoader/GithubRepo/RepoLoader/index.js @@ -29,20 +29,36 @@ class GitHubRepoLoader { } #validGithubUrl() { - const UrlPattern = require("url-pattern"); - const pattern = new UrlPattern( - "https\\://github.com/(:author)/(:project(*))", - { - // fixes project names with special characters (.github) - segmentValueCharset: "a-zA-Z0-9-._~%/+", - } - ); - const match = pattern.match(this.repo); - if (!match) return false; + try { + const url = new URL(this.repo); - this.author = match.author; - this.project = match.project; - return true; + // Not a github url at all. + if (url.hostname !== "github.com") { + console.log( + `[Github Loader]: Invalid Github URL provided! Hostname must be 'github.com'. Got ${url.hostname}` + ); + return false; + } + + // Assume the url is in the format of github.com/{author}/{project} + // Remove the first slash from the pathname so we can split it properly. + const [author, project, ..._rest] = url.pathname.slice(1).split("/"); + if (!author || !project) { + console.log( + `[Github Loader]: Invalid Github URL provided! URL must be in the format of 'github.com/{author}/{project}'. Got ${url.pathname}` + ); + return false; + } + + this.author = author; + this.project = project; + return true; + } catch (e) { + console.log( + `[Github Loader]: Invalid Github URL provided! Error: ${e.message}` + ); + return false; + } } // Ensure the branch provided actually exists diff --git a/collector/utils/extensions/WebsiteDepth/index.js b/collector/utils/extensions/WebsiteDepth/index.js index 2a9994aa5..80be0a1d8 100644 --- a/collector/utils/extensions/WebsiteDepth/index.js +++ b/collector/utils/extensions/WebsiteDepth/index.js @@ -108,7 +108,8 @@ async function bulkScrapePages(links, outFolderPath) { } const url = new URL(link); - const filename = (url.host + "-" + url.pathname).replace(".", "_"); + const decodedPathname = decodeURIComponent(url.pathname); + const filename = `${url.hostname}${decodedPathname.replace(/\//g, "_")}`; const data = { id: v4(), diff --git a/collector/utils/files/mime.js b/collector/utils/files/mime.js index b747d5975..ad3ff5782 100644 --- a/collector/utils/files/mime.js +++ b/collector/utils/files/mime.js @@ -1,5 +1,5 @@ const MimeLib = require("mime"); - +const path = require("path"); class MimeDetector { nonTextTypes = ["multipart", "image", "model", "audio", "video"]; badMimes = [ @@ -44,8 +44,26 @@ class MimeDetector { ); } + // These are file types that are not detected by the mime library and need to be processed as text files. + // You should only add file types that are not detected by the mime library, are parsable as text, and are files + // with no extension. Otherwise, their extension should be added to the overrides array. + #specialTextFileTypes = ["dockerfile", "jenkinsfile"]; + + /** + * Returns the MIME type of the file. If the file has no extension found, it will be processed as a text file. + * @param {string} filepath + * @returns {string} + */ getType(filepath) { - return this.lib.getType(filepath); + const parsedMime = this.lib.getType(filepath); + if (!!parsedMime) return parsedMime; + + // If the mime could not be parsed, it could be a special file type like Dockerfile or Jenkinsfile + // which we can reliably process as text files. + const baseName = path.basename(filepath)?.toLowerCase(); + if (this.#specialTextFileTypes.includes(baseName)) return "text/plain"; + + return null; } } diff --git a/collector/yarn.lock b/collector/yarn.lock index 2786692e0..f991b43fa 100644 --- a/collector/yarn.lock +++ b/collector/yarn.lock @@ -2326,6 +2326,13 @@ node-html-parser@^6.1.13: css-select "^5.1.0" he "1.2.0" +node-xlsx@^0.24.0: + version "0.24.0" + resolved "https://registry.yarnpkg.com/node-xlsx/-/node-xlsx-0.24.0.tgz#a6a365acb18ad37c66c2b254b6ebe0c22dc9dc6f" + integrity sha512-1olwK48XK9nXZsyH/FCltvGrQYvXXZuxVitxXXv2GIuRm51aBi1+5KwR4rWM4KeO61sFU+00913WLZTD+AcXEg== + dependencies: + xlsx "https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz" + nodemailer@6.9.13: version "6.9.13" resolved "https://registry.yarnpkg.com/nodemailer/-/nodemailer-6.9.13.tgz#5b292bf1e92645f4852ca872c56a6ba6c4a3d3d6" @@ -3528,6 +3535,10 @@ ws@8.14.2: resolved "https://registry.yarnpkg.com/ws/-/ws-8.14.2.tgz#6c249a806eb2db7a20d26d51e7709eab7b2e6c7f" integrity sha512-wEBG1ftX4jcglPxgFCMJmZ2PLtSbJ2Peg6TmpJFTbe9GZYOQCDPdMYu/Tm0/bGZkw8paZnJY45J4K2PZrLYq8g== +"xlsx@https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz": + version "0.20.2" + resolved "https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz#0f64eeed3f1a46e64724620c3553f2dbd3cd2d7d" + xml2js@^0.6.2: version "0.6.2" resolved "https://registry.yarnpkg.com/xml2js/-/xml2js-0.6.2.tgz#dd0b630083aa09c161e25a4d0901e2b2a929b499" diff --git a/docker/.env.example b/docker/.env.example index e67ac5ddd..7bb07ebef 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -105,6 +105,14 @@ GID='1000' # FIREWORKS_AI_LLM_API_KEY='my-fireworks-ai-key' # FIREWORKS_AI_LLM_MODEL_PREF='accounts/fireworks/models/llama-v3p1-8b-instruct' +# LLM_PROVIDER='apipie' +# APIPIE_LLM_API_KEY='sk-123abc' +# APIPIE_LLM_MODEL_PREF='openrouter/llama-3.1-8b-instruct' + +# LLM_PROVIDER='xai' +# XAI_LLM_API_KEY='xai-your-api-key-here' +# XAI_LLM_MODEL_PREF='grok-beta' + ########################################### ######## Embedding API SElECTION ########## ########################################### @@ -215,6 +223,11 @@ GID='1000' # TTS_OPEN_AI_KEY=sk-example # TTS_OPEN_AI_VOICE_MODEL=nova +# TTS_PROVIDER="generic-openai" +# TTS_OPEN_AI_COMPATIBLE_KEY=sk-example +# TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova +# TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1" + # TTS_PROVIDER="elevenlabs" # TTS_ELEVEN_LABS_KEY= # TTS_ELEVEN_LABS_VOICE_MODEL=21m00Tcm4TlvDq8ikWAM # Rachel @@ -270,4 +283,12 @@ GID='1000' # AGENT_SERPLY_API_KEY= #------ SearXNG ----------- https://github.com/searxng/searxng -# AGENT_SEARXNG_API_URL= \ No newline at end of file +# AGENT_SEARXNG_API_URL= + +########################################### +######## Other Configurations ############ +########################################### + +# Disable viewing chat history from the UI and frontend APIs. +# See https://docs.anythingllm.com/configuration#disable-view-chat-history for more information. +# DISABLE_VIEW_CHAT_HISTORY=1 \ No newline at end of file diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index c6cac66db..cb3bac7f7 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -22,7 +22,6 @@ const WorkspaceChat = lazy(() => import("@/pages/WorkspaceChat")); const AdminUsers = lazy(() => import("@/pages/Admin/Users")); const AdminInvites = lazy(() => import("@/pages/Admin/Invitations")); const AdminWorkspaces = lazy(() => import("@/pages/Admin/Workspaces")); -const AdminSystem = lazy(() => import("@/pages/Admin/System")); const AdminLogs = lazy(() => import("@/pages/Admin/Logging")); const AdminAgents = lazy(() => import("@/pages/Admin/Agents")); const GeneralChats = lazy(() => import("@/pages/GeneralSettings/Chats")); @@ -168,10 +167,6 @@ export default function App() { path="/settings/workspace-chats" element={} /> - } - /> } diff --git a/frontend/src/components/CanViewChatHistory/index.jsx b/frontend/src/components/CanViewChatHistory/index.jsx new file mode 100644 index 000000000..44e753531 --- /dev/null +++ b/frontend/src/components/CanViewChatHistory/index.jsx @@ -0,0 +1,50 @@ +import { useEffect, useState } from "react"; +import { FullScreenLoader } from "@/components/Preloader"; +import System from "@/models/system"; +import paths from "@/utils/paths"; + +/** + * Protects the view from system set ups who cannot view chat history. + * If the user cannot view chat history, they are redirected to the home page. + * @param {React.ReactNode} children + */ +export function CanViewChatHistory({ children }) { + const { loading, viewable } = useCanViewChatHistory(); + if (loading) return ; + if (!viewable) { + window.location.href = paths.home(); + return ; + } + + return <>{children}; +} + +/** + * Provides the `viewable` state to the children. + * @returns {React.ReactNode} + */ +export function CanViewChatHistoryProvider({ children }) { + const { loading, viewable } = useCanViewChatHistory(); + if (loading) return null; + return <>{children({ viewable })}; +} + +/** + * Hook that fetches the can view chat history state from local storage or the system settings. + * @returns {Promise<{viewable: boolean, error: string | null}>} + */ +export function useCanViewChatHistory() { + const [loading, setLoading] = useState(true); + const [viewable, setViewable] = useState(false); + + useEffect(() => { + async function fetchViewable() { + const { viewable } = await System.fetchCanViewChatHistory(); + setViewable(viewable); + setLoading(false); + } + fetchViewable(); + }, []); + + return { loading, viewable }; +} diff --git a/frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx b/frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx index 252cb0a7b..b55fc6743 100644 --- a/frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx +++ b/frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx @@ -36,6 +36,8 @@ export default function VoyageAiOptions({ settings }) { "voyage-code-2", "voyage-large-2", "voyage-2", + "voyage-3", + "voyage-3-lite", ].map((model) => { return ( - -
- - -
-
); diff --git a/frontend/src/components/LLMSelection/XAiLLMOptions/index.jsx b/frontend/src/components/LLMSelection/XAiLLMOptions/index.jsx new file mode 100644 index 000000000..d760a8ba4 --- /dev/null +++ b/frontend/src/components/LLMSelection/XAiLLMOptions/index.jsx @@ -0,0 +1,114 @@ +import { useState, useEffect } from "react"; +import System from "@/models/system"; + +export default function XAILLMOptions({ settings }) { + const [inputValue, setInputValue] = useState(settings?.XAIApiKey); + const [apiKey, setApiKey] = useState(settings?.XAIApiKey); + + return ( +
+
+ + setInputValue(e.target.value)} + onBlur={() => setApiKey(inputValue)} + /> +
+ + {!settings?.credentialsOnly && ( + + )} +
+ ); +} + +function XAIModelSelection({ apiKey, settings }) { + const [customModels, setCustomModels] = useState([]); + const [loading, setLoading] = useState(true); + + useEffect(() => { + async function findCustomModels() { + if (!apiKey) { + setCustomModels([]); + setLoading(true); + return; + } + + try { + setLoading(true); + const { models } = await System.customModels("xai", apiKey); + setCustomModels(models || []); + } catch (error) { + console.error("Failed to fetch custom models:", error); + setCustomModels([]); + } finally { + setLoading(false); + } + } + findCustomModels(); + }, [apiKey]); + + if (loading) { + return ( +
+ + +

+ Enter a valid API key to view all available models for your account. +

+
+ ); + } + + return ( +
+ + +

+ Select the xAI model you want to use for your conversations. +

+
+ ); +} diff --git a/frontend/src/components/Modals/ManageWorkspace/Documents/Directory/FileRow/index.jsx b/frontend/src/components/Modals/ManageWorkspace/Documents/Directory/FileRow/index.jsx index ea34b33a4..fc3546c17 100644 --- a/frontend/src/components/Modals/ManageWorkspace/Documents/Directory/FileRow/index.jsx +++ b/frontend/src/components/Modals/ManageWorkspace/Documents/Directory/FileRow/index.jsx @@ -31,7 +31,7 @@ export default function FileRow({ item, selected, toggleSelection }) { className="shrink-0 text-base font-bold w-4 h-4 mr-[3px]" weight="fill" /> -

+

{middleTruncate(item.title, 55)}

diff --git a/frontend/src/components/Modals/ManageWorkspace/Documents/Directory/FolderRow/index.jsx b/frontend/src/components/Modals/ManageWorkspace/Documents/Directory/FolderRow/index.jsx index bf1581e14..7e2dfffee 100644 --- a/frontend/src/components/Modals/ManageWorkspace/Documents/Directory/FolderRow/index.jsx +++ b/frontend/src/components/Modals/ManageWorkspace/Documents/Directory/FolderRow/index.jsx @@ -51,7 +51,7 @@ export default function FolderRow({ className="shrink-0 text-base font-bold w-4 h-4 mr-[3px]" weight="fill" /> -

+

{middleTruncate(item.name, 35)}

diff --git a/frontend/src/components/Modals/ManageWorkspace/Documents/WorkspaceDirectory/WorkspaceFileRow/index.jsx b/frontend/src/components/Modals/ManageWorkspace/Documents/WorkspaceDirectory/WorkspaceFileRow/index.jsx index cc267170b..505c4c22c 100644 --- a/frontend/src/components/Modals/ManageWorkspace/Documents/WorkspaceDirectory/WorkspaceFileRow/index.jsx +++ b/frontend/src/components/Modals/ManageWorkspace/Documents/WorkspaceDirectory/WorkspaceFileRow/index.jsx @@ -83,7 +83,7 @@ export default function WorkspaceFileRow({ className="shrink-0 text-base font-bold w-4 h-4 mr-[3px] ml-1" weight="fill" /> -

+

{middleTruncate(item.title, 50)}

diff --git a/frontend/src/components/SettingsButton/index.jsx b/frontend/src/components/SettingsButton/index.jsx index 19a4a17aa..f53e675f1 100644 --- a/frontend/src/components/SettingsButton/index.jsx +++ b/frontend/src/components/SettingsButton/index.jsx @@ -29,9 +29,7 @@ export default function SettingsButton() { return ( - isVisible({ roles: opt.roles, user, flex: opt.flex }) + isVisible({ roles: opt.roles, user, flex: opt.flex, hidden: opt.hidden }) ); } diff --git a/frontend/src/components/SettingsSidebar/index.jsx b/frontend/src/components/SettingsSidebar/index.jsx index 97b088eca..46eba5db9 100644 --- a/frontend/src/components/SettingsSidebar/index.jsx +++ b/frontend/src/components/SettingsSidebar/index.jsx @@ -21,6 +21,7 @@ import { useTranslation } from "react-i18next"; import showToast from "@/utils/toast"; import System from "@/models/system"; import Option from "./MenuOption"; +import { CanViewChatHistoryProvider } from "../CanViewChatHistory"; export default function SettingsSidebar() { const { t } = useTranslation(); @@ -208,156 +209,157 @@ function SupportEmail() { } const SidebarOptions = ({ user = null, t }) => ( - <> -