+
-
+
{
+ switch (size) {
+ case "small":
+ return "text-[12px]";
+ case "large":
+ return "text-[18px]";
+ default:
+ return "text-[14px]";
+ }
+ };
useEffect(() => {
- scrollToBottom();
+ const storedTextSize = window.localStorage.getItem("anythingllm_text_size");
+ if (storedTextSize) {
+ setTextSize(getTextSizeClass(storedTextSize));
+ }
+
+ const handleTextSizeChange = (event) => {
+ const size = event.detail;
+ setTextSize(getTextSizeClass(size));
+ };
+
+ window.addEventListener("textSizeChange", handleTextSizeChange);
+
+ return () => {
+ window.removeEventListener("textSizeChange", handleTextSizeChange);
+ };
+ }, []);
+
+ useEffect(() => {
+ if (isAtBottom) scrollToBottom();
}, [history]);
const handleScroll = () => {
@@ -91,7 +121,7 @@ export default function ChatHistory({ history = [], workspace, sendCommand }) {
return (
diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/TextSizeMenu/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/TextSizeMenu/index.jsx
new file mode 100644
index 000000000..645e943c0
--- /dev/null
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/TextSizeMenu/index.jsx
@@ -0,0 +1,124 @@
+import { useState, useRef, useEffect } from "react";
+import { TextT } from "@phosphor-icons/react";
+import { Tooltip } from "react-tooltip";
+
+export default function TextSizeButton() {
+ const [showTextSizeMenu, setShowTextSizeMenu] = useState(false);
+ const buttonRef = useRef(null);
+
+ return (
+ <>
+
setShowTextSizeMenu(!showTextSizeMenu)}
+ className={`relative flex justify-center items-center opacity-60 hover:opacity-100 cursor-pointer ${
+ showTextSizeMenu ? "!opacity-100" : ""
+ }`}
+ >
+
+
+
+
+ >
+ );
+}
+
+function TextSizeMenu({ showing, setShowing, buttonRef }) {
+ const formRef = useRef(null);
+ const [selectedSize, setSelectedSize] = useState(
+ window.localStorage.getItem("anythingllm_text_size") || "normal"
+ );
+
+ useEffect(() => {
+ function listenForOutsideClick() {
+ if (!showing || !formRef.current) return false;
+ document.addEventListener("click", closeIfOutside);
+ }
+ listenForOutsideClick();
+ }, [showing, formRef.current]);
+
+ const closeIfOutside = ({ target }) => {
+ if (target.id === "text-size-btn") return;
+ const isOutside = !formRef?.current?.contains(target);
+ if (!isOutside) return;
+ setShowing(false);
+ };
+
+ const handleTextSizeChange = (size) => {
+ setSelectedSize(size);
+ window.localStorage.setItem("anythingllm_text_size", size);
+ window.dispatchEvent(new CustomEvent("textSizeChange", { detail: size }));
+ };
+
+ if (!buttonRef.current) return null;
+
+ return (
+
+
+
+
+
+
+
+
+
+ );
+}
diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx
index cf460f3b0..859f84174 100644
--- a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx
@@ -11,6 +11,7 @@ import AvailableAgentsButton, {
AvailableAgents,
useAvailableAgents,
} from "./AgentMenu";
+import TextSizeButton from "./TextSizeMenu";
export default function PromptInput({
message,
submit,
@@ -137,6 +138,7 @@ export default function PromptInput({
showing={showAgents}
setShowAgents={setShowAgents}
/>
+
diff --git a/frontend/src/hooks/useGetProvidersModels.js b/frontend/src/hooks/useGetProvidersModels.js
index b6cb403e1..cdfe78214 100644
--- a/frontend/src/hooks/useGetProvidersModels.js
+++ b/frontend/src/hooks/useGetProvidersModels.js
@@ -19,7 +19,13 @@ const PROVIDER_DEFAULT_MODELS = {
localai: [],
ollama: [],
togetherai: [],
- groq: ["llama2-70b-4096", "mixtral-8x7b-32768"],
+ groq: [
+ "llama2-70b-4096",
+ "mixtral-8x7b-32768",
+ "llama3-8b-8192",
+ "llama3-70b-8192",
+ "gemma-7b-it",
+ ],
native: [],
};
@@ -34,7 +40,7 @@ function groupModels(models) {
}, {});
}
-const groupedProviders = ["togetherai", "openai"];
+const groupedProviders = ["togetherai", "openai", "openrouter"];
export default function useGetProviderModels(provider = null) {
const [defaultModels, setDefaultModels] = useState([]);
const [customModels, setCustomModels] = useState([]);
diff --git a/frontend/src/media/llmprovider/generic-openai.png b/frontend/src/media/llmprovider/generic-openai.png
new file mode 100644
index 000000000..302f5dbee
Binary files /dev/null and b/frontend/src/media/llmprovider/generic-openai.png differ
diff --git a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
index 25dcd62d3..d8e790f39 100644
--- a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx
@@ -30,7 +30,7 @@ const EMBEDDERS = [
logo: AnythingLLMIcon,
options: (settings) =>
,
description:
- "Use the built-in embedding engine for AnythingLLM. Zero setup!",
+ "Use the built-in embedding provider for AnythingLLM. Zero setup!",
},
{
name: "OpenAI",
diff --git a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
index 61c843680..64526ba92 100644
--- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
+++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx
@@ -4,6 +4,7 @@ import { isMobile } from "react-device-detect";
import System from "@/models/system";
import showToast from "@/utils/toast";
import OpenAiLogo from "@/media/llmprovider/openai.png";
+import GenericOpenAiLogo from "@/media/llmprovider/generic-openai.png";
import AzureOpenAiLogo from "@/media/llmprovider/azure.png";
import AnthropicLogo from "@/media/llmprovider/anthropic.png";
import GeminiLogo from "@/media/llmprovider/gemini.png";
@@ -18,6 +19,7 @@ import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg";
import GroqLogo from "@/media/llmprovider/groq.png";
import PreLoader from "@/components/Preloader";
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
+import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions";
import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions";
import AnthropicAiOptions from "@/components/LLMSelection/AnthropicAiOptions";
import LMStudioOptions from "@/components/LLMSelection/LMStudioOptions";
@@ -148,6 +150,20 @@ export const AVAILABLE_LLM_PROVIDERS = [
"The fastest LLM inferencing available for real-time AI applications.",
requiredConfig: ["GroqApiKey"],
},
+ {
+ name: "Generic OpenAI",
+ value: "generic-openai",
+ logo: GenericOpenAiLogo,
+ options: (settings) =>
,
+ description:
+ "Connect to any OpenAi-compatible service via a custom configuration",
+ requiredConfig: [
+ "GenericOpenAiBasePath",
+ "GenericOpenAiModelPref",
+ "GenericOpenAiTokenLimit",
+ "GenericOpenAiKey",
+ ],
+ },
// {
// name: "Native",
// value: "native",
diff --git a/frontend/src/pages/GeneralSettings/PrivacyAndData/index.jsx b/frontend/src/pages/GeneralSettings/PrivacyAndData/index.jsx
index c0cd2b199..4075c89a6 100644
--- a/frontend/src/pages/GeneralSettings/PrivacyAndData/index.jsx
+++ b/frontend/src/pages/GeneralSettings/PrivacyAndData/index.jsx
@@ -88,7 +88,9 @@ function ThirdParty({ settings }) {
-
Embedding Engine
+
+ Embedding Preference
+
{
diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
index 11612b99b..548272fe0 100644
--- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx
@@ -2,6 +2,7 @@ import PreLoader from "@/components/Preloader";
import System from "@/models/system";
import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
import OpenAiLogo from "@/media/llmprovider/openai.png";
+import GenericOpenAiLogo from "@/media/llmprovider/generic-openai.png";
import AzureOpenAiLogo from "@/media/llmprovider/azure.png";
import AnthropicLogo from "@/media/llmprovider/anthropic.png";
import GeminiLogo from "@/media/llmprovider/gemini.png";
@@ -136,6 +137,13 @@ export const LLM_SELECTION_PRIVACY = {
],
logo: GroqLogo,
},
+ "generic-openai": {
+ name: "Generic OpenAI compatible service",
+ description: [
+ "Data is shared according to the terms of service applicable with your generic endpoint provider.",
+ ],
+ logo: GenericOpenAiLogo,
+ },
};
export const VECTOR_DB_PRIVACY = {
@@ -305,7 +313,9 @@ export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) {
-
Embedding Engine
+
+ Embedding Preference
+
+
+ These settings can be reconfigured at any time in the settings.
+
);
}
diff --git a/frontend/src/pages/OnboardingFlow/Steps/EmbeddingPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/EmbeddingPreference/index.jsx
deleted file mode 100644
index fc44a68de..000000000
--- a/frontend/src/pages/OnboardingFlow/Steps/EmbeddingPreference/index.jsx
+++ /dev/null
@@ -1,186 +0,0 @@
-import { MagnifyingGlass } from "@phosphor-icons/react";
-import { useEffect, useState, useRef } from "react";
-import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
-import OpenAiLogo from "@/media/llmprovider/openai.png";
-import AzureOpenAiLogo from "@/media/llmprovider/azure.png";
-import LocalAiLogo from "@/media/llmprovider/localai.png";
-import OllamaLogo from "@/media/llmprovider/ollama.png";
-import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
-import NativeEmbeddingOptions from "@/components/EmbeddingSelection/NativeEmbeddingOptions";
-import OpenAiOptions from "@/components/EmbeddingSelection/OpenAiOptions";
-import AzureAiOptions from "@/components/EmbeddingSelection/AzureAiOptions";
-import LocalAiOptions from "@/components/EmbeddingSelection/LocalAiOptions";
-import OllamaEmbeddingOptions from "@/components/EmbeddingSelection/OllamaOptions";
-import LMStudioEmbeddingOptions from "@/components/EmbeddingSelection/LMStudioOptions";
-import EmbedderItem from "@/components/EmbeddingSelection/EmbedderItem";
-import System from "@/models/system";
-import paths from "@/utils/paths";
-import showToast from "@/utils/toast";
-import { useNavigate } from "react-router-dom";
-
-const TITLE = "Embedding Preference";
-const DESCRIPTION =
- "AnythingLLM can work with many embedding models. This will be the model which turns documents into vectors.";
-const EMBEDDERS = [
- {
- name: "AnythingLLM Embedder",
- value: "native",
- logo: AnythingLLMIcon,
- options: (settings) => ,
- description:
- "Use the built-in embedding engine for AnythingLLM. Zero setup!",
- },
- {
- name: "OpenAI",
- value: "openai",
- logo: OpenAiLogo,
- options: (settings) => ,
- description: "The standard option for most non-commercial use.",
- },
- {
- name: "Azure OpenAI",
- value: "azure",
- logo: AzureOpenAiLogo,
- options: (settings) => ,
- description: "The enterprise option of OpenAI hosted on Azure services.",
- },
- {
- name: "Local AI",
- value: "localai",
- logo: LocalAiLogo,
- options: (settings) => ,
- description: "Run embedding models locally on your own machine.",
- },
- {
- name: "Ollama",
- value: "ollama",
- logo: OllamaLogo,
- options: (settings) => ,
- description: "Run embedding models locally on your own machine.",
- },
- {
- name: "LM Studio",
- value: "lmstudio",
- logo: LMStudioLogo,
- options: (settings) => ,
- description:
- "Discover, download, and run thousands of cutting edge LLMs in a few clicks.",
- },
-];
-
-export default function EmbeddingPreference({
- setHeader,
- setForwardBtn,
- setBackBtn,
-}) {
- const [searchQuery, setSearchQuery] = useState("");
- const [filteredEmbedders, setFilteredEmbedders] = useState([]);
- const [selectedEmbedder, setSelectedEmbedder] = useState(null);
- const [settings, setSettings] = useState(null);
- const formRef = useRef(null);
- const hiddenSubmitButtonRef = useRef(null);
- const navigate = useNavigate();
-
- useEffect(() => {
- async function fetchKeys() {
- const _settings = await System.keys();
- setSettings(_settings);
- setSelectedEmbedder(_settings?.EmbeddingEngine || "native");
- }
- fetchKeys();
- }, []);
-
- function handleForward() {
- if (hiddenSubmitButtonRef.current) {
- hiddenSubmitButtonRef.current.click();
- }
- }
-
- function handleBack() {
- navigate(paths.onboarding.llmPreference());
- }
-
- const handleSubmit = async (e) => {
- e.preventDefault();
- const form = e.target;
- const data = {};
- const formData = new FormData(form);
- data.EmbeddingEngine = selectedEmbedder;
- for (var [key, value] of formData.entries()) data[key] = value;
-
- const { error } = await System.updateSystem(data);
- if (error) {
- showToast(`Failed to save embedding settings: ${error}`, "error");
- return;
- }
- navigate(paths.onboarding.vectorDatabase());
- };
-
- useEffect(() => {
- setHeader({ title: TITLE, description: DESCRIPTION });
- setForwardBtn({ showing: true, disabled: false, onClick: handleForward });
- setBackBtn({ showing: true, disabled: false, onClick: handleBack });
- }, []);
-
- useEffect(() => {
- const filtered = EMBEDDERS.filter((embedder) =>
- embedder.name.toLowerCase().includes(searchQuery.toLowerCase())
- );
- setFilteredEmbedders(filtered);
- }, [searchQuery, selectedEmbedder]);
-
- return (
-
- );
-}
diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
index adf892a96..d7fa55cc5 100644
--- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
@@ -1,6 +1,7 @@
import { MagnifyingGlass } from "@phosphor-icons/react";
import { useEffect, useState, useRef } from "react";
import OpenAiLogo from "@/media/llmprovider/openai.png";
+import GenericOpenAiLogo from "@/media/llmprovider/generic-openai.png";
import AzureOpenAiLogo from "@/media/llmprovider/azure.png";
import AnthropicLogo from "@/media/llmprovider/anthropic.png";
import GeminiLogo from "@/media/llmprovider/gemini.png";
@@ -14,6 +15,7 @@ import PerplexityLogo from "@/media/llmprovider/perplexity.png";
import OpenRouterLogo from "@/media/llmprovider/openrouter.jpeg";
import GroqLogo from "@/media/llmprovider/groq.png";
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
+import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions";
import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions";
import AnthropicAiOptions from "@/components/LLMSelection/AnthropicAiOptions";
import LMStudioOptions from "@/components/LLMSelection/LMStudioOptions";
@@ -36,6 +38,120 @@ const TITLE = "LLM Preference";
const DESCRIPTION =
"AnythingLLM can work with many LLM providers. This will be the service which handles chatting.";
+const LLMS = [
+ {
+ name: "OpenAI",
+ value: "openai",
+ logo: OpenAiLogo,
+ options: (settings) => ,
+ description: "The standard option for most non-commercial use.",
+ },
+ {
+ name: "Azure OpenAI",
+ value: "azure",
+ logo: AzureOpenAiLogo,
+ options: (settings) => ,
+ description: "The enterprise option of OpenAI hosted on Azure services.",
+ },
+ {
+ name: "Anthropic",
+ value: "anthropic",
+ logo: AnthropicLogo,
+ options: (settings) => ,
+ description: "A friendly AI Assistant hosted by Anthropic.",
+ },
+ {
+ name: "Gemini",
+ value: "gemini",
+ logo: GeminiLogo,
+ options: (settings) => ,
+ description: "Google's largest and most capable AI model",
+ },
+ {
+ name: "HuggingFace",
+ value: "huggingface",
+ logo: HuggingFaceLogo,
+ options: (settings) => ,
+ description:
+ "Access 150,000+ open-source LLMs and the world's AI community",
+ },
+ {
+ name: "Ollama",
+ value: "ollama",
+ logo: OllamaLogo,
+ options: (settings) => ,
+ description: "Run LLMs locally on your own machine.",
+ },
+ {
+ name: "LM Studio",
+ value: "lmstudio",
+ logo: LMStudioLogo,
+ options: (settings) => ,
+ description:
+ "Discover, download, and run thousands of cutting edge LLMs in a few clicks.",
+ },
+ {
+ name: "Local AI",
+ value: "localai",
+ logo: LocalAiLogo,
+ options: (settings) => ,
+ description: "Run LLMs locally on your own machine.",
+ },
+ {
+ name: "Together AI",
+ value: "togetherai",
+ logo: TogetherAILogo,
+ options: (settings) => ,
+ description: "Run open source models from Together AI.",
+ },
+ {
+ name: "Mistral",
+ value: "mistral",
+ logo: MistralLogo,
+ options: (settings) => ,
+ description: "Run open source models from Mistral AI.",
+ },
+ {
+ name: "Perplexity AI",
+ value: "perplexity",
+ logo: PerplexityLogo,
+ options: (settings) => ,
+ description:
+ "Run powerful and internet-connected models hosted by Perplexity AI.",
+ },
+ {
+ name: "OpenRouter",
+ value: "openrouter",
+ logo: OpenRouterLogo,
+ options: (settings) => ,
+ description: "A unified interface for LLMs.",
+ },
+ {
+ name: "Groq",
+ value: "groq",
+ logo: GroqLogo,
+ options: (settings) => ,
+ description:
+ "The fastest LLM inferencing available for real-time AI applications.",
+ },
+ {
+ name: "Generic OpenAI",
+ value: "generic-openai",
+ logo: GenericOpenAiLogo,
+ options: (settings) => ,
+ description:
+ "Connect to any OpenAi-compatible service via a custom configuration",
+ },
+ // {
+ // name: "Native",
+ // value: "native",
+ // logo: AnythingLLMIcon,
+ // options: (settings) => ,
+ // description:
+ // "Use a downloaded custom Llama model for chatting on this AnythingLLM instance.",
+ // },
+];
+
export default function LLMPreference({
setHeader,
setForwardBtn,
@@ -59,104 +175,6 @@ export default function LLMPreference({
fetchKeys();
}, []);
- const LLMS = [
- {
- name: "OpenAI",
- value: "openai",
- logo: OpenAiLogo,
- options: ,
- description: "The standard option for most non-commercial use.",
- },
- {
- name: "Azure OpenAI",
- value: "azure",
- logo: AzureOpenAiLogo,
- options: ,
- description: "The enterprise option of OpenAI hosted on Azure services.",
- },
- {
- name: "Anthropic",
- value: "anthropic",
- logo: AnthropicLogo,
- options: ,
- description: "A friendly AI Assistant hosted by Anthropic.",
- },
- {
- name: "Gemini",
- value: "gemini",
- logo: GeminiLogo,
- options: ,
- description: "Google's largest and most capable AI model",
- },
- {
- name: "HuggingFace",
- value: "huggingface",
- logo: HuggingFaceLogo,
- options: ,
- description:
- "Access 150,000+ open-source LLMs and the world's AI community",
- },
- {
- name: "Ollama",
- value: "ollama",
- logo: OllamaLogo,
- options: ,
- description: "Run LLMs locally on your own machine.",
- },
- {
- name: "LM Studio",
- value: "lmstudio",
- logo: LMStudioLogo,
- options: ,
- description:
- "Discover, download, and run thousands of cutting edge LLMs in a few clicks.",
- },
- {
- name: "Local AI",
- value: "localai",
- logo: LocalAiLogo,
- options: ,
- description: "Run LLMs locally on your own machine.",
- },
- {
- name: "Together AI",
- value: "togetherai",
- logo: TogetherAILogo,
- options: ,
- description: "Run open source models from Together AI.",
- },
- {
- name: "Mistral",
- value: "mistral",
- logo: MistralLogo,
- options: ,
- description: "Run open source models from Mistral AI.",
- },
- {
- name: "Perplexity AI",
- value: "perplexity",
- logo: PerplexityLogo,
- options: ,
- description:
- "Run powerful and internet-connected models hosted by Perplexity AI.",
- },
- {
- name: "OpenRouter",
- value: "openrouter",
- logo: OpenRouterLogo,
- options: ,
- description: "A unified interface for LLMs.",
- },
- {
- name: "Groq",
- value: "groq",
- logo: GroqLogo,
- options: ,
- description:
- "The fastest LLM inferencing available for real-time AI applications.",
- },
- ];
-
function handleForward() {
if (hiddenSubmitButtonRef.current) {
hiddenSubmitButtonRef.current.click();
@@ -172,6 +190,9 @@ export default function LLMPreference({
const data = {};
const formData = new FormData(form);
data.LLMProvider = selectedLLM;
+ // Default to AnythingLLM embedder and LanceDB
+ data.EmbeddingEngine = "native";
+ data.VectorDB = "lancedb";
for (var [key, value] of formData.entries()) data[key] = value;
const { error } = await System.updateSystem(data);
@@ -179,7 +200,7 @@ export default function LLMPreference({
showToast(`Failed to save LLM settings: ${error}`, "error");
return;
}
- navigate(paths.onboarding.embeddingPreference());
+ navigate(paths.onboarding.customLogo());
};
useEffect(() => {
@@ -236,7 +257,7 @@ export default function LLMPreference({
{selectedLLM &&
- LLMS.find((llm) => llm.value === selectedLLM)?.options}
+ LLMS.find((llm) => llm.value === selectedLLM)?.options(settings)}
)}
- {selectedLLM !== "default" && (
+ {!NO_MODEL_SELECTION.includes(selectedLLM) && (
v.startsWith("@"));
},
diff --git a/server/storage/models/.gitignore b/server/storage/models/.gitignore
index 556246672..6ed579fa3 100644
--- a/server/storage/models/.gitignore
+++ b/server/storage/models/.gitignore
@@ -1,3 +1,4 @@
Xenova
downloaded/*
-!downloaded/.placeholder
\ No newline at end of file
+!downloaded/.placeholder
+openrouter
\ No newline at end of file
diff --git a/server/utils/AiProviders/genericOpenAi/index.js b/server/utils/AiProviders/genericOpenAi/index.js
new file mode 100644
index 000000000..61e7bccf0
--- /dev/null
+++ b/server/utils/AiProviders/genericOpenAi/index.js
@@ -0,0 +1,193 @@
+const { NativeEmbedder } = require("../../EmbeddingEngines/native");
+const { chatPrompt } = require("../../chats");
+const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
+
+class GenericOpenAiLLM {
+ constructor(embedder = null, modelPreference = null) {
+ const { Configuration, OpenAIApi } = require("openai");
+ if (!process.env.GENERIC_OPEN_AI_BASE_PATH)
+ throw new Error(
+ "GenericOpenAI must have a valid base path to use for the api."
+ );
+
+ this.basePath = process.env.GENERIC_OPEN_AI_BASE_PATH;
+ const config = new Configuration({
+ basePath: this.basePath,
+ apiKey: process.env.GENERIC_OPEN_AI_API_KEY ?? null,
+ });
+ this.openai = new OpenAIApi(config);
+ this.model =
+ modelPreference ?? process.env.GENERIC_OPEN_AI_MODEL_PREF ?? null;
+ if (!this.model)
+ throw new Error("GenericOpenAI must have a valid model set.");
+ this.limits = {
+ history: this.promptWindowLimit() * 0.15,
+ system: this.promptWindowLimit() * 0.15,
+ user: this.promptWindowLimit() * 0.7,
+ };
+
+ if (!embedder)
+ console.warn(
+ "No embedding provider defined for GenericOpenAiLLM - falling back to NativeEmbedder for embedding!"
+ );
+ this.embedder = !embedder ? new NativeEmbedder() : embedder;
+ this.defaultTemp = 0.7;
+ this.log(`Inference API: ${this.basePath} Model: ${this.model}`);
+ }
+
+ log(text, ...args) {
+ console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
+ }
+
+ #appendContext(contextTexts = []) {
+ if (!contextTexts || !contextTexts.length) return "";
+ return (
+ "\nContext:\n" +
+ contextTexts
+ .map((text, i) => {
+ return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+ })
+ .join("")
+ );
+ }
+
+ streamingEnabled() {
+ return "streamChat" in this && "streamGetChatCompletion" in this;
+ }
+
+ // Ensure the user set a value for the token limit
+ // and if undefined - assume 4096 window.
+ promptWindowLimit() {
+ const limit = process.env.GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT || 4096;
+ if (!limit || isNaN(Number(limit)))
+ throw new Error("No token context limit was set.");
+ return Number(limit);
+ }
+
+ // Short circuit since we have no idea if the model is valid or not
+ // in pre-flight for generic endpoints
+ isValidChatCompletionModel(_modelName = "") {
+ return true;
+ }
+
+ constructPrompt({
+ systemPrompt = "",
+ contextTexts = [],
+ chatHistory = [],
+ userPrompt = "",
+ }) {
+ const prompt = {
+ role: "system",
+ content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
+ };
+ return [prompt, ...chatHistory, { role: "user", content: userPrompt }];
+ }
+
+ async isSafe(_input = "") {
+ // Not implemented so must be stubbed
+ return { safe: true, reasons: [] };
+ }
+
+ async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
+ const textResponse = await this.openai
+ .createChatCompletion({
+ model: this.model,
+ temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
+ n: 1,
+ messages: await this.compressMessages(
+ {
+ systemPrompt: chatPrompt(workspace),
+ userPrompt: prompt,
+ chatHistory,
+ },
+ rawHistory
+ ),
+ })
+ .then((json) => {
+ const res = json.data;
+ if (!res.hasOwnProperty("choices"))
+ throw new Error("GenericOpenAI chat: No results!");
+ if (res.choices.length === 0)
+ throw new Error("GenericOpenAI chat: No results length!");
+ return res.choices[0].message.content;
+ })
+ .catch((error) => {
+ throw new Error(
+ `GenericOpenAI::createChatCompletion failed with: ${error.message}`
+ );
+ });
+
+ return textResponse;
+ }
+
+ async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
+ const streamRequest = await this.openai.createChatCompletion(
+ {
+ model: this.model,
+ stream: true,
+ temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
+ n: 1,
+ messages: await this.compressMessages(
+ {
+ systemPrompt: chatPrompt(workspace),
+ userPrompt: prompt,
+ chatHistory,
+ },
+ rawHistory
+ ),
+ },
+ { responseType: "stream" }
+ );
+ return streamRequest;
+ }
+
+ async getChatCompletion(messages = null, { temperature = 0.7 }) {
+ const { data } = await this.openai
+ .createChatCompletion({
+ model: this.model,
+ messages,
+ temperature,
+ })
+ .catch((e) => {
+ throw new Error(e.response.data.error.message);
+ });
+
+ if (!data.hasOwnProperty("choices")) return null;
+ return data.choices[0].message.content;
+ }
+
+ async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
+ const streamRequest = await this.openai.createChatCompletion(
+ {
+ model: this.model,
+ stream: true,
+ messages,
+ temperature,
+ },
+ { responseType: "stream" }
+ );
+ return streamRequest;
+ }
+
+ handleStream(response, stream, responseProps) {
+ return handleDefaultStreamResponse(response, stream, responseProps);
+ }
+
+ // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
+ async embedTextInput(textInput) {
+ return await this.embedder.embedTextInput(textInput);
+ }
+ async embedChunks(textChunks = []) {
+ return await this.embedder.embedChunks(textChunks);
+ }
+
+ async compressMessages(promptArgs = {}, rawHistory = []) {
+ const { messageArrayCompressor } = require("../../helpers/chat");
+ const messageArray = this.constructPrompt(promptArgs);
+ return await messageArrayCompressor(this, messageArray, rawHistory);
+ }
+}
+
+module.exports = {
+ GenericOpenAiLLM,
+};
diff --git a/server/utils/AiProviders/groq/index.js b/server/utils/AiProviders/groq/index.js
index 1b15fe1fe..c556d0357 100644
--- a/server/utils/AiProviders/groq/index.js
+++ b/server/utils/AiProviders/groq/index.js
@@ -40,20 +40,31 @@ class GroqLLM {
streamingEnabled() {
return "streamChat" in this && "streamGetChatCompletion" in this;
}
-
promptWindowLimit() {
switch (this.model) {
case "llama2-70b-4096":
return 4096;
case "mixtral-8x7b-32768":
return 32_768;
+ case "llama3-8b-8192":
+ return 8192;
+ case "llama3-70b-8192":
+ return 8192;
+ case "gemma-7b-it":
+ return 8192;
default:
return 4096;
}
}
async isValidChatCompletionModel(modelName = "") {
- const validModels = ["llama2-70b-4096", "mixtral-8x7b-32768"];
+ const validModels = [
+ "llama2-70b-4096",
+ "mixtral-8x7b-32768",
+ "llama3-8b-8192",
+ "llama3-70b-8192",
+ "gemma-7b-it",
+ ];
const isPreset = validModels.some((model) => modelName === model);
if (isPreset) return true;
diff --git a/server/utils/AiProviders/openRouter/index.js b/server/utils/AiProviders/openRouter/index.js
index a1f606f60..dbed87fa3 100644
--- a/server/utils/AiProviders/openRouter/index.js
+++ b/server/utils/AiProviders/openRouter/index.js
@@ -5,11 +5,9 @@ const {
writeResponseChunk,
clientAbortedHandler,
} = require("../../helpers/chat/responses");
-
-function openRouterModels() {
- const { MODELS } = require("./models.js");
- return MODELS || {};
-}
+const fs = require("fs");
+const path = require("path");
+const { safeJsonParse } = require("../../http");
class OpenRouterLLM {
constructor(embedder = null, modelPreference = null) {
@@ -17,8 +15,9 @@ class OpenRouterLLM {
if (!process.env.OPENROUTER_API_KEY)
throw new Error("No OpenRouter API key was set.");
+ this.basePath = "https://openrouter.ai/api/v1";
const config = new Configuration({
- basePath: "https://openrouter.ai/api/v1",
+ basePath: this.basePath,
apiKey: process.env.OPENROUTER_API_KEY,
baseOptions: {
headers: {
@@ -38,6 +37,81 @@ class OpenRouterLLM {
this.embedder = !embedder ? new NativeEmbedder() : embedder;
this.defaultTemp = 0.7;
+
+ const cacheFolder = path.resolve(
+ process.env.STORAGE_DIR
+ ? path.resolve(process.env.STORAGE_DIR, "models", "openrouter")
+ : path.resolve(__dirname, `../../../storage/models/openrouter`)
+ );
+ fs.mkdirSync(cacheFolder, { recursive: true });
+ this.cacheModelPath = path.resolve(cacheFolder, "models.json");
+ this.cacheAtPath = path.resolve(cacheFolder, ".cached_at");
+ }
+
+ log(text, ...args) {
+ console.log(`\x1b[36m[${this.constructor.name}]\x1b[0m ${text}`, ...args);
+ }
+
+ async init() {
+ await this.#syncModels();
+ return this;
+ }
+
+ // This checks if the .cached_at file has a timestamp that is more than 1Week (in millis)
+ // from the current date. If it is, then we will refetch the API so that all the models are up
+ // to date.
+ #cacheIsStale() {
+ const MAX_STALE = 6.048e8; // 1 Week in MS
+ if (!fs.existsSync(this.cacheAtPath)) return true;
+ const now = Number(new Date());
+ const timestampMs = Number(fs.readFileSync(this.cacheAtPath));
+ return now - timestampMs > MAX_STALE;
+ }
+
+ // The OpenRouter model API has a lot of models, so we cache this locally in the directory
+ // as if the cache directory JSON file is stale or does not exist we will fetch from API and store it.
+ // This might slow down the first request, but we need the proper token context window
+ // for each model and this is a constructor property - so we can really only get it if this cache exists.
+ // We used to have this as a chore, but given there is an API to get the info - this makes little sense.
+ async #syncModels() {
+ if (fs.existsSync(this.cacheModelPath) && !this.#cacheIsStale())
+ return false;
+
+ this.log(
+ "Model cache is not present or stale. Fetching from OpenRouter API."
+ );
+ await fetch(`${this.basePath}/models`, {
+ method: "GET",
+ headers: {
+ "Content-Type": "application/json",
+ },
+ })
+ .then((res) => res.json())
+ .then(({ data = [] }) => {
+ const models = {};
+ data.forEach((model) => {
+ models[model.id] = {
+ id: model.id,
+ name: model.name,
+ organization:
+ model.id.split("/")[0].charAt(0).toUpperCase() +
+ model.id.split("/")[0].slice(1),
+ maxLength: model.context_length,
+ };
+ });
+ fs.writeFileSync(this.cacheModelPath, JSON.stringify(models), {
+ encoding: "utf-8",
+ });
+ fs.writeFileSync(this.cacheAtPath, String(Number(new Date())), {
+ encoding: "utf-8",
+ });
+ return models;
+ })
+ .catch((e) => {
+ console.error(e);
+ return {};
+ });
+ return;
}
#appendContext(contextTexts = []) {
@@ -52,8 +126,12 @@ class OpenRouterLLM {
);
}
- allModelInformation() {
- return openRouterModels();
+ models() {
+ if (!fs.existsSync(this.cacheModelPath)) return {};
+ return safeJsonParse(
+ fs.readFileSync(this.cacheModelPath, { encoding: "utf-8" }),
+ {}
+ );
}
streamingEnabled() {
@@ -61,12 +139,13 @@ class OpenRouterLLM {
}
promptWindowLimit() {
- const availableModels = this.allModelInformation();
+ const availableModels = this.models();
return availableModels[this.model]?.maxLength || 4096;
}
async isValidChatCompletionModel(model = "") {
- const availableModels = this.allModelInformation();
+ await this.#syncModels();
+ const availableModels = this.models();
return availableModels.hasOwnProperty(model);
}
@@ -343,5 +422,4 @@ class OpenRouterLLM {
module.exports = {
OpenRouterLLM,
- openRouterModels,
};
diff --git a/server/utils/AiProviders/openRouter/models.js b/server/utils/AiProviders/openRouter/models.js
deleted file mode 100644
index 4c2d7946c..000000000
--- a/server/utils/AiProviders/openRouter/models.js
+++ /dev/null
@@ -1,778 +0,0 @@
-const MODELS = {
- "openrouter/auto": {
- id: "openrouter/auto",
- name: "Auto (best for prompt)",
- organization: "Openrouter",
- maxLength: 128000,
- },
- "nousresearch/nous-capybara-7b:free": {
- id: "nousresearch/nous-capybara-7b:free",
- name: "Nous: Capybara 7B (free)",
- organization: "Nousresearch",
- maxLength: 4096,
- },
- "mistralai/mistral-7b-instruct:free": {
- id: "mistralai/mistral-7b-instruct:free",
- name: "Mistral 7B Instruct (free)",
- organization: "Mistralai",
- maxLength: 32768,
- },
- "openchat/openchat-7b:free": {
- id: "openchat/openchat-7b:free",
- name: "OpenChat 3.5 (free)",
- organization: "Openchat",
- maxLength: 8192,
- },
- "gryphe/mythomist-7b:free": {
- id: "gryphe/mythomist-7b:free",
- name: "MythoMist 7B (free)",
- organization: "Gryphe",
- maxLength: 32768,
- },
- "undi95/toppy-m-7b:free": {
- id: "undi95/toppy-m-7b:free",
- name: "Toppy M 7B (free)",
- organization: "Undi95",
- maxLength: 4096,
- },
- "openrouter/cinematika-7b:free": {
- id: "openrouter/cinematika-7b:free",
- name: "Cinematika 7B (alpha) (free)",
- organization: "Openrouter",
- maxLength: 8000,
- },
- "google/gemma-7b-it:free": {
- id: "google/gemma-7b-it:free",
- name: "Google: Gemma 7B (free)",
- organization: "Google",
- maxLength: 8192,
- },
- "jebcarter/psyfighter-13b": {
- id: "jebcarter/psyfighter-13b",
- name: "Psyfighter 13B",
- organization: "Jebcarter",
- maxLength: 4096,
- },
- "koboldai/psyfighter-13b-2": {
- id: "koboldai/psyfighter-13b-2",
- name: "Psyfighter v2 13B",
- organization: "Koboldai",
- maxLength: 4096,
- },
- "intel/neural-chat-7b": {
- id: "intel/neural-chat-7b",
- name: "Neural Chat 7B v3.1",
- organization: "Intel",
- maxLength: 4096,
- },
- "haotian-liu/llava-13b": {
- id: "haotian-liu/llava-13b",
- name: "Llava 13B",
- organization: "Haotian-liu",
- maxLength: 2048,
- },
- "nousresearch/nous-hermes-2-vision-7b": {
- id: "nousresearch/nous-hermes-2-vision-7b",
- name: "Nous: Hermes 2 Vision 7B (alpha)",
- organization: "Nousresearch",
- maxLength: 4096,
- },
- "meta-llama/llama-2-13b-chat": {
- id: "meta-llama/llama-2-13b-chat",
- name: "Meta: Llama v2 13B Chat",
- organization: "Meta-llama",
- maxLength: 4096,
- },
- "migtissera/synthia-70b": {
- id: "migtissera/synthia-70b",
- name: "Synthia 70B",
- organization: "Migtissera",
- maxLength: 8192,
- },
- "pygmalionai/mythalion-13b": {
- id: "pygmalionai/mythalion-13b",
- name: "Pygmalion: Mythalion 13B",
- organization: "Pygmalionai",
- maxLength: 8192,
- },
- "xwin-lm/xwin-lm-70b": {
- id: "xwin-lm/xwin-lm-70b",
- name: "Xwin 70B",
- organization: "Xwin-lm",
- maxLength: 8192,
- },
- "alpindale/goliath-120b": {
- id: "alpindale/goliath-120b",
- name: "Goliath 120B",
- organization: "Alpindale",
- maxLength: 6144,
- },
- "neversleep/noromaid-20b": {
- id: "neversleep/noromaid-20b",
- name: "Noromaid 20B",
- organization: "Neversleep",
- maxLength: 8192,
- },
- "gryphe/mythomist-7b": {
- id: "gryphe/mythomist-7b",
- name: "MythoMist 7B",
- organization: "Gryphe",
- maxLength: 32768,
- },
- "sophosympatheia/midnight-rose-70b": {
- id: "sophosympatheia/midnight-rose-70b",
- name: "Midnight Rose 70B",
- organization: "Sophosympatheia",
- maxLength: 4096,
- },
- "undi95/remm-slerp-l2-13b:extended": {
- id: "undi95/remm-slerp-l2-13b:extended",
- name: "ReMM SLERP 13B (extended)",
- organization: "Undi95",
- maxLength: 6144,
- },
- "mancer/weaver": {
- id: "mancer/weaver",
- name: "Mancer: Weaver (alpha)",
- organization: "Mancer",
- maxLength: 8000,
- },
- "nousresearch/nous-hermes-llama2-13b": {
- id: "nousresearch/nous-hermes-llama2-13b",
- name: "Nous: Hermes 13B",
- organization: "Nousresearch",
- maxLength: 4096,
- },
- "nousresearch/nous-capybara-7b": {
- id: "nousresearch/nous-capybara-7b",
- name: "Nous: Capybara 7B",
- organization: "Nousresearch",
- maxLength: 4096,
- },
- "meta-llama/codellama-34b-instruct": {
- id: "meta-llama/codellama-34b-instruct",
- name: "Meta: CodeLlama 34B Instruct",
- organization: "Meta-llama",
- maxLength: 8192,
- },
- "codellama/codellama-70b-instruct": {
- id: "codellama/codellama-70b-instruct",
- name: "Meta: CodeLlama 70B Instruct",
- organization: "Codellama",
- maxLength: 2048,
- },
- "phind/phind-codellama-34b": {
- id: "phind/phind-codellama-34b",
- name: "Phind: CodeLlama 34B v2",
- organization: "Phind",
- maxLength: 4096,
- },
- "teknium/openhermes-2-mistral-7b": {
- id: "teknium/openhermes-2-mistral-7b",
- name: "OpenHermes 2 Mistral 7B",
- organization: "Teknium",
- maxLength: 4096,
- },
- "teknium/openhermes-2.5-mistral-7b": {
- id: "teknium/openhermes-2.5-mistral-7b",
- name: "OpenHermes 2.5 Mistral 7B",
- organization: "Teknium",
- maxLength: 4096,
- },
- "undi95/remm-slerp-l2-13b": {
- id: "undi95/remm-slerp-l2-13b",
- name: "ReMM SLERP 13B",
- organization: "Undi95",
- maxLength: 4096,
- },
- "openrouter/cinematika-7b": {
- id: "openrouter/cinematika-7b",
- name: "Cinematika 7B (alpha)",
- organization: "Openrouter",
- maxLength: 8000,
- },
- "01-ai/yi-34b-chat": {
- id: "01-ai/yi-34b-chat",
- name: "Yi 34B Chat",
- organization: "01-ai",
- maxLength: 4096,
- },
- "01-ai/yi-34b": {
- id: "01-ai/yi-34b",
- name: "Yi 34B (base)",
- organization: "01-ai",
- maxLength: 4096,
- },
- "01-ai/yi-6b": {
- id: "01-ai/yi-6b",
- name: "Yi 6B (base)",
- organization: "01-ai",
- maxLength: 4096,
- },
- "togethercomputer/stripedhyena-nous-7b": {
- id: "togethercomputer/stripedhyena-nous-7b",
- name: "StripedHyena Nous 7B",
- organization: "Togethercomputer",
- maxLength: 32768,
- },
- "togethercomputer/stripedhyena-hessian-7b": {
- id: "togethercomputer/stripedhyena-hessian-7b",
- name: "StripedHyena Hessian 7B (base)",
- organization: "Togethercomputer",
- maxLength: 32768,
- },
- "mistralai/mixtral-8x7b": {
- id: "mistralai/mixtral-8x7b",
- name: "Mixtral 8x7B (base)",
- organization: "Mistralai",
- maxLength: 32768,
- },
- "nousresearch/nous-hermes-yi-34b": {
- id: "nousresearch/nous-hermes-yi-34b",
- name: "Nous: Hermes 2 Yi 34B",
- organization: "Nousresearch",
- maxLength: 4096,
- },
- "nousresearch/nous-hermes-2-mixtral-8x7b-sft": {
- id: "nousresearch/nous-hermes-2-mixtral-8x7b-sft",
- name: "Nous: Hermes 2 Mixtral 8x7B SFT",
- organization: "Nousresearch",
- maxLength: 32000,
- },
- "nousresearch/nous-hermes-2-mistral-7b-dpo": {
- id: "nousresearch/nous-hermes-2-mistral-7b-dpo",
- name: "Nous: Hermes 2 Mistral 7B DPO",
- organization: "Nousresearch",
- maxLength: 8192,
- },
- "open-orca/mistral-7b-openorca": {
- id: "open-orca/mistral-7b-openorca",
- name: "Mistral OpenOrca 7B",
- organization: "Open-orca",
- maxLength: 8192,
- },
- "huggingfaceh4/zephyr-7b-beta": {
- id: "huggingfaceh4/zephyr-7b-beta",
- name: "Hugging Face: Zephyr 7B",
- organization: "Huggingfaceh4",
- maxLength: 4096,
- },
- "openai/gpt-3.5-turbo": {
- id: "openai/gpt-3.5-turbo",
- name: "OpenAI: GPT-3.5 Turbo",
- organization: "Openai",
- maxLength: 16385,
- },
- "openai/gpt-3.5-turbo-0125": {
- id: "openai/gpt-3.5-turbo-0125",
- name: "OpenAI: GPT-3.5 Turbo 16k",
- organization: "Openai",
- maxLength: 16385,
- },
- "openai/gpt-3.5-turbo-1106": {
- id: "openai/gpt-3.5-turbo-1106",
- name: "OpenAI: GPT-3.5 Turbo 16k (older v1106)",
- organization: "Openai",
- maxLength: 16385,
- },
- "openai/gpt-3.5-turbo-0613": {
- id: "openai/gpt-3.5-turbo-0613",
- name: "OpenAI: GPT-3.5 Turbo (older v0613)",
- organization: "Openai",
- maxLength: 4095,
- },
- "openai/gpt-3.5-turbo-0301": {
- id: "openai/gpt-3.5-turbo-0301",
- name: "OpenAI: GPT-3.5 Turbo (older v0301)",
- organization: "Openai",
- maxLength: 4095,
- },
- "openai/gpt-3.5-turbo-16k": {
- id: "openai/gpt-3.5-turbo-16k",
- name: "OpenAI: GPT-3.5 Turbo 16k",
- organization: "Openai",
- maxLength: 16385,
- },
- "openai/gpt-4-turbo": {
- id: "openai/gpt-4-turbo",
- name: "OpenAI: GPT-4 Turbo",
- organization: "Openai",
- maxLength: 128000,
- },
- "openai/gpt-4-turbo-preview": {
- id: "openai/gpt-4-turbo-preview",
- name: "OpenAI: GPT-4 Turbo Preview",
- organization: "Openai",
- maxLength: 128000,
- },
- "openai/gpt-4-1106-preview": {
- id: "openai/gpt-4-1106-preview",
- name: "OpenAI: GPT-4 Turbo (older v1106)",
- organization: "Openai",
- maxLength: 128000,
- },
- "openai/gpt-4": {
- id: "openai/gpt-4",
- name: "OpenAI: GPT-4",
- organization: "Openai",
- maxLength: 8191,
- },
- "openai/gpt-4-0314": {
- id: "openai/gpt-4-0314",
- name: "OpenAI: GPT-4 (older v0314)",
- organization: "Openai",
- maxLength: 8191,
- },
- "openai/gpt-4-32k": {
- id: "openai/gpt-4-32k",
- name: "OpenAI: GPT-4 32k",
- organization: "Openai",
- maxLength: 32767,
- },
- "openai/gpt-4-32k-0314": {
- id: "openai/gpt-4-32k-0314",
- name: "OpenAI: GPT-4 32k (older v0314)",
- organization: "Openai",
- maxLength: 32767,
- },
- "openai/gpt-4-vision-preview": {
- id: "openai/gpt-4-vision-preview",
- name: "OpenAI: GPT-4 Vision",
- organization: "Openai",
- maxLength: 128000,
- },
- "openai/gpt-3.5-turbo-instruct": {
- id: "openai/gpt-3.5-turbo-instruct",
- name: "OpenAI: GPT-3.5 Turbo Instruct",
- organization: "Openai",
- maxLength: 4095,
- },
- "google/palm-2-chat-bison": {
- id: "google/palm-2-chat-bison",
- name: "Google: PaLM 2 Chat",
- organization: "Google",
- maxLength: 25804,
- },
- "google/palm-2-codechat-bison": {
- id: "google/palm-2-codechat-bison",
- name: "Google: PaLM 2 Code Chat",
- organization: "Google",
- maxLength: 20070,
- },
- "google/palm-2-chat-bison-32k": {
- id: "google/palm-2-chat-bison-32k",
- name: "Google: PaLM 2 Chat 32k",
- organization: "Google",
- maxLength: 91750,
- },
- "google/palm-2-codechat-bison-32k": {
- id: "google/palm-2-codechat-bison-32k",
- name: "Google: PaLM 2 Code Chat 32k",
- organization: "Google",
- maxLength: 91750,
- },
- "google/gemini-pro": {
- id: "google/gemini-pro",
- name: "Google: Gemini Pro 1.0",
- organization: "Google",
- maxLength: 91728,
- },
- "google/gemini-pro-vision": {
- id: "google/gemini-pro-vision",
- name: "Google: Gemini Pro Vision 1.0",
- organization: "Google",
- maxLength: 45875,
- },
- "google/gemini-pro-1.5": {
- id: "google/gemini-pro-1.5",
- name: "Google: Gemini Pro 1.5 (preview)",
- organization: "Google",
- maxLength: 2800000,
- },
- "perplexity/pplx-70b-online": {
- id: "perplexity/pplx-70b-online",
- name: "Perplexity: PPLX 70B Online",
- organization: "Perplexity",
- maxLength: 4096,
- },
- "perplexity/pplx-7b-online": {
- id: "perplexity/pplx-7b-online",
- name: "Perplexity: PPLX 7B Online",
- organization: "Perplexity",
- maxLength: 4096,
- },
- "perplexity/pplx-7b-chat": {
- id: "perplexity/pplx-7b-chat",
- name: "Perplexity: PPLX 7B Chat",
- organization: "Perplexity",
- maxLength: 8192,
- },
- "perplexity/pplx-70b-chat": {
- id: "perplexity/pplx-70b-chat",
- name: "Perplexity: PPLX 70B Chat",
- organization: "Perplexity",
- maxLength: 4096,
- },
- "perplexity/sonar-small-chat": {
- id: "perplexity/sonar-small-chat",
- name: "Perplexity: Sonar 7B",
- organization: "Perplexity",
- maxLength: 16384,
- },
- "perplexity/sonar-medium-chat": {
- id: "perplexity/sonar-medium-chat",
- name: "Perplexity: Sonar 8x7B",
- organization: "Perplexity",
- maxLength: 16384,
- },
- "perplexity/sonar-small-online": {
- id: "perplexity/sonar-small-online",
- name: "Perplexity: Sonar 7B Online",
- organization: "Perplexity",
- maxLength: 12000,
- },
- "perplexity/sonar-medium-online": {
- id: "perplexity/sonar-medium-online",
- name: "Perplexity: Sonar 8x7B Online",
- organization: "Perplexity",
- maxLength: 12000,
- },
- "fireworks/mixtral-8x22b-instruct-preview": {
- id: "fireworks/mixtral-8x22b-instruct-preview",
- name: "Fireworks Mixtral 8x22B Instruct OH (preview)",
- organization: "Fireworks",
- maxLength: 8192,
- },
- "anthropic/claude-3-opus": {
- id: "anthropic/claude-3-opus",
- name: "Anthropic: Claude 3 Opus",
- organization: "Anthropic",
- maxLength: 200000,
- },
- "anthropic/claude-3-sonnet": {
- id: "anthropic/claude-3-sonnet",
- name: "Anthropic: Claude 3 Sonnet",
- organization: "Anthropic",
- maxLength: 200000,
- },
- "anthropic/claude-3-haiku": {
- id: "anthropic/claude-3-haiku",
- name: "Anthropic: Claude 3 Haiku",
- organization: "Anthropic",
- maxLength: 200000,
- },
- "anthropic/claude-3-opus:beta": {
- id: "anthropic/claude-3-opus:beta",
- name: "Anthropic: Claude 3 Opus (self-moderated)",
- organization: "Anthropic",
- maxLength: 200000,
- },
- "anthropic/claude-3-sonnet:beta": {
- id: "anthropic/claude-3-sonnet:beta",
- name: "Anthropic: Claude 3 Sonnet (self-moderated)",
- organization: "Anthropic",
- maxLength: 200000,
- },
- "anthropic/claude-3-haiku:beta": {
- id: "anthropic/claude-3-haiku:beta",
- name: "Anthropic: Claude 3 Haiku (self-moderated)",
- organization: "Anthropic",
- maxLength: 200000,
- },
- "meta-llama/llama-2-70b-chat": {
- id: "meta-llama/llama-2-70b-chat",
- name: "Meta: Llama v2 70B Chat",
- organization: "Meta-llama",
- maxLength: 4096,
- },
- "nousresearch/nous-capybara-34b": {
- id: "nousresearch/nous-capybara-34b",
- name: "Nous: Capybara 34B",
- organization: "Nousresearch",
- maxLength: 32768,
- },
- "jondurbin/airoboros-l2-70b": {
- id: "jondurbin/airoboros-l2-70b",
- name: "Airoboros 70B",
- organization: "Jondurbin",
- maxLength: 4096,
- },
- "jondurbin/bagel-34b": {
- id: "jondurbin/bagel-34b",
- name: "Bagel 34B v0.2",
- organization: "Jondurbin",
- maxLength: 8000,
- },
- "austism/chronos-hermes-13b": {
- id: "austism/chronos-hermes-13b",
- name: "Chronos Hermes 13B v2",
- organization: "Austism",
- maxLength: 4096,
- },
- "mistralai/mistral-7b-instruct": {
- id: "mistralai/mistral-7b-instruct",
- name: "Mistral 7B Instruct",
- organization: "Mistralai",
- maxLength: 32768,
- },
- "gryphe/mythomax-l2-13b": {
- id: "gryphe/mythomax-l2-13b",
- name: "MythoMax 13B",
- organization: "Gryphe",
- maxLength: 4096,
- },
- "openchat/openchat-7b": {
- id: "openchat/openchat-7b",
- name: "OpenChat 3.5",
- organization: "Openchat",
- maxLength: 8192,
- },
- "undi95/toppy-m-7b": {
- id: "undi95/toppy-m-7b",
- name: "Toppy M 7B",
- organization: "Undi95",
- maxLength: 4096,
- },
- "lizpreciatior/lzlv-70b-fp16-hf": {
- id: "lizpreciatior/lzlv-70b-fp16-hf",
- name: "lzlv 70B",
- organization: "Lizpreciatior",
- maxLength: 4096,
- },
- "mistralai/mixtral-8x7b-instruct": {
- id: "mistralai/mixtral-8x7b-instruct",
- name: "Mixtral 8x7B Instruct",
- organization: "Mistralai",
- maxLength: 32768,
- },
- "cognitivecomputations/dolphin-mixtral-8x7b": {
- id: "cognitivecomputations/dolphin-mixtral-8x7b",
- name: "Dolphin 2.6 Mixtral 8x7B 🐬",
- organization: "Cognitivecomputations",
- maxLength: 32000,
- },
- "neversleep/noromaid-mixtral-8x7b-instruct": {
- id: "neversleep/noromaid-mixtral-8x7b-instruct",
- name: "Noromaid Mixtral 8x7B Instruct",
- organization: "Neversleep",
- maxLength: 8000,
- },
- "nousresearch/nous-hermes-2-mixtral-8x7b-dpo": {
- id: "nousresearch/nous-hermes-2-mixtral-8x7b-dpo",
- name: "Nous: Hermes 2 Mixtral 8x7B DPO",
- organization: "Nousresearch",
- maxLength: 32000,
- },
- "rwkv/rwkv-5-world-3b": {
- id: "rwkv/rwkv-5-world-3b",
- name: "RWKV v5 World 3B",
- organization: "Rwkv",
- maxLength: 10000,
- },
- "recursal/rwkv-5-3b-ai-town": {
- id: "recursal/rwkv-5-3b-ai-town",
- name: "RWKV v5 3B AI Town",
- organization: "Recursal",
- maxLength: 10000,
- },
- "recursal/eagle-7b": {
- id: "recursal/eagle-7b",
- name: "RWKV v5: Eagle 7B",
- organization: "Recursal",
- maxLength: 10000,
- },
- "google/gemma-7b-it": {
- id: "google/gemma-7b-it",
- name: "Google: Gemma 7B",
- organization: "Google",
- maxLength: 8192,
- },
- "databricks/dbrx-instruct": {
- id: "databricks/dbrx-instruct",
- name: "Databricks: DBRX 132B Instruct",
- organization: "Databricks",
- maxLength: 32768,
- },
- "huggingfaceh4/zephyr-orpo-141b-a35b": {
- id: "huggingfaceh4/zephyr-orpo-141b-a35b",
- name: "Zephyr 141B-A35B",
- organization: "Huggingfaceh4",
- maxLength: 65536,
- },
- "anthropic/claude-2": {
- id: "anthropic/claude-2",
- name: "Anthropic: Claude v2",
- organization: "Anthropic",
- maxLength: 200000,
- },
- "anthropic/claude-2.1": {
- id: "anthropic/claude-2.1",
- name: "Anthropic: Claude v2.1",
- organization: "Anthropic",
- maxLength: 200000,
- },
- "anthropic/claude-2.0": {
- id: "anthropic/claude-2.0",
- name: "Anthropic: Claude v2.0",
- organization: "Anthropic",
- maxLength: 100000,
- },
- "anthropic/claude-instant-1": {
- id: "anthropic/claude-instant-1",
- name: "Anthropic: Claude Instant v1",
- organization: "Anthropic",
- maxLength: 100000,
- },
- "anthropic/claude-instant-1.2": {
- id: "anthropic/claude-instant-1.2",
- name: "Anthropic: Claude Instant v1.2",
- organization: "Anthropic",
- maxLength: 100000,
- },
- "anthropic/claude-1": {
- id: "anthropic/claude-1",
- name: "Anthropic: Claude v1",
- organization: "Anthropic",
- maxLength: 100000,
- },
- "anthropic/claude-1.2": {
- id: "anthropic/claude-1.2",
- name: "Anthropic: Claude (older v1)",
- organization: "Anthropic",
- maxLength: 100000,
- },
- "anthropic/claude-instant-1.0": {
- id: "anthropic/claude-instant-1.0",
- name: "Anthropic: Claude Instant (older v1)",
- organization: "Anthropic",
- maxLength: 100000,
- },
- "anthropic/claude-instant-1.1": {
- id: "anthropic/claude-instant-1.1",
- name: "Anthropic: Claude Instant (older v1.1)",
- organization: "Anthropic",
- maxLength: 100000,
- },
- "anthropic/claude-2:beta": {
- id: "anthropic/claude-2:beta",
- name: "Anthropic: Claude v2 (self-moderated)",
- organization: "Anthropic",
- maxLength: 200000,
- },
- "anthropic/claude-2.1:beta": {
- id: "anthropic/claude-2.1:beta",
- name: "Anthropic: Claude v2.1 (self-moderated)",
- organization: "Anthropic",
- maxLength: 200000,
- },
- "anthropic/claude-2.0:beta": {
- id: "anthropic/claude-2.0:beta",
- name: "Anthropic: Claude v2.0 (self-moderated)",
- organization: "Anthropic",
- maxLength: 100000,
- },
- "anthropic/claude-instant-1:beta": {
- id: "anthropic/claude-instant-1:beta",
- name: "Anthropic: Claude Instant v1 (self-moderated)",
- organization: "Anthropic",
- maxLength: 100000,
- },
- "mistralai/mixtral-8x22b": {
- id: "mistralai/mixtral-8x22b",
- name: "Mistral: Mixtral 8x22B (base)",
- organization: "Mistralai",
- maxLength: 65536,
- },
- "huggingfaceh4/zephyr-7b-beta:free": {
- id: "huggingfaceh4/zephyr-7b-beta:free",
- name: "Hugging Face: Zephyr 7B (free)",
- organization: "Huggingfaceh4",
- maxLength: 4096,
- },
- "mistralai/mixtral-8x7b-instruct:nitro": {
- id: "mistralai/mixtral-8x7b-instruct:nitro",
- name: "Mixtral 8x7B Instruct (nitro)",
- organization: "Mistralai",
- maxLength: 32768,
- },
- "meta-llama/llama-2-70b-chat:nitro": {
- id: "meta-llama/llama-2-70b-chat:nitro",
- name: "Meta: Llama v2 70B Chat (nitro)",
- organization: "Meta-llama",
- maxLength: 4096,
- },
- "gryphe/mythomax-l2-13b:nitro": {
- id: "gryphe/mythomax-l2-13b:nitro",
- name: "MythoMax 13B (nitro)",
- organization: "Gryphe",
- maxLength: 4096,
- },
- "mistralai/mistral-7b-instruct:nitro": {
- id: "mistralai/mistral-7b-instruct:nitro",
- name: "Mistral 7B Instruct (nitro)",
- organization: "Mistralai",
- maxLength: 32768,
- },
- "google/gemma-7b-it:nitro": {
- id: "google/gemma-7b-it:nitro",
- name: "Google: Gemma 7B (nitro)",
- organization: "Google",
- maxLength: 8192,
- },
- "databricks/dbrx-instruct:nitro": {
- id: "databricks/dbrx-instruct:nitro",
- name: "Databricks: DBRX 132B Instruct (nitro)",
- organization: "Databricks",
- maxLength: 32768,
- },
- "gryphe/mythomax-l2-13b:extended": {
- id: "gryphe/mythomax-l2-13b:extended",
- name: "MythoMax 13B (extended)",
- organization: "Gryphe",
- maxLength: 8192,
- },
- "mistralai/mistral-tiny": {
- id: "mistralai/mistral-tiny",
- name: "Mistral Tiny",
- organization: "Mistralai",
- maxLength: 32000,
- },
- "mistralai/mistral-small": {
- id: "mistralai/mistral-small",
- name: "Mistral Small",
- organization: "Mistralai",
- maxLength: 32000,
- },
- "mistralai/mistral-medium": {
- id: "mistralai/mistral-medium",
- name: "Mistral Medium",
- organization: "Mistralai",
- maxLength: 32000,
- },
- "mistralai/mistral-large": {
- id: "mistralai/mistral-large",
- name: "Mistral Large",
- organization: "Mistralai",
- maxLength: 32000,
- },
- "cohere/command": {
- id: "cohere/command",
- name: "Cohere: Command",
- organization: "Cohere",
- maxLength: 4096,
- },
- "cohere/command-r": {
- id: "cohere/command-r",
- name: "Cohere: Command R",
- organization: "Cohere",
- maxLength: 128000,
- },
- "cohere/command-r-plus": {
- id: "cohere/command-r-plus",
- name: "Cohere: Command R+",
- organization: "Cohere",
- maxLength: 128000,
- },
-};
-
-module.exports.MODELS = MODELS;
diff --git a/server/utils/AiProviders/openRouter/scripts/.gitignore b/server/utils/AiProviders/openRouter/scripts/.gitignore
deleted file mode 100644
index 94a2dd146..000000000
--- a/server/utils/AiProviders/openRouter/scripts/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-*.json
\ No newline at end of file
diff --git a/server/utils/AiProviders/openRouter/scripts/parse.mjs b/server/utils/AiProviders/openRouter/scripts/parse.mjs
deleted file mode 100644
index 11c67b22c..000000000
--- a/server/utils/AiProviders/openRouter/scripts/parse.mjs
+++ /dev/null
@@ -1,37 +0,0 @@
-// OpenRouter has lots of models we can use so we use this script
-// to cache all the models. We can see the list of all the models
-// here: https://openrouter.ai/docs#models
-
-// To run, cd into this directory and run `node parse.mjs`
-// copy outputs into the export in ../models.js
-
-// Update the date below if you run this again because OpenRouter added new models.
-// Last Collected: Apr 14, 2024
-
-import fs from "fs";
-
-async function parseChatModels() {
- const models = {};
- const response = await fetch("https://openrouter.ai/api/v1/models");
- const data = await response.json();
- data.data.forEach((model) => {
- models[model.id] = {
- id: model.id,
- name: model.name,
- // capitalize first letter
- organization:
- model.id.split("/")[0].charAt(0).toUpperCase() +
- model.id.split("/")[0].slice(1),
- maxLength: model.context_length,
- };
- });
-
- fs.writeFileSync(
- "chat_models.json",
- JSON.stringify(models, null, 2),
- "utf-8"
- );
- return models;
-}
-
-parseChatModels();
diff --git a/server/utils/AiProviders/perplexity/models.js b/server/utils/AiProviders/perplexity/models.js
index 8bed2a5a0..1990c4e70 100644
--- a/server/utils/AiProviders/perplexity/models.js
+++ b/server/utils/AiProviders/perplexity/models.js
@@ -19,6 +19,16 @@ const MODELS = {
name: "sonar-medium-online",
maxLength: 12000,
},
+ "llama-3-8b-instruct": {
+ id: "llama-3-8b-instruct",
+ name: "llama-3-8b-instruct",
+ maxLength: 8192,
+ },
+ "llama-3-70b-instruct": {
+ id: "llama-3-70b-instruct",
+ name: "llama-3-70b-instruct",
+ maxLength: 8192,
+ },
"codellama-70b-instruct": {
id: "codellama-70b-instruct",
name: "codellama-70b-instruct",
@@ -34,6 +44,11 @@ const MODELS = {
name: "mixtral-8x7b-instruct",
maxLength: 16384,
},
+ "mixtral-8x22b-instruct": {
+ id: "mixtral-8x22b-instruct",
+ name: "mixtral-8x22b-instruct",
+ maxLength: 16384,
+ },
};
module.exports.MODELS = MODELS;
diff --git a/server/utils/AiProviders/perplexity/scripts/chat_models.txt b/server/utils/AiProviders/perplexity/scripts/chat_models.txt
index 41fce0f01..2a620525b 100644
--- a/server/utils/AiProviders/perplexity/scripts/chat_models.txt
+++ b/server/utils/AiProviders/perplexity/scripts/chat_models.txt
@@ -4,6 +4,9 @@
| `sonar-small-online` | 7B | 12000 | Chat Completion |
| `sonar-medium-chat` | 8x7B | 16384 | Chat Completion |
| `sonar-medium-online` | 8x7B | 12000 | Chat Completion |
+| `llama-3-8b-instruct` | 8B | 8192 | Chat Completion |
+| `llama-3-70b-instruct` | 70B | 8192 | Chat Completion |
| `codellama-70b-instruct` | 70B | 16384 | Chat Completion |
| `mistral-7b-instruct` [1] | 7B | 16384 | Chat Completion |
-| `mixtral-8x7b-instruct` | 8x7B | 16384 | Chat Completion |
\ No newline at end of file
+| `mixtral-8x7b-instruct` | 8x7B | 16384 | Chat Completion |
+| `mixtral-8x22b-instruct` | 8x22B | 16384 | Chat Completion |
\ No newline at end of file
diff --git a/server/utils/AiProviders/perplexity/scripts/parse.mjs b/server/utils/AiProviders/perplexity/scripts/parse.mjs
index 1858eafb8..fcbbaf731 100644
--- a/server/utils/AiProviders/perplexity/scripts/parse.mjs
+++ b/server/utils/AiProviders/perplexity/scripts/parse.mjs
@@ -8,7 +8,7 @@
// copy outputs into the export in ../models.js
// Update the date below if you run this again because Perplexity added new models.
-// Last Collected: Apr 14, 2024
+// Last Collected: Apr 25, 2024
import fs from "fs";
diff --git a/server/utils/helpers/customModels.js b/server/utils/helpers/customModels.js
index d0d162c4a..3c6b5350b 100644
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@@ -1,4 +1,4 @@
-const { openRouterModels } = require("../AiProviders/openRouter");
+const { OpenRouterLLM } = require("../AiProviders/openRouter");
const { perplexityModels } = require("../AiProviders/perplexity");
const { togetherAiModels } = require("../AiProviders/togetherAi");
const SUPPORT_CUSTOM_MODELS = [
@@ -232,7 +232,8 @@ async function getPerplexityModels() {
}
async function getOpenRouterModels() {
- const knownModels = await openRouterModels();
+ const openrouter = await new OpenRouterLLM().init();
+ const knownModels = openrouter.models();
if (!Object.keys(knownModels).length === 0)
return { models: [], error: null };
diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js
index 837a28f60..c8cdd870f 100644
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@@ -77,8 +77,13 @@ function getLLMProvider({ provider = null, model = null } = {}) {
case "groq":
const { GroqLLM } = require("../AiProviders/groq");
return new GroqLLM(embedder, model);
+ case "generic-openai":
+ const { GenericOpenAiLLM } = require("../AiProviders/genericOpenAi");
+ return new GenericOpenAiLLM(embedder, model);
default:
- throw new Error("ENV: No LLM_PROVIDER value found in environment!");
+ throw new Error(
+ `ENV: No valid LLM_PROVIDER value found in environment! Using ${process.env.LLM_PROVIDER}`
+ );
}
}
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
index ed2318f84..ed6e5c13c 100644
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -132,6 +132,24 @@ const KEY_MAPPING = {
checks: [nonZero],
},
+ // Generic OpenAI InferenceSettings
+ GenericOpenAiBasePath: {
+ envKey: "GENERIC_OPEN_AI_BASE_PATH",
+ checks: [isValidURL],
+ },
+ GenericOpenAiModelPref: {
+ envKey: "GENERIC_OPEN_AI_MODEL_PREF",
+ checks: [isNotEmpty],
+ },
+ GenericOpenAiTokenLimit: {
+ envKey: "GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT",
+ checks: [nonZero],
+ },
+ GenericOpenAiKey: {
+ envKey: "GENERIC_OPEN_AI_API_KEY",
+ checks: [],
+ },
+
EmbeddingEngine: {
envKey: "EMBEDDING_ENGINE",
checks: [supportedEmbeddingModel],
@@ -375,6 +393,7 @@ function supportedLLM(input = "") {
"perplexity",
"openrouter",
"groq",
+ "generic-openai",
].includes(input);
return validSelection ? null : `${input} is not a valid LLM provider.`;
}