mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-04 22:10:12 +01:00
LocalAI for embeddings (#361)
* feature: add localAi as embedding provider * chore: add LocalAI image * chore: add localai embedding examples to docker .env.example * update setting env pull models from localai API * update comments on embedder Dont show cost estimation on UI --------- Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
parent
f269124760
commit
a96a9d41a3
@ -35,6 +35,15 @@ CACHE_VECTORS="true"
|
||||
# EMBEDDING_ENGINE='openai'
|
||||
# OPEN_AI_KEY=sk-xxxx
|
||||
|
||||
# EMBEDDING_ENGINE='azure'
|
||||
# AZURE_OPENAI_ENDPOINT=
|
||||
# AZURE_OPENAI_KEY=
|
||||
# EMBEDDING_MODEL_PREF='my-embedder-model' # This is the "deployment" on Azure you want to use for embeddings. Not the base model. Valid base model is text-embedding-ada-002
|
||||
|
||||
# EMBEDDING_ENGINE='localai'
|
||||
# EMBEDDING_BASE_PATH='https://localhost:8080/v1'
|
||||
# EMBEDDING_MODEL_PREF='text-embedding-ada-002'
|
||||
|
||||
###########################################
|
||||
######## Vector Database Selection ########
|
||||
###########################################
|
||||
|
@ -8,7 +8,11 @@ import WorkspaceDirectory from "./WorkspaceDirectory";
|
||||
|
||||
const COST_PER_TOKEN = 0.0004;
|
||||
|
||||
export default function DocumentSettings({ workspace, fileTypes }) {
|
||||
export default function DocumentSettings({
|
||||
workspace,
|
||||
fileTypes,
|
||||
systemSettings,
|
||||
}) {
|
||||
const [highlightWorkspace, setHighlightWorkspace] = useState(false);
|
||||
const [availableDocs, setAvailableDocs] = useState([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
@ -135,8 +139,15 @@ export default function DocumentSettings({ workspace, fileTypes }) {
|
||||
}
|
||||
});
|
||||
|
||||
const dollarAmount = (totalTokenCount / 1000) * COST_PER_TOKEN;
|
||||
setEmbeddingsCost(dollarAmount);
|
||||
// Do not do cost estimation unless the embedding engine is OpenAi.
|
||||
if (
|
||||
!systemSettings?.EmbeddingEngine ||
|
||||
systemSettings.EmbeddingEngine === "openai"
|
||||
) {
|
||||
const dollarAmount = (totalTokenCount / 1000) * COST_PER_TOKEN;
|
||||
setEmbeddingsCost(dollarAmount);
|
||||
}
|
||||
|
||||
setMovedItems([...movedItems, ...newMovedItems]);
|
||||
|
||||
let newAvailableDocs = JSON.parse(JSON.stringify(availableDocs));
|
||||
|
@ -15,11 +15,14 @@ const ManageWorkspace = ({ hideModal = noop, providedSlug = null }) => {
|
||||
const [selectedTab, setSelectedTab] = useState("documents");
|
||||
const [workspace, setWorkspace] = useState(null);
|
||||
const [fileTypes, setFileTypes] = useState(null);
|
||||
const [settings, setSettings] = useState({});
|
||||
|
||||
useEffect(() => {
|
||||
async function checkSupportedFiletypes() {
|
||||
const acceptedTypes = await System.acceptedDocumentTypes();
|
||||
const _settings = await System.keys();
|
||||
setFileTypes(acceptedTypes ?? {});
|
||||
setSettings(_settings ?? {});
|
||||
}
|
||||
checkSupportedFiletypes();
|
||||
}, []);
|
||||
@ -104,7 +107,11 @@ const ManageWorkspace = ({ hideModal = noop, providedSlug = null }) => {
|
||||
</div>
|
||||
<Suspense fallback={<div>Loading...</div>}>
|
||||
<div className={selectedTab === "documents" ? "" : "hidden"}>
|
||||
<DocumentSettings workspace={workspace} fileTypes={fileTypes} />
|
||||
<DocumentSettings
|
||||
workspace={workspace}
|
||||
fileTypes={fileTypes}
|
||||
systemSettings={settings}
|
||||
/>
|
||||
</div>
|
||||
<div className={selectedTab === "settings" ? "" : "hidden"}>
|
||||
<WorkspaceSettings workspace={workspace} fileTypes={fileTypes} />
|
||||
|
@ -7,6 +7,7 @@ import System from "../../../models/system";
|
||||
import showToast from "../../../utils/toast";
|
||||
import OpenAiLogo from "../../../media/llmprovider/openai.png";
|
||||
import AzureOpenAiLogo from "../../../media/llmprovider/azure.png";
|
||||
import LocalAiLogo from "../../../media/llmprovider/localai.png";
|
||||
import PreLoader from "../../../components/Preloader";
|
||||
import LLMProviderOption from "../../../components/LLMSelection/LLMProviderOption";
|
||||
|
||||
@ -16,6 +17,8 @@ export default function GeneralEmbeddingPreference() {
|
||||
const [embeddingChoice, setEmbeddingChoice] = useState("openai");
|
||||
const [settings, setSettings] = useState(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [basePathValue, setBasePathValue] = useState("");
|
||||
const [basePath, setBasePath] = useState("");
|
||||
|
||||
const handleSubmit = async (e) => {
|
||||
e.preventDefault();
|
||||
@ -38,11 +41,17 @@ export default function GeneralEmbeddingPreference() {
|
||||
setHasChanges(true);
|
||||
};
|
||||
|
||||
function updateBasePath() {
|
||||
setBasePath(basePathValue);
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
async function fetchKeys() {
|
||||
const _settings = await System.keys();
|
||||
setSettings(_settings);
|
||||
setEmbeddingChoice(_settings?.EmbeddingEngine || "openai");
|
||||
setBasePath(_settings?.EmbeddingBasePath || "");
|
||||
setBasePathValue(_settings?.EmbeddingBasePath || "");
|
||||
setLoading(false);
|
||||
}
|
||||
fetchKeys();
|
||||
@ -136,6 +145,15 @@ export default function GeneralEmbeddingPreference() {
|
||||
image={AzureOpenAiLogo}
|
||||
onClick={updateChoice}
|
||||
/>
|
||||
<LLMProviderOption
|
||||
name="LocalAI"
|
||||
value="localai"
|
||||
link="localai.io"
|
||||
description="Self hosted LocalAI embedding engine."
|
||||
checked={embeddingChoice === "localai"}
|
||||
image={LocalAiLogo}
|
||||
onClick={updateChoice}
|
||||
/>
|
||||
</div>
|
||||
<div className="mt-10 flex flex-wrap gap-4 max-w-[800px]">
|
||||
{embeddingChoice === "openai" && (
|
||||
@ -215,6 +233,32 @@ export default function GeneralEmbeddingPreference() {
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
{embeddingChoice === "localai" && (
|
||||
<>
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-4">
|
||||
LocalAI Base URL
|
||||
</label>
|
||||
<input
|
||||
type="url"
|
||||
name="EmbeddingBasePath"
|
||||
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
|
||||
placeholder="http://localhost:8080/v1"
|
||||
defaultValue={settings?.EmbeddingBasePath}
|
||||
onChange={(e) => setBasePathValue(e.target.value)}
|
||||
onBlur={updateBasePath}
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
/>
|
||||
</div>
|
||||
<LocalAIModelSelection
|
||||
settings={settings}
|
||||
basePath={basePath}
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
@ -225,3 +269,73 @@ export default function GeneralEmbeddingPreference() {
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function LocalAIModelSelection({ settings, basePath = null }) {
|
||||
const [customModels, setCustomModels] = useState([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
|
||||
useEffect(() => {
|
||||
async function findCustomModels() {
|
||||
if (!basePath || !basePath.includes("/v1")) {
|
||||
setCustomModels([]);
|
||||
setLoading(false);
|
||||
return;
|
||||
}
|
||||
setLoading(true);
|
||||
const { models } = await System.customModels("localai", null, basePath);
|
||||
setCustomModels(models || []);
|
||||
setLoading(false);
|
||||
}
|
||||
findCustomModels();
|
||||
}, [basePath]);
|
||||
|
||||
if (loading || customModels.length == 0) {
|
||||
return (
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-4">
|
||||
Embedding Model Name
|
||||
</label>
|
||||
<select
|
||||
name="EmbeddingModelPref"
|
||||
disabled={true}
|
||||
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||
>
|
||||
<option disabled={true} selected={true}>
|
||||
{basePath?.includes("/v1")
|
||||
? "-- loading available models --"
|
||||
: "-- waiting for URL --"}
|
||||
</option>
|
||||
</select>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-4">
|
||||
Embedding Model Name
|
||||
</label>
|
||||
<select
|
||||
name="EmbeddingModelPref"
|
||||
required={true}
|
||||
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||
>
|
||||
{customModels.length > 0 && (
|
||||
<optgroup label="Your loaded models">
|
||||
{customModels.map((model) => {
|
||||
return (
|
||||
<option
|
||||
key={model.id}
|
||||
value={model.id}
|
||||
selected={settings?.EmbeddingModelPref === model.id}
|
||||
>
|
||||
{model.id}
|
||||
</option>
|
||||
);
|
||||
})}
|
||||
</optgroup>
|
||||
)}
|
||||
</select>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
import React, { memo, useEffect, useState } from "react";
|
||||
import OpenAiLogo from "../../../../../media/llmprovider/openai.png";
|
||||
import AzureOpenAiLogo from "../../../../../media/llmprovider/azure.png";
|
||||
import LocalAiLogo from "../../../../../media/llmprovider/localai.png";
|
||||
import System from "../../../../../models/system";
|
||||
import PreLoader from "../../../../../components/Preloader";
|
||||
import LLMProviderOption from "../../../../../components/LLMSelection/LLMProviderOption";
|
||||
@ -9,16 +10,23 @@ function EmbeddingSelection({ nextStep, prevStep, currentStep }) {
|
||||
const [embeddingChoice, setEmbeddingChoice] = useState("openai");
|
||||
const [settings, setSettings] = useState(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [basePathValue, setBasePathValue] = useState("");
|
||||
const [basePath, setBasePath] = useState("");
|
||||
|
||||
const updateChoice = (selection) => {
|
||||
setEmbeddingChoice(selection);
|
||||
};
|
||||
|
||||
function updateBasePath() {
|
||||
setBasePath(basePathValue);
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
async function fetchKeys() {
|
||||
const _settings = await System.keys();
|
||||
setSettings(_settings);
|
||||
setEmbeddingChoice(_settings?.EmbeddingEngine || "openai");
|
||||
setBasePathValue(_settings?.EmbeddingBasePath || "");
|
||||
setLoading(false);
|
||||
}
|
||||
fetchKeys();
|
||||
@ -77,6 +85,15 @@ function EmbeddingSelection({ nextStep, prevStep, currentStep }) {
|
||||
image={AzureOpenAiLogo}
|
||||
onClick={updateChoice}
|
||||
/>
|
||||
<LLMProviderOption
|
||||
name="LocalAI"
|
||||
value="localai"
|
||||
link="localai.io"
|
||||
description="Self hosted LocalAI embedding engine."
|
||||
checked={embeddingChoice === "localai"}
|
||||
image={LocalAiLogo}
|
||||
onClick={updateChoice}
|
||||
/>
|
||||
</div>
|
||||
<div className="mt-10 flex flex-wrap gap-4 max-w-[800px]">
|
||||
{embeddingChoice === "openai" && (
|
||||
@ -152,6 +169,32 @@ function EmbeddingSelection({ nextStep, prevStep, currentStep }) {
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
|
||||
{embeddingChoice === "localai" && (
|
||||
<>
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-4">
|
||||
LocalAI Base URL
|
||||
</label>
|
||||
<input
|
||||
type="url"
|
||||
name="EmbeddingBasePath"
|
||||
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
|
||||
placeholder="http://localhost:8080/v1"
|
||||
defaultValue={settings?.EmbeddingBasePath}
|
||||
onChange={(e) => setBasePathValue(e.target.value)}
|
||||
onBlur={updateBasePath}
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
/>
|
||||
</div>
|
||||
<LocalAIModelSelection
|
||||
settings={settings}
|
||||
basePath={basePath}
|
||||
/>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex w-full justify-between items-center p-6 space-x-2 border-t rounded-b border-gray-500/50">
|
||||
@ -174,4 +217,74 @@ function EmbeddingSelection({ nextStep, prevStep, currentStep }) {
|
||||
);
|
||||
}
|
||||
|
||||
function LocalAIModelSelection({ settings, basePath = null }) {
|
||||
const [customModels, setCustomModels] = useState([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
|
||||
useEffect(() => {
|
||||
async function findCustomModels() {
|
||||
if (!basePath || !basePath.includes("/v1")) {
|
||||
setCustomModels([]);
|
||||
setLoading(false);
|
||||
return;
|
||||
}
|
||||
setLoading(true);
|
||||
const { models } = await System.customModels("localai", null, basePath);
|
||||
setCustomModels(models || []);
|
||||
setLoading(false);
|
||||
}
|
||||
findCustomModels();
|
||||
}, [basePath]);
|
||||
|
||||
if (loading || customModels.length == 0) {
|
||||
return (
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-4">
|
||||
Embedding Model Name
|
||||
</label>
|
||||
<select
|
||||
name="EmbeddingModelPref"
|
||||
disabled={true}
|
||||
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||
>
|
||||
<option disabled={true} selected={true}>
|
||||
{basePath?.includes("/v1")
|
||||
? "-- loading available models --"
|
||||
: "-- waiting for URL --"}
|
||||
</option>
|
||||
</select>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-4">
|
||||
Embedding Model Name
|
||||
</label>
|
||||
<select
|
||||
name="EmbeddingModelPref"
|
||||
required={true}
|
||||
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||
>
|
||||
{customModels.length > 0 && (
|
||||
<optgroup label="Your loaded models">
|
||||
{customModels.map((model) => {
|
||||
return (
|
||||
<option
|
||||
key={model.id}
|
||||
value={model.id}
|
||||
selected={settings.EmbeddingModelPref === model.id}
|
||||
>
|
||||
{model.id}
|
||||
</option>
|
||||
);
|
||||
})}
|
||||
</optgroup>
|
||||
)}
|
||||
</select>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
export default memo(EmbeddingSelection);
|
||||
|
@ -35,6 +35,15 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
|
||||
# EMBEDDING_ENGINE='openai'
|
||||
# OPEN_AI_KEY=sk-xxxx
|
||||
|
||||
# EMBEDDING_ENGINE='azure'
|
||||
# AZURE_OPENAI_ENDPOINT=
|
||||
# AZURE_OPENAI_KEY=
|
||||
# EMBEDDING_MODEL_PREF='my-embedder-model' # This is the "deployment" on Azure you want to use for embeddings. Not the base model. Valid base model is text-embedding-ada-002
|
||||
|
||||
# EMBEDDING_ENGINE='localai'
|
||||
# EMBEDDING_BASE_PATH='https://localhost:8080/v1'
|
||||
# EMBEDDING_MODEL_PREF='text-embedding-ada-002'
|
||||
|
||||
###########################################
|
||||
######## Vector Database Selection ########
|
||||
###########################################
|
||||
|
@ -25,6 +25,8 @@ const SystemSettings = {
|
||||
MultiUserMode: await this.isMultiUserMode(),
|
||||
VectorDB: vectorDB,
|
||||
EmbeddingEngine: process.env.EMBEDDING_ENGINE,
|
||||
EmbeddingBasePath: process.env.EMBEDDING_BASE_PATH,
|
||||
EmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
|
||||
...(vectorDB === "pinecone"
|
||||
? {
|
||||
PineConeEnvironment: process.env.PINECONE_ENVIRONMENT,
|
||||
|
77
server/utils/EmbeddingEngines/localAi/index.js
Normal file
77
server/utils/EmbeddingEngines/localAi/index.js
Normal file
@ -0,0 +1,77 @@
|
||||
const { toChunks } = require("../../helpers");
|
||||
|
||||
class LocalAiEmbedder {
|
||||
constructor() {
|
||||
const { Configuration, OpenAIApi } = require("openai");
|
||||
if (!process.env.EMBEDDING_BASE_PATH)
|
||||
throw new Error("No embedding base path was set.");
|
||||
if (!process.env.EMBEDDING_MODEL_PREF)
|
||||
throw new Error("No embedding model was set.");
|
||||
const config = new Configuration({
|
||||
basePath: process.env.EMBEDDING_BASE_PATH,
|
||||
});
|
||||
this.openai = new OpenAIApi(config);
|
||||
|
||||
// Arbitrary limit to ensure we stay within reasonable POST request size.
|
||||
this.embeddingChunkLimit = 1_000;
|
||||
}
|
||||
|
||||
async embedTextInput(textInput) {
|
||||
const result = await this.embedChunks(textInput);
|
||||
return result?.[0] || [];
|
||||
}
|
||||
|
||||
async embedChunks(textChunks = []) {
|
||||
const embeddingRequests = [];
|
||||
for (const chunk of toChunks(textChunks, this.embeddingChunkLimit)) {
|
||||
embeddingRequests.push(
|
||||
new Promise((resolve) => {
|
||||
this.openai
|
||||
.createEmbedding({
|
||||
model: process.env.EMBEDDING_MODEL_PREF,
|
||||
input: chunk,
|
||||
})
|
||||
.then((res) => {
|
||||
resolve({ data: res.data?.data, error: null });
|
||||
})
|
||||
.catch((e) => {
|
||||
resolve({ data: [], error: e?.error });
|
||||
});
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
const { data = [], error = null } = await Promise.all(
|
||||
embeddingRequests
|
||||
).then((results) => {
|
||||
// If any errors were returned from LocalAI abort the entire sequence because the embeddings
|
||||
// will be incomplete.
|
||||
const errors = results
|
||||
.filter((res) => !!res.error)
|
||||
.map((res) => res.error)
|
||||
.flat();
|
||||
if (errors.length > 0) {
|
||||
return {
|
||||
data: [],
|
||||
error: `(${errors.length}) Embedding Errors! ${errors
|
||||
.map((error) => `[${error.type}]: ${error.message}`)
|
||||
.join(", ")}`,
|
||||
};
|
||||
}
|
||||
return {
|
||||
data: results.map((res) => res?.data || []).flat(),
|
||||
error: null,
|
||||
};
|
||||
});
|
||||
|
||||
if (!!error) throw new Error(`LocalAI Failed to embed: ${error}`);
|
||||
return data.length > 0 &&
|
||||
data.every((embd) => embd.hasOwnProperty("embedding"))
|
||||
? data.map((embd) => embd.embedding)
|
||||
: null;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
LocalAiEmbedder,
|
||||
};
|
@ -59,6 +59,9 @@ function getEmbeddingEngineSelection() {
|
||||
AzureOpenAiEmbedder,
|
||||
} = require("../EmbeddingEngines/azureOpenAi");
|
||||
return new AzureOpenAiEmbedder();
|
||||
case "localai":
|
||||
const { LocalAiEmbedder } = require("../EmbeddingEngines/localAi");
|
||||
return new LocalAiEmbedder();
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
|
@ -72,6 +72,14 @@ const KEY_MAPPING = {
|
||||
envKey: "EMBEDDING_ENGINE",
|
||||
checks: [supportedEmbeddingModel],
|
||||
},
|
||||
EmbeddingBasePath: {
|
||||
envKey: "EMBEDDING_BASE_PATH",
|
||||
checks: [isNotEmpty, validLLMExternalBasePath],
|
||||
},
|
||||
EmbeddingModelPref: {
|
||||
envKey: "EMBEDDING_MODEL_PREF",
|
||||
checks: [isNotEmpty],
|
||||
},
|
||||
|
||||
// Vector Database Selection Settings
|
||||
VectorDB: {
|
||||
@ -191,7 +199,7 @@ function validAnthropicModel(input = "") {
|
||||
}
|
||||
|
||||
function supportedEmbeddingModel(input = "") {
|
||||
const supported = ["openai", "azure"];
|
||||
const supported = ["openai", "azure", "localai"];
|
||||
return supported.includes(input)
|
||||
? null
|
||||
: `Invalid Embedding model type. Must be one of ${supported.join(", ")}.`;
|
||||
|
Loading…
Reference in New Issue
Block a user