mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-05 06:20:10 +01:00
Implement support for HuggingFace Inference Endpoints (#680)
This commit is contained in:
parent
1846a99b93
commit
2bc11d3f1a
@ -48,6 +48,11 @@ GID='1000'
|
|||||||
# MISTRAL_API_KEY='example-mistral-ai-api-key'
|
# MISTRAL_API_KEY='example-mistral-ai-api-key'
|
||||||
# MISTRAL_MODEL_PREF='mistral-tiny'
|
# MISTRAL_MODEL_PREF='mistral-tiny'
|
||||||
|
|
||||||
|
# LLM_PROVIDER='huggingface'
|
||||||
|
# HUGGING_FACE_LLM_ENDPOINT=https://uuid-here.us-east-1.aws.endpoints.huggingface.cloud
|
||||||
|
# HUGGING_FACE_LLM_API_KEY=hf_xxxxxx
|
||||||
|
# HUGGING_FACE_LLM_TOKEN_LIMIT=8000
|
||||||
|
|
||||||
###########################################
|
###########################################
|
||||||
######## Embedding API SElECTION ##########
|
######## Embedding API SElECTION ##########
|
||||||
###########################################
|
###########################################
|
||||||
|
@ -0,0 +1,56 @@
|
|||||||
|
export default function HuggingFaceOptions({ settings }) {
|
||||||
|
return (
|
||||||
|
<div className="w-full flex flex-col">
|
||||||
|
<div className="w-full flex items-center gap-4">
|
||||||
|
<div className="flex flex-col w-60">
|
||||||
|
<label className="text-white text-sm font-semibold block mb-4">
|
||||||
|
HuggingFace Inference Endpoint
|
||||||
|
</label>
|
||||||
|
<input
|
||||||
|
type="url"
|
||||||
|
name="HuggingFaceLLMEndpoint"
|
||||||
|
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
|
||||||
|
placeholder="https://example.endpoints.huggingface.cloud"
|
||||||
|
defaultValue={settings?.HuggingFaceLLMEndpoint}
|
||||||
|
required={true}
|
||||||
|
autoComplete="off"
|
||||||
|
spellCheck={false}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div className="flex flex-col w-60">
|
||||||
|
<label className="text-white text-sm font-semibold block mb-4">
|
||||||
|
HuggingFace Access Token
|
||||||
|
</label>
|
||||||
|
<input
|
||||||
|
type="password"
|
||||||
|
name="HuggingFaceLLMAccessToken"
|
||||||
|
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
|
||||||
|
placeholder="HuggingFace Access Token"
|
||||||
|
defaultValue={
|
||||||
|
settings?.HuggingFaceLLMAccessToken ? "*".repeat(20) : ""
|
||||||
|
}
|
||||||
|
required={true}
|
||||||
|
autoComplete="off"
|
||||||
|
spellCheck={false}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
<div className="flex flex-col w-60">
|
||||||
|
<label className="text-white text-sm font-semibold block mb-4">
|
||||||
|
Model Token Limit
|
||||||
|
</label>
|
||||||
|
<input
|
||||||
|
type="number"
|
||||||
|
name="HuggingFaceLLMTokenLimit"
|
||||||
|
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
|
||||||
|
placeholder="4096"
|
||||||
|
min={1}
|
||||||
|
onScroll={(e) => e.target.blur()}
|
||||||
|
defaultValue={settings?.HuggingFaceLLMTokenLimit}
|
||||||
|
required={true}
|
||||||
|
autoComplete="off"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
BIN
frontend/src/media/llmprovider/huggingface.png
Normal file
BIN
frontend/src/media/llmprovider/huggingface.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 17 KiB |
@ -13,6 +13,7 @@ import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
|
|||||||
import LocalAiLogo from "@/media/llmprovider/localai.png";
|
import LocalAiLogo from "@/media/llmprovider/localai.png";
|
||||||
import TogetherAILogo from "@/media/llmprovider/togetherai.png";
|
import TogetherAILogo from "@/media/llmprovider/togetherai.png";
|
||||||
import MistralLogo from "@/media/llmprovider/mistral.jpeg";
|
import MistralLogo from "@/media/llmprovider/mistral.jpeg";
|
||||||
|
import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
|
||||||
import PreLoader from "@/components/Preloader";
|
import PreLoader from "@/components/Preloader";
|
||||||
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
|
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
|
||||||
import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions";
|
import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions";
|
||||||
@ -24,6 +25,7 @@ import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions";
|
|||||||
import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions";
|
import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions";
|
||||||
import TogetherAiOptions from "@/components/LLMSelection/TogetherAiOptions";
|
import TogetherAiOptions from "@/components/LLMSelection/TogetherAiOptions";
|
||||||
import MistralOptions from "@/components/LLMSelection/MistralOptions";
|
import MistralOptions from "@/components/LLMSelection/MistralOptions";
|
||||||
|
import HuggingFaceOptions from "@/components/LLMSelection/HuggingFaceOptions";
|
||||||
import LLMItem from "@/components/LLMSelection/LLMItem";
|
import LLMItem from "@/components/LLMSelection/LLMItem";
|
||||||
import { MagnifyingGlass } from "@phosphor-icons/react";
|
import { MagnifyingGlass } from "@phosphor-icons/react";
|
||||||
|
|
||||||
@ -107,6 +109,14 @@ export default function GeneralLLMPreference() {
|
|||||||
options: <GeminiLLMOptions settings={settings} />,
|
options: <GeminiLLMOptions settings={settings} />,
|
||||||
description: "Google's largest and most capable AI model",
|
description: "Google's largest and most capable AI model",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "HuggingFace",
|
||||||
|
value: "huggingface",
|
||||||
|
logo: HuggingFaceLogo,
|
||||||
|
options: <HuggingFaceOptions settings={settings} />,
|
||||||
|
description:
|
||||||
|
"Access 150,000+ open-source LLMs and the world's AI community",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "Ollama",
|
name: "Ollama",
|
||||||
value: "ollama",
|
value: "ollama",
|
||||||
|
@ -10,6 +10,7 @@ import TogetherAILogo from "@/media/llmprovider/togetherai.png";
|
|||||||
import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
|
import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
|
||||||
import LocalAiLogo from "@/media/llmprovider/localai.png";
|
import LocalAiLogo from "@/media/llmprovider/localai.png";
|
||||||
import MistralLogo from "@/media/llmprovider/mistral.jpeg";
|
import MistralLogo from "@/media/llmprovider/mistral.jpeg";
|
||||||
|
import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
|
||||||
import ZillizLogo from "@/media/vectordbs/zilliz.png";
|
import ZillizLogo from "@/media/vectordbs/zilliz.png";
|
||||||
import AstraDBLogo from "@/media/vectordbs/astraDB.png";
|
import AstraDBLogo from "@/media/vectordbs/astraDB.png";
|
||||||
import ChromaLogo from "@/media/vectordbs/chroma.png";
|
import ChromaLogo from "@/media/vectordbs/chroma.png";
|
||||||
@ -101,6 +102,13 @@ const LLM_SELECTION_PRIVACY = {
|
|||||||
],
|
],
|
||||||
logo: MistralLogo,
|
logo: MistralLogo,
|
||||||
},
|
},
|
||||||
|
huggingface: {
|
||||||
|
name: "HuggingFace",
|
||||||
|
description: [
|
||||||
|
"Your prompts and document text used in response are sent to your HuggingFace managed endpoint",
|
||||||
|
],
|
||||||
|
logo: HuggingFaceLogo,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
const VECTOR_DB_PRIVACY = {
|
const VECTOR_DB_PRIVACY = {
|
||||||
|
@ -10,6 +10,7 @@ import LocalAiLogo from "@/media/llmprovider/localai.png";
|
|||||||
import TogetherAILogo from "@/media/llmprovider/togetherai.png";
|
import TogetherAILogo from "@/media/llmprovider/togetherai.png";
|
||||||
import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
|
import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
|
||||||
import MistralLogo from "@/media/llmprovider/mistral.jpeg";
|
import MistralLogo from "@/media/llmprovider/mistral.jpeg";
|
||||||
|
import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
|
||||||
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
|
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
|
||||||
import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions";
|
import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions";
|
||||||
import AnthropicAiOptions from "@/components/LLMSelection/AnthropicAiOptions";
|
import AnthropicAiOptions from "@/components/LLMSelection/AnthropicAiOptions";
|
||||||
@ -19,6 +20,7 @@ import NativeLLMOptions from "@/components/LLMSelection/NativeLLMOptions";
|
|||||||
import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions";
|
import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions";
|
||||||
import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions";
|
import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions";
|
||||||
import MistralOptions from "@/components/LLMSelection/MistralOptions";
|
import MistralOptions from "@/components/LLMSelection/MistralOptions";
|
||||||
|
import HuggingFaceOptions from "@/components/LLMSelection/HuggingFaceOptions";
|
||||||
import LLMItem from "@/components/LLMSelection/LLMItem";
|
import LLMItem from "@/components/LLMSelection/LLMItem";
|
||||||
import System from "@/models/system";
|
import System from "@/models/system";
|
||||||
import paths from "@/utils/paths";
|
import paths from "@/utils/paths";
|
||||||
@ -82,6 +84,14 @@ export default function LLMPreference({
|
|||||||
options: <GeminiLLMOptions settings={settings} />,
|
options: <GeminiLLMOptions settings={settings} />,
|
||||||
description: "Google's largest and most capable AI model",
|
description: "Google's largest and most capable AI model",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "HuggingFace",
|
||||||
|
value: "huggingface",
|
||||||
|
logo: HuggingFaceLogo,
|
||||||
|
options: <HuggingFaceOptions settings={settings} />,
|
||||||
|
description:
|
||||||
|
"Access 150,000+ open-source LLMs and the world's AI community",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "Ollama",
|
name: "Ollama",
|
||||||
value: "ollama",
|
value: "ollama",
|
||||||
|
@ -45,6 +45,11 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
|
|||||||
# MISTRAL_API_KEY='example-mistral-ai-api-key'
|
# MISTRAL_API_KEY='example-mistral-ai-api-key'
|
||||||
# MISTRAL_MODEL_PREF='mistral-tiny'
|
# MISTRAL_MODEL_PREF='mistral-tiny'
|
||||||
|
|
||||||
|
# LLM_PROVIDER='huggingface'
|
||||||
|
# HUGGING_FACE_LLM_ENDPOINT=https://uuid-here.us-east-1.aws.endpoints.huggingface.cloud
|
||||||
|
# HUGGING_FACE_LLM_API_KEY=hf_xxxxxx
|
||||||
|
# HUGGING_FACE_LLM_TOKEN_LIMIT=8000
|
||||||
|
|
||||||
###########################################
|
###########################################
|
||||||
######## Embedding API SElECTION ##########
|
######## Embedding API SElECTION ##########
|
||||||
###########################################
|
###########################################
|
||||||
|
@ -194,6 +194,20 @@ const SystemSettings = {
|
|||||||
AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
|
AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
|
||||||
}
|
}
|
||||||
: {}),
|
: {}),
|
||||||
|
|
||||||
|
...(llmProvider === "huggingface"
|
||||||
|
? {
|
||||||
|
HuggingFaceLLMEndpoint: process.env.HUGGING_FACE_LLM_ENDPOINT,
|
||||||
|
HuggingFaceLLMAccessToken: !!process.env.HUGGING_FACE_LLM_API_KEY,
|
||||||
|
HuggingFaceLLMTokenLimit: process.env.HUGGING_FACE_LLM_TOKEN_LIMIT,
|
||||||
|
|
||||||
|
// For embedding credentials when Anthropic is selected.
|
||||||
|
OpenAiKey: !!process.env.OPEN_AI_KEY,
|
||||||
|
AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT,
|
||||||
|
AzureOpenAiKey: !!process.env.AZURE_OPENAI_KEY,
|
||||||
|
AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
|
||||||
|
}
|
||||||
|
: {}),
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
|
|
||||||
|
185
server/utils/AiProviders/huggingface/index.js
Normal file
185
server/utils/AiProviders/huggingface/index.js
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
|
||||||
|
const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
|
||||||
|
const { chatPrompt } = require("../../chats");
|
||||||
|
|
||||||
|
class HuggingFaceLLM {
|
||||||
|
constructor(embedder = null, _modelPreference = null) {
|
||||||
|
const { Configuration, OpenAIApi } = require("openai");
|
||||||
|
if (!process.env.HUGGING_FACE_LLM_ENDPOINT)
|
||||||
|
throw new Error("No HuggingFace Inference Endpoint was set.");
|
||||||
|
if (!process.env.HUGGING_FACE_LLM_API_KEY)
|
||||||
|
throw new Error("No HuggingFace Access Token was set.");
|
||||||
|
|
||||||
|
const config = new Configuration({
|
||||||
|
basePath: `${process.env.HUGGING_FACE_LLM_ENDPOINT}/v1`,
|
||||||
|
apiKey: process.env.HUGGING_FACE_LLM_API_KEY,
|
||||||
|
});
|
||||||
|
this.openai = new OpenAIApi(config);
|
||||||
|
// When using HF inference server - the model param is not required so
|
||||||
|
// we can stub it here. HF Endpoints can only run one model at a time.
|
||||||
|
// We set to 'tgi' so that endpoint for HF can accept message format
|
||||||
|
this.model = "tgi";
|
||||||
|
this.limits = {
|
||||||
|
history: this.promptWindowLimit() * 0.15,
|
||||||
|
system: this.promptWindowLimit() * 0.15,
|
||||||
|
user: this.promptWindowLimit() * 0.7,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!embedder)
|
||||||
|
console.warn(
|
||||||
|
"No embedding provider defined for HuggingFaceLLM - falling back to Native for embedding!"
|
||||||
|
);
|
||||||
|
this.embedder = !embedder ? new OpenAiEmbedder() : new NativeEmbedder();
|
||||||
|
this.defaultTemp = 0.2;
|
||||||
|
}
|
||||||
|
|
||||||
|
#appendContext(contextTexts = []) {
|
||||||
|
if (!contextTexts || !contextTexts.length) return "";
|
||||||
|
return (
|
||||||
|
"\nContext:\n" +
|
||||||
|
contextTexts
|
||||||
|
.map((text, i) => {
|
||||||
|
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
|
||||||
|
})
|
||||||
|
.join("")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
streamingEnabled() {
|
||||||
|
return "streamChat" in this && "streamGetChatCompletion" in this;
|
||||||
|
}
|
||||||
|
|
||||||
|
promptWindowLimit() {
|
||||||
|
const limit = process.env.HUGGING_FACE_LLM_TOKEN_LIMIT || 4096;
|
||||||
|
if (!limit || isNaN(Number(limit)))
|
||||||
|
throw new Error("No HuggingFace token context limit was set.");
|
||||||
|
return Number(limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
async isValidChatCompletionModel(_ = "") {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
constructPrompt({
|
||||||
|
systemPrompt = "",
|
||||||
|
contextTexts = [],
|
||||||
|
chatHistory = [],
|
||||||
|
userPrompt = "",
|
||||||
|
}) {
|
||||||
|
// System prompt it not enabled for HF model chats
|
||||||
|
const prompt = {
|
||||||
|
role: "user",
|
||||||
|
content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
|
||||||
|
};
|
||||||
|
const assistantResponse = {
|
||||||
|
role: "assistant",
|
||||||
|
content: "Okay, I will follow those instructions",
|
||||||
|
};
|
||||||
|
return [
|
||||||
|
prompt,
|
||||||
|
assistantResponse,
|
||||||
|
...chatHistory,
|
||||||
|
{ role: "user", content: userPrompt },
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
async isSafe(_input = "") {
|
||||||
|
// Not implemented so must be stubbed
|
||||||
|
return { safe: true, reasons: [] };
|
||||||
|
}
|
||||||
|
|
||||||
|
async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
|
||||||
|
const textResponse = await this.openai
|
||||||
|
.createChatCompletion({
|
||||||
|
model: this.model,
|
||||||
|
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
|
||||||
|
n: 1,
|
||||||
|
messages: await this.compressMessages(
|
||||||
|
{
|
||||||
|
systemPrompt: chatPrompt(workspace),
|
||||||
|
userPrompt: prompt,
|
||||||
|
chatHistory,
|
||||||
|
},
|
||||||
|
rawHistory
|
||||||
|
),
|
||||||
|
})
|
||||||
|
.then((json) => {
|
||||||
|
const res = json.data;
|
||||||
|
if (!res.hasOwnProperty("choices"))
|
||||||
|
throw new Error("HuggingFace chat: No results!");
|
||||||
|
if (res.choices.length === 0)
|
||||||
|
throw new Error("HuggingFace chat: No results length!");
|
||||||
|
return res.choices[0].message.content;
|
||||||
|
})
|
||||||
|
.catch((error) => {
|
||||||
|
throw new Error(
|
||||||
|
`HuggingFace::createChatCompletion failed with: ${error.message}`
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
return textResponse;
|
||||||
|
}
|
||||||
|
|
||||||
|
async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
|
||||||
|
const streamRequest = await this.openai.createChatCompletion(
|
||||||
|
{
|
||||||
|
model: this.model,
|
||||||
|
stream: true,
|
||||||
|
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
|
||||||
|
n: 1,
|
||||||
|
messages: await this.compressMessages(
|
||||||
|
{
|
||||||
|
systemPrompt: chatPrompt(workspace),
|
||||||
|
userPrompt: prompt,
|
||||||
|
chatHistory,
|
||||||
|
},
|
||||||
|
rawHistory
|
||||||
|
),
|
||||||
|
},
|
||||||
|
{ responseType: "stream" }
|
||||||
|
);
|
||||||
|
return { type: "huggingFaceStream", stream: streamRequest };
|
||||||
|
}
|
||||||
|
|
||||||
|
async getChatCompletion(messages = null, { temperature = 0.7 }) {
|
||||||
|
const { data } = await this.openai.createChatCompletion({
|
||||||
|
model: this.model,
|
||||||
|
messages,
|
||||||
|
temperature,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!data.hasOwnProperty("choices")) return null;
|
||||||
|
return data.choices[0].message.content;
|
||||||
|
}
|
||||||
|
|
||||||
|
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
|
||||||
|
const streamRequest = await this.openai.createChatCompletion(
|
||||||
|
{
|
||||||
|
model: this.model,
|
||||||
|
stream: true,
|
||||||
|
messages,
|
||||||
|
temperature,
|
||||||
|
},
|
||||||
|
{ responseType: "stream" }
|
||||||
|
);
|
||||||
|
return { type: "huggingFaceStream", stream: streamRequest };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
|
||||||
|
async embedTextInput(textInput) {
|
||||||
|
return await this.embedder.embedTextInput(textInput);
|
||||||
|
}
|
||||||
|
async embedChunks(textChunks = []) {
|
||||||
|
return await this.embedder.embedChunks(textChunks);
|
||||||
|
}
|
||||||
|
|
||||||
|
async compressMessages(promptArgs = {}, rawHistory = []) {
|
||||||
|
const { messageArrayCompressor } = require("../../helpers/chat");
|
||||||
|
const messageArray = this.constructPrompt(promptArgs);
|
||||||
|
return await messageArrayCompressor(this, messageArray, rawHistory);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
HuggingFaceLLM,
|
||||||
|
};
|
@ -383,6 +383,112 @@ function handleStreamResponses(response, stream, responseProps) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (stream.type === "huggingFaceStream") {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
let fullText = "";
|
||||||
|
let chunk = "";
|
||||||
|
stream.stream.data.on("data", (data) => {
|
||||||
|
const lines = data
|
||||||
|
?.toString()
|
||||||
|
?.split("\n")
|
||||||
|
.filter((line) => line.trim() !== "");
|
||||||
|
|
||||||
|
for (const line of lines) {
|
||||||
|
let validJSON = false;
|
||||||
|
const message = chunk + line.replace(/^data:/, "");
|
||||||
|
if (message !== "[DONE]") {
|
||||||
|
// JSON chunk is incomplete and has not ended yet
|
||||||
|
// so we need to stitch it together. You would think JSON
|
||||||
|
// chunks would only come complete - but they don't!
|
||||||
|
try {
|
||||||
|
JSON.parse(message);
|
||||||
|
validJSON = true;
|
||||||
|
} catch {
|
||||||
|
console.log("Failed to parse message", message);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!validJSON) {
|
||||||
|
// It can be possible that the chunk decoding is running away
|
||||||
|
// and the message chunk fails to append due to string length.
|
||||||
|
// In this case abort the chunk and reset so we can continue.
|
||||||
|
// ref: https://github.com/Mintplex-Labs/anything-llm/issues/416
|
||||||
|
try {
|
||||||
|
chunk += message;
|
||||||
|
} catch (e) {
|
||||||
|
console.error(`Chunk appending error`, e);
|
||||||
|
chunk = "";
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
chunk = "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (message == "[DONE]") {
|
||||||
|
writeResponseChunk(response, {
|
||||||
|
uuid,
|
||||||
|
sources,
|
||||||
|
type: "textResponseChunk",
|
||||||
|
textResponse: "",
|
||||||
|
close: true,
|
||||||
|
error: false,
|
||||||
|
});
|
||||||
|
resolve(fullText);
|
||||||
|
} else {
|
||||||
|
let error = null;
|
||||||
|
let finishReason = null;
|
||||||
|
let token = "";
|
||||||
|
try {
|
||||||
|
const json = JSON.parse(message);
|
||||||
|
error = json?.error || null;
|
||||||
|
token = json?.choices?.[0]?.delta?.content;
|
||||||
|
finishReason = json?.choices?.[0]?.finish_reason || null;
|
||||||
|
} catch {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!!error) {
|
||||||
|
writeResponseChunk(response, {
|
||||||
|
uuid,
|
||||||
|
sources: [],
|
||||||
|
type: "textResponseChunk",
|
||||||
|
textResponse: null,
|
||||||
|
close: true,
|
||||||
|
error,
|
||||||
|
});
|
||||||
|
resolve("");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (token) {
|
||||||
|
fullText += token;
|
||||||
|
writeResponseChunk(response, {
|
||||||
|
uuid,
|
||||||
|
sources: [],
|
||||||
|
type: "textResponseChunk",
|
||||||
|
textResponse: token,
|
||||||
|
close: false,
|
||||||
|
error: false,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (finishReason !== null) {
|
||||||
|
writeResponseChunk(response, {
|
||||||
|
uuid,
|
||||||
|
sources,
|
||||||
|
type: "textResponseChunk",
|
||||||
|
textResponse: "",
|
||||||
|
close: true,
|
||||||
|
error: false,
|
||||||
|
});
|
||||||
|
resolve(fullText);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// If stream is not a regular OpenAI Stream (like if using native model, Ollama, or most LangChain interfaces)
|
// If stream is not a regular OpenAI Stream (like if using native model, Ollama, or most LangChain interfaces)
|
||||||
// we can just iterate the stream content instead.
|
// we can just iterate the stream content instead.
|
||||||
if (!stream.hasOwnProperty("data")) {
|
if (!stream.hasOwnProperty("data")) {
|
||||||
|
@ -64,6 +64,9 @@ function getLLMProvider(modelPreference = null) {
|
|||||||
case "native":
|
case "native":
|
||||||
const { NativeLLM } = require("../AiProviders/native");
|
const { NativeLLM } = require("../AiProviders/native");
|
||||||
return new NativeLLM(embedder, modelPreference);
|
return new NativeLLM(embedder, modelPreference);
|
||||||
|
case "huggingface":
|
||||||
|
const { HuggingFaceLLM } = require("../AiProviders/huggingface");
|
||||||
|
return new HuggingFaceLLM(embedder, modelPreference);
|
||||||
default:
|
default:
|
||||||
throw new Error("ENV: No LLM_PROVIDER value found in environment!");
|
throw new Error("ENV: No LLM_PROVIDER value found in environment!");
|
||||||
}
|
}
|
||||||
|
@ -95,6 +95,7 @@ const KEY_MAPPING = {
|
|||||||
checks: [nonZero],
|
checks: [nonZero],
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// Mistral AI API Settings
|
||||||
MistralApiKey: {
|
MistralApiKey: {
|
||||||
envKey: "MISTRAL_API_KEY",
|
envKey: "MISTRAL_API_KEY",
|
||||||
checks: [isNotEmpty],
|
checks: [isNotEmpty],
|
||||||
@ -109,12 +110,25 @@ const KEY_MAPPING = {
|
|||||||
envKey: "NATIVE_LLM_MODEL_PREF",
|
envKey: "NATIVE_LLM_MODEL_PREF",
|
||||||
checks: [isDownloadedModel],
|
checks: [isDownloadedModel],
|
||||||
},
|
},
|
||||||
|
|
||||||
NativeLLMTokenLimit: {
|
NativeLLMTokenLimit: {
|
||||||
envKey: "NATIVE_LLM_MODEL_TOKEN_LIMIT",
|
envKey: "NATIVE_LLM_MODEL_TOKEN_LIMIT",
|
||||||
checks: [nonZero],
|
checks: [nonZero],
|
||||||
},
|
},
|
||||||
|
|
||||||
|
// Hugging Face LLM Inference Settings
|
||||||
|
HuggingFaceLLMEndpoint: {
|
||||||
|
envKey: "HUGGING_FACE_LLM_ENDPOINT",
|
||||||
|
checks: [isNotEmpty, isValidURL, validHuggingFaceEndpoint],
|
||||||
|
},
|
||||||
|
HuggingFaceLLMAccessToken: {
|
||||||
|
envKey: "HUGGING_FACE_LLM_API_KEY",
|
||||||
|
checks: [isNotEmpty],
|
||||||
|
},
|
||||||
|
HuggingFaceLLMTokenLimit: {
|
||||||
|
envKey: "HUGGING_FACE_LLM_TOKEN_LIMIT",
|
||||||
|
checks: [nonZero],
|
||||||
|
},
|
||||||
|
|
||||||
EmbeddingEngine: {
|
EmbeddingEngine: {
|
||||||
envKey: "EMBEDDING_ENGINE",
|
envKey: "EMBEDDING_ENGINE",
|
||||||
checks: [supportedEmbeddingModel],
|
checks: [supportedEmbeddingModel],
|
||||||
@ -299,6 +313,7 @@ function supportedLLM(input = "") {
|
|||||||
"native",
|
"native",
|
||||||
"togetherai",
|
"togetherai",
|
||||||
"mistral",
|
"mistral",
|
||||||
|
"huggingface",
|
||||||
].includes(input);
|
].includes(input);
|
||||||
return validSelection ? null : `${input} is not a valid LLM provider.`;
|
return validSelection ? null : `${input} is not a valid LLM provider.`;
|
||||||
}
|
}
|
||||||
@ -396,6 +411,12 @@ function validDockerizedUrl(input = "") {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function validHuggingFaceEndpoint(input = "") {
|
||||||
|
return input.slice(-6) !== ".cloud"
|
||||||
|
? `Your HF Endpoint should end in ".cloud"`
|
||||||
|
: null;
|
||||||
|
}
|
||||||
|
|
||||||
// If the LLMProvider has changed we need to reset all workspace model preferences to
|
// If the LLMProvider has changed we need to reset all workspace model preferences to
|
||||||
// null since the provider<>model name combination will be invalid for whatever the new
|
// null since the provider<>model name combination will be invalid for whatever the new
|
||||||
// provider is.
|
// provider is.
|
||||||
|
Loading…
Reference in New Issue
Block a user