Implement support for HuggingFace Inference Endpoints (#680)

This commit is contained in:
Timothy Carambat 2024-02-06 09:17:51 -08:00 committed by GitHub
parent 1846a99b93
commit 2bc11d3f1a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 424 additions and 1 deletions

View File

@ -48,6 +48,11 @@ GID='1000'
# MISTRAL_API_KEY='example-mistral-ai-api-key' # MISTRAL_API_KEY='example-mistral-ai-api-key'
# MISTRAL_MODEL_PREF='mistral-tiny' # MISTRAL_MODEL_PREF='mistral-tiny'
# LLM_PROVIDER='huggingface'
# HUGGING_FACE_LLM_ENDPOINT=https://uuid-here.us-east-1.aws.endpoints.huggingface.cloud
# HUGGING_FACE_LLM_API_KEY=hf_xxxxxx
# HUGGING_FACE_LLM_TOKEN_LIMIT=8000
########################################### ###########################################
######## Embedding API SElECTION ########## ######## Embedding API SElECTION ##########
########################################### ###########################################

View File

@ -0,0 +1,56 @@
export default function HuggingFaceOptions({ settings }) {
return (
<div className="w-full flex flex-col">
<div className="w-full flex items-center gap-4">
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
HuggingFace Inference Endpoint
</label>
<input
type="url"
name="HuggingFaceLLMEndpoint"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="https://example.endpoints.huggingface.cloud"
defaultValue={settings?.HuggingFaceLLMEndpoint}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
HuggingFace Access Token
</label>
<input
type="password"
name="HuggingFaceLLMAccessToken"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="HuggingFace Access Token"
defaultValue={
settings?.HuggingFaceLLMAccessToken ? "*".repeat(20) : ""
}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Model Token Limit
</label>
<input
type="number"
name="HuggingFaceLLMTokenLimit"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="4096"
min={1}
onScroll={(e) => e.target.blur()}
defaultValue={settings?.HuggingFaceLLMTokenLimit}
required={true}
autoComplete="off"
/>
</div>
</div>
</div>
);
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

View File

@ -13,6 +13,7 @@ import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
import LocalAiLogo from "@/media/llmprovider/localai.png"; import LocalAiLogo from "@/media/llmprovider/localai.png";
import TogetherAILogo from "@/media/llmprovider/togetherai.png"; import TogetherAILogo from "@/media/llmprovider/togetherai.png";
import MistralLogo from "@/media/llmprovider/mistral.jpeg"; import MistralLogo from "@/media/llmprovider/mistral.jpeg";
import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
import PreLoader from "@/components/Preloader"; import PreLoader from "@/components/Preloader";
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions"; import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions";
@ -24,6 +25,7 @@ import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions";
import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions"; import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions";
import TogetherAiOptions from "@/components/LLMSelection/TogetherAiOptions"; import TogetherAiOptions from "@/components/LLMSelection/TogetherAiOptions";
import MistralOptions from "@/components/LLMSelection/MistralOptions"; import MistralOptions from "@/components/LLMSelection/MistralOptions";
import HuggingFaceOptions from "@/components/LLMSelection/HuggingFaceOptions";
import LLMItem from "@/components/LLMSelection/LLMItem"; import LLMItem from "@/components/LLMSelection/LLMItem";
import { MagnifyingGlass } from "@phosphor-icons/react"; import { MagnifyingGlass } from "@phosphor-icons/react";
@ -107,6 +109,14 @@ export default function GeneralLLMPreference() {
options: <GeminiLLMOptions settings={settings} />, options: <GeminiLLMOptions settings={settings} />,
description: "Google's largest and most capable AI model", description: "Google's largest and most capable AI model",
}, },
{
name: "HuggingFace",
value: "huggingface",
logo: HuggingFaceLogo,
options: <HuggingFaceOptions settings={settings} />,
description:
"Access 150,000+ open-source LLMs and the world's AI community",
},
{ {
name: "Ollama", name: "Ollama",
value: "ollama", value: "ollama",

View File

@ -10,6 +10,7 @@ import TogetherAILogo from "@/media/llmprovider/togetherai.png";
import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import LMStudioLogo from "@/media/llmprovider/lmstudio.png";
import LocalAiLogo from "@/media/llmprovider/localai.png"; import LocalAiLogo from "@/media/llmprovider/localai.png";
import MistralLogo from "@/media/llmprovider/mistral.jpeg"; import MistralLogo from "@/media/llmprovider/mistral.jpeg";
import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
import ZillizLogo from "@/media/vectordbs/zilliz.png"; import ZillizLogo from "@/media/vectordbs/zilliz.png";
import AstraDBLogo from "@/media/vectordbs/astraDB.png"; import AstraDBLogo from "@/media/vectordbs/astraDB.png";
import ChromaLogo from "@/media/vectordbs/chroma.png"; import ChromaLogo from "@/media/vectordbs/chroma.png";
@ -101,6 +102,13 @@ const LLM_SELECTION_PRIVACY = {
], ],
logo: MistralLogo, logo: MistralLogo,
}, },
huggingface: {
name: "HuggingFace",
description: [
"Your prompts and document text used in response are sent to your HuggingFace managed endpoint",
],
logo: HuggingFaceLogo,
},
}; };
const VECTOR_DB_PRIVACY = { const VECTOR_DB_PRIVACY = {

View File

@ -10,6 +10,7 @@ import LocalAiLogo from "@/media/llmprovider/localai.png";
import TogetherAILogo from "@/media/llmprovider/togetherai.png"; import TogetherAILogo from "@/media/llmprovider/togetherai.png";
import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png"; import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
import MistralLogo from "@/media/llmprovider/mistral.jpeg"; import MistralLogo from "@/media/llmprovider/mistral.jpeg";
import HuggingFaceLogo from "@/media/llmprovider/huggingface.png";
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions"; import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions";
import AnthropicAiOptions from "@/components/LLMSelection/AnthropicAiOptions"; import AnthropicAiOptions from "@/components/LLMSelection/AnthropicAiOptions";
@ -19,6 +20,7 @@ import NativeLLMOptions from "@/components/LLMSelection/NativeLLMOptions";
import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions"; import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions";
import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions"; import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions";
import MistralOptions from "@/components/LLMSelection/MistralOptions"; import MistralOptions from "@/components/LLMSelection/MistralOptions";
import HuggingFaceOptions from "@/components/LLMSelection/HuggingFaceOptions";
import LLMItem from "@/components/LLMSelection/LLMItem"; import LLMItem from "@/components/LLMSelection/LLMItem";
import System from "@/models/system"; import System from "@/models/system";
import paths from "@/utils/paths"; import paths from "@/utils/paths";
@ -82,6 +84,14 @@ export default function LLMPreference({
options: <GeminiLLMOptions settings={settings} />, options: <GeminiLLMOptions settings={settings} />,
description: "Google's largest and most capable AI model", description: "Google's largest and most capable AI model",
}, },
{
name: "HuggingFace",
value: "huggingface",
logo: HuggingFaceLogo,
options: <HuggingFaceOptions settings={settings} />,
description:
"Access 150,000+ open-source LLMs and the world's AI community",
},
{ {
name: "Ollama", name: "Ollama",
value: "ollama", value: "ollama",

View File

@ -45,6 +45,11 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
# MISTRAL_API_KEY='example-mistral-ai-api-key' # MISTRAL_API_KEY='example-mistral-ai-api-key'
# MISTRAL_MODEL_PREF='mistral-tiny' # MISTRAL_MODEL_PREF='mistral-tiny'
# LLM_PROVIDER='huggingface'
# HUGGING_FACE_LLM_ENDPOINT=https://uuid-here.us-east-1.aws.endpoints.huggingface.cloud
# HUGGING_FACE_LLM_API_KEY=hf_xxxxxx
# HUGGING_FACE_LLM_TOKEN_LIMIT=8000
########################################### ###########################################
######## Embedding API SElECTION ########## ######## Embedding API SElECTION ##########
########################################### ###########################################

View File

@ -194,6 +194,20 @@ const SystemSettings = {
AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF, AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
} }
: {}), : {}),
...(llmProvider === "huggingface"
? {
HuggingFaceLLMEndpoint: process.env.HUGGING_FACE_LLM_ENDPOINT,
HuggingFaceLLMAccessToken: !!process.env.HUGGING_FACE_LLM_API_KEY,
HuggingFaceLLMTokenLimit: process.env.HUGGING_FACE_LLM_TOKEN_LIMIT,
// For embedding credentials when Anthropic is selected.
OpenAiKey: !!process.env.OPEN_AI_KEY,
AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT,
AzureOpenAiKey: !!process.env.AZURE_OPENAI_KEY,
AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
}
: {}),
}; };
}, },

View File

@ -0,0 +1,185 @@
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
const { chatPrompt } = require("../../chats");
class HuggingFaceLLM {
constructor(embedder = null, _modelPreference = null) {
const { Configuration, OpenAIApi } = require("openai");
if (!process.env.HUGGING_FACE_LLM_ENDPOINT)
throw new Error("No HuggingFace Inference Endpoint was set.");
if (!process.env.HUGGING_FACE_LLM_API_KEY)
throw new Error("No HuggingFace Access Token was set.");
const config = new Configuration({
basePath: `${process.env.HUGGING_FACE_LLM_ENDPOINT}/v1`,
apiKey: process.env.HUGGING_FACE_LLM_API_KEY,
});
this.openai = new OpenAIApi(config);
// When using HF inference server - the model param is not required so
// we can stub it here. HF Endpoints can only run one model at a time.
// We set to 'tgi' so that endpoint for HF can accept message format
this.model = "tgi";
this.limits = {
history: this.promptWindowLimit() * 0.15,
system: this.promptWindowLimit() * 0.15,
user: this.promptWindowLimit() * 0.7,
};
if (!embedder)
console.warn(
"No embedding provider defined for HuggingFaceLLM - falling back to Native for embedding!"
);
this.embedder = !embedder ? new OpenAiEmbedder() : new NativeEmbedder();
this.defaultTemp = 0.2;
}
#appendContext(contextTexts = []) {
if (!contextTexts || !contextTexts.length) return "";
return (
"\nContext:\n" +
contextTexts
.map((text, i) => {
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
})
.join("")
);
}
streamingEnabled() {
return "streamChat" in this && "streamGetChatCompletion" in this;
}
promptWindowLimit() {
const limit = process.env.HUGGING_FACE_LLM_TOKEN_LIMIT || 4096;
if (!limit || isNaN(Number(limit)))
throw new Error("No HuggingFace token context limit was set.");
return Number(limit);
}
async isValidChatCompletionModel(_ = "") {
return true;
}
constructPrompt({
systemPrompt = "",
contextTexts = [],
chatHistory = [],
userPrompt = "",
}) {
// System prompt it not enabled for HF model chats
const prompt = {
role: "user",
content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
};
const assistantResponse = {
role: "assistant",
content: "Okay, I will follow those instructions",
};
return [
prompt,
assistantResponse,
...chatHistory,
{ role: "user", content: userPrompt },
];
}
async isSafe(_input = "") {
// Not implemented so must be stubbed
return { safe: true, reasons: [] };
}
async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
const textResponse = await this.openai
.createChatCompletion({
model: this.model,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
})
.then((json) => {
const res = json.data;
if (!res.hasOwnProperty("choices"))
throw new Error("HuggingFace chat: No results!");
if (res.choices.length === 0)
throw new Error("HuggingFace chat: No results length!");
return res.choices[0].message.content;
})
.catch((error) => {
throw new Error(
`HuggingFace::createChatCompletion failed with: ${error.message}`
);
});
return textResponse;
}
async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
},
{ responseType: "stream" }
);
return { type: "huggingFaceStream", stream: streamRequest };
}
async getChatCompletion(messages = null, { temperature = 0.7 }) {
const { data } = await this.openai.createChatCompletion({
model: this.model,
messages,
temperature,
});
if (!data.hasOwnProperty("choices")) return null;
return data.choices[0].message.content;
}
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
messages,
temperature,
},
{ responseType: "stream" }
);
return { type: "huggingFaceStream", stream: streamRequest };
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
async embedTextInput(textInput) {
return await this.embedder.embedTextInput(textInput);
}
async embedChunks(textChunks = []) {
return await this.embedder.embedChunks(textChunks);
}
async compressMessages(promptArgs = {}, rawHistory = []) {
const { messageArrayCompressor } = require("../../helpers/chat");
const messageArray = this.constructPrompt(promptArgs);
return await messageArrayCompressor(this, messageArray, rawHistory);
}
}
module.exports = {
HuggingFaceLLM,
};

View File

@ -383,6 +383,112 @@ function handleStreamResponses(response, stream, responseProps) {
}); });
} }
if (stream.type === "huggingFaceStream") {
return new Promise((resolve) => {
let fullText = "";
let chunk = "";
stream.stream.data.on("data", (data) => {
const lines = data
?.toString()
?.split("\n")
.filter((line) => line.trim() !== "");
for (const line of lines) {
let validJSON = false;
const message = chunk + line.replace(/^data:/, "");
if (message !== "[DONE]") {
// JSON chunk is incomplete and has not ended yet
// so we need to stitch it together. You would think JSON
// chunks would only come complete - but they don't!
try {
JSON.parse(message);
validJSON = true;
} catch {
console.log("Failed to parse message", message);
}
if (!validJSON) {
// It can be possible that the chunk decoding is running away
// and the message chunk fails to append due to string length.
// In this case abort the chunk and reset so we can continue.
// ref: https://github.com/Mintplex-Labs/anything-llm/issues/416
try {
chunk += message;
} catch (e) {
console.error(`Chunk appending error`, e);
chunk = "";
}
continue;
} else {
chunk = "";
}
}
if (message == "[DONE]") {
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
resolve(fullText);
} else {
let error = null;
let finishReason = null;
let token = "";
try {
const json = JSON.parse(message);
error = json?.error || null;
token = json?.choices?.[0]?.delta?.content;
finishReason = json?.choices?.[0]?.finish_reason || null;
} catch {
continue;
}
if (!!error) {
writeResponseChunk(response, {
uuid,
sources: [],
type: "textResponseChunk",
textResponse: null,
close: true,
error,
});
resolve("");
return;
}
if (token) {
fullText += token;
writeResponseChunk(response, {
uuid,
sources: [],
type: "textResponseChunk",
textResponse: token,
close: false,
error: false,
});
}
if (finishReason !== null) {
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
resolve(fullText);
}
}
}
});
});
}
// If stream is not a regular OpenAI Stream (like if using native model, Ollama, or most LangChain interfaces) // If stream is not a regular OpenAI Stream (like if using native model, Ollama, or most LangChain interfaces)
// we can just iterate the stream content instead. // we can just iterate the stream content instead.
if (!stream.hasOwnProperty("data")) { if (!stream.hasOwnProperty("data")) {

View File

@ -64,6 +64,9 @@ function getLLMProvider(modelPreference = null) {
case "native": case "native":
const { NativeLLM } = require("../AiProviders/native"); const { NativeLLM } = require("../AiProviders/native");
return new NativeLLM(embedder, modelPreference); return new NativeLLM(embedder, modelPreference);
case "huggingface":
const { HuggingFaceLLM } = require("../AiProviders/huggingface");
return new HuggingFaceLLM(embedder, modelPreference);
default: default:
throw new Error("ENV: No LLM_PROVIDER value found in environment!"); throw new Error("ENV: No LLM_PROVIDER value found in environment!");
} }

View File

@ -95,6 +95,7 @@ const KEY_MAPPING = {
checks: [nonZero], checks: [nonZero],
}, },
// Mistral AI API Settings
MistralApiKey: { MistralApiKey: {
envKey: "MISTRAL_API_KEY", envKey: "MISTRAL_API_KEY",
checks: [isNotEmpty], checks: [isNotEmpty],
@ -109,12 +110,25 @@ const KEY_MAPPING = {
envKey: "NATIVE_LLM_MODEL_PREF", envKey: "NATIVE_LLM_MODEL_PREF",
checks: [isDownloadedModel], checks: [isDownloadedModel],
}, },
NativeLLMTokenLimit: { NativeLLMTokenLimit: {
envKey: "NATIVE_LLM_MODEL_TOKEN_LIMIT", envKey: "NATIVE_LLM_MODEL_TOKEN_LIMIT",
checks: [nonZero], checks: [nonZero],
}, },
// Hugging Face LLM Inference Settings
HuggingFaceLLMEndpoint: {
envKey: "HUGGING_FACE_LLM_ENDPOINT",
checks: [isNotEmpty, isValidURL, validHuggingFaceEndpoint],
},
HuggingFaceLLMAccessToken: {
envKey: "HUGGING_FACE_LLM_API_KEY",
checks: [isNotEmpty],
},
HuggingFaceLLMTokenLimit: {
envKey: "HUGGING_FACE_LLM_TOKEN_LIMIT",
checks: [nonZero],
},
EmbeddingEngine: { EmbeddingEngine: {
envKey: "EMBEDDING_ENGINE", envKey: "EMBEDDING_ENGINE",
checks: [supportedEmbeddingModel], checks: [supportedEmbeddingModel],
@ -299,6 +313,7 @@ function supportedLLM(input = "") {
"native", "native",
"togetherai", "togetherai",
"mistral", "mistral",
"huggingface",
].includes(input); ].includes(input);
return validSelection ? null : `${input} is not a valid LLM provider.`; return validSelection ? null : `${input} is not a valid LLM provider.`;
} }
@ -396,6 +411,12 @@ function validDockerizedUrl(input = "") {
return null; return null;
} }
function validHuggingFaceEndpoint(input = "") {
return input.slice(-6) !== ".cloud"
? `Your HF Endpoint should end in ".cloud"`
: null;
}
// If the LLMProvider has changed we need to reset all workspace model preferences to // If the LLMProvider has changed we need to reset all workspace model preferences to
// null since the provider<>model name combination will be invalid for whatever the new // null since the provider<>model name combination will be invalid for whatever the new
// provider is. // provider is.