Patch LMStudio Inference server bug integration (#957)

This commit is contained in:
Timothy Carambat 2024-03-22 14:39:30 -07:00 committed by GitHub
parent 35a155d3ac
commit 1135853740
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 120 additions and 3 deletions

View File

@ -27,6 +27,7 @@ GID='1000'
# LLM_PROVIDER='lmstudio' # LLM_PROVIDER='lmstudio'
# LMSTUDIO_BASE_PATH='http://your-server:1234/v1' # LMSTUDIO_BASE_PATH='http://your-server:1234/v1'
# LMSTUDIO_MODEL_PREF='Loaded from Chat UI' # this is a bug in LMStudio 0.2.17
# LMSTUDIO_MODEL_TOKEN_LIMIT=4096 # LMSTUDIO_MODEL_TOKEN_LIMIT=4096
# LLM_PROVIDER='localai' # LLM_PROVIDER='localai'

View File

@ -1,7 +1,14 @@
import { useEffect, useState } from "react";
import { Info } from "@phosphor-icons/react"; import { Info } from "@phosphor-icons/react";
import paths from "@/utils/paths"; import paths from "@/utils/paths";
import System from "@/models/system";
export default function LMStudioOptions({ settings, showAlert = false }) { export default function LMStudioOptions({ settings, showAlert = false }) {
const [basePathValue, setBasePathValue] = useState(
settings?.LMStudioBasePath
);
const [basePath, setBasePath] = useState(settings?.LMStudioBasePath);
return ( return (
<div className="w-full flex flex-col"> <div className="w-full flex flex-col">
{showAlert && ( {showAlert && (
@ -35,8 +42,11 @@ export default function LMStudioOptions({ settings, showAlert = false }) {
required={true} required={true}
autoComplete="off" autoComplete="off"
spellCheck={false} spellCheck={false}
onChange={(e) => setBasePathValue(e.target.value)}
onBlur={() => setBasePath(basePathValue)}
/> />
</div> </div>
<LMStudioModelSelection settings={settings} basePath={basePath} />
<div className="flex flex-col w-60"> <div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4"> <label className="text-white text-sm font-semibold block mb-4">
Token context window Token context window
@ -57,3 +67,73 @@ export default function LMStudioOptions({ settings, showAlert = false }) {
</div> </div>
); );
} }
function LMStudioModelSelection({ settings, basePath = null }) {
const [customModels, setCustomModels] = useState([]);
const [loading, setLoading] = useState(true);
useEffect(() => {
async function findCustomModels() {
if (!basePath || !basePath.includes("/v1")) {
setCustomModels([]);
setLoading(false);
return;
}
setLoading(true);
const { models } = await System.customModels("lmstudio", null, basePath);
setCustomModels(models || []);
setLoading(false);
}
findCustomModels();
}, [basePath]);
if (loading || customModels.length == 0) {
return (
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Chat Model Selection
</label>
<select
name="LMStudioModelPref"
disabled={true}
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<option disabled={true} selected={true}>
{basePath?.includes("/v1")
? "-- loading available models --"
: "-- waiting for URL --"}
</option>
</select>
</div>
);
}
return (
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Chat Model Selection
</label>
<select
name="LMStudioModelPref"
required={true}
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
{customModels.length > 0 && (
<optgroup label="Your loaded models">
{customModels.map((model) => {
return (
<option
key={model.id}
value={model.id}
selected={settings.LMStudioModelPref === model.id}
>
{model.id}
</option>
);
})}
</optgroup>
)}
</select>
</div>
);
}

View File

@ -24,6 +24,7 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
# LLM_PROVIDER='lmstudio' # LLM_PROVIDER='lmstudio'
# LMSTUDIO_BASE_PATH='http://your-server:1234/v1' # LMSTUDIO_BASE_PATH='http://your-server:1234/v1'
# LMSTUDIO_MODEL_PREF='Loaded from Chat UI' # this is a bug in LMStudio 0.2.17
# LMSTUDIO_MODEL_TOKEN_LIMIT=4096 # LMSTUDIO_MODEL_TOKEN_LIMIT=4096
# LLM_PROVIDER='localai' # LLM_PROVIDER='localai'

View File

@ -137,6 +137,7 @@ const SystemSettings = {
? { ? {
LMStudioBasePath: process.env.LMSTUDIO_BASE_PATH, LMStudioBasePath: process.env.LMSTUDIO_BASE_PATH,
LMStudioTokenLimit: process.env.LMSTUDIO_MODEL_TOKEN_LIMIT, LMStudioTokenLimit: process.env.LMSTUDIO_MODEL_TOKEN_LIMIT,
LMStudioModelPref: process.env.LMSTUDIO_MODEL_PREF,
// For embedding credentials when lmstudio is selected. // For embedding credentials when lmstudio is selected.
OpenAiKey: !!process.env.OPEN_AI_KEY, OpenAiKey: !!process.env.OPEN_AI_KEY,

View File

@ -12,9 +12,14 @@ class LMStudioLLM {
basePath: process.env.LMSTUDIO_BASE_PATH?.replace(/\/+$/, ""), // here is the URL to your LMStudio instance basePath: process.env.LMSTUDIO_BASE_PATH?.replace(/\/+$/, ""), // here is the URL to your LMStudio instance
}); });
this.lmstudio = new OpenAIApi(config); this.lmstudio = new OpenAIApi(config);
// When using LMStudios inference server - the model param is not required so
// we can stub it here. LMStudio can only run one model at a time. // Prior to LMStudio 0.2.17 the `model` param was not required and you could pass anything
this.model = "model-placeholder"; // into that field and it would work. On 0.2.17 LMStudio introduced multi-model chat
// which now has a bug that reports the server model id as "Loaded from Chat UI"
// and any other value will crash inferencing. So until this is patched we will
// try to fetch the `/models` and have the user set it, or just fallback to "Loaded from Chat UI"
// which will not impact users with <v0.2.17 and should work as well once the bug is fixed.
this.model = process.env.LMSTUDIO_MODEL_PREF || "Loaded from Chat UI";
this.limits = { this.limits = {
history: this.promptWindowLimit() * 0.15, history: this.promptWindowLimit() * 0.15,
system: this.promptWindowLimit() * 0.15, system: this.promptWindowLimit() * 0.15,

View File

@ -10,6 +10,7 @@ const SUPPORT_CUSTOM_MODELS = [
"mistral", "mistral",
"perplexity", "perplexity",
"openrouter", "openrouter",
"lmstudio",
]; ];
async function getCustomModels(provider = "", apiKey = null, basePath = null) { async function getCustomModels(provider = "", apiKey = null, basePath = null) {
@ -33,6 +34,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
return await getPerplexityModels(); return await getPerplexityModels();
case "openrouter": case "openrouter":
return await getOpenRouterModels(); return await getOpenRouterModels();
case "lmstudio":
return await getLMStudioModels(basePath);
default: default:
return { models: [], error: "Invalid provider for custom models" }; return { models: [], error: "Invalid provider for custom models" };
} }
@ -81,6 +84,28 @@ async function localAIModels(basePath = null, apiKey = null) {
return { models, error: null }; return { models, error: null };
} }
async function getLMStudioModels(basePath = null) {
try {
const { Configuration, OpenAIApi } = require("openai");
const config = new Configuration({
basePath: basePath || process.env.LMSTUDIO_BASE_PATH,
});
const openai = new OpenAIApi(config);
const models = await openai
.listModels()
.then((res) => res.data.data)
.catch((e) => {
console.error(`LMStudio:listModels`, e.message);
return [];
});
return { models, error: null };
} catch (e) {
console.error(`LMStudio:getLMStudioModels`, e.message);
return { models: [], error: "Could not fetch LMStudio Models" };
}
}
async function ollamaAIModels(basePath = null) { async function ollamaAIModels(basePath = null) {
let url; let url;
try { try {

View File

@ -59,6 +59,10 @@ const KEY_MAPPING = {
envKey: "LMSTUDIO_BASE_PATH", envKey: "LMSTUDIO_BASE_PATH",
checks: [isNotEmpty, validLLMExternalBasePath, validDockerizedUrl], checks: [isNotEmpty, validLLMExternalBasePath, validDockerizedUrl],
}, },
LMStudioModelPref: {
envKey: "LMSTUDIO_MODEL_PREF",
checks: [],
},
LMStudioTokenLimit: { LMStudioTokenLimit: {
envKey: "LMSTUDIO_MODEL_TOKEN_LIMIT", envKey: "LMSTUDIO_MODEL_TOKEN_LIMIT",
checks: [nonZero], checks: [nonZero],