Bump openai package to latest (#1234)

* Bump `openai` package to latest
Tested all except localai

* bump LocalAI support with latest image

* add deprecation notice

* linting
This commit is contained in:
Timothy Carambat 2024-04-30 12:33:42 -07:00 committed by GitHub
parent 94017e2b51
commit 547d4859ef
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 2315 additions and 2375 deletions

View File

@ -1,8 +1,8 @@
{
"cSpell.words": [
"AIbitat",
"adoc",
"aibitat",
"AIbitat",
"anythingllm",
"Astra",
"Chartable",
@ -20,6 +20,7 @@
"mbox",
"Milvus",
"Mintplex",
"moderations",
"Ollama",
"openai",
"opendocument",

View File

@ -36,7 +36,7 @@
"multer": "^1.4.5-lts.1",
"node-html-parser": "^6.1.13",
"officeparser": "^4.0.5",
"openai": "^3.2.1",
"openai": "4.38.5",
"pdf-parse": "^1.1.1",
"puppeteer": "~21.5.2",
"slugify": "^1.6.6",

View File

@ -2,13 +2,12 @@ const fs = require("fs");
class OpenAiWhisper {
constructor({ options }) {
const { Configuration, OpenAIApi } = require("openai");
const { OpenAI: OpenAIApi } = require("openai");
if (!options.openAiKey) throw new Error("No OpenAI API key was set.");
const config = new Configuration({
this.openai = new OpenAIApi({
apiKey: options.openAiKey,
});
this.openai = new OpenAIApi(config);
this.model = "whisper-1";
this.temperature = 0;
this.#log("Initialized.");
@ -19,22 +18,30 @@ class OpenAiWhisper {
}
async processFile(fullFilePath) {
return await this.openai
.createTranscription(
fs.createReadStream(fullFilePath),
this.model,
undefined,
"text",
this.temperature
)
.then((res) => {
if (res.hasOwnProperty("data"))
return { content: res.data, error: null };
return { content: "", error: "No content was able to be transcribed." };
return await this.openai.audio.transcriptions
.create({
file: fs.createReadStream(fullFilePath),
model: this.model,
model: "whisper-1",
response_format: "text",
temperature: this.temperature,
})
.catch((e) => {
this.#log(`Could not get any response from openai whisper`, e.message);
return { content: "", error: e.message };
.then((response) => {
if (!response) {
return {
content: "",
error: "No content was able to be transcribed.",
};
}
return { content: response, error: null };
})
.catch((error) => {
this.#log(
`Could not get any response from openai whisper`,
error.message
);
return { content: "", error: error.message };
});
}
}

File diff suppressed because it is too large Load Diff

View File

@ -64,7 +64,7 @@ GID='1000'
# LLM_PROVIDER='groq'
# GROQ_API_KEY=gsk_abcxyz
# GROQ_MODEL_PREF=llama2-70b-4096
# GROQ_MODEL_PREF=llama3-8b-8192
# LLM_PROVIDER='generic-openai'
# GENERIC_OPEN_AI_BASE_PATH='http://proxy.url.openai.com/v1'

View File

@ -24,12 +24,11 @@ export default function GroqAiOptions({ settings }) {
</label>
<select
name="GroqModelPref"
defaultValue={settings?.GroqModelPref || "llama2-70b-4096"}
defaultValue={settings?.GroqModelPref || "llama3-8b-8192"}
required={true}
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
{[
"llama2-70b-4096",
"mixtral-8x7b-32768",
"llama3-8b-8192",
"llama3-70b-8192",

View File

@ -86,7 +86,7 @@ function TogetherAiModelSelection({ settings }) {
<option
key={model.id}
value={model.id}
selected={settings?.OpenRouterModelPref === model.id}
selected={settings?.TogetherAiModelPref === model.id}
>
{model.name}
</option>

View File

@ -20,7 +20,6 @@ const PROVIDER_DEFAULT_MODELS = {
ollama: [],
togetherai: [],
groq: [
"llama2-70b-4096",
"mixtral-8x7b-32768",
"llama3-8b-8192",
"llama3-70b-8192",

View File

@ -61,7 +61,7 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
# LLM_PROVIDER='groq'
# GROQ_API_KEY=gsk_abcxyz
# GROQ_MODEL_PREF=llama2-70b-4096
# GROQ_MODEL_PREF=llama3-8b-8192
# LLM_PROVIDER='generic-openai'
# GENERIC_OPEN_AI_BASE_PATH='http://proxy.url.openai.com/v1'

View File

@ -57,8 +57,7 @@
"multer": "^1.4.5-lts.1",
"node-html-markdown": "^1.3.0",
"node-llama-cpp": "^2.8.0",
"openai": "^3.2.1",
"openai-latest": "npm:openai@latest",
"openai": "4.38.5",
"pinecone-client": "^1.1.0",
"pluralize": "^8.0.0",
"posthog-node": "^3.1.1",

View File

@ -1,21 +1,22 @@
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { chatPrompt } = require("../../chats");
const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
const {
handleDefaultStreamResponseV2,
} = require("../../helpers/chat/responses");
class GenericOpenAiLLM {
constructor(embedder = null, modelPreference = null) {
const { Configuration, OpenAIApi } = require("openai");
const { OpenAI: OpenAIApi } = require("openai");
if (!process.env.GENERIC_OPEN_AI_BASE_PATH)
throw new Error(
"GenericOpenAI must have a valid base path to use for the api."
);
this.basePath = process.env.GENERIC_OPEN_AI_BASE_PATH;
const config = new Configuration({
basePath: this.basePath,
this.openai = new OpenAIApi({
baseURL: this.basePath,
apiKey: process.env.GENERIC_OPEN_AI_API_KEY ?? null,
});
this.openai = new OpenAIApi(config);
this.model =
modelPreference ?? process.env.GENERIC_OPEN_AI_MODEL_PREF ?? null;
if (!this.model)
@ -89,8 +90,8 @@ class GenericOpenAiLLM {
}
async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
const textResponse = await this.openai
.createChatCompletion({
const textResponse = await this.openai.chat.completions
.create({
model: this.model,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
@ -103,13 +104,12 @@ class GenericOpenAiLLM {
rawHistory
),
})
.then((json) => {
const res = json.data;
if (!res.hasOwnProperty("choices"))
.then((result) => {
if (!result.hasOwnProperty("choices"))
throw new Error("GenericOpenAI chat: No results!");
if (res.choices.length === 0)
if (result.choices.length === 0)
throw new Error("GenericOpenAI chat: No results length!");
return res.choices[0].message.content;
return result.choices[0].message.content;
})
.catch((error) => {
throw new Error(
@ -121,29 +121,26 @@ class GenericOpenAiLLM {
}
async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions.create({
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
});
return streamRequest;
}
async getChatCompletion(messages = null, { temperature = 0.7 }) {
const { data } = await this.openai
.createChatCompletion({
const result = await this.openai.chat.completions
.create({
model: this.model,
messages,
temperature,
@ -152,25 +149,23 @@ class GenericOpenAiLLM {
throw new Error(e.response.data.error.message);
});
if (!data.hasOwnProperty("choices")) return null;
return data.choices[0].message.content;
if (!result.hasOwnProperty("choices") || result.choices.length === 0)
return null;
return result.choices[0].message.content;
}
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
messages,
temperature,
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions.create({
model: this.model,
stream: true,
messages,
temperature,
});
return streamRequest;
}
handleStream(response, stream, responseProps) {
return handleDefaultStreamResponse(response, stream, responseProps);
return handleDefaultStreamResponseV2(response, stream, responseProps);
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations

View File

@ -1,20 +1,20 @@
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { chatPrompt } = require("../../chats");
const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
const {
handleDefaultStreamResponseV2,
} = require("../../helpers/chat/responses");
class GroqLLM {
constructor(embedder = null, modelPreference = null) {
const { Configuration, OpenAIApi } = require("openai");
const { OpenAI: OpenAIApi } = require("openai");
if (!process.env.GROQ_API_KEY) throw new Error("No Groq API key was set.");
const config = new Configuration({
basePath: "https://api.groq.com/openai/v1",
this.openai = new OpenAIApi({
baseURL: "https://api.groq.com/openai/v1",
apiKey: process.env.GROQ_API_KEY,
});
this.openai = new OpenAIApi(config);
this.model =
modelPreference || process.env.GROQ_MODEL_PREF || "llama2-70b-4096";
modelPreference || process.env.GROQ_MODEL_PREF || "llama3-8b-8192";
this.limits = {
history: this.promptWindowLimit() * 0.15,
system: this.promptWindowLimit() * 0.15,
@ -40,10 +40,9 @@ class GroqLLM {
streamingEnabled() {
return "streamChat" in this && "streamGetChatCompletion" in this;
}
promptWindowLimit() {
switch (this.model) {
case "llama2-70b-4096":
return 4096;
case "mixtral-8x7b-32768":
return 32_768;
case "llama3-8b-8192":
@ -53,13 +52,12 @@ class GroqLLM {
case "gemma-7b-it":
return 8192;
default:
return 4096;
return 8192;
}
}
async isValidChatCompletionModel(modelName = "") {
const validModels = [
"llama2-70b-4096",
"mixtral-8x7b-32768",
"llama3-8b-8192",
"llama3-70b-8192",
@ -68,9 +66,9 @@ class GroqLLM {
const isPreset = validModels.some((model) => modelName === model);
if (isPreset) return true;
const model = await this.openai
.retrieveModel(modelName)
.then((res) => res.data)
const model = await this.openai.models
.retrieve(modelName)
.then((modelObj) => modelObj)
.catch(() => null);
return !!model;
}
@ -99,8 +97,8 @@ class GroqLLM {
`Groq chat: ${this.model} is not valid for chat completion!`
);
const textResponse = await this.openai
.createChatCompletion({
const textResponse = await this.openai.chat.completions
.create({
model: this.model,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
@ -113,13 +111,12 @@ class GroqLLM {
rawHistory
),
})
.then((json) => {
const res = json.data;
if (!res.hasOwnProperty("choices"))
.then((result) => {
if (!result.hasOwnProperty("choices"))
throw new Error("GroqAI chat: No results!");
if (res.choices.length === 0)
if (result.choices.length === 0)
throw new Error("GroqAI chat: No results length!");
return res.choices[0].message.content;
return result.choices[0].message.content;
})
.catch((error) => {
throw new Error(
@ -136,23 +133,20 @@ class GroqLLM {
`GroqAI:streamChat: ${this.model} is not valid for chat completion!`
);
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions.create({
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
});
return streamRequest;
}
@ -162,8 +156,8 @@ class GroqLLM {
`GroqAI:chatCompletion: ${this.model} is not valid for chat completion!`
);
const { data } = await this.openai
.createChatCompletion({
const result = await this.openai.chat.completions
.create({
model: this.model,
messages,
temperature,
@ -172,8 +166,9 @@ class GroqLLM {
throw new Error(e.response.data.error.message);
});
if (!data.hasOwnProperty("choices")) return null;
return data.choices[0].message.content;
if (!result.hasOwnProperty("choices") || result.choices.length === 0)
return null;
return result.choices[0].message.content;
}
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
@ -182,20 +177,17 @@ class GroqLLM {
`GroqAI:streamChatCompletion: ${this.model} is not valid for chat completion!`
);
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
messages,
temperature,
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions.create({
model: this.model,
stream: true,
messages,
temperature,
});
return streamRequest;
}
handleStream(response, stream, responseProps) {
return handleDefaultStreamResponse(response, stream, responseProps);
return handleDefaultStreamResponseV2(response, stream, responseProps);
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations

View File

@ -2,23 +2,21 @@ const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
const { chatPrompt } = require("../../chats");
const {
writeResponseChunk,
clientAbortedHandler,
handleDefaultStreamResponseV2,
} = require("../../helpers/chat/responses");
class HuggingFaceLLM {
constructor(embedder = null, _modelPreference = null) {
const { Configuration, OpenAIApi } = require("openai");
if (!process.env.HUGGING_FACE_LLM_ENDPOINT)
throw new Error("No HuggingFace Inference Endpoint was set.");
if (!process.env.HUGGING_FACE_LLM_API_KEY)
throw new Error("No HuggingFace Access Token was set.");
const { OpenAI: OpenAIApi } = require("openai");
const config = new Configuration({
basePath: `${process.env.HUGGING_FACE_LLM_ENDPOINT}/v1`,
this.openai = new OpenAIApi({
baseURL: `${process.env.HUGGING_FACE_LLM_ENDPOINT}/v1`,
apiKey: process.env.HUGGING_FACE_LLM_API_KEY,
});
this.openai = new OpenAIApi(config);
// When using HF inference server - the model param is not required so
// we can stub it here. HF Endpoints can only run one model at a time.
// We set to 'tgi' so that endpoint for HF can accept message format
@ -93,8 +91,8 @@ class HuggingFaceLLM {
}
async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
const textResponse = await this.openai
.createChatCompletion({
const textResponse = await this.openai.chat.completions
.create({
model: this.model,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
@ -107,13 +105,12 @@ class HuggingFaceLLM {
rawHistory
),
})
.then((json) => {
const res = json.data;
if (!res.hasOwnProperty("choices"))
.then((result) => {
if (!result.hasOwnProperty("choices"))
throw new Error("HuggingFace chat: No results!");
if (res.choices.length === 0)
if (result.choices.length === 0)
throw new Error("HuggingFace chat: No results length!");
return res.choices[0].message.content;
return result.choices[0].message.content;
})
.catch((error) => {
throw new Error(
@ -125,167 +122,47 @@ class HuggingFaceLLM {
}
async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions.create({
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
});
return streamRequest;
}
async getChatCompletion(messages = null, { temperature = 0.7 }) {
const { data } = await this.openai.createChatCompletion({
const result = await this.openai.createChatCompletion({
model: this.model,
messages,
temperature,
});
if (!data.hasOwnProperty("choices")) return null;
return data.choices[0].message.content;
if (!result.hasOwnProperty("choices") || result.choices.length === 0)
return null;
return result.choices[0].message.content;
}
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
messages,
temperature,
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions.create({
model: this.model,
stream: true,
messages,
temperature,
});
return streamRequest;
}
handleStream(response, stream, responseProps) {
const { uuid = uuidv4(), sources = [] } = responseProps;
return new Promise((resolve) => {
let fullText = "";
let chunk = "";
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
stream.data.on("data", (data) => {
const lines = data
?.toString()
?.split("\n")
.filter((line) => line.trim() !== "");
for (const line of lines) {
let validJSON = false;
const message = chunk + line.replace(/^data:/, "");
if (message !== "[DONE]") {
// JSON chunk is incomplete and has not ended yet
// so we need to stitch it together. You would think JSON
// chunks would only come complete - but they don't!
try {
JSON.parse(message);
validJSON = true;
} catch {
console.log("Failed to parse message", message);
}
if (!validJSON) {
// It can be possible that the chunk decoding is running away
// and the message chunk fails to append due to string length.
// In this case abort the chunk and reset so we can continue.
// ref: https://github.com/Mintplex-Labs/anything-llm/issues/416
try {
chunk += message;
} catch (e) {
console.error(`Chunk appending error`, e);
chunk = "";
}
continue;
} else {
chunk = "";
}
}
if (message == "[DONE]") {
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
} else {
let error = null;
let finishReason = null;
let token = "";
try {
const json = JSON.parse(message);
error = json?.error || null;
token = json?.choices?.[0]?.delta?.content;
finishReason = json?.choices?.[0]?.finish_reason || null;
} catch {
continue;
}
if (!!error) {
writeResponseChunk(response, {
uuid,
sources: [],
type: "textResponseChunk",
textResponse: null,
close: true,
error,
});
response.removeListener("close", handleAbort);
resolve("");
return;
}
if (token) {
fullText += token;
writeResponseChunk(response, {
uuid,
sources: [],
type: "textResponseChunk",
textResponse: token,
close: false,
error: false,
});
}
if (finishReason !== null) {
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
}
}
}
});
});
return handleDefaultStreamResponseV2(response, stream, responseProps);
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations

View File

@ -1,5 +1,7 @@
const { chatPrompt } = require("../../chats");
const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
const {
handleDefaultStreamResponseV2,
} = require("../../helpers/chat/responses");
// hybrid of openAi LLM chat completion for LMStudio
class LMStudioLLM {
@ -7,11 +9,11 @@ class LMStudioLLM {
if (!process.env.LMSTUDIO_BASE_PATH)
throw new Error("No LMStudio API Base Path was set.");
const { Configuration, OpenAIApi } = require("openai");
const config = new Configuration({
basePath: process.env.LMSTUDIO_BASE_PATH?.replace(/\/+$/, ""), // here is the URL to your LMStudio instance
const { OpenAI: OpenAIApi } = require("openai");
this.lmstudio = new OpenAIApi({
baseURL: process.env.LMSTUDIO_BASE_PATH?.replace(/\/+$/, ""), // here is the URL to your LMStudio instance
apiKey: null,
});
this.lmstudio = new OpenAIApi(config);
// Prior to LMStudio 0.2.17 the `model` param was not required and you could pass anything
// into that field and it would work. On 0.2.17 LMStudio introduced multi-model chat
@ -89,8 +91,8 @@ class LMStudioLLM {
`LMStudio chat: ${this.model} is not valid or defined for chat completion!`
);
const textResponse = await this.lmstudio
.createChatCompletion({
const textResponse = await this.lmstudio.chat.completions
.create({
model: this.model,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
@ -103,13 +105,12 @@ class LMStudioLLM {
rawHistory
),
})
.then((json) => {
const res = json.data;
if (!res.hasOwnProperty("choices"))
.then((result) => {
if (!result.hasOwnProperty("choices"))
throw new Error("LMStudio chat: No results!");
if (res.choices.length === 0)
if (result.choices.length === 0)
throw new Error("LMStudio chat: No results length!");
return res.choices[0].message.content;
return result.choices[0].message.content;
})
.catch((error) => {
throw new Error(
@ -126,23 +127,20 @@ class LMStudioLLM {
`LMStudio chat: ${this.model} is not valid or defined for chat completion!`
);
const streamRequest = await this.lmstudio.createChatCompletion(
{
model: this.model,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
stream: true,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
},
{ responseType: "stream" }
);
const streamRequest = await this.lmstudio.chat.completions.create({
model: this.model,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
stream: true,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
});
return streamRequest;
}
@ -152,14 +150,15 @@ class LMStudioLLM {
`LMStudio chat: ${this.model} is not valid or defined model for chat completion!`
);
const { data } = await this.lmstudio.createChatCompletion({
const result = await this.lmstudio.chat.completions.create({
model: this.model,
messages,
temperature,
});
if (!data.hasOwnProperty("choices")) return null;
return data.choices[0].message.content;
if (!result.hasOwnProperty("choices") || result.choices.length === 0)
return null;
return result.choices[0].message.content;
}
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
@ -168,20 +167,17 @@ class LMStudioLLM {
`LMStudio chat: ${this.model} is not valid or defined model for chat completion!`
);
const streamRequest = await this.lmstudio.createChatCompletion(
{
model: this.model,
stream: true,
messages,
temperature,
},
{ responseType: "stream" }
);
const streamRequest = await this.lmstudio.chat.completions.create({
model: this.model,
stream: true,
messages,
temperature,
});
return streamRequest;
}
handleStream(response, stream, responseProps) {
return handleDefaultStreamResponse(response, stream, responseProps);
return handleDefaultStreamResponseV2(response, stream, responseProps);
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations

View File

@ -1,21 +1,18 @@
const { chatPrompt } = require("../../chats");
const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
const {
handleDefaultStreamResponseV2,
} = require("../../helpers/chat/responses");
class LocalAiLLM {
constructor(embedder = null, modelPreference = null) {
if (!process.env.LOCAL_AI_BASE_PATH)
throw new Error("No LocalAI Base Path was set.");
const { Configuration, OpenAIApi } = require("openai");
const config = new Configuration({
basePath: process.env.LOCAL_AI_BASE_PATH,
...(!!process.env.LOCAL_AI_API_KEY
? {
apiKey: process.env.LOCAL_AI_API_KEY,
}
: {}),
const { OpenAI: OpenAIApi } = require("openai");
this.openai = new OpenAIApi({
baseURL: process.env.LOCAL_AI_BASE_PATH,
apiKey: process.env.LOCAL_AI_API_KEY ?? null,
});
this.openai = new OpenAIApi(config);
this.model = modelPreference || process.env.LOCAL_AI_MODEL_PREF;
this.limits = {
history: this.promptWindowLimit() * 0.15,
@ -84,8 +81,8 @@ class LocalAiLLM {
`LocalAI chat: ${this.model} is not valid for chat completion!`
);
const textResponse = await this.openai
.createChatCompletion({
const textResponse = await this.openai.chat.completions
.create({
model: this.model,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
@ -98,13 +95,12 @@ class LocalAiLLM {
rawHistory
),
})
.then((json) => {
const res = json.data;
if (!res.hasOwnProperty("choices"))
.then((result) => {
if (!result.hasOwnProperty("choices"))
throw new Error("LocalAI chat: No results!");
if (res.choices.length === 0)
if (result.choices.length === 0)
throw new Error("LocalAI chat: No results length!");
return res.choices[0].message.content;
return result.choices[0].message.content;
})
.catch((error) => {
throw new Error(
@ -121,23 +117,20 @@ class LocalAiLLM {
`LocalAI chat: ${this.model} is not valid for chat completion!`
);
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions.create({
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
});
return streamRequest;
}
@ -147,14 +140,15 @@ class LocalAiLLM {
`LocalAI chat: ${this.model} is not valid for chat completion!`
);
const { data } = await this.openai.createChatCompletion({
const result = await this.openai.chat.completions.create({
model: this.model,
messages,
temperature,
});
if (!data.hasOwnProperty("choices")) return null;
return data.choices[0].message.content;
if (!result.hasOwnProperty("choices") || result.choices.length === 0)
return null;
return result.choices[0].message.content;
}
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
@ -163,20 +157,17 @@ class LocalAiLLM {
`LocalAi chat: ${this.model} is not valid for chat completion!`
);
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
messages,
temperature,
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions.create({
model: this.model,
stream: true,
messages,
temperature,
});
return streamRequest;
}
handleStream(response, stream, responseProps) {
return handleDefaultStreamResponse(response, stream, responseProps);
return handleDefaultStreamResponseV2(response, stream, responseProps);
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations

View File

@ -1,17 +1,18 @@
const { chatPrompt } = require("../../chats");
const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
const {
handleDefaultStreamResponseV2,
} = require("../../helpers/chat/responses");
class MistralLLM {
constructor(embedder = null, modelPreference = null) {
const { Configuration, OpenAIApi } = require("openai");
if (!process.env.MISTRAL_API_KEY)
throw new Error("No Mistral API key was set.");
const config = new Configuration({
basePath: "https://api.mistral.ai/v1",
apiKey: process.env.MISTRAL_API_KEY,
const { OpenAI: OpenAIApi } = require("openai");
this.openai = new OpenAIApi({
baseURL: "https://api.mistral.ai/v1",
apiKey: process.env.MISTRAL_API_KEY ?? null,
});
this.openai = new OpenAIApi(config);
this.model =
modelPreference || process.env.MISTRAL_MODEL_PREF || "mistral-tiny";
this.limits = {
@ -75,8 +76,8 @@ class MistralLLM {
`Mistral chat: ${this.model} is not valid for chat completion!`
);
const textResponse = await this.openai
.createChatCompletion({
const textResponse = await this.openai.chat.completions
.create({
model: this.model,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
messages: await this.compressMessages(
@ -88,13 +89,12 @@ class MistralLLM {
rawHistory
),
})
.then((json) => {
const res = json.data;
if (!res.hasOwnProperty("choices"))
.then((result) => {
if (!result.hasOwnProperty("choices"))
throw new Error("Mistral chat: No results!");
if (res.choices.length === 0)
if (result.choices.length === 0)
throw new Error("Mistral chat: No results length!");
return res.choices[0].message.content;
return result.choices[0].message.content;
})
.catch((error) => {
throw new Error(
@ -111,22 +111,19 @@ class MistralLLM {
`Mistral chat: ${this.model} is not valid for chat completion!`
);
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions.create({
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
});
return streamRequest;
}
@ -137,14 +134,15 @@ class MistralLLM {
`Mistral chat: ${this.model} is not valid for chat completion!`
);
const { data } = await this.openai.createChatCompletion({
const result = await this.openai.chat.completions.create({
model: this.model,
messages,
temperature,
});
if (!data.hasOwnProperty("choices")) return null;
return data.choices[0].message.content;
if (!result.hasOwnProperty("choices") || result.choices.length === 0)
return null;
return result.choices[0].message.content;
}
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
@ -153,20 +151,17 @@ class MistralLLM {
`Mistral chat: ${this.model} is not valid for chat completion!`
);
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
messages,
temperature,
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions.create({
model: this.model,
stream: true,
messages,
temperature,
});
return streamRequest;
}
handleStream(response, stream, responseProps) {
return handleDefaultStreamResponse(response, stream, responseProps);
return handleDefaultStreamResponseV2(response, stream, responseProps);
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations

View File

@ -1,16 +1,17 @@
const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
const { chatPrompt } = require("../../chats");
const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
const {
handleDefaultStreamResponseV2,
} = require("../../helpers/chat/responses");
class OpenAiLLM {
constructor(embedder = null, modelPreference = null) {
const { Configuration, OpenAIApi } = require("openai");
if (!process.env.OPEN_AI_KEY) throw new Error("No OpenAI API key was set.");
const { OpenAI: OpenAIApi } = require("openai");
const config = new Configuration({
this.openai = new OpenAIApi({
apiKey: process.env.OPEN_AI_KEY,
});
this.openai = new OpenAIApi(config);
this.model =
modelPreference || process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo";
this.limits = {
@ -70,9 +71,9 @@ class OpenAiLLM {
const isPreset = modelName.toLowerCase().includes("gpt");
if (isPreset) return true;
const model = await this.openai
.retrieveModel(modelName)
.then((res) => res.data)
const model = await this.openai.models
.retrieve(modelName)
.then((modelObj) => modelObj)
.catch(() => null);
return !!model;
}
@ -91,10 +92,9 @@ class OpenAiLLM {
}
async isSafe(input = "") {
const { flagged = false, categories = {} } = await this.openai
.createModeration({ input })
.then((json) => {
const res = json.data;
const { flagged = false, categories = {} } = await this.openai.moderations
.create({ input })
.then((res) => {
if (!res.hasOwnProperty("results"))
throw new Error("OpenAI moderation: No results!");
if (res.results.length === 0)
@ -128,8 +128,8 @@ class OpenAiLLM {
`OpenAI chat: ${this.model} is not valid for chat completion!`
);
const textResponse = await this.openai
.createChatCompletion({
const textResponse = await this.openai.chat.completions
.create({
model: this.model,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
@ -142,13 +142,12 @@ class OpenAiLLM {
rawHistory
),
})
.then((json) => {
const res = json.data;
if (!res.hasOwnProperty("choices"))
.then((result) => {
if (!result.hasOwnProperty("choices"))
throw new Error("OpenAI chat: No results!");
if (res.choices.length === 0)
if (result.choices.length === 0)
throw new Error("OpenAI chat: No results length!");
return res.choices[0].message.content;
return result.choices[0].message.content;
})
.catch((error) => {
throw new Error(
@ -165,23 +164,20 @@ class OpenAiLLM {
`OpenAI chat: ${this.model} is not valid for chat completion!`
);
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions({
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
});
return streamRequest;
}
@ -191,8 +187,8 @@ class OpenAiLLM {
`OpenAI chat: ${this.model} is not valid for chat completion!`
);
const { data } = await this.openai
.createChatCompletion({
const result = await this.openai.chat.completions
.create({
model: this.model,
messages,
temperature,
@ -201,8 +197,9 @@ class OpenAiLLM {
throw new Error(e.response.data.error.message);
});
if (!data.hasOwnProperty("choices")) return null;
return data.choices[0].message.content;
if (!result.hasOwnProperty("choices") || result.choices.length === 0)
return null;
return result.choices[0].message.content;
}
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
@ -211,20 +208,17 @@ class OpenAiLLM {
`OpenAI chat: ${this.model} is not valid for chat completion!`
);
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
messages,
temperature,
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions.create({
model: this.model,
stream: true,
messages,
temperature,
});
return streamRequest;
}
handleStream(response, stream, responseProps) {
return handleDefaultStreamResponse(response, stream, responseProps);
return handleDefaultStreamResponseV2(response, stream, responseProps);
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations

View File

@ -4,6 +4,7 @@ const { v4: uuidv4 } = require("uuid");
const {
writeResponseChunk,
clientAbortedHandler,
handleDefaultStreamResponseV2,
} = require("../../helpers/chat/responses");
const fs = require("fs");
const path = require("path");
@ -16,22 +17,19 @@ const cacheFolder = path.resolve(
class OpenRouterLLM {
constructor(embedder = null, modelPreference = null) {
const { Configuration, OpenAIApi } = require("openai");
if (!process.env.OPENROUTER_API_KEY)
throw new Error("No OpenRouter API key was set.");
const { OpenAI: OpenAIApi } = require("openai");
this.basePath = "https://openrouter.ai/api/v1";
const config = new Configuration({
basePath: this.basePath,
apiKey: process.env.OPENROUTER_API_KEY,
baseOptions: {
headers: {
"HTTP-Referer": "https://useanything.com",
"X-Title": "AnythingLLM",
},
this.openai = new OpenAIApi({
baseURL: this.basePath,
apiKey: process.env.OPENROUTER_API_KEY ?? null,
defaultHeaders: {
"HTTP-Referer": "https://useanything.com",
"X-Title": "AnythingLLM",
},
});
this.openai = new OpenAIApi(config);
this.model =
modelPreference || process.env.OPENROUTER_MODEL_PREF || "openrouter/auto";
this.limits = {
@ -139,8 +137,8 @@ class OpenRouterLLM {
`OpenRouter chat: ${this.model} is not valid for chat completion!`
);
const textResponse = await this.openai
.createChatCompletion({
const textResponse = await this.openai.chat.completions
.create({
model: this.model,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
@ -153,13 +151,12 @@ class OpenRouterLLM {
rawHistory
),
})
.then((json) => {
const res = json.data;
if (!res.hasOwnProperty("choices"))
.then((result) => {
if (!result.hasOwnProperty("choices"))
throw new Error("OpenRouter chat: No results!");
if (res.choices.length === 0)
if (result.choices.length === 0)
throw new Error("OpenRouter chat: No results length!");
return res.choices[0].message.content;
return result.choices[0].message.content;
})
.catch((error) => {
throw new Error(
@ -176,23 +173,20 @@ class OpenRouterLLM {
`OpenRouter chat: ${this.model} is not valid for chat completion!`
);
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions.create({
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
});
return streamRequest;
}
@ -202,8 +196,8 @@ class OpenRouterLLM {
`OpenRouter chat: ${this.model} is not valid for chat completion!`
);
const { data } = await this.openai
.createChatCompletion({
const result = await this.openai.chat.completions
.create({
model: this.model,
messages,
temperature,
@ -212,8 +206,9 @@ class OpenRouterLLM {
throw new Error(e.response.data.error.message);
});
if (!data.hasOwnProperty("choices")) return null;
return data.choices[0].message.content;
if (!result.hasOwnProperty("choices") || result.choices.length === 0)
return null;
return result.choices[0].message.content;
}
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
@ -222,15 +217,12 @@ class OpenRouterLLM {
`OpenRouter chat: ${this.model} is not valid for chat completion!`
);
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
messages,
temperature,
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions.create({
model: this.model,
stream: true,
messages,
temperature,
});
return streamRequest;
}
@ -238,9 +230,8 @@ class OpenRouterLLM {
const timeoutThresholdMs = 500;
const { uuid = uuidv4(), sources = [] } = responseProps;
return new Promise((resolve) => {
return new Promise(async (resolve) => {
let fullText = "";
let chunk = "";
let lastChunkTime = null; // null when first token is still not received.
// Establish listener to early-abort a streaming response
@ -280,97 +271,176 @@ class OpenRouterLLM {
}
}, 500);
stream.data.on("data", (data) => {
const lines = data
?.toString()
?.split("\n")
.filter((line) => line.trim() !== "");
for await (const chunk of stream) {
const message = chunk?.choices?.[0];
const token = message?.delta?.content;
lastChunkTime = Number(new Date());
for (const line of lines) {
let validJSON = false;
const message = chunk + line.replace(/^data: /, "");
// JSON chunk is incomplete and has not ended yet
// so we need to stitch it together. You would think JSON
// chunks would only come complete - but they don't!
try {
JSON.parse(message);
validJSON = true;
} catch {}
if (!validJSON) {
// It can be possible that the chunk decoding is running away
// and the message chunk fails to append due to string length.
// In this case abort the chunk and reset so we can continue.
// ref: https://github.com/Mintplex-Labs/anything-llm/issues/416
try {
chunk += message;
} catch (e) {
console.error(`Chunk appending error`, e);
chunk = "";
}
continue;
} else {
chunk = "";
}
if (message == "[DONE]") {
lastChunkTime = Number(new Date());
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
clearInterval(timeoutCheck);
response.removeListener("close", handleAbort);
resolve(fullText);
} else {
let finishReason = null;
let token = "";
try {
const json = JSON.parse(message);
token = json?.choices?.[0]?.delta?.content;
finishReason = json?.choices?.[0]?.finish_reason || null;
} catch {
continue;
}
if (token) {
fullText += token;
lastChunkTime = Number(new Date());
writeResponseChunk(response, {
uuid,
sources: [],
type: "textResponseChunk",
textResponse: token,
close: false,
error: false,
});
}
if (finishReason !== null) {
lastChunkTime = Number(new Date());
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
clearInterval(timeoutCheck);
response.removeListener("close", handleAbort);
resolve(fullText);
}
}
if (token) {
fullText += token;
writeResponseChunk(response, {
uuid,
sources: [],
type: "textResponseChunk",
textResponse: token,
close: false,
error: false,
});
}
});
if (message.finish_reason !== null) {
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
}
}
});
}
// handleStream(response, stream, responseProps) {
// const timeoutThresholdMs = 500;
// const { uuid = uuidv4(), sources = [] } = responseProps;
// return new Promise((resolve) => {
// let fullText = "";
// let chunk = "";
// let lastChunkTime = null; // null when first token is still not received.
// // Establish listener to early-abort a streaming response
// // in case things go sideways or the user does not like the response.
// // We preserve the generated text but continue as if chat was completed
// // to preserve previously generated content.
// const handleAbort = () => clientAbortedHandler(resolve, fullText);
// response.on("close", handleAbort);
// // NOTICE: Not all OpenRouter models will return a stop reason
// // which keeps the connection open and so the model never finalizes the stream
// // like the traditional OpenAI response schema does. So in the case the response stream
// // never reaches a formal close state we maintain an interval timer that if we go >=timeoutThresholdMs with
// // no new chunks then we kill the stream and assume it to be complete. OpenRouter is quite fast
// // so this threshold should permit most responses, but we can adjust `timeoutThresholdMs` if
// // we find it is too aggressive.
// const timeoutCheck = setInterval(() => {
// if (lastChunkTime === null) return;
// const now = Number(new Date());
// const diffMs = now - lastChunkTime;
// if (diffMs >= timeoutThresholdMs) {
// console.log(
// `OpenRouter stream did not self-close and has been stale for >${timeoutThresholdMs}ms. Closing response stream.`
// );
// writeResponseChunk(response, {
// uuid,
// sources,
// type: "textResponseChunk",
// textResponse: "",
// close: true,
// error: false,
// });
// clearInterval(timeoutCheck);
// response.removeListener("close", handleAbort);
// resolve(fullText);
// }
// }, 500);
// stream.data.on("data", (data) => {
// const lines = data
// ?.toString()
// ?.split("\n")
// .filter((line) => line.trim() !== "");
// for (const line of lines) {
// let validJSON = false;
// const message = chunk + line.replace(/^data: /, "");
// // JSON chunk is incomplete and has not ended yet
// // so we need to stitch it together. You would think JSON
// // chunks would only come complete - but they don't!
// try {
// JSON.parse(message);
// validJSON = true;
// } catch { }
// if (!validJSON) {
// // It can be possible that the chunk decoding is running away
// // and the message chunk fails to append due to string length.
// // In this case abort the chunk and reset so we can continue.
// // ref: https://github.com/Mintplex-Labs/anything-llm/issues/416
// try {
// chunk += message;
// } catch (e) {
// console.error(`Chunk appending error`, e);
// chunk = "";
// }
// continue;
// } else {
// chunk = "";
// }
// if (message == "[DONE]") {
// lastChunkTime = Number(new Date());
// writeResponseChunk(response, {
// uuid,
// sources,
// type: "textResponseChunk",
// textResponse: "",
// close: true,
// error: false,
// });
// clearInterval(timeoutCheck);
// response.removeListener("close", handleAbort);
// resolve(fullText);
// } else {
// let finishReason = null;
// let token = "";
// try {
// const json = JSON.parse(message);
// token = json?.choices?.[0]?.delta?.content;
// finishReason = json?.choices?.[0]?.finish_reason || null;
// } catch {
// continue;
// }
// if (token) {
// fullText += token;
// lastChunkTime = Number(new Date());
// writeResponseChunk(response, {
// uuid,
// sources: [],
// type: "textResponseChunk",
// textResponse: token,
// close: false,
// error: false,
// });
// }
// if (finishReason !== null) {
// lastChunkTime = Number(new Date());
// writeResponseChunk(response, {
// uuid,
// sources,
// type: "textResponseChunk",
// textResponse: "",
// close: true,
// error: false,
// });
// clearInterval(timeoutCheck);
// response.removeListener("close", handleAbort);
// resolve(fullText);
// }
// }
// }
// });
// });
// }
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
async embedTextInput(textInput) {
return await this.embedder.embedTextInput(textInput);

View File

@ -1,6 +1,8 @@
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
const { chatPrompt } = require("../../chats");
const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
const {
handleDefaultStreamResponseV2,
} = require("../../helpers/chat/responses");
function perplexityModels() {
const { MODELS } = require("./models.js");
@ -9,17 +11,18 @@ function perplexityModels() {
class PerplexityLLM {
constructor(embedder = null, modelPreference = null) {
const { Configuration, OpenAIApi } = require("openai");
if (!process.env.PERPLEXITY_API_KEY)
throw new Error("No Perplexity API key was set.");
const config = new Configuration({
basePath: "https://api.perplexity.ai",
apiKey: process.env.PERPLEXITY_API_KEY,
const { OpenAI: OpenAIApi } = require("openai");
this.openai = new OpenAIApi({
baseURL: "https://api.perplexity.ai",
apiKey: process.env.PERPLEXITY_API_KEY ?? null,
});
this.openai = new OpenAIApi(config);
this.model =
modelPreference || process.env.PERPLEXITY_MODEL_PREF || "pplx-7b-online"; // Give at least a unique model to the provider as last fallback.
modelPreference ||
process.env.PERPLEXITY_MODEL_PREF ||
"sonar-small-online"; // Give at least a unique model to the provider as last fallback.
this.limits = {
history: this.promptWindowLimit() * 0.15,
system: this.promptWindowLimit() * 0.15,
@ -84,8 +87,8 @@ class PerplexityLLM {
`Perplexity chat: ${this.model} is not valid for chat completion!`
);
const textResponse = await this.openai
.createChatCompletion({
const textResponse = await this.openai.chat.completions
.create({
model: this.model,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
@ -98,13 +101,12 @@ class PerplexityLLM {
rawHistory
),
})
.then((json) => {
const res = json.data;
if (!res.hasOwnProperty("choices"))
.then((result) => {
if (!result.hasOwnProperty("choices"))
throw new Error("Perplexity chat: No results!");
if (res.choices.length === 0)
if (result.choices.length === 0)
throw new Error("Perplexity chat: No results length!");
return res.choices[0].message.content;
return result.choices[0].message.content;
})
.catch((error) => {
throw new Error(
@ -121,23 +123,20 @@ class PerplexityLLM {
`Perplexity chat: ${this.model} is not valid for chat completion!`
);
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions.create({
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
});
return streamRequest;
}
@ -147,8 +146,8 @@ class PerplexityLLM {
`Perplexity chat: ${this.model} is not valid for chat completion!`
);
const { data } = await this.openai
.createChatCompletion({
const result = await this.openai.chat.completions
.create({
model: this.model,
messages,
temperature,
@ -157,8 +156,9 @@ class PerplexityLLM {
throw new Error(e.response.data.error.message);
});
if (!data.hasOwnProperty("choices")) return null;
return data.choices[0].message.content;
if (!result.hasOwnProperty("choices") || result.choices.length === 0)
return null;
return result.choices[0].message.content;
}
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
@ -167,20 +167,17 @@ class PerplexityLLM {
`Perplexity chat: ${this.model} is not valid for chat completion!`
);
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
messages,
temperature,
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions.create({
model: this.model,
stream: true,
messages,
temperature,
});
return streamRequest;
}
handleStream(response, stream, responseProps) {
return handleDefaultStreamResponse(response, stream, responseProps);
return handleDefaultStreamResponseV2(response, stream, responseProps);
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations

View File

@ -1,7 +1,6 @@
const { chatPrompt } = require("../../chats");
const {
writeResponseChunk,
clientAbortedHandler,
handleDefaultStreamResponseV2,
} = require("../../helpers/chat/responses");
function togetherAiModels() {
@ -11,15 +10,13 @@ function togetherAiModels() {
class TogetherAiLLM {
constructor(embedder = null, modelPreference = null) {
const { Configuration, OpenAIApi } = require("openai");
if (!process.env.TOGETHER_AI_API_KEY)
throw new Error("No TogetherAI API key was set.");
const config = new Configuration({
basePath: "https://api.together.xyz/v1",
apiKey: process.env.TOGETHER_AI_API_KEY,
const { OpenAI: OpenAIApi } = require("openai");
this.openai = new OpenAIApi({
baseURL: "https://api.together.xyz/v1",
apiKey: process.env.TOGETHER_AI_API_KEY ?? null,
});
this.openai = new OpenAIApi(config);
this.model = modelPreference || process.env.TOGETHER_AI_MODEL_PREF;
this.limits = {
history: this.promptWindowLimit() * 0.15,
@ -91,8 +88,8 @@ class TogetherAiLLM {
`Together AI chat: ${this.model} is not valid for chat completion!`
);
const textResponse = await this.openai
.createChatCompletion({
const textResponse = await this.openai.chat.completions
.create({
model: this.model,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
@ -105,13 +102,12 @@ class TogetherAiLLM {
rawHistory
),
})
.then((json) => {
const res = json.data;
if (!res.hasOwnProperty("choices"))
.then((result) => {
if (!result.hasOwnProperty("choices"))
throw new Error("Together AI chat: No results!");
if (res.choices.length === 0)
if (result.choices.length === 0)
throw new Error("Together AI chat: No results length!");
return res.choices[0].message.content;
return result.choices[0].message.content;
})
.catch((error) => {
throw new Error(
@ -128,23 +124,20 @@ class TogetherAiLLM {
`TogetherAI chat: ${this.model} is not valid for chat completion!`
);
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions.create({
model: this.model,
stream: true,
temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
n: 1,
messages: await this.compressMessages(
{
systemPrompt: chatPrompt(workspace),
userPrompt: prompt,
chatHistory,
},
rawHistory
),
});
return streamRequest;
}
@ -154,14 +147,15 @@ class TogetherAiLLM {
`TogetherAI chat: ${this.model} is not valid for chat completion!`
);
const { data } = await this.openai.createChatCompletion({
const result = await this.openai.chat.completions.create({
model: this.model,
messages,
temperature,
});
if (!data.hasOwnProperty("choices")) return null;
return data.choices[0].message.content;
if (!result.hasOwnProperty("choices") || result.choices.length === 0)
return null;
return result.choices[0].message.content;
}
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
@ -170,118 +164,17 @@ class TogetherAiLLM {
`TogetherAI chat: ${this.model} is not valid for chat completion!`
);
const streamRequest = await this.openai.createChatCompletion(
{
model: this.model,
stream: true,
messages,
temperature,
},
{ responseType: "stream" }
);
const streamRequest = await this.openai.chat.completions.create({
model: this.model,
stream: true,
messages,
temperature,
});
return streamRequest;
}
handleStream(response, stream, responseProps) {
const { uuid = uuidv4(), sources = [] } = responseProps;
return new Promise((resolve) => {
let fullText = "";
let chunk = "";
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
stream.data.on("data", (data) => {
const lines = data
?.toString()
?.split("\n")
.filter((line) => line.trim() !== "");
for (const line of lines) {
let validJSON = false;
const message = chunk + line.replace(/^data: /, "");
if (message !== "[DONE]") {
// JSON chunk is incomplete and has not ended yet
// so we need to stitch it together. You would think JSON
// chunks would only come complete - but they don't!
try {
JSON.parse(message);
validJSON = true;
} catch {}
if (!validJSON) {
// It can be possible that the chunk decoding is running away
// and the message chunk fails to append due to string length.
// In this case abort the chunk and reset so we can continue.
// ref: https://github.com/Mintplex-Labs/anything-llm/issues/416
try {
chunk += message;
} catch (e) {
console.error(`Chunk appending error`, e);
chunk = "";
}
continue;
} else {
chunk = "";
}
}
if (message == "[DONE]") {
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
} else {
let finishReason = null;
let token = "";
try {
const json = JSON.parse(message);
token = json?.choices?.[0]?.delta?.content;
finishReason = json?.choices?.[0]?.finish_reason || null;
} catch {
continue;
}
if (token) {
fullText += token;
writeResponseChunk(response, {
uuid,
sources: [],
type: "textResponseChunk",
textResponse: token,
close: false,
error: false,
});
}
if (finishReason !== null) {
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
}
}
}
});
});
return handleDefaultStreamResponseV2(response, stream, responseProps);
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations

View File

@ -2,20 +2,16 @@ const { toChunks, maximumChunkLength } = require("../../helpers");
class LocalAiEmbedder {
constructor() {
const { Configuration, OpenAIApi } = require("openai");
if (!process.env.EMBEDDING_BASE_PATH)
throw new Error("No embedding base path was set.");
if (!process.env.EMBEDDING_MODEL_PREF)
throw new Error("No embedding model was set.");
const config = new Configuration({
basePath: process.env.EMBEDDING_BASE_PATH,
...(!!process.env.LOCAL_AI_API_KEY
? {
apiKey: process.env.LOCAL_AI_API_KEY,
}
: {}),
const { OpenAI: OpenAIApi } = require("openai");
this.openai = new OpenAIApi({
baseURL: process.env.EMBEDDING_BASE_PATH,
apiKey: process.env.LOCAL_AI_API_KEY ?? null,
});
this.openai = new OpenAIApi(config);
// Limit of how many strings we can process in a single pass to stay with resource or network limits
this.maxConcurrentChunks = 50;
@ -34,13 +30,13 @@ class LocalAiEmbedder {
for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
embeddingRequests.push(
new Promise((resolve) => {
this.openai
.createEmbedding({
this.openai.embeddings
.create({
model: process.env.EMBEDDING_MODEL_PREF,
input: chunk,
})
.then((res) => {
resolve({ data: res.data?.data, error: null });
.then((result) => {
resolve({ data: result?.data, error: null });
})
.catch((e) => {
e.type =

View File

@ -2,13 +2,11 @@ const { toChunks } = require("../../helpers");
class OpenAiEmbedder {
constructor() {
const { Configuration, OpenAIApi } = require("openai");
if (!process.env.OPEN_AI_KEY) throw new Error("No OpenAI API key was set.");
const config = new Configuration({
const { OpenAI: OpenAIApi } = require("openai");
this.openai = new OpenAIApi({
apiKey: process.env.OPEN_AI_KEY,
});
const openai = new OpenAIApi(config);
this.openai = openai;
this.model = process.env.EMBEDDING_MODEL_PREF || "text-embedding-ada-002";
// Limit of how many strings we can process in a single pass to stay with resource or network limits
@ -33,13 +31,13 @@ class OpenAiEmbedder {
for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
embeddingRequests.push(
new Promise((resolve) => {
this.openai
.createEmbedding({
this.openai.embeddings
.create({
model: this.model,
input: chunk,
})
.then((res) => {
resolve({ data: res.data?.data, error: null });
.then((result) => {
resolve({ data: result?.data, error: null });
})
.catch((e) => {
e.type =

View File

@ -1,4 +1,4 @@
const OpenAI = require("openai-latest");
const OpenAI = require("openai");
const Provider = require("./ai-provider.js");
const { RetryError } = require("../error.js");

View File

@ -9,6 +9,53 @@ function clientAbortedHandler(resolve, fullText) {
return;
}
function handleDefaultStreamResponseV2(response, stream, responseProps) {
const { uuid = uuidv4(), sources = [] } = responseProps;
return new Promise(async (resolve) => {
let fullText = "";
// Establish listener to early-abort a streaming response
// in case things go sideways or the user does not like the response.
// We preserve the generated text but continue as if chat was completed
// to preserve previously generated content.
const handleAbort = () => clientAbortedHandler(resolve, fullText);
response.on("close", handleAbort);
for await (const chunk of stream) {
const message = chunk?.choices?.[0];
const token = message?.delta?.content;
if (token) {
fullText += token;
writeResponseChunk(response, {
uuid,
sources: [],
type: "textResponseChunk",
textResponse: token,
close: false,
error: false,
});
}
// LocalAi returns '' and others return null.
if (message.finish_reason !== "" && message.finish_reason !== null) {
writeResponseChunk(response, {
uuid,
sources,
type: "textResponseChunk",
textResponse: "",
close: true,
error: false,
});
response.removeListener("close", handleAbort);
resolve(fullText);
}
}
});
}
// TODO: Fully remove - deprecated.
// The default way to handle a stream response. Functions best with OpenAI.
// Currently used for LMStudio, LocalAI, Mistral API, and OpenAI
function handleDefaultStreamResponse(response, stream, responseProps) {
@ -156,6 +203,7 @@ function writeResponseChunk(response, data) {
}
module.exports = {
handleDefaultStreamResponseV2,
handleDefaultStreamResponse,
convertToChatHistory,
convertToPromptHistory,

View File

@ -45,14 +45,13 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
}
async function openAiModels(apiKey = null) {
const { Configuration, OpenAIApi } = require("openai");
const config = new Configuration({
const { OpenAI: OpenAIApi } = require("openai");
const openai = new OpenAIApi({
apiKey: apiKey || process.env.OPEN_AI_KEY,
});
const openai = new OpenAIApi(config);
const allModels = await openai
.listModels()
.then((res) => res.data.data)
const allModels = await openai.models
.list()
.then((results) => results.data)
.catch((e) => {
console.error(`OpenAI:listModels`, e.message);
return [
@ -132,15 +131,14 @@ async function openAiModels(apiKey = null) {
}
async function localAIModels(basePath = null, apiKey = null) {
const { Configuration, OpenAIApi } = require("openai");
const config = new Configuration({
basePath: basePath || process.env.LOCAL_AI_BASE_PATH,
apiKey: apiKey || process.env.LOCAL_AI_API_KEY,
const { OpenAI: OpenAIApi } = require("openai");
const openai = new OpenAIApi({
baseURL: basePath || process.env.LOCAL_AI_BASE_PATH,
apiKey: apiKey || process.env.LOCAL_AI_API_KEY || null,
});
const openai = new OpenAIApi(config);
const models = await openai
.listModels()
.then((res) => res.data.data)
const models = await openai.models
.list()
.then((results) => results.data)
.catch((e) => {
console.error(`LocalAI:listModels`, e.message);
return [];
@ -153,14 +151,14 @@ async function localAIModels(basePath = null, apiKey = null) {
async function getLMStudioModels(basePath = null) {
try {
const { Configuration, OpenAIApi } = require("openai");
const config = new Configuration({
basePath: basePath || process.env.LMSTUDIO_BASE_PATH,
const { OpenAI: OpenAIApi } = require("openai");
const openai = new OpenAIApi({
baseURL: basePath || process.env.LMSTUDIO_BASE_PATH,
apiKey: null,
});
const openai = new OpenAIApi(config);
const models = await openai
.listModels()
.then((res) => res.data.data)
const models = await openai.models
.list()
.then((results) => results.data)
.catch((e) => {
console.error(`LMStudio:listModels`, e.message);
return [];
@ -250,15 +248,16 @@ async function getOpenRouterModels() {
}
async function getMistralModels(apiKey = null) {
const { Configuration, OpenAIApi } = require("openai");
const config = new Configuration({
apiKey: apiKey || process.env.MISTRAL_API_KEY,
basePath: "https://api.mistral.ai/v1",
const { OpenAI: OpenAIApi } = require("openai");
const openai = new OpenAIApi({
apiKey: apiKey || process.env.MISTRAL_API_KEY || null,
baseURL: "https://api.mistral.ai/v1",
});
const openai = new OpenAIApi(config);
const models = await openai
.listModels()
.then((res) => res.data.data.filter((model) => !model.id.includes("embed")))
const models = await openai.models
.list()
.then((results) =>
results.data.filter((model) => !model.id.includes("embed"))
)
.catch((e) => {
console.error(`Mistral:listModels`, e.message);
return [];

File diff suppressed because it is too large Load Diff