Bump openai package to latest (#1234)

* Bump `openai` package to latest Tested all except localai * bump LocalAI support with latest image * add deprecation notice * linting
2024-11-13 02:00:10 +01:00 · 2024-04-30 12:33:42 -07:00 · 2024-04-30 12:33:42 -07:00 · 547d4859ef
commit 547d4859ef
parent 94017e2b51
26 changed files with 2315 additions and 2375 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -1,8 +1,8 @@
 {
  "cSpell.words": [
-    "AIbitat",
    "adoc",
    "aibitat",
+    "AIbitat",
    "anythingllm",
    "Astra",
    "Chartable",
@ -20,6 +20,7 @@
    "mbox",
    "Milvus",
    "Mintplex",
+    "moderations",
    "Ollama",
    "openai",
    "opendocument",
--- a/collector/package.json
+++ b/collector/package.json
@ -36,7 +36,7 @@
    "multer": "^1.4.5-lts.1",
    "node-html-parser": "^6.1.13",
    "officeparser": "^4.0.5",
-    "openai": "^3.2.1",
+    "openai": "4.38.5",
    "pdf-parse": "^1.1.1",
    "puppeteer": "~21.5.2",
    "slugify": "^1.6.6",
--- a/collector/utils/WhisperProviders/OpenAiWhisper.js
+++ b/collector/utils/WhisperProviders/OpenAiWhisper.js
@ -2,13 +2,12 @@ const fs = require("fs");

 class OpenAiWhisper {
  constructor({ options }) {
-    const { Configuration, OpenAIApi } = require("openai");
+    const { OpenAI: OpenAIApi } = require("openai");
    if (!options.openAiKey) throw new Error("No OpenAI API key was set.");

-    const config = new Configuration({
+    this.openai = new OpenAIApi({
      apiKey: options.openAiKey,
    });
-    this.openai = new OpenAIApi(config);
    this.model = "whisper-1";
    this.temperature = 0;
    this.#log("Initialized.");
@ -19,22 +18,30 @@ class OpenAiWhisper {
  }

  async processFile(fullFilePath) {
-    return await this.openai
-      .createTranscription(
-        fs.createReadStream(fullFilePath),
-        this.model,
-        undefined,
-        "text",
-        this.temperature
-      )
-      .then((res) => {
-        if (res.hasOwnProperty("data"))
-          return { content: res.data, error: null };
-        return { content: "", error: "No content was able to be transcribed." };
+    return await this.openai.audio.transcriptions
+      .create({
+        file: fs.createReadStream(fullFilePath),
+        model: this.model,
+        model: "whisper-1",
+        response_format: "text",
+        temperature: this.temperature,
      })
-      .catch((e) => {
-        this.#log(`Could not get any response from openai whisper`, e.message);
-        return { content: "", error: e.message };
+      .then((response) => {
+        if (!response) {
+          return {
+            content: "",
+            error: "No content was able to be transcribed.",
+          };
+        }
+
+        return { content: response, error: null };
+      })
+      .catch((error) => {
+        this.#log(
+          `Could not get any response from openai whisper`,
+          error.message
+        );
+        return { content: "", error: error.message };
      });
  }
 }
--- a/collector/yarn.lock
+++ b/collector/yarn.lock
--- a/docker/.env.example
+++ b/docker/.env.example
@ -64,7 +64,7 @@ GID='1000'

 # LLM_PROVIDER='groq'
 # GROQ_API_KEY=gsk_abcxyz
-# GROQ_MODEL_PREF=llama2-70b-4096
+# GROQ_MODEL_PREF=llama3-8b-8192

 # LLM_PROVIDER='generic-openai'
 # GENERIC_OPEN_AI_BASE_PATH='http://proxy.url.openai.com/v1'
--- a/frontend/src/components/LLMSelection/GroqAiOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/GroqAiOptions/index.jsx
@ -24,12 +24,11 @@ export default function GroqAiOptions({ settings }) {
          </label>
          <select
            name="GroqModelPref"
-            defaultValue={settings?.GroqModelPref || "llama2-70b-4096"}
+            defaultValue={settings?.GroqModelPref || "llama3-8b-8192"}
            required={true}
            className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
          >
            {[
-              "llama2-70b-4096",
              "mixtral-8x7b-32768",
              "llama3-8b-8192",
              "llama3-70b-8192",
--- a/frontend/src/components/LLMSelection/TogetherAiOptions/index.jsx
+++ b/frontend/src/components/LLMSelection/TogetherAiOptions/index.jsx
@ -86,7 +86,7 @@ function TogetherAiModelSelection({ settings }) {
                <option
                  key={model.id}
                  value={model.id}
-                  selected={settings?.OpenRouterModelPref === model.id}
+                  selected={settings?.TogetherAiModelPref === model.id}
                >
                  {model.name}
                </option>
--- a/frontend/src/hooks/useGetProvidersModels.js
+++ b/frontend/src/hooks/useGetProvidersModels.js
@ -20,7 +20,6 @@ const PROVIDER_DEFAULT_MODELS = {
  ollama: [],
  togetherai: [],
  groq: [
-    "llama2-70b-4096",
    "mixtral-8x7b-32768",
    "llama3-8b-8192",
    "llama3-70b-8192",
--- a/server/.env.example
+++ b/server/.env.example
@ -61,7 +61,7 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea

 # LLM_PROVIDER='groq'
 # GROQ_API_KEY=gsk_abcxyz
-# GROQ_MODEL_PREF=llama2-70b-4096
+# GROQ_MODEL_PREF=llama3-8b-8192

 # LLM_PROVIDER='generic-openai'
 # GENERIC_OPEN_AI_BASE_PATH='http://proxy.url.openai.com/v1'
--- a/server/package.json
+++ b/server/package.json
@ -57,8 +57,7 @@
    "multer": "^1.4.5-lts.1",
    "node-html-markdown": "^1.3.0",
    "node-llama-cpp": "^2.8.0",
-    "openai": "^3.2.1",
-    "openai-latest": "npm:openai@latest",
+    "openai": "4.38.5",
    "pinecone-client": "^1.1.0",
    "pluralize": "^8.0.0",
    "posthog-node": "^3.1.1",
--- a/server/utils/AiProviders/genericOpenAi/index.js
+++ b/server/utils/AiProviders/genericOpenAi/index.js
@ -1,21 +1,22 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const { chatPrompt } = require("../../chats");
-const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
+const {
+  handleDefaultStreamResponseV2,
+} = require("../../helpers/chat/responses");

 class GenericOpenAiLLM {
  constructor(embedder = null, modelPreference = null) {
-    const { Configuration, OpenAIApi } = require("openai");
+    const { OpenAI: OpenAIApi } = require("openai");
    if (!process.env.GENERIC_OPEN_AI_BASE_PATH)
      throw new Error(
        "GenericOpenAI must have a valid base path to use for the api."
      );

    this.basePath = process.env.GENERIC_OPEN_AI_BASE_PATH;
-    const config = new Configuration({
-      basePath: this.basePath,
+    this.openai = new OpenAIApi({
+      baseURL: this.basePath,
      apiKey: process.env.GENERIC_OPEN_AI_API_KEY ?? null,
    });
-    this.openai = new OpenAIApi(config);
    this.model =
      modelPreference ?? process.env.GENERIC_OPEN_AI_MODEL_PREF ?? null;
    if (!this.model)
@ -89,8 +90,8 @@ class GenericOpenAiLLM {
  }

  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    const textResponse = await this.openai
-      .createChatCompletion({
+    const textResponse = await this.openai.chat.completions
+      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
@ -103,13 +104,12 @@ class GenericOpenAiLLM {
          rawHistory
        ),
      })
-      .then((json) => {
-        const res = json.data;
-        if (!res.hasOwnProperty("choices"))
+      .then((result) => {
+        if (!result.hasOwnProperty("choices"))
          throw new Error("GenericOpenAI chat: No results!");
-        if (res.choices.length === 0)
+        if (result.choices.length === 0)
          throw new Error("GenericOpenAI chat: No results length!");
-        return res.choices[0].message.content;
+        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
@ -121,29 +121,26 @@ class GenericOpenAiLLM {
  }

  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
+      n: 1,
+      messages: await this.compressMessages(
+        {
+          systemPrompt: chatPrompt(workspace),
+          userPrompt: prompt,
+          chatHistory,
+        },
+        rawHistory
+      ),
+    });
    return streamRequest;
  }

  async getChatCompletion(messages = null, { temperature = 0.7 }) {
-    const { data } = await this.openai
-      .createChatCompletion({
+    const result = await this.openai.chat.completions
+      .create({
        model: this.model,
        messages,
        temperature,
@ -152,25 +149,23 @@ class GenericOpenAiLLM {
        throw new Error(e.response.data.error.message);
      });

-    if (!data.hasOwnProperty("choices")) return null;
-    return data.choices[0].message.content;
+    if (!result.hasOwnProperty("choices") || result.choices.length === 0)
+      return null;
+    return result.choices[0].message.content;
  }

  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        messages,
-        temperature,
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      messages,
+      temperature,
+    });
    return streamRequest;
  }

  handleStream(response, stream, responseProps) {
-    return handleDefaultStreamResponse(response, stream, responseProps);
+    return handleDefaultStreamResponseV2(response, stream, responseProps);
  }

  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
--- a/server/utils/AiProviders/groq/index.js
+++ b/server/utils/AiProviders/groq/index.js
@ -1,20 +1,20 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const { chatPrompt } = require("../../chats");
-const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
+const {
+  handleDefaultStreamResponseV2,
+} = require("../../helpers/chat/responses");

 class GroqLLM {
  constructor(embedder = null, modelPreference = null) {
-    const { Configuration, OpenAIApi } = require("openai");
+    const { OpenAI: OpenAIApi } = require("openai");
    if (!process.env.GROQ_API_KEY) throw new Error("No Groq API key was set.");

-    const config = new Configuration({
-      basePath: "https://api.groq.com/openai/v1",
+    this.openai = new OpenAIApi({
+      baseURL: "https://api.groq.com/openai/v1",
      apiKey: process.env.GROQ_API_KEY,
    });
-
-    this.openai = new OpenAIApi(config);
    this.model =
-      modelPreference || process.env.GROQ_MODEL_PREF || "llama2-70b-4096";
+      modelPreference || process.env.GROQ_MODEL_PREF || "llama3-8b-8192";
    this.limits = {
      history: this.promptWindowLimit() * 0.15,
      system: this.promptWindowLimit() * 0.15,
@ -40,10 +40,9 @@ class GroqLLM {
  streamingEnabled() {
    return "streamChat" in this && "streamGetChatCompletion" in this;
  }
+
  promptWindowLimit() {
    switch (this.model) {
-      case "llama2-70b-4096":
-        return 4096;
      case "mixtral-8x7b-32768":
        return 32_768;
      case "llama3-8b-8192":
@ -53,13 +52,12 @@ class GroqLLM {
      case "gemma-7b-it":
        return 8192;
      default:
-        return 4096;
+        return 8192;
    }
  }

  async isValidChatCompletionModel(modelName = "") {
    const validModels = [
-      "llama2-70b-4096",
      "mixtral-8x7b-32768",
      "llama3-8b-8192",
      "llama3-70b-8192",
@ -68,9 +66,9 @@ class GroqLLM {
    const isPreset = validModels.some((model) => modelName === model);
    if (isPreset) return true;

-    const model = await this.openai
-      .retrieveModel(modelName)
-      .then((res) => res.data)
+    const model = await this.openai.models
+      .retrieve(modelName)
+      .then((modelObj) => modelObj)
      .catch(() => null);
    return !!model;
  }
@ -99,8 +97,8 @@ class GroqLLM {
        `Groq chat: ${this.model} is not valid for chat completion!`
      );

-    const textResponse = await this.openai
-      .createChatCompletion({
+    const textResponse = await this.openai.chat.completions
+      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
@ -113,13 +111,12 @@ class GroqLLM {
          rawHistory
        ),
      })
-      .then((json) => {
-        const res = json.data;
-        if (!res.hasOwnProperty("choices"))
+      .then((result) => {
+        if (!result.hasOwnProperty("choices"))
          throw new Error("GroqAI chat: No results!");
-        if (res.choices.length === 0)
+        if (result.choices.length === 0)
          throw new Error("GroqAI chat: No results length!");
-        return res.choices[0].message.content;
+        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
@ -136,23 +133,20 @@ class GroqLLM {
        `GroqAI:streamChat: ${this.model} is not valid for chat completion!`
      );

-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
+      n: 1,
+      messages: await this.compressMessages(
+        {
+          systemPrompt: chatPrompt(workspace),
+          userPrompt: prompt,
+          chatHistory,
+        },
+        rawHistory
+      ),
+    });
    return streamRequest;
  }

@ -162,8 +156,8 @@ class GroqLLM {
        `GroqAI:chatCompletion: ${this.model} is not valid for chat completion!`
      );

-    const { data } = await this.openai
-      .createChatCompletion({
+    const result = await this.openai.chat.completions
+      .create({
        model: this.model,
        messages,
        temperature,
@ -172,8 +166,9 @@ class GroqLLM {
        throw new Error(e.response.data.error.message);
      });

-    if (!data.hasOwnProperty("choices")) return null;
-    return data.choices[0].message.content;
+    if (!result.hasOwnProperty("choices") || result.choices.length === 0)
+      return null;
+    return result.choices[0].message.content;
  }

  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
@ -182,20 +177,17 @@ class GroqLLM {
        `GroqAI:streamChatCompletion: ${this.model} is not valid for chat completion!`
      );

-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        messages,
-        temperature,
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      messages,
+      temperature,
+    });
    return streamRequest;
  }

  handleStream(response, stream, responseProps) {
-    return handleDefaultStreamResponse(response, stream, responseProps);
+    return handleDefaultStreamResponseV2(response, stream, responseProps);
  }

  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
--- a/server/utils/AiProviders/huggingface/index.js
+++ b/server/utils/AiProviders/huggingface/index.js
@ -2,23 +2,21 @@ const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
 const { chatPrompt } = require("../../chats");
 const {
-  writeResponseChunk,
-  clientAbortedHandler,
+  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");

 class HuggingFaceLLM {
  constructor(embedder = null, _modelPreference = null) {
-    const { Configuration, OpenAIApi } = require("openai");
    if (!process.env.HUGGING_FACE_LLM_ENDPOINT)
      throw new Error("No HuggingFace Inference Endpoint was set.");
    if (!process.env.HUGGING_FACE_LLM_API_KEY)
      throw new Error("No HuggingFace Access Token was set.");
+    const { OpenAI: OpenAIApi } = require("openai");

-    const config = new Configuration({
-      basePath: `${process.env.HUGGING_FACE_LLM_ENDPOINT}/v1`,
+    this.openai = new OpenAIApi({
+      baseURL: `${process.env.HUGGING_FACE_LLM_ENDPOINT}/v1`,
      apiKey: process.env.HUGGING_FACE_LLM_API_KEY,
    });
-    this.openai = new OpenAIApi(config);
    // When using HF inference server - the model param is not required so
    // we can stub it here. HF Endpoints can only run one model at a time.
    // We set to 'tgi' so that endpoint for HF can accept message format
@ -93,8 +91,8 @@ class HuggingFaceLLM {
  }

  async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    const textResponse = await this.openai
-      .createChatCompletion({
+    const textResponse = await this.openai.chat.completions
+      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
@ -107,13 +105,12 @@ class HuggingFaceLLM {
          rawHistory
        ),
      })
-      .then((json) => {
-        const res = json.data;
-        if (!res.hasOwnProperty("choices"))
+      .then((result) => {
+        if (!result.hasOwnProperty("choices"))
          throw new Error("HuggingFace chat: No results!");
-        if (res.choices.length === 0)
+        if (result.choices.length === 0)
          throw new Error("HuggingFace chat: No results length!");
-        return res.choices[0].message.content;
+        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
@ -125,167 +122,47 @@ class HuggingFaceLLM {
  }

  async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
+      n: 1,
+      messages: await this.compressMessages(
+        {
+          systemPrompt: chatPrompt(workspace),
+          userPrompt: prompt,
+          chatHistory,
+        },
+        rawHistory
+      ),
+    });
    return streamRequest;
  }

  async getChatCompletion(messages = null, { temperature = 0.7 }) {
-    const { data } = await this.openai.createChatCompletion({
+    const result = await this.openai.createChatCompletion({
      model: this.model,
      messages,
      temperature,
    });

-    if (!data.hasOwnProperty("choices")) return null;
-    return data.choices[0].message.content;
+    if (!result.hasOwnProperty("choices") || result.choices.length === 0)
+      return null;
+    return result.choices[0].message.content;
  }

  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        messages,
-        temperature,
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      messages,
+      temperature,
+    });
    return streamRequest;
  }

  handleStream(response, stream, responseProps) {
-    const { uuid = uuidv4(), sources = [] } = responseProps;
-
-    return new Promise((resolve) => {
-      let fullText = "";
-      let chunk = "";
-
-      // Establish listener to early-abort a streaming response
-      // in case things go sideways or the user does not like the response.
-      // We preserve the generated text but continue as if chat was completed
-      // to preserve previously generated content.
-      const handleAbort = () => clientAbortedHandler(resolve, fullText);
-      response.on("close", handleAbort);
-
-      stream.data.on("data", (data) => {
-        const lines = data
-          ?.toString()
-          ?.split("\n")
-          .filter((line) => line.trim() !== "");
-
-        for (const line of lines) {
-          let validJSON = false;
-          const message = chunk + line.replace(/^data:/, "");
-          if (message !== "[DONE]") {
-            // JSON chunk is incomplete and has not ended yet
-            // so we need to stitch it together. You would think JSON
-            // chunks would only come complete - but they don't!
-            try {
-              JSON.parse(message);
-              validJSON = true;
-            } catch {
-              console.log("Failed to parse message", message);
-            }
-
-            if (!validJSON) {
-              // It can be possible that the chunk decoding is running away
-              // and the message chunk fails to append due to string length.
-              // In this case abort the chunk and reset so we can continue.
-              // ref: https://github.com/Mintplex-Labs/anything-llm/issues/416
-              try {
-                chunk += message;
-              } catch (e) {
-                console.error(`Chunk appending error`, e);
-                chunk = "";
-              }
-              continue;
-            } else {
-              chunk = "";
-            }
-          }
-
-          if (message == "[DONE]") {
-            writeResponseChunk(response, {
-              uuid,
-              sources,
-              type: "textResponseChunk",
-              textResponse: "",
-              close: true,
-              error: false,
-            });
-            response.removeListener("close", handleAbort);
-            resolve(fullText);
-          } else {
-            let error = null;
-            let finishReason = null;
-            let token = "";
-            try {
-              const json = JSON.parse(message);
-              error = json?.error || null;
-              token = json?.choices?.[0]?.delta?.content;
-              finishReason = json?.choices?.[0]?.finish_reason || null;
-            } catch {
-              continue;
-            }
-
-            if (!!error) {
-              writeResponseChunk(response, {
-                uuid,
-                sources: [],
-                type: "textResponseChunk",
-                textResponse: null,
-                close: true,
-                error,
-              });
-              response.removeListener("close", handleAbort);
-              resolve("");
-              return;
-            }
-
-            if (token) {
-              fullText += token;
-              writeResponseChunk(response, {
-                uuid,
-                sources: [],
-                type: "textResponseChunk",
-                textResponse: token,
-                close: false,
-                error: false,
-              });
-            }
-
-            if (finishReason !== null) {
-              writeResponseChunk(response, {
-                uuid,
-                sources,
-                type: "textResponseChunk",
-                textResponse: "",
-                close: true,
-                error: false,
-              });
-              response.removeListener("close", handleAbort);
-              resolve(fullText);
-            }
-          }
-        }
-      });
-    });
+    return handleDefaultStreamResponseV2(response, stream, responseProps);
  }

  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
--- a/server/utils/AiProviders/lmStudio/index.js
+++ b/server/utils/AiProviders/lmStudio/index.js
@ -1,5 +1,7 @@
 const { chatPrompt } = require("../../chats");
-const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
+const {
+  handleDefaultStreamResponseV2,
+} = require("../../helpers/chat/responses");

 //  hybrid of openAi LLM chat completion for LMStudio
 class LMStudioLLM {
@ -7,11 +9,11 @@ class LMStudioLLM {
    if (!process.env.LMSTUDIO_BASE_PATH)
      throw new Error("No LMStudio API Base Path was set.");

-    const { Configuration, OpenAIApi } = require("openai");
-    const config = new Configuration({
-      basePath: process.env.LMSTUDIO_BASE_PATH?.replace(/\/+$/, ""), // here is the URL to your LMStudio instance
+    const { OpenAI: OpenAIApi } = require("openai");
+    this.lmstudio = new OpenAIApi({
+      baseURL: process.env.LMSTUDIO_BASE_PATH?.replace(/\/+$/, ""), // here is the URL to your LMStudio instance
+      apiKey: null,
    });
-    this.lmstudio = new OpenAIApi(config);

    // Prior to LMStudio 0.2.17 the `model` param was not required and you could pass anything
    // into that field and it would work. On 0.2.17 LMStudio introduced multi-model chat
@ -89,8 +91,8 @@ class LMStudioLLM {
        `LMStudio chat: ${this.model} is not valid or defined for chat completion!`
      );

-    const textResponse = await this.lmstudio
-      .createChatCompletion({
+    const textResponse = await this.lmstudio.chat.completions
+      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
@ -103,13 +105,12 @@ class LMStudioLLM {
          rawHistory
        ),
      })
-      .then((json) => {
-        const res = json.data;
-        if (!res.hasOwnProperty("choices"))
+      .then((result) => {
+        if (!result.hasOwnProperty("choices"))
          throw new Error("LMStudio chat: No results!");
-        if (res.choices.length === 0)
+        if (result.choices.length === 0)
          throw new Error("LMStudio chat: No results length!");
-        return res.choices[0].message.content;
+        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
@ -126,23 +127,20 @@ class LMStudioLLM {
        `LMStudio chat: ${this.model} is not valid or defined for chat completion!`
      );

-    const streamRequest = await this.lmstudio.createChatCompletion(
-      {
-        model: this.model,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        stream: true,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.lmstudio.chat.completions.create({
+      model: this.model,
+      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
+      n: 1,
+      stream: true,
+      messages: await this.compressMessages(
+        {
+          systemPrompt: chatPrompt(workspace),
+          userPrompt: prompt,
+          chatHistory,
+        },
+        rawHistory
+      ),
+    });
    return streamRequest;
  }

@ -152,14 +150,15 @@ class LMStudioLLM {
        `LMStudio chat: ${this.model} is not valid or defined model for chat completion!`
      );

-    const { data } = await this.lmstudio.createChatCompletion({
+    const result = await this.lmstudio.chat.completions.create({
      model: this.model,
      messages,
      temperature,
    });

-    if (!data.hasOwnProperty("choices")) return null;
-    return data.choices[0].message.content;
+    if (!result.hasOwnProperty("choices") || result.choices.length === 0)
+      return null;
+    return result.choices[0].message.content;
  }

  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
@ -168,20 +167,17 @@ class LMStudioLLM {
        `LMStudio chat: ${this.model} is not valid or defined model for chat completion!`
      );

-    const streamRequest = await this.lmstudio.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        messages,
-        temperature,
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.lmstudio.chat.completions.create({
+      model: this.model,
+      stream: true,
+      messages,
+      temperature,
+    });
    return streamRequest;
  }

  handleStream(response, stream, responseProps) {
-    return handleDefaultStreamResponse(response, stream, responseProps);
+    return handleDefaultStreamResponseV2(response, stream, responseProps);
  }

  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
--- a/server/utils/AiProviders/localAi/index.js
+++ b/server/utils/AiProviders/localAi/index.js
@ -1,21 +1,18 @@
 const { chatPrompt } = require("../../chats");
-const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
+const {
+  handleDefaultStreamResponseV2,
+} = require("../../helpers/chat/responses");

 class LocalAiLLM {
  constructor(embedder = null, modelPreference = null) {
    if (!process.env.LOCAL_AI_BASE_PATH)
      throw new Error("No LocalAI Base Path was set.");

-    const { Configuration, OpenAIApi } = require("openai");
-    const config = new Configuration({
-      basePath: process.env.LOCAL_AI_BASE_PATH,
-      ...(!!process.env.LOCAL_AI_API_KEY
-        ? {
-            apiKey: process.env.LOCAL_AI_API_KEY,
-          }
-        : {}),
+    const { OpenAI: OpenAIApi } = require("openai");
+    this.openai = new OpenAIApi({
+      baseURL: process.env.LOCAL_AI_BASE_PATH,
+      apiKey: process.env.LOCAL_AI_API_KEY ?? null,
    });
-    this.openai = new OpenAIApi(config);
    this.model = modelPreference || process.env.LOCAL_AI_MODEL_PREF;
    this.limits = {
      history: this.promptWindowLimit() * 0.15,
@ -84,8 +81,8 @@ class LocalAiLLM {
        `LocalAI chat: ${this.model} is not valid for chat completion!`
      );

-    const textResponse = await this.openai
-      .createChatCompletion({
+    const textResponse = await this.openai.chat.completions
+      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
@ -98,13 +95,12 @@ class LocalAiLLM {
          rawHistory
        ),
      })
-      .then((json) => {
-        const res = json.data;
-        if (!res.hasOwnProperty("choices"))
+      .then((result) => {
+        if (!result.hasOwnProperty("choices"))
          throw new Error("LocalAI chat: No results!");
-        if (res.choices.length === 0)
+        if (result.choices.length === 0)
          throw new Error("LocalAI chat: No results length!");
-        return res.choices[0].message.content;
+        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
@ -121,23 +117,20 @@ class LocalAiLLM {
        `LocalAI chat: ${this.model} is not valid for chat completion!`
      );

-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
+      n: 1,
+      messages: await this.compressMessages(
+        {
+          systemPrompt: chatPrompt(workspace),
+          userPrompt: prompt,
+          chatHistory,
+        },
+        rawHistory
+      ),
+    });
    return streamRequest;
  }

@ -147,14 +140,15 @@ class LocalAiLLM {
        `LocalAI chat: ${this.model} is not valid for chat completion!`
      );

-    const { data } = await this.openai.createChatCompletion({
+    const result = await this.openai.chat.completions.create({
      model: this.model,
      messages,
      temperature,
    });

-    if (!data.hasOwnProperty("choices")) return null;
-    return data.choices[0].message.content;
+    if (!result.hasOwnProperty("choices") || result.choices.length === 0)
+      return null;
+    return result.choices[0].message.content;
  }

  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
@ -163,20 +157,17 @@ class LocalAiLLM {
        `LocalAi chat: ${this.model} is not valid for chat completion!`
      );

-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        messages,
-        temperature,
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      messages,
+      temperature,
+    });
    return streamRequest;
  }

  handleStream(response, stream, responseProps) {
-    return handleDefaultStreamResponse(response, stream, responseProps);
+    return handleDefaultStreamResponseV2(response, stream, responseProps);
  }

  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
--- a/server/utils/AiProviders/mistral/index.js
+++ b/server/utils/AiProviders/mistral/index.js
@ -1,17 +1,18 @@
 const { chatPrompt } = require("../../chats");
-const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
+const {
+  handleDefaultStreamResponseV2,
+} = require("../../helpers/chat/responses");

 class MistralLLM {
  constructor(embedder = null, modelPreference = null) {
-    const { Configuration, OpenAIApi } = require("openai");
    if (!process.env.MISTRAL_API_KEY)
      throw new Error("No Mistral API key was set.");

-    const config = new Configuration({
-      basePath: "https://api.mistral.ai/v1",
-      apiKey: process.env.MISTRAL_API_KEY,
+    const { OpenAI: OpenAIApi } = require("openai");
+    this.openai = new OpenAIApi({
+      baseURL: "https://api.mistral.ai/v1",
+      apiKey: process.env.MISTRAL_API_KEY ?? null,
    });
-    this.openai = new OpenAIApi(config);
    this.model =
      modelPreference || process.env.MISTRAL_MODEL_PREF || "mistral-tiny";
    this.limits = {
@ -75,8 +76,8 @@ class MistralLLM {
        `Mistral chat: ${this.model} is not valid for chat completion!`
      );

-    const textResponse = await this.openai
-      .createChatCompletion({
+    const textResponse = await this.openai.chat.completions
+      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        messages: await this.compressMessages(
@ -88,13 +89,12 @@ class MistralLLM {
          rawHistory
        ),
      })
-      .then((json) => {
-        const res = json.data;
-        if (!res.hasOwnProperty("choices"))
+      .then((result) => {
+        if (!result.hasOwnProperty("choices"))
          throw new Error("Mistral chat: No results!");
-        if (res.choices.length === 0)
+        if (result.choices.length === 0)
          throw new Error("Mistral chat: No results length!");
-        return res.choices[0].message.content;
+        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
@ -111,22 +111,19 @@ class MistralLLM {
        `Mistral chat: ${this.model} is not valid for chat completion!`
      );

-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
+      messages: await this.compressMessages(
+        {
+          systemPrompt: chatPrompt(workspace),
+          userPrompt: prompt,
+          chatHistory,
+        },
+        rawHistory
+      ),
+    });

    return streamRequest;
  }
@ -137,14 +134,15 @@ class MistralLLM {
        `Mistral chat: ${this.model} is not valid for chat completion!`
      );

-    const { data } = await this.openai.createChatCompletion({
+    const result = await this.openai.chat.completions.create({
      model: this.model,
      messages,
      temperature,
    });

-    if (!data.hasOwnProperty("choices")) return null;
-    return data.choices[0].message.content;
+    if (!result.hasOwnProperty("choices") || result.choices.length === 0)
+      return null;
+    return result.choices[0].message.content;
  }

  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
@ -153,20 +151,17 @@ class MistralLLM {
        `Mistral chat: ${this.model} is not valid for chat completion!`
      );

-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        messages,
-        temperature,
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      messages,
+      temperature,
+    });
    return streamRequest;
  }

  handleStream(response, stream, responseProps) {
-    return handleDefaultStreamResponse(response, stream, responseProps);
+    return handleDefaultStreamResponseV2(response, stream, responseProps);
  }

  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
--- a/server/utils/AiProviders/openAi/index.js
+++ b/server/utils/AiProviders/openAi/index.js
@ -1,16 +1,17 @@
 const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
 const { chatPrompt } = require("../../chats");
-const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
+const {
+  handleDefaultStreamResponseV2,
+} = require("../../helpers/chat/responses");

 class OpenAiLLM {
  constructor(embedder = null, modelPreference = null) {
-    const { Configuration, OpenAIApi } = require("openai");
    if (!process.env.OPEN_AI_KEY) throw new Error("No OpenAI API key was set.");
+    const { OpenAI: OpenAIApi } = require("openai");

-    const config = new Configuration({
+    this.openai = new OpenAIApi({
      apiKey: process.env.OPEN_AI_KEY,
    });
-    this.openai = new OpenAIApi(config);
    this.model =
      modelPreference || process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo";
    this.limits = {
@ -70,9 +71,9 @@ class OpenAiLLM {
    const isPreset = modelName.toLowerCase().includes("gpt");
    if (isPreset) return true;

-    const model = await this.openai
-      .retrieveModel(modelName)
-      .then((res) => res.data)
+    const model = await this.openai.models
+      .retrieve(modelName)
+      .then((modelObj) => modelObj)
      .catch(() => null);
    return !!model;
  }
@ -91,10 +92,9 @@ class OpenAiLLM {
  }

  async isSafe(input = "") {
-    const { flagged = false, categories = {} } = await this.openai
-      .createModeration({ input })
-      .then((json) => {
-        const res = json.data;
+    const { flagged = false, categories = {} } = await this.openai.moderations
+      .create({ input })
+      .then((res) => {
        if (!res.hasOwnProperty("results"))
          throw new Error("OpenAI moderation: No results!");
        if (res.results.length === 0)
@ -128,8 +128,8 @@ class OpenAiLLM {
        `OpenAI chat: ${this.model} is not valid for chat completion!`
      );

-    const textResponse = await this.openai
-      .createChatCompletion({
+    const textResponse = await this.openai.chat.completions
+      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
@ -142,13 +142,12 @@ class OpenAiLLM {
          rawHistory
        ),
      })
-      .then((json) => {
-        const res = json.data;
-        if (!res.hasOwnProperty("choices"))
+      .then((result) => {
+        if (!result.hasOwnProperty("choices"))
          throw new Error("OpenAI chat: No results!");
-        if (res.choices.length === 0)
+        if (result.choices.length === 0)
          throw new Error("OpenAI chat: No results length!");
-        return res.choices[0].message.content;
+        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
@ -165,23 +164,20 @@ class OpenAiLLM {
        `OpenAI chat: ${this.model} is not valid for chat completion!`
      );

-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions({
+      model: this.model,
+      stream: true,
+      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
+      n: 1,
+      messages: await this.compressMessages(
+        {
+          systemPrompt: chatPrompt(workspace),
+          userPrompt: prompt,
+          chatHistory,
+        },
+        rawHistory
+      ),
+    });
    return streamRequest;
  }

@ -191,8 +187,8 @@ class OpenAiLLM {
        `OpenAI chat: ${this.model} is not valid for chat completion!`
      );

-    const { data } = await this.openai
-      .createChatCompletion({
+    const result = await this.openai.chat.completions
+      .create({
        model: this.model,
        messages,
        temperature,
@ -201,8 +197,9 @@ class OpenAiLLM {
        throw new Error(e.response.data.error.message);
      });

-    if (!data.hasOwnProperty("choices")) return null;
-    return data.choices[0].message.content;
+    if (!result.hasOwnProperty("choices") || result.choices.length === 0)
+      return null;
+    return result.choices[0].message.content;
  }

  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
@ -211,20 +208,17 @@ class OpenAiLLM {
        `OpenAI chat: ${this.model} is not valid for chat completion!`
      );

-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        messages,
-        temperature,
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      messages,
+      temperature,
+    });
    return streamRequest;
  }

  handleStream(response, stream, responseProps) {
-    return handleDefaultStreamResponse(response, stream, responseProps);
+    return handleDefaultStreamResponseV2(response, stream, responseProps);
  }

  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
--- a/server/utils/AiProviders/openRouter/index.js
+++ b/server/utils/AiProviders/openRouter/index.js
@ -4,6 +4,7 @@ const { v4: uuidv4 } = require("uuid");
 const {
  writeResponseChunk,
  clientAbortedHandler,
+  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");
 const fs = require("fs");
 const path = require("path");
@ -16,22 +17,19 @@ const cacheFolder = path.resolve(

 class OpenRouterLLM {
  constructor(embedder = null, modelPreference = null) {
-    const { Configuration, OpenAIApi } = require("openai");
    if (!process.env.OPENROUTER_API_KEY)
      throw new Error("No OpenRouter API key was set.");

+    const { OpenAI: OpenAIApi } = require("openai");
    this.basePath = "https://openrouter.ai/api/v1";
-    const config = new Configuration({
-      basePath: this.basePath,
-      apiKey: process.env.OPENROUTER_API_KEY,
-      baseOptions: {
-        headers: {
-          "HTTP-Referer": "https://useanything.com",
-          "X-Title": "AnythingLLM",
-        },
+    this.openai = new OpenAIApi({
+      baseURL: this.basePath,
+      apiKey: process.env.OPENROUTER_API_KEY ?? null,
+      defaultHeaders: {
+        "HTTP-Referer": "https://useanything.com",
+        "X-Title": "AnythingLLM",
      },
    });
-    this.openai = new OpenAIApi(config);
    this.model =
      modelPreference || process.env.OPENROUTER_MODEL_PREF || "openrouter/auto";
    this.limits = {
@ -139,8 +137,8 @@ class OpenRouterLLM {
        `OpenRouter chat: ${this.model} is not valid for chat completion!`
      );

-    const textResponse = await this.openai
-      .createChatCompletion({
+    const textResponse = await this.openai.chat.completions
+      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
@ -153,13 +151,12 @@ class OpenRouterLLM {
          rawHistory
        ),
      })
-      .then((json) => {
-        const res = json.data;
-        if (!res.hasOwnProperty("choices"))
+      .then((result) => {
+        if (!result.hasOwnProperty("choices"))
          throw new Error("OpenRouter chat: No results!");
-        if (res.choices.length === 0)
+        if (result.choices.length === 0)
          throw new Error("OpenRouter chat: No results length!");
-        return res.choices[0].message.content;
+        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
@ -176,23 +173,20 @@ class OpenRouterLLM {
        `OpenRouter chat: ${this.model} is not valid for chat completion!`
      );

-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
+      n: 1,
+      messages: await this.compressMessages(
+        {
+          systemPrompt: chatPrompt(workspace),
+          userPrompt: prompt,
+          chatHistory,
+        },
+        rawHistory
+      ),
+    });
    return streamRequest;
  }

@ -202,8 +196,8 @@ class OpenRouterLLM {
        `OpenRouter chat: ${this.model} is not valid for chat completion!`
      );

-    const { data } = await this.openai
-      .createChatCompletion({
+    const result = await this.openai.chat.completions
+      .create({
        model: this.model,
        messages,
        temperature,
@ -212,8 +206,9 @@ class OpenRouterLLM {
        throw new Error(e.response.data.error.message);
      });

-    if (!data.hasOwnProperty("choices")) return null;
-    return data.choices[0].message.content;
+    if (!result.hasOwnProperty("choices") || result.choices.length === 0)
+      return null;
+    return result.choices[0].message.content;
  }

  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
@ -222,15 +217,12 @@ class OpenRouterLLM {
        `OpenRouter chat: ${this.model} is not valid for chat completion!`
      );

-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        messages,
-        temperature,
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      messages,
+      temperature,
+    });
    return streamRequest;
  }

@ -238,9 +230,8 @@ class OpenRouterLLM {
    const timeoutThresholdMs = 500;
    const { uuid = uuidv4(), sources = [] } = responseProps;

-    return new Promise((resolve) => {
+    return new Promise(async (resolve) => {
      let fullText = "";
-      let chunk = "";
      let lastChunkTime = null; // null when first token is still not received.

      // Establish listener to early-abort a streaming response
@ -280,97 +271,176 @@ class OpenRouterLLM {
        }
      }, 500);

-      stream.data.on("data", (data) => {
-        const lines = data
-          ?.toString()
-          ?.split("\n")
-          .filter((line) => line.trim() !== "");
+      for await (const chunk of stream) {
+        const message = chunk?.choices?.[0];
+        const token = message?.delta?.content;
+        lastChunkTime = Number(new Date());

-        for (const line of lines) {
-          let validJSON = false;
-          const message = chunk + line.replace(/^data: /, "");
-
-          // JSON chunk is incomplete and has not ended yet
-          // so we need to stitch it together. You would think JSON
-          // chunks would only come complete - but they don't!
-          try {
-            JSON.parse(message);
-            validJSON = true;
-          } catch {}
-
-          if (!validJSON) {
-            // It can be possible that the chunk decoding is running away
-            // and the message chunk fails to append due to string length.
-            // In this case abort the chunk and reset so we can continue.
-            // ref: https://github.com/Mintplex-Labs/anything-llm/issues/416
-            try {
-              chunk += message;
-            } catch (e) {
-              console.error(`Chunk appending error`, e);
-              chunk = "";
-            }
-            continue;
-          } else {
-            chunk = "";
-          }
-
-          if (message == "[DONE]") {
-            lastChunkTime = Number(new Date());
-            writeResponseChunk(response, {
-              uuid,
-              sources,
-              type: "textResponseChunk",
-              textResponse: "",
-              close: true,
-              error: false,
-            });
-            clearInterval(timeoutCheck);
-            response.removeListener("close", handleAbort);
-            resolve(fullText);
-          } else {
-            let finishReason = null;
-            let token = "";
-            try {
-              const json = JSON.parse(message);
-              token = json?.choices?.[0]?.delta?.content;
-              finishReason = json?.choices?.[0]?.finish_reason || null;
-            } catch {
-              continue;
-            }
-
-            if (token) {
-              fullText += token;
-              lastChunkTime = Number(new Date());
-              writeResponseChunk(response, {
-                uuid,
-                sources: [],
-                type: "textResponseChunk",
-                textResponse: token,
-                close: false,
-                error: false,
-              });
-            }
-
-            if (finishReason !== null) {
-              lastChunkTime = Number(new Date());
-              writeResponseChunk(response, {
-                uuid,
-                sources,
-                type: "textResponseChunk",
-                textResponse: "",
-                close: true,
-                error: false,
-              });
-              clearInterval(timeoutCheck);
-              response.removeListener("close", handleAbort);
-              resolve(fullText);
-            }
-          }
+        if (token) {
+          fullText += token;
+          writeResponseChunk(response, {
+            uuid,
+            sources: [],
+            type: "textResponseChunk",
+            textResponse: token,
+            close: false,
+            error: false,
+          });
        }
-      });
+
+        if (message.finish_reason !== null) {
+          writeResponseChunk(response, {
+            uuid,
+            sources,
+            type: "textResponseChunk",
+            textResponse: "",
+            close: true,
+            error: false,
+          });
+          response.removeListener("close", handleAbort);
+          resolve(fullText);
+        }
+      }
    });
  }

+  // handleStream(response, stream, responseProps) {
+  //   const timeoutThresholdMs = 500;
+  //   const { uuid = uuidv4(), sources = [] } = responseProps;
+
+  //   return new Promise((resolve) => {
+  //     let fullText = "";
+  //     let chunk = "";
+  //     let lastChunkTime = null; // null when first token is still not received.
+
+  //     // Establish listener to early-abort a streaming response
+  //     // in case things go sideways or the user does not like the response.
+  //     // We preserve the generated text but continue as if chat was completed
+  //     // to preserve previously generated content.
+  //     const handleAbort = () => clientAbortedHandler(resolve, fullText);
+  //     response.on("close", handleAbort);
+
+  //     // NOTICE: Not all OpenRouter models will return a stop reason
+  //     // which keeps the connection open and so the model never finalizes the stream
+  //     // like the traditional OpenAI response schema does. So in the case the response stream
+  //     // never reaches a formal close state we maintain an interval timer that if we go >=timeoutThresholdMs with
+  //     // no new chunks then we kill the stream and assume it to be complete. OpenRouter is quite fast
+  //     // so this threshold should permit most responses, but we can adjust `timeoutThresholdMs` if
+  //     // we find it is too aggressive.
+  //     const timeoutCheck = setInterval(() => {
+  //       if (lastChunkTime === null) return;
+
+  //       const now = Number(new Date());
+  //       const diffMs = now - lastChunkTime;
+  //       if (diffMs >= timeoutThresholdMs) {
+  //         console.log(
+  //           `OpenRouter stream did not self-close and has been stale for >${timeoutThresholdMs}ms. Closing response stream.`
+  //         );
+  //         writeResponseChunk(response, {
+  //           uuid,
+  //           sources,
+  //           type: "textResponseChunk",
+  //           textResponse: "",
+  //           close: true,
+  //           error: false,
+  //         });
+  //         clearInterval(timeoutCheck);
+  //         response.removeListener("close", handleAbort);
+  //         resolve(fullText);
+  //       }
+  //     }, 500);
+
+  //     stream.data.on("data", (data) => {
+  //       const lines = data
+  //         ?.toString()
+  //         ?.split("\n")
+  //         .filter((line) => line.trim() !== "");
+
+  //       for (const line of lines) {
+  //         let validJSON = false;
+  //         const message = chunk + line.replace(/^data: /, "");
+
+  //         // JSON chunk is incomplete and has not ended yet
+  //         // so we need to stitch it together. You would think JSON
+  //         // chunks would only come complete - but they don't!
+  //         try {
+  //           JSON.parse(message);
+  //           validJSON = true;
+  //         } catch { }
+
+  //         if (!validJSON) {
+  //           // It can be possible that the chunk decoding is running away
+  //           // and the message chunk fails to append due to string length.
+  //           // In this case abort the chunk and reset so we can continue.
+  //           // ref: https://github.com/Mintplex-Labs/anything-llm/issues/416
+  //           try {
+  //             chunk += message;
+  //           } catch (e) {
+  //             console.error(`Chunk appending error`, e);
+  //             chunk = "";
+  //           }
+  //           continue;
+  //         } else {
+  //           chunk = "";
+  //         }
+
+  //         if (message == "[DONE]") {
+  //           lastChunkTime = Number(new Date());
+  //           writeResponseChunk(response, {
+  //             uuid,
+  //             sources,
+  //             type: "textResponseChunk",
+  //             textResponse: "",
+  //             close: true,
+  //             error: false,
+  //           });
+  //           clearInterval(timeoutCheck);
+  //           response.removeListener("close", handleAbort);
+  //           resolve(fullText);
+  //         } else {
+  //           let finishReason = null;
+  //           let token = "";
+  //           try {
+  //             const json = JSON.parse(message);
+  //             token = json?.choices?.[0]?.delta?.content;
+  //             finishReason = json?.choices?.[0]?.finish_reason || null;
+  //           } catch {
+  //             continue;
+  //           }
+
+  //           if (token) {
+  //             fullText += token;
+  //             lastChunkTime = Number(new Date());
+  //             writeResponseChunk(response, {
+  //               uuid,
+  //               sources: [],
+  //               type: "textResponseChunk",
+  //               textResponse: token,
+  //               close: false,
+  //               error: false,
+  //             });
+  //           }
+
+  //           if (finishReason !== null) {
+  //             lastChunkTime = Number(new Date());
+  //             writeResponseChunk(response, {
+  //               uuid,
+  //               sources,
+  //               type: "textResponseChunk",
+  //               textResponse: "",
+  //               close: true,
+  //               error: false,
+  //             });
+  //             clearInterval(timeoutCheck);
+  //             response.removeListener("close", handleAbort);
+  //             resolve(fullText);
+  //           }
+  //         }
+  //       }
+  //     });
+  //   });
+  // }
+
  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
  async embedTextInput(textInput) {
    return await this.embedder.embedTextInput(textInput);
--- a/server/utils/AiProviders/perplexity/index.js
+++ b/server/utils/AiProviders/perplexity/index.js
@ -1,6 +1,8 @@
 const { NativeEmbedder } = require("../../EmbeddingEngines/native");
 const { chatPrompt } = require("../../chats");
-const { handleDefaultStreamResponse } = require("../../helpers/chat/responses");
+const {
+  handleDefaultStreamResponseV2,
+} = require("../../helpers/chat/responses");

 function perplexityModels() {
  const { MODELS } = require("./models.js");
@ -9,17 +11,18 @@ function perplexityModels() {

 class PerplexityLLM {
  constructor(embedder = null, modelPreference = null) {
-    const { Configuration, OpenAIApi } = require("openai");
    if (!process.env.PERPLEXITY_API_KEY)
      throw new Error("No Perplexity API key was set.");

-    const config = new Configuration({
-      basePath: "https://api.perplexity.ai",
-      apiKey: process.env.PERPLEXITY_API_KEY,
+    const { OpenAI: OpenAIApi } = require("openai");
+    this.openai = new OpenAIApi({
+      baseURL: "https://api.perplexity.ai",
+      apiKey: process.env.PERPLEXITY_API_KEY ?? null,
    });
-    this.openai = new OpenAIApi(config);
    this.model =
-      modelPreference || process.env.PERPLEXITY_MODEL_PREF || "pplx-7b-online"; // Give at least a unique model to the provider as last fallback.
+      modelPreference ||
+      process.env.PERPLEXITY_MODEL_PREF ||
+      "sonar-small-online"; // Give at least a unique model to the provider as last fallback.
    this.limits = {
      history: this.promptWindowLimit() * 0.15,
      system: this.promptWindowLimit() * 0.15,
@ -84,8 +87,8 @@ class PerplexityLLM {
        `Perplexity chat: ${this.model} is not valid for chat completion!`
      );

-    const textResponse = await this.openai
-      .createChatCompletion({
+    const textResponse = await this.openai.chat.completions
+      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
@ -98,13 +101,12 @@ class PerplexityLLM {
          rawHistory
        ),
      })
-      .then((json) => {
-        const res = json.data;
-        if (!res.hasOwnProperty("choices"))
+      .then((result) => {
+        if (!result.hasOwnProperty("choices"))
          throw new Error("Perplexity chat: No results!");
-        if (res.choices.length === 0)
+        if (result.choices.length === 0)
          throw new Error("Perplexity chat: No results length!");
-        return res.choices[0].message.content;
+        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
@ -121,23 +123,20 @@ class PerplexityLLM {
        `Perplexity chat: ${this.model} is not valid for chat completion!`
      );

-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
+      n: 1,
+      messages: await this.compressMessages(
+        {
+          systemPrompt: chatPrompt(workspace),
+          userPrompt: prompt,
+          chatHistory,
+        },
+        rawHistory
+      ),
+    });
    return streamRequest;
  }

@ -147,8 +146,8 @@ class PerplexityLLM {
        `Perplexity chat: ${this.model} is not valid for chat completion!`
      );

-    const { data } = await this.openai
-      .createChatCompletion({
+    const result = await this.openai.chat.completions
+      .create({
        model: this.model,
        messages,
        temperature,
@ -157,8 +156,9 @@ class PerplexityLLM {
        throw new Error(e.response.data.error.message);
      });

-    if (!data.hasOwnProperty("choices")) return null;
-    return data.choices[0].message.content;
+    if (!result.hasOwnProperty("choices") || result.choices.length === 0)
+      return null;
+    return result.choices[0].message.content;
  }

  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
@ -167,20 +167,17 @@ class PerplexityLLM {
        `Perplexity chat: ${this.model} is not valid for chat completion!`
      );

-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        messages,
-        temperature,
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      messages,
+      temperature,
+    });
    return streamRequest;
  }

  handleStream(response, stream, responseProps) {
-    return handleDefaultStreamResponse(response, stream, responseProps);
+    return handleDefaultStreamResponseV2(response, stream, responseProps);
  }

  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
--- a/server/utils/AiProviders/togetherAi/index.js
+++ b/server/utils/AiProviders/togetherAi/index.js
@ -1,7 +1,6 @@
 const { chatPrompt } = require("../../chats");
 const {
-  writeResponseChunk,
-  clientAbortedHandler,
+  handleDefaultStreamResponseV2,
 } = require("../../helpers/chat/responses");

 function togetherAiModels() {
@ -11,15 +10,13 @@ function togetherAiModels() {

 class TogetherAiLLM {
  constructor(embedder = null, modelPreference = null) {
-    const { Configuration, OpenAIApi } = require("openai");
    if (!process.env.TOGETHER_AI_API_KEY)
      throw new Error("No TogetherAI API key was set.");
-
-    const config = new Configuration({
-      basePath: "https://api.together.xyz/v1",
-      apiKey: process.env.TOGETHER_AI_API_KEY,
+    const { OpenAI: OpenAIApi } = require("openai");
+    this.openai = new OpenAIApi({
+      baseURL: "https://api.together.xyz/v1",
+      apiKey: process.env.TOGETHER_AI_API_KEY ?? null,
    });
-    this.openai = new OpenAIApi(config);
    this.model = modelPreference || process.env.TOGETHER_AI_MODEL_PREF;
    this.limits = {
      history: this.promptWindowLimit() * 0.15,
@ -91,8 +88,8 @@ class TogetherAiLLM {
        `Together AI chat: ${this.model} is not valid for chat completion!`
      );

-    const textResponse = await this.openai
-      .createChatCompletion({
+    const textResponse = await this.openai.chat.completions
+      .create({
        model: this.model,
        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
        n: 1,
@ -105,13 +102,12 @@ class TogetherAiLLM {
          rawHistory
        ),
      })
-      .then((json) => {
-        const res = json.data;
-        if (!res.hasOwnProperty("choices"))
+      .then((result) => {
+        if (!result.hasOwnProperty("choices"))
          throw new Error("Together AI chat: No results!");
-        if (res.choices.length === 0)
+        if (result.choices.length === 0)
          throw new Error("Together AI chat: No results length!");
-        return res.choices[0].message.content;
+        return result.choices[0].message.content;
      })
      .catch((error) => {
        throw new Error(
@ -128,23 +124,20 @@ class TogetherAiLLM {
        `TogetherAI chat: ${this.model} is not valid for chat completion!`
      );

-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
-        n: 1,
-        messages: await this.compressMessages(
-          {
-            systemPrompt: chatPrompt(workspace),
-            userPrompt: prompt,
-            chatHistory,
-          },
-          rawHistory
-        ),
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      temperature: Number(workspace?.openAiTemp ?? this.defaultTemp),
+      n: 1,
+      messages: await this.compressMessages(
+        {
+          systemPrompt: chatPrompt(workspace),
+          userPrompt: prompt,
+          chatHistory,
+        },
+        rawHistory
+      ),
+    });
    return streamRequest;
  }

@ -154,14 +147,15 @@ class TogetherAiLLM {
        `TogetherAI chat: ${this.model} is not valid for chat completion!`
      );

-    const { data } = await this.openai.createChatCompletion({
+    const result = await this.openai.chat.completions.create({
      model: this.model,
      messages,
      temperature,
    });

-    if (!data.hasOwnProperty("choices")) return null;
-    return data.choices[0].message.content;
+    if (!result.hasOwnProperty("choices") || result.choices.length === 0)
+      return null;
+    return result.choices[0].message.content;
  }

  async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
@ -170,118 +164,17 @@ class TogetherAiLLM {
        `TogetherAI chat: ${this.model} is not valid for chat completion!`
      );

-    const streamRequest = await this.openai.createChatCompletion(
-      {
-        model: this.model,
-        stream: true,
-        messages,
-        temperature,
-      },
-      { responseType: "stream" }
-    );
+    const streamRequest = await this.openai.chat.completions.create({
+      model: this.model,
+      stream: true,
+      messages,
+      temperature,
+    });
    return streamRequest;
  }

  handleStream(response, stream, responseProps) {
-    const { uuid = uuidv4(), sources = [] } = responseProps;
-
-    return new Promise((resolve) => {
-      let fullText = "";
-      let chunk = "";
-
-      // Establish listener to early-abort a streaming response
-      // in case things go sideways or the user does not like the response.
-      // We preserve the generated text but continue as if chat was completed
-      // to preserve previously generated content.
-      const handleAbort = () => clientAbortedHandler(resolve, fullText);
-      response.on("close", handleAbort);
-
-      stream.data.on("data", (data) => {
-        const lines = data
-          ?.toString()
-          ?.split("\n")
-          .filter((line) => line.trim() !== "");
-
-        for (const line of lines) {
-          let validJSON = false;
-          const message = chunk + line.replace(/^data: /, "");
-
-          if (message !== "[DONE]") {
-            // JSON chunk is incomplete and has not ended yet
-            // so we need to stitch it together. You would think JSON
-            // chunks would only come complete - but they don't!
-            try {
-              JSON.parse(message);
-              validJSON = true;
-            } catch {}
-
-            if (!validJSON) {
-              // It can be possible that the chunk decoding is running away
-              // and the message chunk fails to append due to string length.
-              // In this case abort the chunk and reset so we can continue.
-              // ref: https://github.com/Mintplex-Labs/anything-llm/issues/416
-              try {
-                chunk += message;
-              } catch (e) {
-                console.error(`Chunk appending error`, e);
-                chunk = "";
-              }
-              continue;
-            } else {
-              chunk = "";
-            }
-          }
-
-          if (message == "[DONE]") {
-            writeResponseChunk(response, {
-              uuid,
-              sources,
-              type: "textResponseChunk",
-              textResponse: "",
-              close: true,
-              error: false,
-            });
-            response.removeListener("close", handleAbort);
-            resolve(fullText);
-          } else {
-            let finishReason = null;
-            let token = "";
-            try {
-              const json = JSON.parse(message);
-              token = json?.choices?.[0]?.delta?.content;
-              finishReason = json?.choices?.[0]?.finish_reason || null;
-            } catch {
-              continue;
-            }
-
-            if (token) {
-              fullText += token;
-              writeResponseChunk(response, {
-                uuid,
-                sources: [],
-                type: "textResponseChunk",
-                textResponse: token,
-                close: false,
-                error: false,
-              });
-            }
-
-            if (finishReason !== null) {
-              writeResponseChunk(response, {
-                uuid,
-                sources,
-                type: "textResponseChunk",
-                textResponse: "",
-                close: true,
-                error: false,
-              });
-              response.removeListener("close", handleAbort);
-              resolve(fullText);
-            }
-          }
-        }
-      });
-    });
+    return handleDefaultStreamResponseV2(response, stream, responseProps);
  }

  // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
--- a/server/utils/EmbeddingEngines/localAi/index.js
+++ b/server/utils/EmbeddingEngines/localAi/index.js
@ -2,20 +2,16 @@ const { toChunks, maximumChunkLength } = require("../../helpers");

 class LocalAiEmbedder {
  constructor() {
-    const { Configuration, OpenAIApi } = require("openai");
    if (!process.env.EMBEDDING_BASE_PATH)
      throw new Error("No embedding base path was set.");
    if (!process.env.EMBEDDING_MODEL_PREF)
      throw new Error("No embedding model was set.");
-    const config = new Configuration({
-      basePath: process.env.EMBEDDING_BASE_PATH,
-      ...(!!process.env.LOCAL_AI_API_KEY
-        ? {
-            apiKey: process.env.LOCAL_AI_API_KEY,
-          }
-        : {}),
+
+    const { OpenAI: OpenAIApi } = require("openai");
+    this.openai = new OpenAIApi({
+      baseURL: process.env.EMBEDDING_BASE_PATH,
+      apiKey: process.env.LOCAL_AI_API_KEY ?? null,
    });
-    this.openai = new OpenAIApi(config);

    // Limit of how many strings we can process in a single pass to stay with resource or network limits
    this.maxConcurrentChunks = 50;
@ -34,13 +30,13 @@ class LocalAiEmbedder {
    for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
      embeddingRequests.push(
        new Promise((resolve) => {
-          this.openai
-            .createEmbedding({
+          this.openai.embeddings
+            .create({
              model: process.env.EMBEDDING_MODEL_PREF,
              input: chunk,
            })
-            .then((res) => {
-              resolve({ data: res.data?.data, error: null });
+            .then((result) => {
+              resolve({ data: result?.data, error: null });
            })
            .catch((e) => {
              e.type =
--- a/server/utils/EmbeddingEngines/openAi/index.js
+++ b/server/utils/EmbeddingEngines/openAi/index.js
@ -2,13 +2,11 @@ const { toChunks } = require("../../helpers");

 class OpenAiEmbedder {
  constructor() {
-    const { Configuration, OpenAIApi } = require("openai");
    if (!process.env.OPEN_AI_KEY) throw new Error("No OpenAI API key was set.");
-    const config = new Configuration({
+    const { OpenAI: OpenAIApi } = require("openai");
+    this.openai = new OpenAIApi({
      apiKey: process.env.OPEN_AI_KEY,
    });
-    const openai = new OpenAIApi(config);
-    this.openai = openai;
    this.model = process.env.EMBEDDING_MODEL_PREF || "text-embedding-ada-002";

    // Limit of how many strings we can process in a single pass to stay with resource or network limits
@ -33,13 +31,13 @@ class OpenAiEmbedder {
    for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
      embeddingRequests.push(
        new Promise((resolve) => {
-          this.openai
-            .createEmbedding({
+          this.openai.embeddings
+            .create({
              model: this.model,
              input: chunk,
            })
-            .then((res) => {
-              resolve({ data: res.data?.data, error: null });
+            .then((result) => {
+              resolve({ data: result?.data, error: null });
            })
            .catch((e) => {
              e.type =
--- a/server/utils/agents/aibitat/providers/openai.js
+++ b/server/utils/agents/aibitat/providers/openai.js
@ -1,4 +1,4 @@
-const OpenAI = require("openai-latest");
+const OpenAI = require("openai");
 const Provider = require("./ai-provider.js");
 const { RetryError } = require("../error.js");

--- a/server/utils/helpers/chat/responses.js
+++ b/server/utils/helpers/chat/responses.js
@ -9,6 +9,53 @@ function clientAbortedHandler(resolve, fullText) {
  return;
 }

+function handleDefaultStreamResponseV2(response, stream, responseProps) {
+  const { uuid = uuidv4(), sources = [] } = responseProps;
+
+  return new Promise(async (resolve) => {
+    let fullText = "";
+
+    // Establish listener to early-abort a streaming response
+    // in case things go sideways or the user does not like the response.
+    // We preserve the generated text but continue as if chat was completed
+    // to preserve previously generated content.
+    const handleAbort = () => clientAbortedHandler(resolve, fullText);
+    response.on("close", handleAbort);
+
+    for await (const chunk of stream) {
+      const message = chunk?.choices?.[0];
+      const token = message?.delta?.content;
+
+      if (token) {
+        fullText += token;
+        writeResponseChunk(response, {
+          uuid,
+          sources: [],
+          type: "textResponseChunk",
+          textResponse: token,
+          close: false,
+          error: false,
+        });
+      }
+
+      // LocalAi returns '' and others return null.
+      if (message.finish_reason !== "" && message.finish_reason !== null) {
+        writeResponseChunk(response, {
+          uuid,
+          sources,
+          type: "textResponseChunk",
+          textResponse: "",
+          close: true,
+          error: false,
+        });
+        response.removeListener("close", handleAbort);
+        resolve(fullText);
+      }
+    }
+  });
+}
+
+// TODO: Fully remove - deprecated.
 // The default way to handle a stream response. Functions best with OpenAI.
 // Currently used for LMStudio, LocalAI, Mistral API, and OpenAI
 function handleDefaultStreamResponse(response, stream, responseProps) {
@ -156,6 +203,7 @@ function writeResponseChunk(response, data) {
 }

 module.exports = {
+  handleDefaultStreamResponseV2,
  handleDefaultStreamResponse,
  convertToChatHistory,
  convertToPromptHistory,
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@ -45,14 +45,13 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
 }

 async function openAiModels(apiKey = null) {
-  const { Configuration, OpenAIApi } = require("openai");
-  const config = new Configuration({
+  const { OpenAI: OpenAIApi } = require("openai");
+  const openai = new OpenAIApi({
    apiKey: apiKey || process.env.OPEN_AI_KEY,
  });
-  const openai = new OpenAIApi(config);
-  const allModels = await openai
-    .listModels()
-    .then((res) => res.data.data)
+  const allModels = await openai.models
+    .list()
+    .then((results) => results.data)
    .catch((e) => {
      console.error(`OpenAI:listModels`, e.message);
      return [
@ -132,15 +131,14 @@ async function openAiModels(apiKey = null) {
 }

 async function localAIModels(basePath = null, apiKey = null) {
-  const { Configuration, OpenAIApi } = require("openai");
-  const config = new Configuration({
-    basePath: basePath || process.env.LOCAL_AI_BASE_PATH,
-    apiKey: apiKey || process.env.LOCAL_AI_API_KEY,
+  const { OpenAI: OpenAIApi } = require("openai");
+  const openai = new OpenAIApi({
+    baseURL: basePath || process.env.LOCAL_AI_BASE_PATH,
+    apiKey: apiKey || process.env.LOCAL_AI_API_KEY || null,
  });
-  const openai = new OpenAIApi(config);
-  const models = await openai
-    .listModels()
-    .then((res) => res.data.data)
+  const models = await openai.models
+    .list()
+    .then((results) => results.data)
    .catch((e) => {
      console.error(`LocalAI:listModels`, e.message);
      return [];
@ -153,14 +151,14 @@ async function localAIModels(basePath = null, apiKey = null) {

 async function getLMStudioModels(basePath = null) {
  try {
-    const { Configuration, OpenAIApi } = require("openai");
-    const config = new Configuration({
-      basePath: basePath || process.env.LMSTUDIO_BASE_PATH,
+    const { OpenAI: OpenAIApi } = require("openai");
+    const openai = new OpenAIApi({
+      baseURL: basePath || process.env.LMSTUDIO_BASE_PATH,
+      apiKey: null,
    });
-    const openai = new OpenAIApi(config);
-    const models = await openai
-      .listModels()
-      .then((res) => res.data.data)
+    const models = await openai.models
+      .list()
+      .then((results) => results.data)
      .catch((e) => {
        console.error(`LMStudio:listModels`, e.message);
        return [];
@ -250,15 +248,16 @@ async function getOpenRouterModels() {
 }

 async function getMistralModels(apiKey = null) {
-  const { Configuration, OpenAIApi } = require("openai");
-  const config = new Configuration({
-    apiKey: apiKey || process.env.MISTRAL_API_KEY,
-    basePath: "https://api.mistral.ai/v1",
+  const { OpenAI: OpenAIApi } = require("openai");
+  const openai = new OpenAIApi({
+    apiKey: apiKey || process.env.MISTRAL_API_KEY || null,
+    baseURL: "https://api.mistral.ai/v1",
  });
-  const openai = new OpenAIApi(config);
-  const models = await openai
-    .listModels()
-    .then((res) => res.data.data.filter((model) => !model.id.includes("embed")))
+  const models = await openai.models
+    .list()
+    .then((results) =>
+      results.data.filter((model) => !model.id.includes("embed"))
+    )
    .catch((e) => {
      console.error(`Mistral:listModels`, e.message);
      return [];
--- a/server/yarn.lock
+++ b/server/yarn.lock