[Feat] Query mode to return no-result when no context found (#601)

* Query mode to return no-result when no context found * update default error for sync chat * remove unnecessary type conversion
2024-11-05 06:20:10 +01:00 · 2024-01-16 09:32:51 -08:00 · 2024-01-16 09:32:51 -08:00 · bd158ce7b1
commit bd158ce7b1
parent 7aaa4b38e7
7 changed files with 61 additions and 117 deletions
--- a/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx
@ -71,21 +71,6 @@ export default function ChatContainer({ workspace, knownHistory = [] }) {
        return false;
      }

-      // TODO: Delete this snippet once we have streaming stable.
-      // const chatResult = await Workspace.sendChat(
-      //   workspace,
-      //   promptMessage.userMessage,
-      //   window.localStorage.getItem(`workspace_chat_mode_${workspace.slug}`) ??
-      //   "chat",
-      // )
-      // handleChat(
-      //   chatResult,
-      //   setLoadingResponse,
-      //   setChatHistory,
-      //   remHistory,
-      //   _chatHistory
-      // )
-
      await Workspace.streamChat(
        workspace,
        promptMessage.userMessage,
--- a/frontend/src/models/workspace.js
+++ b/frontend/src/models/workspace.js
@ -168,22 +168,6 @@ const Workspace = {
    const data = await response.json();
    return { response, data };
  },
-
-  // TODO: Deprecated and should be removed from frontend.
-  sendChat: async function ({ slug }, message, mode = "query") {
-    const chatResult = await fetch(`${API_BASE}/workspace/${slug}/chat`, {
-      method: "POST",
-      body: JSON.stringify({ message, mode }),
-      headers: baseHeaders(),
-    })
-      .then((res) => res.json())
-      .catch((e) => {
-        console.error(e);
-        return null;
-      });
-
-    return chatResult;
-  },
 };

 export default Workspace;
--- a/server/endpoints/api/workspace/index.js
+++ b/server/endpoints/api/workspace/index.js
@ -196,10 +196,11 @@ function apiWorkspaceEndpoints(app) {
          return;
        }

-        await WorkspaceChats.delete({ workspaceId: Number(workspace.id) });
-        await DocumentVectors.deleteForWorkspace(Number(workspace.id));
-        await Document.delete({ workspaceId: Number(workspace.id) });
-        await Workspace.delete({ id: Number(workspace.id) });
+        const workspaceId = Number(workspace.id);
+        await WorkspaceChats.delete({ workspaceId: workspaceId });
+        await DocumentVectors.deleteForWorkspace(workspaceId);
+        await Document.delete({ workspaceId: workspaceId });
+        await Workspace.delete({ id: workspaceId });
        try {
          await VectorDb["delete-namespace"]({ namespace: slug });
        } catch (e) {
@ -441,7 +442,7 @@ function apiWorkspaceEndpoints(app) {
   #swagger.tags = ['Workspaces']
   #swagger.description = 'Execute a chat with a workspace'
   #swagger.requestBody = {
-       description: 'prompt to send to the workspace and the type of conversation (query or chat).',
+       description: 'Send a prompt to the workspace and the type of conversation (query or chat).<br/><b>Query:</b> Will not use LLM unless there are relevant sources from vectorDB & does not recall chat history.<br/><b>Chat:</b> Uses LLM general knowledge w/custom embeddings to produce output, uses rolling chat history.',
       required: true,
       type: 'object',
       content: {
--- a/server/endpoints/chat.js
+++ b/server/endpoints/chat.js
@ -1,7 +1,6 @@
 const { v4: uuidv4 } = require("uuid");
 const { reqBody, userFromSession, multiUserMode } = require("../utils/http");
 const { Workspace } = require("../models/workspace");
-const { chatWithWorkspace } = require("../utils/chats");
 const { validatedRequest } = require("../utils/middleware/validatedRequest");
 const { WorkspaceChats } = require("../models/workspaceChats");
 const { SystemSettings } = require("../models/systemSettings");
@ -95,85 +94,6 @@ function chatEndpoints(app) {
      }
    }
  );
-
-  app.post(
-    "/workspace/:slug/chat",
-    [validatedRequest],
-    async (request, response) => {
-      try {
-        const user = await userFromSession(request, response);
-        const { slug } = request.params;
-        const { message, mode = "query" } = reqBody(request);
-
-        const workspace = multiUserMode(response)
-          ? await Workspace.getWithUser(user, { slug })
-          : await Workspace.get({ slug });
-
-        if (!workspace) {
-          response.sendStatus(400).end();
-          return;
-        }
-
-        if (multiUserMode(response) && user.role !== "admin") {
-          const limitMessagesSetting = await SystemSettings.get({
-            label: "limit_user_messages",
-          });
-          const limitMessages = limitMessagesSetting?.value === "true";
-
-          if (limitMessages) {
-            const messageLimitSetting = await SystemSettings.get({
-              label: "message_limit",
-            });
-            const systemLimit = Number(messageLimitSetting?.value);
-
-            if (!!systemLimit) {
-              const currentChatCount = await WorkspaceChats.count({
-                user_id: user.id,
-                createdAt: {
-                  gte: new Date(new Date() - 24 * 60 * 60 * 1000),
-                },
-              });
-
-              if (currentChatCount >= systemLimit) {
-                response.status(500).json({
-                  id: uuidv4(),
-                  type: "abort",
-                  textResponse: null,
-                  sources: [],
-                  close: true,
-                  error: `You have met your maximum 24 hour chat quota of ${systemLimit} chats set by the instance administrators. Try again later.`,
-                });
-                return;
-              }
-            }
-          }
-        }
-
-        const result = await chatWithWorkspace(workspace, message, mode, user);
-        await Telemetry.sendTelemetry(
-          "sent_chat",
-          {
-            multiUserMode: multiUserMode(response),
-            LLMSelection: process.env.LLM_PROVIDER || "openai",
-            Embedder: process.env.EMBEDDING_ENGINE || "inherit",
-            VectorDbSelection: process.env.VECTOR_DB || "pinecone",
-          },
-          user?.id
-        );
-        response.status(200).json({ ...result });
-      } catch (e) {
-        console.error(e);
-        response.status(500).json({
-          id: uuidv4(),
-          type: "abort",
-          textResponse: null,
-          sources: [],
-          close: true,
-          error: e.message,
-        });
-      }
-    }
-  );
 }

 module.exports = { chatEndpoints };
--- a/server/swagger/openapi.json
+++ b/server/swagger/openapi.json
@ -1598,7 +1598,7 @@
          }
        },
        "requestBody": {
-          "description": "prompt to send to the workspace and the type of conversation (query or chat).",
+          "description": "Send a prompt to the workspace and the type of conversation (query or chat).<br/><b>Query:</b> Will not use LLM unless there are relevant sources from vectorDB & does not recall chat history.<br/><b>Chat:</b> Uses LLM general knowledge w/custom embeddings to produce output, uses rolling chat history.",
          "required": true,
          "type": "object",
          "content": {
--- a/server/utils/chats/index.js
+++ b/server/utils/chats/index.js
@ -91,6 +91,18 @@ async function chatWithWorkspace(
  const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
  const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
  if (!hasVectorizedSpace || embeddingsCount === 0) {
+    if (chatMode === "query") {
+      return {
+        id: uuid,
+        type: "textResponse",
+        sources: [],
+        close: true,
+        error: null,
+        textResponse:
+          "There is no relevant information in this workspace to answer your query.",
+      };
+    }
+
    // If there are no embeddings - chat like a normal LLM chat interface.
    return await emptyEmbeddingChat({
      uuid,
@ -131,6 +143,20 @@ async function chatWithWorkspace(
    };
  }

+  // If in query mode and no sources are found, do not
+  // let the LLM try to hallucinate a response or use general knowledge
+  if (chatMode === "query" && sources.length === 0) {
+    return {
+      id: uuid,
+      type: "textResponse",
+      sources: [],
+      close: true,
+      error: null,
+      textResponse:
+        "There is no relevant information in this workspace to answer your query.",
+    };
+  }
+
  // Compress message to ensure prompt passes token limit with room for response
  // and build system messages based on inputs and history.
  const messages = await LLMConnector.compressMessages(
--- a/server/utils/chats/stream.js
+++ b/server/utils/chats/stream.js
@ -50,6 +50,19 @@ async function streamChatWithWorkspace(
  const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
  const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
  if (!hasVectorizedSpace || embeddingsCount === 0) {
+    if (chatMode === "query") {
+      writeResponseChunk(response, {
+        id: uuid,
+        type: "textResponse",
+        textResponse:
+          "There is no relevant information in this workspace to answer your query.",
+        sources: [],
+        close: true,
+        error: null,
+      });
+      return;
+    }
+
    // If there are no embeddings - chat like a normal LLM chat interface.
    return await streamEmptyEmbeddingChat({
      response,
@ -93,6 +106,21 @@ async function streamChatWithWorkspace(
    return;
  }

+  // If in query mode and no sources are found, do not
+  // let the LLM try to hallucinate a response or use general knowledge
+  if (chatMode === "query" && sources.length === 0) {
+    writeResponseChunk(response, {
+      id: uuid,
+      type: "textResponse",
+      textResponse:
+        "There is no relevant information in this workspace to answer your query.",
+      sources: [],
+      close: true,
+      error: null,
+    });
+    return;
+  }
+
  // Compress message to ensure prompt passes token limit with room for response
  // and build system messages based on inputs and history.
  const messages = await LLMConnector.compressMessages(