diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx
index bd656a7f..0ecf3238 100644
--- a/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/PromptInput/index.jsx
@@ -1,6 +1,6 @@
-import React, { useState, useRef } from "react";
+import React, { useState, useRef, memo, useEffect } from "react";
 import { isMobile } from "react-device-detect";
-import { Loader, Menu, Send, X } from "react-feather";
+import { Loader, Menu, X } from "react-feather";
 
 export default function PromptInput({
   workspace,
@@ -35,6 +35,17 @@ export default function PromptInput({
   };
 
   const setTextCommand = (command = "") => {
+    const storageKey = `workspace_chat_mode_${workspace.slug}`;
+    if (command === "/query") {
+      window.localStorage.setItem(storageKey, "query");
+      window.dispatchEvent(new Event("workspace_chat_mode_update"));
+      return;
+    } else if (command === "/conversation") {
+      window.localStorage.setItem(storageKey, "chat");
+      window.dispatchEvent(new Event("workspace_chat_mode_update"));
+      return;
+    }
+
     onChange({ target: { value: `${command} ${message}` } });
   };
 
@@ -45,13 +56,19 @@ export default function PromptInput({
         className="flex flex-col gap-y-1 bg-white dark:bg-black-900 md:bg-transparent rounded-t-lg md:w-3/4 w-full mx-auto"
       >
         <div className="flex items-center py-2 px-4 rounded-lg">
-          {/* Toggle selector? */}
-          {/* <button
+          <CommandMenu
+            workspace={workspace}
+            show={showMenu}
+            handleClick={setTextCommand}
+            hide={() => setShowMenu(false)}
+          />
+          <button
             onClick={() => setShowMenu(!showMenu)}
             type="button"
-            className="p-2 text-slate-200 bg-transparent rounded-md hover:bg-gray-50 dark:hover:bg-stone-500">
+            className="p-2 text-slate-200 bg-transparent rounded-md hover:bg-gray-50 dark:hover:bg-stone-500"
+          >
             <Menu className="w-4 h-4 md:h-6 md:w-6" />
-          </button> */}
+          </button>
           <textarea
             onKeyUp={adjustTextArea}
             onKeyDown={captureEnter}
@@ -94,19 +111,92 @@ export default function PromptInput({
             <span className="sr-only">Send message</span>
           </button>
         </div>
-        <Tracking />
+        <Tracking workspaceSlug={workspace.slug} />
       </form>
     </div>
   );
 }
 
-const Tracking = () => {
+const Tracking = memo(({ workspaceSlug }) => {
+  const storageKey = `workspace_chat_mode_${workspaceSlug}`;
+  const [chatMode, setChatMode] = useState(
+    window.localStorage.getItem(storageKey) ?? "chat"
+  );
+
+  useEffect(() => {
+    function watchForChatModeChange() {
+      if (!workspaceSlug) return;
+      window.addEventListener(`workspace_chat_mode_update`, () => {
+        try {
+          const chatMode = window.localStorage.getItem(storageKey);
+          setChatMode(chatMode);
+        } catch {}
+      });
+    }
+    watchForChatModeChange();
+  }, [workspaceSlug]);
+
   return (
-    <div className="flex flex-col w-full justify-center items-center gap-y-2 mb-2 px-4 mx:px-0">
-      <p className="text-slate-400 text-xs">
+    <div className="flex flex-col md:flex-row w-full justify-center items-center gap-2 mb-2 px-4 mx:px-0">
+      <p className="bg-stone-600 text-slate-400 text-xs px-2 rounded-lg font-mono text-center">
+        Chat mode: {chatMode}
+      </p>
+      <p className="text-slate-400 text-xs text-center">
         Responses from system may produce inaccurate or invalid responses - use
         with caution.
       </p>
     </div>
   );
-};
+});
+
+function CommandMenu({ workspace, show, handleClick, hide }) {
+  if (!show) return null;
+  const COMMANDS = [
+    {
+      cmd: "/conversation",
+      description: "- switch to chat mode (remembers recent chat history) .",
+    },
+    {
+      cmd: "/query",
+      description: "- switch to query mode (does not remember previous chats).",
+    },
+    { cmd: "/reset", description: "- clear current chat history." },
+  ];
+
+  return (
+    <div className="absolute top-[-25vh] md:top-[-23vh] min-h-[200px] flex flex-col rounded-lg border border-slate-400 p-2 pt-4 bg-stone-600">
+      <div className="flex justify-between items-center border-b border-slate-400 px-2 py-1 ">
+        <p className="text-slate-200">Available Commands</p>
+        <button
+          type="button"
+          onClick={hide}
+          className="p-2 rounded-lg hover:bg-slate-500 rounded-full text-slate-400"
+        >
+          <X className="h-4 w-4" />
+        </button>
+      </div>
+
+      <div className="flex flex-col">
+        {COMMANDS.map((item, i) => {
+          const { cmd, description } = item;
+          return (
+            <div className="border-b border-slate-400 p-1">
+              <button
+                key={i}
+                type="button"
+                onClick={() => {
+                  handleClick(cmd);
+                  hide();
+                }}
+                className="w-full px-4 py-2  flex items-center rounded-lg hover:bg-slate-500 gap-x-1 disabled:cursor-not-allowed"
+              >
+                <p className="text-slate-200 font-semibold">{cmd}</p>
+                <p className="text-slate-400 text-sm">{description}</p>
+              </button>
+            </div>
+          );
+        })}
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx
index e26f0f09..f8b1fc9b 100644
--- a/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/index.jsx
@@ -50,7 +50,9 @@ export default function ChatContainer({ workspace, knownHistory = [] }) {
 
       const chatResult = await Workspace.sendChat(
         workspace,
-        promptMessage.userMessage
+        promptMessage.userMessage,
+        window.localStorage.getItem(`workspace_chat_mode_${workspace.slug}`) ??
+          "chat"
       );
       if (!chatResult) {
         alert("Could not send chat.");
diff --git a/server/models/workspaceChats.js b/server/models/workspaceChats.js
index 3b90cc61..7a2aafb8 100644
--- a/server/models/workspaceChats.js
+++ b/server/models/workspaceChats.js
@@ -64,11 +64,11 @@ const WorkspaceChats = {
 
     return { chat, message: null };
   },
-  forWorkspace: async function (workspaceId = null) {
+  forWorkspace: async function (workspaceId = null, limit = null) {
     if (!workspaceId) return [];
     return await this.where(
       `workspaceId = ${workspaceId} AND include = true`,
-      null,
+      limit,
       "ORDER BY id ASC"
     );
   },
@@ -104,8 +104,8 @@ const WorkspaceChats = {
     const db = await this.db();
     const results = await db.all(
       `SELECT * FROM ${this.tablename} ${clause ? `WHERE ${clause}` : ""} ${
-        !!limit ? `LIMIT ${limit}` : ""
-      } ${!!order ? order : ""}`
+        !!order ? order : ""
+      } ${!!limit ? `LIMIT ${limit}` : ""} `
     );
     db.close();
 
diff --git a/server/utils/chats/index.js b/server/utils/chats/index.js
index 9be40b69..7b2cc484 100644
--- a/server/utils/chats/index.js
+++ b/server/utils/chats/index.js
@@ -59,7 +59,7 @@ function grepCommand(message) {
   return null;
 }
 
-async function chatWithWorkspace(workspace, message, chatMode = "query") {
+async function chatWithWorkspace(workspace, message, chatMode = "chat") {
   const uuid = uuidv4();
   const openai = new OpenAi();
   const VectorDb = getVectorDbClass();
@@ -104,6 +104,8 @@ async function chatWithWorkspace(workspace, message, chatMode = "query") {
       error: null,
     };
   } else {
+    const rawHistory = await WorkspaceChats.forWorkspace(workspace.id, 20);
+    const chatHistory = convertToPromptHistory(rawHistory);
     const {
       response,
       sources,
@@ -112,6 +114,7 @@ async function chatWithWorkspace(workspace, message, chatMode = "query") {
       namespace: workspace.slug,
       input: message,
       workspace,
+      chatHistory,
     });
     if (!response) {
       return {
diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js
index 6c9ea2cb..43cc6376 100644
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@@ -25,6 +25,7 @@ function toChunks(arr, size) {
 function curateSources(sources = []) {
   const knownDocs = [];
   const documents = [];
+
   for (const source of sources) {
     const { metadata = {} } = source;
     if (
diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js
index bd1c6058..532f629c 100644
--- a/server/utils/vectorDbProviders/chroma/index.js
+++ b/server/utils/vectorDbProviders/chroma/index.js
@@ -56,6 +56,21 @@ const Chroma = {
     const openai = new OpenAIApi(config);
     return openai;
   },
+  getChatCompletion: async function (
+    openai,
+    messages = [],
+    { temperature = 0.7 }
+  ) {
+    const model = process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo";
+    const { data } = await openai.createChatCompletion({
+      model,
+      messages,
+      temperature,
+    });
+
+    if (!data.hasOwnProperty("choices")) return null;
+    return data.choices[0].message.content;
+  },
   llm: function ({ temperature = 0.7 }) {
     const model = process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo";
     return new OpenAI({
@@ -75,6 +90,24 @@ const Chroma = {
       ? data[0].embedding
       : null;
   },
+  similarityResponse: async function (client, namespace, queryVector) {
+    const collection = await client.getCollection({ name: namespace });
+    const result = {
+      contextTexts: [],
+      sourceDocuments: [],
+    };
+
+    const response = await collection.query({
+      queryEmbeddings: queryVector,
+      nResults: 4,
+    });
+    response.ids[0].forEach((_, i) => {
+      result.contextTexts.push(response.documents[0][i]);
+      result.sourceDocuments.push(response.metadatas[0][i]);
+    });
+
+    return result;
+  },
   namespace: async function (client, namespace = null) {
     if (!namespace) throw new Error("No namespace value provided.");
     const collection = await client
@@ -284,6 +317,55 @@ const Chroma = {
       message: false,
     };
   },
+  // This implementation of chat uses the chat history and modifies the system prompt at execution
+  // this is improved over the regular langchain implementation so that chats do not directly modify embeddings
+  // because then multi-user support will have all conversations mutating the base vector collection to which then
+  // the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
+  chat: async function (reqBody = {}) {
+    const {
+      namespace = null,
+      input,
+      workspace = {},
+      chatHistory = [],
+    } = reqBody;
+    if (!namespace || !input) throw new Error("Invalid request body");
+
+    const { client } = await this.connect();
+    if (!(await this.namespaceExists(client, namespace))) {
+      return {
+        response: null,
+        sources: [],
+        message: "Invalid query - no documents found for workspace!",
+      };
+    }
+
+    const queryVector = await this.embedChunk(this.openai(), input);
+    const { contextTexts, sourceDocuments } = await this.similarityResponse(
+      client,
+      namespace,
+      queryVector
+    );
+    const prompt = {
+      role: "system",
+      content: `Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed.
+    Context:
+    ${contextTexts
+          .map((text, i) => {
+            return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+          })
+          .join("")}`,
+    };
+    const memory = [prompt, ...chatHistory, { role: "user", content: input }];
+    const responseText = await this.getChatCompletion(this.openai(), memory, {
+      temperature: workspace?.openAiTemp ?? 0.7,
+    });
+
+    return {
+      response: responseText,
+      sources: curateSources(sourceDocuments),
+      message: false,
+    };
+  },
   "namespace-stats": async function (reqBody = {}) {
     const { namespace = null } = reqBody;
     if (!namespace) throw new Error("namespace required");
diff --git a/server/utils/vectorDbProviders/lance/index.js b/server/utils/vectorDbProviders/lance/index.js
index d6aced15..293e835a 100644
--- a/server/utils/vectorDbProviders/lance/index.js
+++ b/server/utils/vectorDbProviders/lance/index.js
@@ -84,6 +84,27 @@ const LanceDb = {
     if (!data.hasOwnProperty("choices")) return null;
     return data.choices[0].message.content;
   },
+  similarityResponse: async function (client, namespace, queryVector) {
+    const collection = await client.openTable(namespace);
+    const result = {
+      contextTexts: [],
+      sourceDocuments: [],
+    };
+
+    const response = await collection
+      .search(queryVector)
+      .metricType("cosine")
+      .limit(5)
+      .execute();
+
+    response.forEach((item) => {
+      const { vector: _, ...rest } = item;
+      result.contextTexts.push(rest.text);
+      result.sourceDocuments.push(rest);
+    });
+
+    return result;
+  },
   namespace: async function (client, namespace = null) {
     if (!namespace) throw new Error("No namespace value provided.");
     const collection = await client.openTable(namespace).catch(() => false);
@@ -232,28 +253,78 @@ const LanceDb = {
 
     // LanceDB does not have langchainJS support so we roll our own here.
     const queryVector = await this.embedChunk(this.openai(), input);
-    const collection = await client.openTable(namespace);
-    const relevantResults = await collection
-      .search(queryVector)
-      .metricType("cosine")
-      .limit(2)
-      .execute();
-    const messages = [
-      {
-        role: "system",
-        content: `The following is a friendly conversation between a human and an AI. The AI is very casual and talkative and responds with a friendly tone. If the AI does not know the answer to a question, it truthfully says it does not know.
-      Relevant pieces of information for context of the current query:
-      ${relevantResults.map((result) => result.text).join("\n\n")}`,
-      },
-      { role: "user", content: input },
-    ];
-    const responseText = await this.getChatCompletion(this.openai(), messages, {
-      temperature: workspace?.openAiTemp,
+    const { contextTexts, sourceDocuments } = await this.similarityResponse(
+      client,
+      namespace,
+      queryVector
+    );
+    const prompt = {
+      role: "system",
+      content: `Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed.
+    Context:
+    ${contextTexts
+      .map((text, i) => {
+        return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+      })
+      .join("")}`,
+    };
+    const memory = [prompt, { role: "user", content: input }];
+    const responseText = await this.getChatCompletion(this.openai(), memory, {
+      temperature: workspace?.openAiTemp ?? 0.7,
     });
 
     return {
       response: responseText,
-      sources: curateLanceSources(relevantResults),
+      sources: curateLanceSources(sourceDocuments),
+      message: false,
+    };
+  },
+  // This implementation of chat uses the chat history and modifies the system prompt at execution
+  // this is improved over the regular langchain implementation so that chats do not directly modify embeddings
+  // because then multi-user support will have all conversations mutating the base vector collection to which then
+  // the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
+  chat: async function (reqBody = {}) {
+    const {
+      namespace = null,
+      input,
+      workspace = {},
+      chatHistory = [],
+    } = reqBody;
+    if (!namespace || !input) throw new Error("Invalid request body");
+
+    const { client } = await this.connect();
+    if (!(await this.namespaceExists(client, namespace))) {
+      return {
+        response: null,
+        sources: [],
+        message: "Invalid query - no documents found for workspace!",
+      };
+    }
+
+    const queryVector = await this.embedChunk(this.openai(), input);
+    const { contextTexts, sourceDocuments } = await this.similarityResponse(
+      client,
+      namespace,
+      queryVector
+    );
+    const prompt = {
+      role: "system",
+      content: `Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed.
+    Context:
+    ${contextTexts
+      .map((text, i) => {
+        return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+      })
+      .join("")}`,
+    };
+    const memory = [prompt, ...chatHistory, { role: "user", content: input }];
+    const responseText = await this.getChatCompletion(this.openai(), memory, {
+      temperature: workspace?.openAiTemp ?? 0.7,
+    });
+
+    return {
+      response: responseText,
+      sources: curateLanceSources(sourceDocuments),
       message: false,
     };
   },
diff --git a/server/utils/vectorDbProviders/pinecone/index.js b/server/utils/vectorDbProviders/pinecone/index.js
index 2dcf2b52..37198ca4 100644
--- a/server/utils/vectorDbProviders/pinecone/index.js
+++ b/server/utils/vectorDbProviders/pinecone/index.js
@@ -38,6 +38,21 @@ const Pinecone = {
     const openai = new OpenAIApi(config);
     return openai;
   },
+  getChatCompletion: async function (
+    openai,
+    messages = [],
+    { temperature = 0.7 }
+  ) {
+    const model = process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo";
+    const { data } = await openai.createChatCompletion({
+      model,
+      messages,
+      temperature,
+    });
+
+    if (!data.hasOwnProperty("choices")) return null;
+    return data.choices[0].message.content;
+  },
   embedChunk: async function (openai, textChunk) {
     const {
       data: { data },
@@ -65,6 +80,27 @@ const Pinecone = {
       0
     );
   },
+  similarityResponse: async function (index, namespace, queryVector) {
+    const result = {
+      contextTexts: [],
+      sourceDocuments: [],
+    };
+    const response = await index.query({
+      queryRequest: {
+        namespace,
+        vector: queryVector,
+        topK: 4,
+        includeMetadata: true,
+      },
+    });
+
+    response.matches.forEach((match) => {
+      result.contextTexts.push(match.metadata.text);
+      result.sourceDocuments.push(match);
+    });
+
+    return result;
+  },
   namespace: async function (index, namespace = null) {
     if (!namespace) throw new Error("No namespace value provided.");
     const { namespaces } = await index.describeIndexStats1();
@@ -255,10 +291,17 @@ const Pinecone = {
       message: false,
     };
   },
-  // This implementation of chat also expands the memory of the chat itself
-  // and adds more tokens to the PineconeDB instance namespace
+  // This implementation of chat uses the chat history and modifies the system prompt at execution
+  // this is improved over the regular langchain implementation so that chats do not directly modify embeddings
+  // because then multi-user support will have all conversations mutating the base vector collection to which then
+  // the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
   chat: async function (reqBody = {}) {
-    const { namespace = null, input, workspace = {} } = reqBody;
+    const {
+      namespace = null,
+      input,
+      workspace = {},
+      chatHistory = [],
+    } = reqBody;
     if (!namespace || !input) throw new Error("Invalid request body");
 
     const { pineconeIndex } = await this.connect();
@@ -267,31 +310,33 @@ const Pinecone = {
         "Invalid namespace - has it been collected and seeded yet?"
       );
 
-    const vectorStore = await PineconeStore.fromExistingIndex(this.embedder(), {
+    const queryVector = await this.embedChunk(this.openai(), input);
+    const { contextTexts, sourceDocuments } = await this.similarityResponse(
       pineconeIndex,
       namespace,
+      queryVector
+    );
+    const prompt = {
+      role: "system",
+      content: `Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed.
+Context:
+${contextTexts
+          .map((text, i) => {
+            return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+          })
+          .join("")}`,
+    };
+    const memory = [prompt, ...chatHistory, { role: "user", content: input }];
+
+    const responseText = await this.getChatCompletion(this.openai(), memory, {
+      temperature: workspace?.openAiTemp ?? 0.7,
     });
 
-    const memory = new VectorStoreRetrieverMemory({
-      vectorStoreRetriever: vectorStore.asRetriever(1),
-      memoryKey: "history",
-    });
-
-    const model = this.llm({
-      temperature: workspace?.openAiTemp,
-    });
-    const prompt =
-      PromptTemplate.fromTemplate(`The following is a friendly conversation between a human and an AI. The AI is very casual and talkative and responds with a friendly tone. If the AI does not know the answer to a question, it truthfully says it does not know.
-  Relevant pieces of previous conversation:
-  {history}
-  
-  Current conversation:
-  Human: {input}
-  AI:`);
-
-    const chain = new LLMChain({ llm: model, prompt, memory });
-    const response = await chain.call({ input });
-    return { response: response.text, sources: [], message: false };
+    return {
+      response: responseText,
+      sources: curateSources(sourceDocuments),
+      message: false,
+    };
   },
 };