Add chat/converstaion mode as the default chat mode for all Vector Databases (#112)

* Add chat/converstaion mode as the default chat mode
Show menu for toggling options for chat/query/reset command
Show chat status below input
resolves #61

* remove console logs
This commit is contained in:
Timothy Carambat 2023-06-26 15:08:47 -07:00 committed by GitHub
parent a3f5a936e2
commit 9d0becb2ee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 353 additions and 59 deletions

View File

@ -1,6 +1,6 @@
import React, { useState, useRef } from "react";
import React, { useState, useRef, memo, useEffect } from "react";
import { isMobile } from "react-device-detect";
import { Loader, Menu, Send, X } from "react-feather";
import { Loader, Menu, X } from "react-feather";
export default function PromptInput({
workspace,
@ -35,6 +35,17 @@ export default function PromptInput({
};
const setTextCommand = (command = "") => {
const storageKey = `workspace_chat_mode_${workspace.slug}`;
if (command === "/query") {
window.localStorage.setItem(storageKey, "query");
window.dispatchEvent(new Event("workspace_chat_mode_update"));
return;
} else if (command === "/conversation") {
window.localStorage.setItem(storageKey, "chat");
window.dispatchEvent(new Event("workspace_chat_mode_update"));
return;
}
onChange({ target: { value: `${command} ${message}` } });
};
@ -45,13 +56,19 @@ export default function PromptInput({
className="flex flex-col gap-y-1 bg-white dark:bg-black-900 md:bg-transparent rounded-t-lg md:w-3/4 w-full mx-auto"
>
<div className="flex items-center py-2 px-4 rounded-lg">
{/* Toggle selector? */}
{/* <button
<CommandMenu
workspace={workspace}
show={showMenu}
handleClick={setTextCommand}
hide={() => setShowMenu(false)}
/>
<button
onClick={() => setShowMenu(!showMenu)}
type="button"
className="p-2 text-slate-200 bg-transparent rounded-md hover:bg-gray-50 dark:hover:bg-stone-500">
className="p-2 text-slate-200 bg-transparent rounded-md hover:bg-gray-50 dark:hover:bg-stone-500"
>
<Menu className="w-4 h-4 md:h-6 md:w-6" />
</button> */}
</button>
<textarea
onKeyUp={adjustTextArea}
onKeyDown={captureEnter}
@ -94,19 +111,92 @@ export default function PromptInput({
<span className="sr-only">Send message</span>
</button>
</div>
<Tracking />
<Tracking workspaceSlug={workspace.slug} />
</form>
</div>
);
}
const Tracking = () => {
const Tracking = memo(({ workspaceSlug }) => {
const storageKey = `workspace_chat_mode_${workspaceSlug}`;
const [chatMode, setChatMode] = useState(
window.localStorage.getItem(storageKey) ?? "chat"
);
useEffect(() => {
function watchForChatModeChange() {
if (!workspaceSlug) return;
window.addEventListener(`workspace_chat_mode_update`, () => {
try {
const chatMode = window.localStorage.getItem(storageKey);
setChatMode(chatMode);
} catch {}
});
}
watchForChatModeChange();
}, [workspaceSlug]);
return (
<div className="flex flex-col w-full justify-center items-center gap-y-2 mb-2 px-4 mx:px-0">
<p className="text-slate-400 text-xs">
<div className="flex flex-col md:flex-row w-full justify-center items-center gap-2 mb-2 px-4 mx:px-0">
<p className="bg-stone-600 text-slate-400 text-xs px-2 rounded-lg font-mono text-center">
Chat mode: {chatMode}
</p>
<p className="text-slate-400 text-xs text-center">
Responses from system may produce inaccurate or invalid responses - use
with caution.
</p>
</div>
);
};
});
function CommandMenu({ workspace, show, handleClick, hide }) {
if (!show) return null;
const COMMANDS = [
{
cmd: "/conversation",
description: "- switch to chat mode (remembers recent chat history) .",
},
{
cmd: "/query",
description: "- switch to query mode (does not remember previous chats).",
},
{ cmd: "/reset", description: "- clear current chat history." },
];
return (
<div className="absolute top-[-25vh] md:top-[-23vh] min-h-[200px] flex flex-col rounded-lg border border-slate-400 p-2 pt-4 bg-stone-600">
<div className="flex justify-between items-center border-b border-slate-400 px-2 py-1 ">
<p className="text-slate-200">Available Commands</p>
<button
type="button"
onClick={hide}
className="p-2 rounded-lg hover:bg-slate-500 rounded-full text-slate-400"
>
<X className="h-4 w-4" />
</button>
</div>
<div className="flex flex-col">
{COMMANDS.map((item, i) => {
const { cmd, description } = item;
return (
<div className="border-b border-slate-400 p-1">
<button
key={i}
type="button"
onClick={() => {
handleClick(cmd);
hide();
}}
className="w-full px-4 py-2 flex items-center rounded-lg hover:bg-slate-500 gap-x-1 disabled:cursor-not-allowed"
>
<p className="text-slate-200 font-semibold">{cmd}</p>
<p className="text-slate-400 text-sm">{description}</p>
</button>
</div>
);
})}
</div>
</div>
);
}

View File

@ -50,7 +50,9 @@ export default function ChatContainer({ workspace, knownHistory = [] }) {
const chatResult = await Workspace.sendChat(
workspace,
promptMessage.userMessage
promptMessage.userMessage,
window.localStorage.getItem(`workspace_chat_mode_${workspace.slug}`) ??
"chat"
);
if (!chatResult) {
alert("Could not send chat.");

View File

@ -64,11 +64,11 @@ const WorkspaceChats = {
return { chat, message: null };
},
forWorkspace: async function (workspaceId = null) {
forWorkspace: async function (workspaceId = null, limit = null) {
if (!workspaceId) return [];
return await this.where(
`workspaceId = ${workspaceId} AND include = true`,
null,
limit,
"ORDER BY id ASC"
);
},
@ -104,8 +104,8 @@ const WorkspaceChats = {
const db = await this.db();
const results = await db.all(
`SELECT * FROM ${this.tablename} ${clause ? `WHERE ${clause}` : ""} ${
!!limit ? `LIMIT ${limit}` : ""
} ${!!order ? order : ""}`
!!order ? order : ""
} ${!!limit ? `LIMIT ${limit}` : ""} `
);
db.close();

View File

@ -59,7 +59,7 @@ function grepCommand(message) {
return null;
}
async function chatWithWorkspace(workspace, message, chatMode = "query") {
async function chatWithWorkspace(workspace, message, chatMode = "chat") {
const uuid = uuidv4();
const openai = new OpenAi();
const VectorDb = getVectorDbClass();
@ -104,6 +104,8 @@ async function chatWithWorkspace(workspace, message, chatMode = "query") {
error: null,
};
} else {
const rawHistory = await WorkspaceChats.forWorkspace(workspace.id, 20);
const chatHistory = convertToPromptHistory(rawHistory);
const {
response,
sources,
@ -112,6 +114,7 @@ async function chatWithWorkspace(workspace, message, chatMode = "query") {
namespace: workspace.slug,
input: message,
workspace,
chatHistory,
});
if (!response) {
return {

View File

@ -25,6 +25,7 @@ function toChunks(arr, size) {
function curateSources(sources = []) {
const knownDocs = [];
const documents = [];
for (const source of sources) {
const { metadata = {} } = source;
if (

View File

@ -56,6 +56,21 @@ const Chroma = {
const openai = new OpenAIApi(config);
return openai;
},
getChatCompletion: async function (
openai,
messages = [],
{ temperature = 0.7 }
) {
const model = process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo";
const { data } = await openai.createChatCompletion({
model,
messages,
temperature,
});
if (!data.hasOwnProperty("choices")) return null;
return data.choices[0].message.content;
},
llm: function ({ temperature = 0.7 }) {
const model = process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo";
return new OpenAI({
@ -75,6 +90,24 @@ const Chroma = {
? data[0].embedding
: null;
},
similarityResponse: async function (client, namespace, queryVector) {
const collection = await client.getCollection({ name: namespace });
const result = {
contextTexts: [],
sourceDocuments: [],
};
const response = await collection.query({
queryEmbeddings: queryVector,
nResults: 4,
});
response.ids[0].forEach((_, i) => {
result.contextTexts.push(response.documents[0][i]);
result.sourceDocuments.push(response.metadatas[0][i]);
});
return result;
},
namespace: async function (client, namespace = null) {
if (!namespace) throw new Error("No namespace value provided.");
const collection = await client
@ -284,6 +317,55 @@ const Chroma = {
message: false,
};
},
// This implementation of chat uses the chat history and modifies the system prompt at execution
// this is improved over the regular langchain implementation so that chats do not directly modify embeddings
// because then multi-user support will have all conversations mutating the base vector collection to which then
// the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
chat: async function (reqBody = {}) {
const {
namespace = null,
input,
workspace = {},
chatHistory = [],
} = reqBody;
if (!namespace || !input) throw new Error("Invalid request body");
const { client } = await this.connect();
if (!(await this.namespaceExists(client, namespace))) {
return {
response: null,
sources: [],
message: "Invalid query - no documents found for workspace!",
};
}
const queryVector = await this.embedChunk(this.openai(), input);
const { contextTexts, sourceDocuments } = await this.similarityResponse(
client,
namespace,
queryVector
);
const prompt = {
role: "system",
content: `Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed.
Context:
${contextTexts
.map((text, i) => {
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
})
.join("")}`,
};
const memory = [prompt, ...chatHistory, { role: "user", content: input }];
const responseText = await this.getChatCompletion(this.openai(), memory, {
temperature: workspace?.openAiTemp ?? 0.7,
});
return {
response: responseText,
sources: curateSources(sourceDocuments),
message: false,
};
},
"namespace-stats": async function (reqBody = {}) {
const { namespace = null } = reqBody;
if (!namespace) throw new Error("namespace required");

View File

@ -84,6 +84,27 @@ const LanceDb = {
if (!data.hasOwnProperty("choices")) return null;
return data.choices[0].message.content;
},
similarityResponse: async function (client, namespace, queryVector) {
const collection = await client.openTable(namespace);
const result = {
contextTexts: [],
sourceDocuments: [],
};
const response = await collection
.search(queryVector)
.metricType("cosine")
.limit(5)
.execute();
response.forEach((item) => {
const { vector: _, ...rest } = item;
result.contextTexts.push(rest.text);
result.sourceDocuments.push(rest);
});
return result;
},
namespace: async function (client, namespace = null) {
if (!namespace) throw new Error("No namespace value provided.");
const collection = await client.openTable(namespace).catch(() => false);
@ -232,28 +253,78 @@ const LanceDb = {
// LanceDB does not have langchainJS support so we roll our own here.
const queryVector = await this.embedChunk(this.openai(), input);
const collection = await client.openTable(namespace);
const relevantResults = await collection
.search(queryVector)
.metricType("cosine")
.limit(2)
.execute();
const messages = [
{
role: "system",
content: `The following is a friendly conversation between a human and an AI. The AI is very casual and talkative and responds with a friendly tone. If the AI does not know the answer to a question, it truthfully says it does not know.
Relevant pieces of information for context of the current query:
${relevantResults.map((result) => result.text).join("\n\n")}`,
},
{ role: "user", content: input },
];
const responseText = await this.getChatCompletion(this.openai(), messages, {
temperature: workspace?.openAiTemp,
const { contextTexts, sourceDocuments } = await this.similarityResponse(
client,
namespace,
queryVector
);
const prompt = {
role: "system",
content: `Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed.
Context:
${contextTexts
.map((text, i) => {
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
})
.join("")}`,
};
const memory = [prompt, { role: "user", content: input }];
const responseText = await this.getChatCompletion(this.openai(), memory, {
temperature: workspace?.openAiTemp ?? 0.7,
});
return {
response: responseText,
sources: curateLanceSources(relevantResults),
sources: curateLanceSources(sourceDocuments),
message: false,
};
},
// This implementation of chat uses the chat history and modifies the system prompt at execution
// this is improved over the regular langchain implementation so that chats do not directly modify embeddings
// because then multi-user support will have all conversations mutating the base vector collection to which then
// the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
chat: async function (reqBody = {}) {
const {
namespace = null,
input,
workspace = {},
chatHistory = [],
} = reqBody;
if (!namespace || !input) throw new Error("Invalid request body");
const { client } = await this.connect();
if (!(await this.namespaceExists(client, namespace))) {
return {
response: null,
sources: [],
message: "Invalid query - no documents found for workspace!",
};
}
const queryVector = await this.embedChunk(this.openai(), input);
const { contextTexts, sourceDocuments } = await this.similarityResponse(
client,
namespace,
queryVector
);
const prompt = {
role: "system",
content: `Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed.
Context:
${contextTexts
.map((text, i) => {
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
})
.join("")}`,
};
const memory = [prompt, ...chatHistory, { role: "user", content: input }];
const responseText = await this.getChatCompletion(this.openai(), memory, {
temperature: workspace?.openAiTemp ?? 0.7,
});
return {
response: responseText,
sources: curateLanceSources(sourceDocuments),
message: false,
};
},

View File

@ -38,6 +38,21 @@ const Pinecone = {
const openai = new OpenAIApi(config);
return openai;
},
getChatCompletion: async function (
openai,
messages = [],
{ temperature = 0.7 }
) {
const model = process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo";
const { data } = await openai.createChatCompletion({
model,
messages,
temperature,
});
if (!data.hasOwnProperty("choices")) return null;
return data.choices[0].message.content;
},
embedChunk: async function (openai, textChunk) {
const {
data: { data },
@ -65,6 +80,27 @@ const Pinecone = {
0
);
},
similarityResponse: async function (index, namespace, queryVector) {
const result = {
contextTexts: [],
sourceDocuments: [],
};
const response = await index.query({
queryRequest: {
namespace,
vector: queryVector,
topK: 4,
includeMetadata: true,
},
});
response.matches.forEach((match) => {
result.contextTexts.push(match.metadata.text);
result.sourceDocuments.push(match);
});
return result;
},
namespace: async function (index, namespace = null) {
if (!namespace) throw new Error("No namespace value provided.");
const { namespaces } = await index.describeIndexStats1();
@ -255,10 +291,17 @@ const Pinecone = {
message: false,
};
},
// This implementation of chat also expands the memory of the chat itself
// and adds more tokens to the PineconeDB instance namespace
// This implementation of chat uses the chat history and modifies the system prompt at execution
// this is improved over the regular langchain implementation so that chats do not directly modify embeddings
// because then multi-user support will have all conversations mutating the base vector collection to which then
// the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
chat: async function (reqBody = {}) {
const { namespace = null, input, workspace = {} } = reqBody;
const {
namespace = null,
input,
workspace = {},
chatHistory = [],
} = reqBody;
if (!namespace || !input) throw new Error("Invalid request body");
const { pineconeIndex } = await this.connect();
@ -267,31 +310,33 @@ const Pinecone = {
"Invalid namespace - has it been collected and seeded yet?"
);
const vectorStore = await PineconeStore.fromExistingIndex(this.embedder(), {
const queryVector = await this.embedChunk(this.openai(), input);
const { contextTexts, sourceDocuments } = await this.similarityResponse(
pineconeIndex,
namespace,
queryVector
);
const prompt = {
role: "system",
content: `Given the following conversation, relevant context, and a follow up question, reply with an answer to the current question the user is asking. Return only your response to the question given the above information following the users instructions as needed.
Context:
${contextTexts
.map((text, i) => {
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
})
.join("")}`,
};
const memory = [prompt, ...chatHistory, { role: "user", content: input }];
const responseText = await this.getChatCompletion(this.openai(), memory, {
temperature: workspace?.openAiTemp ?? 0.7,
});
const memory = new VectorStoreRetrieverMemory({
vectorStoreRetriever: vectorStore.asRetriever(1),
memoryKey: "history",
});
const model = this.llm({
temperature: workspace?.openAiTemp,
});
const prompt =
PromptTemplate.fromTemplate(`The following is a friendly conversation between a human and an AI. The AI is very casual and talkative and responds with a friendly tone. If the AI does not know the answer to a question, it truthfully says it does not know.
Relevant pieces of previous conversation:
{history}
Current conversation:
Human: {input}
AI:`);
const chain = new LLMChain({ llm: model, prompt, memory });
const response = await chain.call({ input });
return { response: response.text, sources: [], message: false };
return {
response: responseText,
sources: curateSources(sourceDocuments),
message: false,
};
},
};