mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-05 06:20:10 +01:00
Infinite prompt input and compression implementation (#332)
* WIP on continuous prompt window summary * wip * Move chat out of VDB simplify chat interface normalize LLM model interface have compression abstraction Cleanup compressor TODO: Anthropic stuff * Implement compression for Anythropic Fix lancedb sources * cleanup vectorDBs and check that lance, chroma, and pinecone are returning valid metadata sources * Resolve Weaviate citation sources not working with schema * comment cleanup
This commit is contained in:
parent
0751fb1fdd
commit
be9d8b0397
@ -24,7 +24,7 @@ export default function AnthropicAiOptions({ settings, showAlert = false }) {
|
||||
<div className="w-full flex items-center gap-4">
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-4">
|
||||
Anthropic Claude-2 API Key
|
||||
Anthropic API Key
|
||||
</label>
|
||||
<input
|
||||
type="password"
|
||||
@ -48,7 +48,7 @@ export default function AnthropicAiOptions({ settings, showAlert = false }) {
|
||||
required={true}
|
||||
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||
>
|
||||
{["claude-2"].map((model) => {
|
||||
{["claude-2", "claude-instant-1"].map((model) => {
|
||||
return (
|
||||
<option key={model} value={model}>
|
||||
{model}
|
||||
|
@ -49,6 +49,23 @@ export default function AzureAiOptions({ settings }) {
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-4">
|
||||
Chat Model Token Limit
|
||||
</label>
|
||||
<select
|
||||
name="AzureOpenAiTokenLimit"
|
||||
defaultValue={settings?.AzureOpenAiTokenLimit || 4096}
|
||||
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
|
||||
required={true}
|
||||
>
|
||||
<option value={4096}>4,096 (gpt-3.5-turbo)</option>
|
||||
<option value={16384}>16,384 (gpt-3.5-16k)</option>
|
||||
<option value={8192}>8,192 (gpt-4)</option>
|
||||
<option value={32768}>32,768 (gpt-4-32k)</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-4">
|
||||
Embedding Deployment Name
|
||||
|
@ -224,7 +224,6 @@ export default function WorkspaceSettings({ workspace }) {
|
||||
</div>
|
||||
<textarea
|
||||
name="openAiPrompt"
|
||||
maxLength={500}
|
||||
rows={5}
|
||||
defaultValue={chatPrompt(workspace)}
|
||||
className="bg-zinc-900 text-white text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5"
|
||||
|
@ -55,7 +55,6 @@ export default function PromptInput({
|
||||
onKeyDown={captureEnter}
|
||||
onChange={onChange}
|
||||
required={true}
|
||||
maxLength={240}
|
||||
disabled={inputDisabled}
|
||||
onFocus={() => setFocused(true)}
|
||||
onBlur={(e) => {
|
||||
|
@ -71,6 +71,7 @@ function chatEndpoints(app) {
|
||||
});
|
||||
response.status(200).json({ ...result });
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
response.status(500).json({
|
||||
id: uuidv4(),
|
||||
type: "abort",
|
||||
|
69
server/models/cacheData.js
Normal file
69
server/models/cacheData.js
Normal file
@ -0,0 +1,69 @@
|
||||
const prisma = require("../utils/prisma");
|
||||
|
||||
const CacheData = {
|
||||
new: async function (inputs = {}) {
|
||||
try {
|
||||
const cache = await prisma.cache_data.create({
|
||||
data: inputs,
|
||||
});
|
||||
return { cache, message: null };
|
||||
} catch (error) {
|
||||
console.error(error.message);
|
||||
return { cache: null, message: error.message };
|
||||
}
|
||||
},
|
||||
|
||||
get: async function (clause = {}, limit = null, orderBy = null) {
|
||||
try {
|
||||
const cache = await prisma.cache_data.findFirst({
|
||||
where: clause,
|
||||
...(limit !== null ? { take: limit } : {}),
|
||||
...(orderBy !== null ? { orderBy } : {}),
|
||||
});
|
||||
return cache || null;
|
||||
} catch (error) {
|
||||
console.error(error.message);
|
||||
return null;
|
||||
}
|
||||
},
|
||||
|
||||
delete: async function (clause = {}) {
|
||||
try {
|
||||
await prisma.cache_data.deleteMany({
|
||||
where: clause,
|
||||
});
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error(error.message);
|
||||
return false;
|
||||
}
|
||||
},
|
||||
|
||||
where: async function (clause = {}, limit = null, orderBy = null) {
|
||||
try {
|
||||
const caches = await prisma.cache_data.findMany({
|
||||
where: clause,
|
||||
...(limit !== null ? { take: limit } : {}),
|
||||
...(orderBy !== null ? { orderBy } : {}),
|
||||
});
|
||||
return caches;
|
||||
} catch (error) {
|
||||
console.error(error.message);
|
||||
return [];
|
||||
}
|
||||
},
|
||||
|
||||
count: async function (clause = {}) {
|
||||
try {
|
||||
const count = await prisma.cache_data.count({
|
||||
where: clause,
|
||||
});
|
||||
return count;
|
||||
} catch (error) {
|
||||
console.error(error.message);
|
||||
return 0;
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
module.exports = { CacheData };
|
@ -65,6 +65,7 @@ const SystemSettings = {
|
||||
AzureOpenAiKey: !!process.env.AZURE_OPENAI_KEY,
|
||||
AzureOpenAiModelPref: process.env.OPEN_MODEL_PREF,
|
||||
AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
|
||||
AzureOpenAiTokenLimit: process.env.AZURE_OPENAI_TOKEN_LIMIT || 4096,
|
||||
}
|
||||
: {}),
|
||||
|
||||
|
@ -36,6 +36,7 @@
|
||||
"express": "^4.18.2",
|
||||
"extract-zip": "^2.0.1",
|
||||
"graphql": "^16.7.1",
|
||||
"js-tiktoken": "^1.0.7",
|
||||
"jsonwebtoken": "^8.5.1",
|
||||
"langchain": "^0.0.90",
|
||||
"mime": "^3.0.0",
|
||||
|
11
server/prisma/migrations/20231101195421_init/migration.sql
Normal file
11
server/prisma/migrations/20231101195421_init/migration.sql
Normal file
@ -0,0 +1,11 @@
|
||||
-- CreateTable
|
||||
CREATE TABLE "cache_data" (
|
||||
"id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
|
||||
"name" TEXT NOT NULL,
|
||||
"data" TEXT NOT NULL,
|
||||
"belongsTo" TEXT,
|
||||
"byId" INTEGER,
|
||||
"expiresAt" DATETIME,
|
||||
"createdAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
"lastUpdatedAt" DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
@ -116,3 +116,14 @@ model workspace_users {
|
||||
workspaces workspaces @relation(fields: [workspace_id], references: [id], onDelete: Cascade, onUpdate: Cascade)
|
||||
users users @relation(fields: [user_id], references: [id], onDelete: Cascade, onUpdate: Cascade)
|
||||
}
|
||||
|
||||
model cache_data {
|
||||
id Int @id @default(autoincrement())
|
||||
name String
|
||||
data String
|
||||
belongsTo String?
|
||||
byId Int?
|
||||
expiresAt DateTime?
|
||||
createdAt DateTime @default(now())
|
||||
lastUpdatedAt DateTime @default(now())
|
||||
}
|
||||
|
@ -12,6 +12,12 @@ class AnthropicLLM {
|
||||
apiKey: process.env.ANTHROPIC_API_KEY,
|
||||
});
|
||||
this.anthropic = anthropic;
|
||||
this.model = process.env.ANTHROPIC_MODEL_PREF;
|
||||
this.limits = {
|
||||
history: this.promptWindowLimit() * 0.15,
|
||||
system: this.promptWindowLimit() * 0.15,
|
||||
user: this.promptWindowLimit() * 0.7,
|
||||
};
|
||||
|
||||
if (!embedder)
|
||||
throw new Error(
|
||||
@ -21,8 +27,19 @@ class AnthropicLLM {
|
||||
this.answerKey = v4().split("-")[0];
|
||||
}
|
||||
|
||||
isValidChatModel(modelName = "") {
|
||||
const validModels = ["claude-2"];
|
||||
promptWindowLimit() {
|
||||
switch (this.model) {
|
||||
case "claude-instant-1":
|
||||
return 72_000;
|
||||
case "claude-2":
|
||||
return 100_000;
|
||||
default:
|
||||
return 72_000; // assume a claude-instant-1 model
|
||||
}
|
||||
}
|
||||
|
||||
isValidChatCompletionModel(modelName = "") {
|
||||
const validModels = ["claude-2", "claude-instant-1"];
|
||||
return validModels.includes(modelName);
|
||||
}
|
||||
|
||||
@ -62,24 +79,25 @@ class AnthropicLLM {
|
||||
\n\nAssistant:`;
|
||||
}
|
||||
|
||||
// This is the interface used when no embeddings are present in the workspace
|
||||
// This is just having a conversation with the LLM as one would normally.
|
||||
async sendChat(chatHistory = [], prompt, workspace = {}) {
|
||||
const model = process.env.ANTHROPIC_MODEL_PREF || "claude-2";
|
||||
if (!this.isValidChatModel(model))
|
||||
async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
|
||||
if (!this.isValidChatCompletionModel(this.model))
|
||||
throw new Error(
|
||||
`Anthropic chat: ${model} is not valid for chat completion!`
|
||||
`Anthropic chat: ${this.model} is not valid for chat completion!`
|
||||
);
|
||||
|
||||
const compressedPrompt = await this.compressMessages(
|
||||
{
|
||||
systemPrompt: chatPrompt(workspace),
|
||||
userPrompt: prompt,
|
||||
chatHistory,
|
||||
},
|
||||
rawHistory
|
||||
);
|
||||
const { content, error } = await this.anthropic.completions
|
||||
.create({
|
||||
model: "claude-2",
|
||||
model: this.model,
|
||||
max_tokens_to_sample: 300,
|
||||
prompt: this.constructPrompt({
|
||||
systemPrompt: chatPrompt(workspace),
|
||||
userPrompt: prompt,
|
||||
chatHistory,
|
||||
}),
|
||||
prompt: compressedPrompt,
|
||||
})
|
||||
.then((res) => {
|
||||
const { completion } = res;
|
||||
@ -100,15 +118,14 @@ class AnthropicLLM {
|
||||
}
|
||||
|
||||
async getChatCompletion(prompt = "", _opts = {}) {
|
||||
const model = process.env.ANTHROPIC_MODEL_PREF || "claude-2";
|
||||
if (!this.isValidChatModel(model))
|
||||
if (!this.isValidChatCompletionModel(this.model))
|
||||
throw new Error(
|
||||
`Anthropic chat: ${model} is not valid for chat completion!`
|
||||
`Anthropic chat: ${this.model} is not valid for chat completion!`
|
||||
);
|
||||
|
||||
const { content, error } = await this.anthropic.completions
|
||||
.create({
|
||||
model: "claude-2",
|
||||
model: this.model,
|
||||
max_tokens_to_sample: 300,
|
||||
prompt,
|
||||
})
|
||||
@ -130,6 +147,16 @@ class AnthropicLLM {
|
||||
return content;
|
||||
}
|
||||
|
||||
async compressMessages(promptArgs = {}, rawHistory = []) {
|
||||
const { messageStringCompressor } = require("../../helpers/chat");
|
||||
const compressedPrompt = await messageStringCompressor(
|
||||
this,
|
||||
promptArgs,
|
||||
rawHistory
|
||||
);
|
||||
return compressedPrompt;
|
||||
}
|
||||
|
||||
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
|
||||
async embedTextInput(textInput) {
|
||||
return await this.embedder.embedTextInput(textInput);
|
||||
|
@ -1,4 +1,5 @@
|
||||
const { AzureOpenAiEmbedder } = require("../../EmbeddingEngines/azureOpenAi");
|
||||
const { chatPrompt } = require("../../chats");
|
||||
|
||||
class AzureOpenAiLLM extends AzureOpenAiEmbedder {
|
||||
constructor() {
|
||||
@ -13,9 +14,24 @@ class AzureOpenAiLLM extends AzureOpenAiEmbedder {
|
||||
process.env.AZURE_OPENAI_ENDPOINT,
|
||||
new AzureKeyCredential(process.env.AZURE_OPENAI_KEY)
|
||||
);
|
||||
this.model = process.env.OPEN_MODEL_PREF;
|
||||
this.limits = {
|
||||
history: this.promptWindowLimit() * 0.15,
|
||||
system: this.promptWindowLimit() * 0.15,
|
||||
user: this.promptWindowLimit() * 0.7,
|
||||
};
|
||||
}
|
||||
|
||||
isValidChatModel(_modelName = "") {
|
||||
// Sure the user selected a proper value for the token limit
|
||||
// could be any of these https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#gpt-4-models
|
||||
// and if undefined - assume it is the lowest end.
|
||||
promptWindowLimit() {
|
||||
return !!process.env.AZURE_OPENAI_TOKEN_LIMIT
|
||||
? Number(process.env.AZURE_OPENAI_TOKEN_LIMIT)
|
||||
: 4096;
|
||||
}
|
||||
|
||||
isValidChatCompletionModel(_modelName = "") {
|
||||
// The Azure user names their "models" as deployments and they can be any name
|
||||
// so we rely on the user to put in the correct deployment as only they would
|
||||
// know it.
|
||||
@ -31,7 +47,7 @@ class AzureOpenAiLLM extends AzureOpenAiEmbedder {
|
||||
const prompt = {
|
||||
role: "system",
|
||||
content: `${systemPrompt}
|
||||
Context:
|
||||
Context:
|
||||
${contextTexts
|
||||
.map((text, i) => {
|
||||
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
|
||||
@ -46,26 +62,25 @@ class AzureOpenAiLLM extends AzureOpenAiEmbedder {
|
||||
return { safe: true, reasons: [] };
|
||||
}
|
||||
|
||||
async sendChat(chatHistory = [], prompt, workspace = {}) {
|
||||
const model = process.env.OPEN_MODEL_PREF;
|
||||
if (!model)
|
||||
async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
|
||||
if (!this.model)
|
||||
throw new Error(
|
||||
"No OPEN_MODEL_PREF ENV defined. This must the name of a deployment on your Azure account for an LLM chat model like GPT-3.5."
|
||||
);
|
||||
|
||||
const messages = await this.compressMessages(
|
||||
{
|
||||
systemPrompt: chatPrompt(workspace),
|
||||
userPrompt: prompt,
|
||||
chatHistory,
|
||||
},
|
||||
rawHistory
|
||||
);
|
||||
const textResponse = await this.openai
|
||||
.getChatCompletions(
|
||||
model,
|
||||
[
|
||||
{ role: "system", content: "" },
|
||||
...chatHistory,
|
||||
{ role: "user", content: prompt },
|
||||
],
|
||||
{
|
||||
temperature: Number(workspace?.openAiTemp ?? 0.7),
|
||||
n: 1,
|
||||
}
|
||||
)
|
||||
.getChatCompletions(this.model, messages, {
|
||||
temperature: Number(workspace?.openAiTemp ?? 0.7),
|
||||
n: 1,
|
||||
})
|
||||
.then((res) => {
|
||||
if (!res.hasOwnProperty("choices"))
|
||||
throw new Error("OpenAI chat: No results!");
|
||||
@ -83,18 +98,23 @@ class AzureOpenAiLLM extends AzureOpenAiEmbedder {
|
||||
}
|
||||
|
||||
async getChatCompletion(messages = [], { temperature = 0.7 }) {
|
||||
const model = process.env.OPEN_MODEL_PREF;
|
||||
if (!model)
|
||||
if (!this.model)
|
||||
throw new Error(
|
||||
"No OPEN_MODEL_PREF ENV defined. This must the name of a deployment on your Azure account for an LLM chat model like GPT-3.5."
|
||||
);
|
||||
|
||||
const data = await this.openai.getChatCompletions(model, messages, {
|
||||
const data = await this.openai.getChatCompletions(this.model, messages, {
|
||||
temperature,
|
||||
});
|
||||
if (!data.hasOwnProperty("choices")) return null;
|
||||
return data.choices[0].message.content;
|
||||
}
|
||||
|
||||
async compressMessages(promptArgs = {}, rawHistory = []) {
|
||||
const { messageArrayCompressor } = require("../../helpers/chat");
|
||||
const messageArray = this.constructPrompt(promptArgs);
|
||||
return await messageArrayCompressor(this, messageArray, rawHistory);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
@ -1,4 +1,5 @@
|
||||
const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
|
||||
const { chatPrompt } = require("../../chats");
|
||||
|
||||
class OpenAiLLM extends OpenAiEmbedder {
|
||||
constructor() {
|
||||
@ -10,6 +11,23 @@ class OpenAiLLM extends OpenAiEmbedder {
|
||||
apiKey: process.env.OPEN_AI_KEY,
|
||||
});
|
||||
this.openai = new OpenAIApi(config);
|
||||
this.model = process.env.OPEN_MODEL_PREF;
|
||||
this.limits = {
|
||||
history: this.promptWindowLimit() * 0.15,
|
||||
system: this.promptWindowLimit() * 0.15,
|
||||
user: this.promptWindowLimit() * 0.7,
|
||||
};
|
||||
}
|
||||
|
||||
promptWindowLimit() {
|
||||
switch (this.model) {
|
||||
case "gpt-3.5-turbo":
|
||||
return 4096;
|
||||
case "gpt-4":
|
||||
return 8192;
|
||||
default:
|
||||
return 4096; // assume a fine-tune 3.5
|
||||
}
|
||||
}
|
||||
|
||||
async isValidChatCompletionModel(modelName = "") {
|
||||
@ -33,7 +51,7 @@ class OpenAiLLM extends OpenAiEmbedder {
|
||||
const prompt = {
|
||||
role: "system",
|
||||
content: `${systemPrompt}
|
||||
Context:
|
||||
Context:
|
||||
${contextTexts
|
||||
.map((text, i) => {
|
||||
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
|
||||
@ -75,7 +93,7 @@ class OpenAiLLM extends OpenAiEmbedder {
|
||||
return { safe: false, reasons };
|
||||
}
|
||||
|
||||
async sendChat(chatHistory = [], prompt, workspace = {}) {
|
||||
async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) {
|
||||
const model = process.env.OPEN_MODEL_PREF;
|
||||
if (!(await this.isValidChatCompletionModel(model)))
|
||||
throw new Error(
|
||||
@ -87,11 +105,14 @@ class OpenAiLLM extends OpenAiEmbedder {
|
||||
model,
|
||||
temperature: Number(workspace?.openAiTemp ?? 0.7),
|
||||
n: 1,
|
||||
messages: [
|
||||
{ role: "system", content: "" },
|
||||
...chatHistory,
|
||||
{ role: "user", content: prompt },
|
||||
],
|
||||
messages: await this.compressMessages(
|
||||
{
|
||||
systemPrompt: chatPrompt(workspace),
|
||||
userPrompt: prompt,
|
||||
chatHistory,
|
||||
},
|
||||
rawHistory
|
||||
),
|
||||
})
|
||||
.then((json) => {
|
||||
const res = json.data;
|
||||
@ -111,14 +132,13 @@ class OpenAiLLM extends OpenAiEmbedder {
|
||||
}
|
||||
|
||||
async getChatCompletion(messages = null, { temperature = 0.7 }) {
|
||||
const model = process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo";
|
||||
if (!(await this.isValidChatCompletionModel(model)))
|
||||
if (!(await this.isValidChatCompletionModel(this.model)))
|
||||
throw new Error(
|
||||
`OpenAI chat: ${model} is not valid for chat completion!`
|
||||
`OpenAI chat: ${this.model} is not valid for chat completion!`
|
||||
);
|
||||
|
||||
const { data } = await this.openai.createChatCompletion({
|
||||
model,
|
||||
model: this.model,
|
||||
messages,
|
||||
temperature,
|
||||
});
|
||||
@ -126,6 +146,12 @@ class OpenAiLLM extends OpenAiEmbedder {
|
||||
if (!data.hasOwnProperty("choices")) return null;
|
||||
return data.choices[0].message.content;
|
||||
}
|
||||
|
||||
async compressMessages(promptArgs = {}, rawHistory = []) {
|
||||
const { messageArrayCompressor } = require("../../helpers/chat");
|
||||
const messageArray = this.constructPrompt(promptArgs);
|
||||
return await messageArrayCompressor(this, messageArray, rawHistory);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
@ -91,91 +91,146 @@ async function chatWithWorkspace(
|
||||
const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
|
||||
const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
|
||||
if (!hasVectorizedSpace || embeddingsCount === 0) {
|
||||
const rawHistory = (
|
||||
user
|
||||
? await WorkspaceChats.forWorkspaceByUser(
|
||||
workspace.id,
|
||||
user.id,
|
||||
messageLimit,
|
||||
{ id: "desc" }
|
||||
)
|
||||
: await WorkspaceChats.forWorkspace(workspace.id, messageLimit, {
|
||||
id: "desc",
|
||||
})
|
||||
).reverse();
|
||||
const chatHistory = convertToPromptHistory(rawHistory);
|
||||
const response = await LLMConnector.sendChat(
|
||||
chatHistory,
|
||||
// If there are no embeddings - chat like a normal LLM chat interface.
|
||||
return await emptyEmbeddingChat({
|
||||
uuid,
|
||||
user,
|
||||
message,
|
||||
workspace
|
||||
);
|
||||
const data = { text: response, sources: [], type: "chat" };
|
||||
|
||||
await WorkspaceChats.new({
|
||||
workspaceId: workspace.id,
|
||||
prompt: message,
|
||||
response: data,
|
||||
user,
|
||||
});
|
||||
return {
|
||||
id: uuid,
|
||||
type: "textResponse",
|
||||
textResponse: response,
|
||||
sources: [],
|
||||
close: true,
|
||||
error: null,
|
||||
};
|
||||
} else {
|
||||
const rawHistory = (
|
||||
user
|
||||
? await WorkspaceChats.forWorkspaceByUser(
|
||||
workspace.id,
|
||||
user.id,
|
||||
messageLimit,
|
||||
{ id: "desc" }
|
||||
)
|
||||
: await WorkspaceChats.forWorkspace(workspace.id, messageLimit, {
|
||||
id: "desc",
|
||||
})
|
||||
).reverse();
|
||||
const chatHistory = convertToPromptHistory(rawHistory);
|
||||
const {
|
||||
response,
|
||||
sources,
|
||||
message: error,
|
||||
} = await VectorDb[chatMode]({
|
||||
namespace: workspace.slug,
|
||||
input: message,
|
||||
workspace,
|
||||
chatHistory,
|
||||
messageLimit,
|
||||
LLMConnector,
|
||||
});
|
||||
if (!response) {
|
||||
return {
|
||||
id: uuid,
|
||||
type: "abort",
|
||||
textResponse: null,
|
||||
sources: [],
|
||||
close: true,
|
||||
error,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const data = { text: response, sources, type: chatMode };
|
||||
await WorkspaceChats.new({
|
||||
workspaceId: workspace.id,
|
||||
prompt: message,
|
||||
response: data,
|
||||
user,
|
||||
});
|
||||
const { rawHistory, chatHistory } = await recentChatHistory(
|
||||
user,
|
||||
workspace,
|
||||
messageLimit,
|
||||
chatMode
|
||||
);
|
||||
const {
|
||||
contextTexts = [],
|
||||
sources = [],
|
||||
message: error,
|
||||
} = await VectorDb.performSimilaritySearch({
|
||||
namespace: workspace.slug,
|
||||
input: message,
|
||||
LLMConnector,
|
||||
});
|
||||
|
||||
// Failed similarity search.
|
||||
if (!!error) {
|
||||
return {
|
||||
id: uuid,
|
||||
type: "textResponse",
|
||||
textResponse: response,
|
||||
sources,
|
||||
type: "abort",
|
||||
textResponse: null,
|
||||
sources: [],
|
||||
close: true,
|
||||
error,
|
||||
};
|
||||
}
|
||||
|
||||
// Compress message to ensure prompt passes token limit with room for response
|
||||
// and build system messages based on inputs and history.
|
||||
const messages = await LLMConnector.compressMessages(
|
||||
{
|
||||
systemPrompt: chatPrompt(workspace),
|
||||
userPrompt: message,
|
||||
contextTexts,
|
||||
chatHistory,
|
||||
},
|
||||
rawHistory
|
||||
);
|
||||
|
||||
// Send the text completion.
|
||||
const textResponse = await LLMConnector.getChatCompletion(messages, {
|
||||
temperature: workspace?.openAiTemp ?? 0.7,
|
||||
});
|
||||
|
||||
if (!textResponse) {
|
||||
return {
|
||||
id: uuid,
|
||||
type: "abort",
|
||||
textResponse: null,
|
||||
sources: [],
|
||||
close: true,
|
||||
error: "No text completion could be completed with this input.",
|
||||
};
|
||||
}
|
||||
|
||||
await WorkspaceChats.new({
|
||||
workspaceId: workspace.id,
|
||||
prompt: message,
|
||||
response: { text: textResponse, sources, type: chatMode },
|
||||
user,
|
||||
});
|
||||
return {
|
||||
id: uuid,
|
||||
type: "textResponse",
|
||||
close: true,
|
||||
textResponse,
|
||||
sources,
|
||||
error,
|
||||
};
|
||||
}
|
||||
|
||||
// On query we dont return message history. All other chat modes and when chatting
|
||||
// with no embeddings we return history.
|
||||
async function recentChatHistory(
|
||||
user = null,
|
||||
workspace,
|
||||
messageLimit = 20,
|
||||
chatMode = null
|
||||
) {
|
||||
if (chatMode === "query") return [];
|
||||
const rawHistory = (
|
||||
user
|
||||
? await WorkspaceChats.forWorkspaceByUser(
|
||||
workspace.id,
|
||||
user.id,
|
||||
messageLimit,
|
||||
{ id: "desc" }
|
||||
)
|
||||
: await WorkspaceChats.forWorkspace(workspace.id, messageLimit, {
|
||||
id: "desc",
|
||||
})
|
||||
).reverse();
|
||||
return { rawHistory, chatHistory: convertToPromptHistory(rawHistory) };
|
||||
}
|
||||
|
||||
async function emptyEmbeddingChat({
|
||||
uuid,
|
||||
user,
|
||||
message,
|
||||
workspace,
|
||||
messageLimit,
|
||||
LLMConnector,
|
||||
}) {
|
||||
const { rawHistory, chatHistory } = await recentChatHistory(
|
||||
user,
|
||||
workspace,
|
||||
messageLimit
|
||||
);
|
||||
const textResponse = await LLMConnector.sendChat(
|
||||
chatHistory,
|
||||
message,
|
||||
workspace,
|
||||
rawHistory
|
||||
);
|
||||
await WorkspaceChats.new({
|
||||
workspaceId: workspace.id,
|
||||
prompt: message,
|
||||
response: { text: textResponse, sources: [], type: "chat" },
|
||||
user,
|
||||
});
|
||||
return {
|
||||
id: uuid,
|
||||
type: "textResponse",
|
||||
sources: [],
|
||||
close: true,
|
||||
error: null,
|
||||
textResponse,
|
||||
};
|
||||
}
|
||||
|
||||
function chatPrompt(workspace) {
|
||||
@ -186,6 +241,7 @@ function chatPrompt(workspace) {
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
convertToPromptHistory,
|
||||
convertToChatHistory,
|
||||
chatWithWorkspace,
|
||||
chatPrompt,
|
||||
|
325
server/utils/helpers/chat/index.js
Normal file
325
server/utils/helpers/chat/index.js
Normal file
@ -0,0 +1,325 @@
|
||||
const { convertToPromptHistory } = require("../../chats");
|
||||
const { TokenManager } = require("../tiktoken");
|
||||
|
||||
/*
|
||||
What is the message Array compressor?
|
||||
TLDR: So anyway, i started blasting (your prompts & stuff)
|
||||
|
||||
messageArrayCompressor arose out of a need for users to be able to insert unlimited token prompts
|
||||
and also maintain coherent history, system instructions and context, if applicable.
|
||||
|
||||
We took an opinionated approach that after much back-testing we have found retained a highly coherent answer
|
||||
under most user conditions that a user would take while using this specific system. While other systems may
|
||||
use a more advanced model for compressing message history or simplify text through a recursive approach - our is much more simple.
|
||||
|
||||
We "cannonball" the input.
|
||||
Cannonball (verb): To ensure a prompt fits through a model window we blast a hole in the center of any inputs blocking our path to doing so.
|
||||
This starts by dissecting the input as tokens and delete from the middle-out bi-directionally until the prompt window is satisfied.
|
||||
You may think: "Doesn't this result in massive data loss?" - yes & no.
|
||||
Under the use cases we expect the tool to be used, which is mostly chatting with documents, we are able to use this approach with minimal blowback
|
||||
on the quality of responses.
|
||||
|
||||
We accomplish this by taking a rate-limit approach that is proportional to the model capacity. Since we support more than openAI models, this needs to
|
||||
be generic and reliance on a "better summary" model just is not a luxury we can afford. The added latency overhead during prompting is also unacceptable.
|
||||
In general:
|
||||
system: at best 15% of token capacity
|
||||
history: at best 15% of token capacity
|
||||
prompt: at best 70% of token capacity.
|
||||
|
||||
we handle overflows by taking an aggressive path for two main cases.
|
||||
|
||||
1. Very large user prompt
|
||||
- Likely uninterested in context, history, or even system prompt. This is a "standalone" prompt that highjacks the whole thread.
|
||||
- We run this prompt on its own since a prompt that is over 70% of context window certainly is standalone.
|
||||
|
||||
2. Context window is exceeded in regular use.
|
||||
- We do not touch prompt since it is very likely to be <70% of window.
|
||||
- We check system prompt is not outrageous - if it is we cannonball it and keep context if present.
|
||||
- We check a sliding window of history, only allowing up to 15% of the history to pass through if it fits, with a
|
||||
preference for recent history if we can cannonball to fit it, otherwise it is omitted.
|
||||
|
||||
We end up with a rather large prompt that fits through a given window with a lot of room for response in most use-cases.
|
||||
We also take the approach that history is the least important and most flexible of the items in this array of responses.
|
||||
|
||||
There is a supplemental version of this function that also returns a formatted string for models like Claude-2
|
||||
*/
|
||||
|
||||
async function messageArrayCompressor(llm, messages = [], rawHistory = []) {
|
||||
// assume the response will be at least 600 tokens. If the total prompt + reply is over we need to proactively
|
||||
// run the compressor to ensure the prompt has enough space to reply.
|
||||
// realistically - most users will not be impacted by this.
|
||||
const tokenBuffer = 600;
|
||||
const tokenManager = new TokenManager(llm.model);
|
||||
// If no work needs to be done, just pass through.
|
||||
if (tokenManager.statsFrom(messages) + tokenBuffer < llm.promptWindowLimit())
|
||||
return messages;
|
||||
|
||||
const system = messages.shift();
|
||||
const user = messages.pop();
|
||||
const userPromptSize = tokenManager.countFromString(user.content);
|
||||
|
||||
// User prompt is the main focus here - we we prioritize it and allow
|
||||
// it to highjack the entire conversation thread. We are going to
|
||||
// cannonball the prompt through to ensure the reply has at least 20% of
|
||||
// the token supply to reply with.
|
||||
if (userPromptSize > llm.limits.user) {
|
||||
return [
|
||||
{
|
||||
role: "user",
|
||||
content: cannonball({
|
||||
input: user.content,
|
||||
targetTokenSize: llm.promptWindowLimit() * 0.8,
|
||||
tiktokenInstance: tokenManager,
|
||||
}),
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
const compressedSystem = new Promise(async (resolve) => {
|
||||
const count = tokenManager.countFromString(system.content);
|
||||
if (count < llm.limits.system) {
|
||||
resolve(system);
|
||||
return;
|
||||
}
|
||||
|
||||
// Split context from system prompt - cannonball since its over the window.
|
||||
// We assume the context + user prompt is enough tokens to fit.
|
||||
const [prompt, context = ""] = system.content.split("Context:");
|
||||
system.content = `${cannonball({
|
||||
input: prompt,
|
||||
targetTokenSize: llm.limits.system,
|
||||
tiktokenInstance: tokenManager,
|
||||
})}${context ? `\nContext: ${context}` : ""}`;
|
||||
resolve(system);
|
||||
});
|
||||
|
||||
// Prompt is allowed to take up to 70% of window - we know its under
|
||||
// if we are here, so passthrough.
|
||||
const compressedPrompt = new Promise(async (resolve) => resolve(user));
|
||||
|
||||
// We always aggressively compress history because it is the least
|
||||
// important data to retain in full-fidelity.
|
||||
const compressedHistory = new Promise((resolve) => {
|
||||
const eligibleHistoryItems = [];
|
||||
var historyTokenCount = 0;
|
||||
|
||||
for (const [i, history] of rawHistory.reverse().entries()) {
|
||||
const [user, assistant] = convertToPromptHistory([history]);
|
||||
const [userTokens, assistantTokens] = [
|
||||
tokenManager.countFromString(user.content),
|
||||
tokenManager.countFromString(assistant.content),
|
||||
];
|
||||
const total = userTokens + assistantTokens;
|
||||
|
||||
// If during the loop the token cost of adding this history
|
||||
// is small, we can add it to history and move onto next.
|
||||
if (historyTokenCount + total < llm.limits.history) {
|
||||
eligibleHistoryItems.unshift(user, assistant);
|
||||
historyTokenCount += total;
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we reach here the overhead of adding this history item will
|
||||
// be too much of the limit. So now, we are prioritizing
|
||||
// the most recent 3 message pairs - if we are already past those - exit loop and stop
|
||||
// trying to make history work.
|
||||
if (i > 2) break;
|
||||
|
||||
// We are over the limit and we are within the first 3 most recent chats.
|
||||
// so now we cannonball them to make them fit into the window.
|
||||
// max size = llm.limit.history; Each component of the message, can at most
|
||||
// be 50% of the history. We cannonball whichever is the problem.
|
||||
// The math isnt perfect for tokens, so we have to add a fudge factor for safety.
|
||||
const maxTargetSize = Math.floor(llm.limits.history / 2.2);
|
||||
if (userTokens > maxTargetSize) {
|
||||
user.content = cannonball({
|
||||
input: user.content,
|
||||
targetTokenSize: maxTargetSize,
|
||||
tiktokenInstance: tokenManager,
|
||||
});
|
||||
}
|
||||
|
||||
if (assistantTokens > maxTargetSize) {
|
||||
assistant.content = cannonball({
|
||||
input: assistant.content,
|
||||
targetTokenSize: maxTargetSize,
|
||||
tiktokenInstance: tokenManager,
|
||||
});
|
||||
}
|
||||
|
||||
const newTotal = tokenManager.statsFrom([user, assistant]);
|
||||
if (historyTokenCount + newTotal > llm.limits.history) continue;
|
||||
eligibleHistoryItems.unshift(user, assistant);
|
||||
historyTokenCount += newTotal;
|
||||
}
|
||||
resolve(eligibleHistoryItems);
|
||||
});
|
||||
|
||||
const [cSystem, cHistory, cPrompt] = await Promise.all([
|
||||
compressedSystem,
|
||||
compressedHistory,
|
||||
compressedPrompt,
|
||||
]);
|
||||
return [cSystem, ...cHistory, cPrompt];
|
||||
}
|
||||
|
||||
// Implementation of messageArrayCompressor, but for string only completion models
|
||||
async function messageStringCompressor(llm, promptArgs = {}, rawHistory = []) {
|
||||
const tokenBuffer = 600;
|
||||
const tokenManager = new TokenManager(llm.model);
|
||||
const initialPrompt = llm.constructPrompt(promptArgs);
|
||||
if (
|
||||
tokenManager.statsFrom(initialPrompt) + tokenBuffer <
|
||||
llm.promptWindowLimit()
|
||||
)
|
||||
return initialPrompt;
|
||||
|
||||
const system = promptArgs.systemPrompt;
|
||||
const user = promptArgs.userPrompt;
|
||||
const userPromptSize = tokenManager.countFromString(user);
|
||||
|
||||
// User prompt is the main focus here - we we prioritize it and allow
|
||||
// it to highjack the entire conversation thread. We are going to
|
||||
// cannonball the prompt through to ensure the reply has at least 20% of
|
||||
// the token supply to reply with.
|
||||
if (userPromptSize > llm.limits.user) {
|
||||
return llm.constructPrompt({
|
||||
userPrompt: cannonball({
|
||||
input: user,
|
||||
targetTokenSize: llm.promptWindowLimit() * 0.8,
|
||||
tiktokenInstance: tokenManager,
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
const compressedSystem = new Promise(async (resolve) => {
|
||||
const count = tokenManager.countFromString(system);
|
||||
if (count < llm.limits.system) {
|
||||
resolve(system);
|
||||
return;
|
||||
}
|
||||
resolve(
|
||||
cannonball({
|
||||
input: system,
|
||||
targetTokenSize: llm.limits.system,
|
||||
tiktokenInstance: tokenManager,
|
||||
})
|
||||
);
|
||||
});
|
||||
|
||||
// Prompt is allowed to take up to 70% of window - we know its under
|
||||
// if we are here, so passthrough.
|
||||
const compressedPrompt = new Promise(async (resolve) => resolve(user));
|
||||
|
||||
// We always aggressively compress history because it is the least
|
||||
// important data to retain in full-fidelity.
|
||||
const compressedHistory = new Promise((resolve) => {
|
||||
const eligibleHistoryItems = [];
|
||||
var historyTokenCount = 0;
|
||||
|
||||
for (const [i, history] of rawHistory.reverse().entries()) {
|
||||
const [user, assistant] = convertToPromptHistory([history]);
|
||||
const [userTokens, assistantTokens] = [
|
||||
tokenManager.countFromString(user.content),
|
||||
tokenManager.countFromString(assistant.content),
|
||||
];
|
||||
const total = userTokens + assistantTokens;
|
||||
|
||||
// If during the loop the token cost of adding this history
|
||||
// is small, we can add it to history and move onto next.
|
||||
if (historyTokenCount + total < llm.limits.history) {
|
||||
eligibleHistoryItems.unshift(user, assistant);
|
||||
historyTokenCount += total;
|
||||
continue;
|
||||
}
|
||||
|
||||
// If we reach here the overhead of adding this history item will
|
||||
// be too much of the limit. So now, we are prioritizing
|
||||
// the most recent 3 message pairs - if we are already past those - exit loop and stop
|
||||
// trying to make history work.
|
||||
if (i > 2) break;
|
||||
|
||||
// We are over the limit and we are within the first 3 most recent chats.
|
||||
// so now we cannonball them to make them fit into the window.
|
||||
// max size = llm.limit.history; Each component of the message, can at most
|
||||
// be 50% of the history. We cannonball whichever is the problem.
|
||||
// The math isnt perfect for tokens, so we have to add a fudge factor for safety.
|
||||
const maxTargetSize = Math.floor(llm.limits.history / 2.2);
|
||||
if (userTokens > maxTargetSize) {
|
||||
user.content = cannonball({
|
||||
input: user.content,
|
||||
targetTokenSize: maxTargetSize,
|
||||
tiktokenInstance: tokenManager,
|
||||
});
|
||||
}
|
||||
|
||||
if (assistantTokens > maxTargetSize) {
|
||||
assistant.content = cannonball({
|
||||
input: assistant.content,
|
||||
targetTokenSize: maxTargetSize,
|
||||
tiktokenInstance: tokenManager,
|
||||
});
|
||||
}
|
||||
|
||||
const newTotal = tokenManager.statsFrom([user, assistant]);
|
||||
if (historyTokenCount + newTotal > llm.limits.history) continue;
|
||||
eligibleHistoryItems.unshift(user, assistant);
|
||||
historyTokenCount += newTotal;
|
||||
}
|
||||
resolve(eligibleHistoryItems);
|
||||
});
|
||||
|
||||
const [cSystem, cHistory, cPrompt] = await Promise.all([
|
||||
compressedSystem,
|
||||
compressedHistory,
|
||||
compressedPrompt,
|
||||
]);
|
||||
|
||||
return llm.constructPrompt({
|
||||
systemPrompt: cSystem,
|
||||
contextTexts: promptArgs?.contextTexts || [],
|
||||
chatHistory: cHistory,
|
||||
userPrompt: cPrompt,
|
||||
});
|
||||
}
|
||||
|
||||
// Cannonball prompting: aka where we shoot a proportionally big cannonball through a proportional large prompt
|
||||
// Nobody should be sending prompts this big, but there is no reason we shouldn't allow it if results are good even by doing it.
|
||||
function cannonball({
|
||||
input = "",
|
||||
targetTokenSize = 0,
|
||||
tiktokenInstance = null,
|
||||
ellipsesStr = null,
|
||||
}) {
|
||||
if (!input || !targetTokenSize) return input;
|
||||
const tokenManager = tiktokenInstance || new TokenManager();
|
||||
const truncText = ellipsesStr || "\n\n--prompt truncated for brevity--\n\n";
|
||||
const initialInputSize = tokenManager.countFromString(input);
|
||||
if (initialInputSize < targetTokenSize) return input;
|
||||
|
||||
// if the delta is the token difference between where our prompt is in size
|
||||
// and where we ideally need to land.
|
||||
const delta = initialInputSize - targetTokenSize;
|
||||
const tokenChunks = tokenManager.tokensFromString(input);
|
||||
const middleIdx = Math.floor(tokenChunks.length / 2);
|
||||
|
||||
// middle truncate the text going left and right of midpoint
|
||||
const leftChunks = tokenChunks.slice(0, middleIdx - Math.round(delta / 2));
|
||||
const rightChunks = tokenChunks.slice(middleIdx + Math.round(delta / 2));
|
||||
const truncatedText =
|
||||
tokenManager.bytesFromTokens(leftChunks) +
|
||||
truncText +
|
||||
tokenManager.bytesFromTokens(rightChunks);
|
||||
|
||||
console.log(
|
||||
`Cannonball results ${initialInputSize} -> ${tokenManager.countFromString(
|
||||
truncatedText
|
||||
)} tokens.`
|
||||
);
|
||||
return truncatedText;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
messageArrayCompressor,
|
||||
messageStringCompressor,
|
||||
};
|
57
server/utils/helpers/tiktoken.js
Normal file
57
server/utils/helpers/tiktoken.js
Normal file
@ -0,0 +1,57 @@
|
||||
const { getEncodingNameForModel, getEncoding } = require("js-tiktoken");
|
||||
|
||||
class TokenManager {
|
||||
constructor(model = "gpt-3.5-turbo") {
|
||||
this.model = model;
|
||||
this.encoderName = this.getEncodingFromModel(model);
|
||||
this.encoder = getEncoding(this.encoderName);
|
||||
this.buffer = 50;
|
||||
}
|
||||
|
||||
getEncodingFromModel(model) {
|
||||
try {
|
||||
return getEncodingNameForModel(model);
|
||||
} catch {
|
||||
return "cl100k_base";
|
||||
}
|
||||
}
|
||||
|
||||
tokensFromString(input = "") {
|
||||
const tokens = this.encoder.encode(input);
|
||||
return tokens;
|
||||
}
|
||||
|
||||
bytesFromTokens(tokens = []) {
|
||||
const bytes = this.encoder.decode(tokens);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
countFromString(input = "") {
|
||||
const tokens = this.encoder.encode(input);
|
||||
return tokens.length;
|
||||
}
|
||||
|
||||
statsFrom(input) {
|
||||
if (typeof input === "string") return this.countFromString(input);
|
||||
|
||||
// What is going on here?
|
||||
// https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb Item 6.
|
||||
// The only option is to estimate. From repeated testing using the static values in the code we are always 2 off,
|
||||
// which means as of Nov 1, 2023 the additional factor on ln: 476 changed from 3 to 5.
|
||||
if (Array.isArray(input)) {
|
||||
const perMessageFactorTokens = input.length * 3;
|
||||
const tokensFromContent = input.reduce(
|
||||
(a, b) => a + this.countFromString(b.content),
|
||||
0
|
||||
);
|
||||
const diffCoefficient = 5;
|
||||
return perMessageFactorTokens + tokensFromContent + diffCoefficient;
|
||||
}
|
||||
|
||||
throw new Error("Not a supported tokenized format.");
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
TokenManager,
|
||||
};
|
@ -17,6 +17,10 @@ const KEY_MAPPING = {
|
||||
envKey: "AZURE_OPENAI_ENDPOINT",
|
||||
checks: [isNotEmpty, validAzureURL],
|
||||
},
|
||||
AzureOpenAiTokenLimit: {
|
||||
envKey: "AZURE_OPENAI_TOKEN_LIMIT",
|
||||
checks: [validOpenAiTokenLimit],
|
||||
},
|
||||
AzureOpenAiKey: {
|
||||
envKey: "AZURE_OPENAI_KEY",
|
||||
checks: [isNotEmpty],
|
||||
@ -137,7 +141,7 @@ function supportedLLM(input = "") {
|
||||
}
|
||||
|
||||
function validAnthropicModel(input = "") {
|
||||
const validModels = ["claude-2"];
|
||||
const validModels = ["claude-2", "claude-instant-1"];
|
||||
return validModels.includes(input)
|
||||
? null
|
||||
: `Invalid Model type. Must be one of ${validModels.join(", ")}.`;
|
||||
@ -174,6 +178,14 @@ function validAzureURL(input = "") {
|
||||
}
|
||||
}
|
||||
|
||||
function validOpenAiTokenLimit(input = "") {
|
||||
const tokenLimit = Number(input);
|
||||
if (isNaN(tokenLimit)) return "Token limit is not a number";
|
||||
if (![4_096, 16_384, 8_192, 32_768].includes(tokenLimit))
|
||||
return "Invalid OpenAI token limit.";
|
||||
return null;
|
||||
}
|
||||
|
||||
function requiresForceMode(_, forceModeEnabled = false) {
|
||||
return forceModeEnabled === true ? null : "Cannot set this setting.";
|
||||
}
|
||||
|
@ -3,7 +3,6 @@ const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
|
||||
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
||||
const { v4: uuidv4 } = require("uuid");
|
||||
const { toChunks, getLLMProvider } = require("../../helpers");
|
||||
const { chatPrompt } = require("../../chats");
|
||||
|
||||
const Chroma = {
|
||||
name: "Chroma",
|
||||
@ -253,92 +252,35 @@ const Chroma = {
|
||||
await DocumentVectors.deleteIds(indexes);
|
||||
return true;
|
||||
},
|
||||
query: async function (reqBody = {}) {
|
||||
const { namespace = null, input, workspace = {} } = reqBody;
|
||||
if (!namespace || !input) throw new Error("Invalid request body");
|
||||
performSimilaritySearch: async function ({
|
||||
namespace = null,
|
||||
input = "",
|
||||
LLMConnector = null,
|
||||
}) {
|
||||
if (!namespace || !input || !LLMConnector)
|
||||
throw new Error("Invalid request to performSimilaritySearch.");
|
||||
|
||||
const { client } = await this.connect();
|
||||
if (!(await this.namespaceExists(client, namespace))) {
|
||||
return {
|
||||
response: null,
|
||||
contextTexts: [],
|
||||
sources: [],
|
||||
message: "Invalid query - no documents found for workspace!",
|
||||
};
|
||||
}
|
||||
|
||||
const LLMConnector = getLLMProvider();
|
||||
const queryVector = await LLMConnector.embedTextInput(input);
|
||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||
client,
|
||||
namespace,
|
||||
queryVector
|
||||
);
|
||||
const memory = LLMConnector.constructPrompt({
|
||||
systemPrompt: chatPrompt(workspace),
|
||||
contextTexts: contextTexts,
|
||||
userPrompt: input,
|
||||
});
|
||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
||||
temperature: workspace?.openAiTemp ?? 0.7,
|
||||
});
|
||||
|
||||
// When we roll out own response we have separate metadata and texts,
|
||||
// so for source collection we need to combine them.
|
||||
const sources = sourceDocuments.map((metadata, i) => {
|
||||
return { metadata: { ...metadata, text: contextTexts[i] } };
|
||||
});
|
||||
return {
|
||||
response: responseText,
|
||||
sources: this.curateSources(sources),
|
||||
message: false,
|
||||
};
|
||||
},
|
||||
// This implementation of chat uses the chat history and modifies the system prompt at execution
|
||||
// this is improved over the regular langchain implementation so that chats do not directly modify embeddings
|
||||
// because then multi-user support will have all conversations mutating the base vector collection to which then
|
||||
// the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
|
||||
chat: async function (reqBody = {}) {
|
||||
const {
|
||||
namespace = null,
|
||||
input,
|
||||
workspace = {},
|
||||
chatHistory = [],
|
||||
} = reqBody;
|
||||
if (!namespace || !input) throw new Error("Invalid request body");
|
||||
|
||||
const { client } = await this.connect();
|
||||
if (!(await this.namespaceExists(client, namespace))) {
|
||||
return {
|
||||
response: null,
|
||||
sources: [],
|
||||
message: "Invalid query - no documents found for workspace!",
|
||||
};
|
||||
}
|
||||
|
||||
const LLMConnector = getLLMProvider();
|
||||
const queryVector = await LLMConnector.embedTextInput(input);
|
||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||
client,
|
||||
namespace,
|
||||
queryVector
|
||||
);
|
||||
const memory = LLMConnector.constructPrompt({
|
||||
systemPrompt: chatPrompt(workspace),
|
||||
contextTexts: contextTexts,
|
||||
userPrompt: input,
|
||||
chatHistory,
|
||||
});
|
||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
||||
temperature: workspace?.openAiTemp ?? 0.7,
|
||||
});
|
||||
|
||||
// When we roll out own response we have separate metadata and texts,
|
||||
// so for source collection we need to combine them.
|
||||
const sources = sourceDocuments.map((metadata, i) => {
|
||||
return { metadata: { ...metadata, text: contextTexts[i] } };
|
||||
});
|
||||
return {
|
||||
response: responseText,
|
||||
contextTexts,
|
||||
sources: this.curateSources(sources),
|
||||
message: false,
|
||||
};
|
||||
|
@ -4,7 +4,6 @@ const { OpenAIEmbeddings } = require("langchain/embeddings/openai");
|
||||
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
|
||||
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
||||
const { v4: uuidv4 } = require("uuid");
|
||||
const { chatPrompt } = require("../../chats");
|
||||
|
||||
const LanceDb = {
|
||||
uri: `${
|
||||
@ -226,83 +225,36 @@ const LanceDb = {
|
||||
return false;
|
||||
}
|
||||
},
|
||||
query: async function (reqBody = {}) {
|
||||
const { namespace = null, input, workspace = {} } = reqBody;
|
||||
if (!namespace || !input) throw new Error("Invalid request body");
|
||||
performSimilaritySearch: async function ({
|
||||
namespace = null,
|
||||
input = "",
|
||||
LLMConnector = null,
|
||||
}) {
|
||||
if (!namespace || !input || !LLMConnector)
|
||||
throw new Error("Invalid request to performSimilaritySearch.");
|
||||
|
||||
const { client } = await this.connect();
|
||||
if (!(await this.namespaceExists(client, namespace))) {
|
||||
return {
|
||||
response: null,
|
||||
contextTexts: [],
|
||||
sources: [],
|
||||
message: "Invalid query - no documents found for workspace!",
|
||||
};
|
||||
}
|
||||
|
||||
const LLMConnector = getLLMProvider();
|
||||
const queryVector = await LLMConnector.embedTextInput(input);
|
||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||
client,
|
||||
namespace,
|
||||
queryVector
|
||||
);
|
||||
const memory = LLMConnector.constructPrompt({
|
||||
systemPrompt: chatPrompt(workspace),
|
||||
contextTexts: contextTexts,
|
||||
userPrompt: input,
|
||||
});
|
||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
||||
temperature: workspace?.openAiTemp ?? 0.7,
|
||||
});
|
||||
|
||||
const sources = sourceDocuments.map((metadata, i) => {
|
||||
return { metadata: { ...metadata, text: contextTexts[i] } };
|
||||
});
|
||||
return {
|
||||
response: responseText,
|
||||
sources: this.curateSources(sourceDocuments),
|
||||
message: false,
|
||||
};
|
||||
},
|
||||
// This implementation of chat uses the chat history and modifies the system prompt at execution
|
||||
// this is improved over the regular langchain implementation so that chats do not directly modify embeddings
|
||||
// because then multi-user support will have all conversations mutating the base vector collection to which then
|
||||
// the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
|
||||
chat: async function (reqBody = {}) {
|
||||
const {
|
||||
namespace = null,
|
||||
input,
|
||||
workspace = {},
|
||||
chatHistory = [],
|
||||
} = reqBody;
|
||||
if (!namespace || !input) throw new Error("Invalid request body");
|
||||
|
||||
const { client } = await this.connect();
|
||||
if (!(await this.namespaceExists(client, namespace))) {
|
||||
return {
|
||||
response: null,
|
||||
sources: [],
|
||||
message: "Invalid query - no documents found for workspace!",
|
||||
};
|
||||
}
|
||||
|
||||
const LLMConnector = getLLMProvider();
|
||||
const queryVector = await LLMConnector.embedTextInput(input);
|
||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||
client,
|
||||
namespace,
|
||||
queryVector
|
||||
);
|
||||
const memory = LLMConnector.constructPrompt({
|
||||
systemPrompt: chatPrompt(workspace),
|
||||
contextTexts: contextTexts,
|
||||
userPrompt: input,
|
||||
chatHistory,
|
||||
});
|
||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
||||
temperature: workspace?.openAiTemp ?? 0.7,
|
||||
});
|
||||
|
||||
return {
|
||||
response: responseText,
|
||||
sources: this.curateSources(sourceDocuments),
|
||||
contextTexts,
|
||||
sources: this.curateSources(sources),
|
||||
message: false,
|
||||
};
|
||||
},
|
||||
@ -337,9 +289,13 @@ const LanceDb = {
|
||||
curateSources: function (sources = []) {
|
||||
const documents = [];
|
||||
for (const source of sources) {
|
||||
const { text, vector: _v, score: _s, ...metadata } = source;
|
||||
const { text, vector: _v, score: _s, ...rest } = source;
|
||||
const metadata = rest.hasOwnProperty("metadata") ? rest.metadata : rest;
|
||||
if (Object.keys(metadata).length > 0) {
|
||||
documents.push({ ...metadata, text });
|
||||
documents.push({
|
||||
...metadata,
|
||||
...(text ? { text } : {}),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3,7 +3,6 @@ const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
|
||||
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
||||
const { v4: uuidv4 } = require("uuid");
|
||||
const { toChunks, getLLMProvider } = require("../../helpers");
|
||||
const { chatPrompt } = require("../../chats");
|
||||
|
||||
const Pinecone = {
|
||||
name: "Pinecone",
|
||||
@ -222,80 +221,33 @@ const Pinecone = {
|
||||
message: `Namespace ${namespace} was deleted along with ${details.vectorCount} vectors.`,
|
||||
};
|
||||
},
|
||||
query: async function (reqBody = {}) {
|
||||
const { namespace = null, input, workspace = {} } = reqBody;
|
||||
if (!namespace || !input) throw new Error("Invalid request body");
|
||||
|
||||
const { pineconeIndex } = await this.connect();
|
||||
if (!(await this.namespaceExists(pineconeIndex, namespace))) {
|
||||
return {
|
||||
response: null,
|
||||
sources: [],
|
||||
message: "Invalid query - no documents found for workspace!",
|
||||
};
|
||||
}
|
||||
|
||||
const LLMConnector = getLLMProvider();
|
||||
const queryVector = await LLMConnector.embedTextInput(input);
|
||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||
pineconeIndex,
|
||||
namespace,
|
||||
queryVector
|
||||
);
|
||||
const memory = LLMConnector.constructPrompt({
|
||||
systemPrompt: chatPrompt(workspace),
|
||||
contextTexts: contextTexts,
|
||||
userPrompt: input,
|
||||
});
|
||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
||||
temperature: workspace?.openAiTemp ?? 0.7,
|
||||
});
|
||||
|
||||
return {
|
||||
response: responseText,
|
||||
sources: this.curateSources(sourceDocuments),
|
||||
message: false,
|
||||
};
|
||||
},
|
||||
// This implementation of chat uses the chat history and modifies the system prompt at execution
|
||||
// this is improved over the regular langchain implementation so that chats do not directly modify embeddings
|
||||
// because then multi-user support will have all conversations mutating the base vector collection to which then
|
||||
// the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
|
||||
chat: async function (reqBody = {}) {
|
||||
const {
|
||||
namespace = null,
|
||||
input,
|
||||
workspace = {},
|
||||
chatHistory = [],
|
||||
} = reqBody;
|
||||
if (!namespace || !input) throw new Error("Invalid request body");
|
||||
performSimilaritySearch: async function ({
|
||||
namespace = null,
|
||||
input = "",
|
||||
LLMConnector = null,
|
||||
}) {
|
||||
if (!namespace || !input || !LLMConnector)
|
||||
throw new Error("Invalid request to performSimilaritySearch.");
|
||||
|
||||
const { pineconeIndex } = await this.connect();
|
||||
if (!(await this.namespaceExists(pineconeIndex, namespace)))
|
||||
throw new Error(
|
||||
"Invalid namespace - has it been collected and seeded yet?"
|
||||
"Invalid namespace - has it been collected and populated yet?"
|
||||
);
|
||||
|
||||
const LLMConnector = getLLMProvider();
|
||||
const queryVector = await LLMConnector.embedTextInput(input);
|
||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||
pineconeIndex,
|
||||
namespace,
|
||||
queryVector
|
||||
);
|
||||
const memory = LLMConnector.constructPrompt({
|
||||
systemPrompt: chatPrompt(workspace),
|
||||
contextTexts: contextTexts,
|
||||
userPrompt: input,
|
||||
chatHistory,
|
||||
});
|
||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
||||
temperature: workspace?.openAiTemp ?? 0.7,
|
||||
});
|
||||
|
||||
const sources = sourceDocuments.map((metadata, i) => {
|
||||
return { ...metadata, text: contextTexts[i] };
|
||||
});
|
||||
return {
|
||||
response: responseText,
|
||||
sources: this.curateSources(sourceDocuments),
|
||||
contextTexts,
|
||||
sources: this.curateSources(sources),
|
||||
message: false,
|
||||
};
|
||||
},
|
||||
|
@ -3,7 +3,6 @@ const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
|
||||
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
||||
const { v4: uuidv4 } = require("uuid");
|
||||
const { toChunks, getLLMProvider } = require("../../helpers");
|
||||
const { chatPrompt } = require("../../chats");
|
||||
|
||||
const QDrant = {
|
||||
name: "QDrant",
|
||||
@ -262,83 +261,36 @@ const QDrant = {
|
||||
await DocumentVectors.deleteIds(indexes);
|
||||
return true;
|
||||
},
|
||||
query: async function (reqBody = {}) {
|
||||
const { namespace = null, input, workspace = {} } = reqBody;
|
||||
if (!namespace || !input) throw new Error("Invalid request body");
|
||||
performSimilaritySearch: async function ({
|
||||
namespace = null,
|
||||
input = "",
|
||||
LLMConnector = null,
|
||||
}) {
|
||||
if (!namespace || !input || !LLMConnector)
|
||||
throw new Error("Invalid request to performSimilaritySearch.");
|
||||
|
||||
const { client } = await this.connect();
|
||||
if (!(await this.namespaceExists(client, namespace))) {
|
||||
return {
|
||||
response: null,
|
||||
contextTexts: [],
|
||||
sources: [],
|
||||
message: "Invalid query - no documents found for workspace!",
|
||||
};
|
||||
}
|
||||
|
||||
const LLMConnector = getLLMProvider();
|
||||
const queryVector = await LLMConnector.embedTextInput(input);
|
||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||
client,
|
||||
namespace,
|
||||
queryVector
|
||||
);
|
||||
const memory = LLMConnector.constructPrompt({
|
||||
systemPrompt: chatPrompt(workspace),
|
||||
contextTexts: contextTexts,
|
||||
userPrompt: input,
|
||||
});
|
||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
||||
temperature: workspace?.openAiTemp ?? 0.7,
|
||||
});
|
||||
|
||||
const sources = sourceDocuments.map((metadata, i) => {
|
||||
return { ...metadata, text: contextTexts[i] };
|
||||
});
|
||||
return {
|
||||
response: responseText,
|
||||
sources: this.curateSources(sourceDocuments),
|
||||
message: false,
|
||||
};
|
||||
},
|
||||
// This implementation of chat uses the chat history and modifies the system prompt at execution
|
||||
// this is improved over the regular langchain implementation so that chats do not directly modify embeddings
|
||||
// because then multi-user support will have all conversations mutating the base vector collection to which then
|
||||
// the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
|
||||
chat: async function (reqBody = {}) {
|
||||
const {
|
||||
namespace = null,
|
||||
input,
|
||||
workspace = {},
|
||||
chatHistory = [],
|
||||
} = reqBody;
|
||||
if (!namespace || !input) throw new Error("Invalid request body");
|
||||
|
||||
const { client } = await this.connect();
|
||||
if (!(await this.namespaceExists(client, namespace))) {
|
||||
return {
|
||||
response: null,
|
||||
sources: [],
|
||||
message: "Invalid query - no documents found for workspace!",
|
||||
};
|
||||
}
|
||||
|
||||
const LLMConnector = getLLMProvider();
|
||||
const queryVector = await LLMConnector.embedTextInput(input);
|
||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||
client,
|
||||
namespace,
|
||||
queryVector
|
||||
);
|
||||
const memory = LLMConnector.constructPrompt({
|
||||
systemPrompt: chatPrompt(workspace),
|
||||
contextTexts: contextTexts,
|
||||
userPrompt: input,
|
||||
chatHistory,
|
||||
});
|
||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
||||
temperature: workspace?.openAiTemp ?? 0.7,
|
||||
});
|
||||
|
||||
return {
|
||||
response: responseText,
|
||||
sources: this.curateSources(sourceDocuments),
|
||||
contextTexts,
|
||||
sources: this.curateSources(sources),
|
||||
message: false,
|
||||
};
|
||||
},
|
||||
@ -377,8 +329,11 @@ const QDrant = {
|
||||
const documents = [];
|
||||
for (const source of sources) {
|
||||
if (Object.keys(source).length > 0) {
|
||||
const metadata = source.hasOwnProperty("metadata")
|
||||
? source.metadata
|
||||
: source;
|
||||
documents.push({
|
||||
...source,
|
||||
...metadata,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -3,7 +3,6 @@ const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
|
||||
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
||||
const { v4: uuidv4 } = require("uuid");
|
||||
const { toChunks, getLLMProvider } = require("../../helpers");
|
||||
const { chatPrompt } = require("../../chats");
|
||||
const { camelCase } = require("../../helpers/camelcase");
|
||||
|
||||
const Weaviate = {
|
||||
@ -333,83 +332,36 @@ const Weaviate = {
|
||||
await DocumentVectors.deleteIds(indexes);
|
||||
return true;
|
||||
},
|
||||
query: async function (reqBody = {}) {
|
||||
const { namespace = null, input, workspace = {} } = reqBody;
|
||||
if (!namespace || !input) throw new Error("Invalid request body");
|
||||
performSimilaritySearch: async function ({
|
||||
namespace = null,
|
||||
input = "",
|
||||
LLMConnector = null,
|
||||
}) {
|
||||
if (!namespace || !input || !LLMConnector)
|
||||
throw new Error("Invalid request to performSimilaritySearch.");
|
||||
|
||||
const { client } = await this.connect();
|
||||
if (!(await this.namespaceExists(client, namespace))) {
|
||||
return {
|
||||
response: null,
|
||||
contextTexts: [],
|
||||
sources: [],
|
||||
message: "Invalid query - no documents found for workspace!",
|
||||
};
|
||||
}
|
||||
|
||||
const LLMConnector = getLLMProvider();
|
||||
const queryVector = await LLMConnector.embedTextInput(input);
|
||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||
client,
|
||||
namespace,
|
||||
queryVector
|
||||
);
|
||||
const memory = LLMConnector.constructPrompt({
|
||||
systemPrompt: chatPrompt(workspace),
|
||||
contextTexts: contextTexts,
|
||||
userPrompt: input,
|
||||
});
|
||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
||||
temperature: workspace?.openAiTemp ?? 0.7,
|
||||
});
|
||||
|
||||
const sources = sourceDocuments.map((metadata, i) => {
|
||||
return { ...metadata, text: contextTexts[i] };
|
||||
});
|
||||
return {
|
||||
response: responseText,
|
||||
sources: this.curateSources(sourceDocuments),
|
||||
message: false,
|
||||
};
|
||||
},
|
||||
// This implementation of chat uses the chat history and modifies the system prompt at execution
|
||||
// this is improved over the regular langchain implementation so that chats do not directly modify embeddings
|
||||
// because then multi-user support will have all conversations mutating the base vector collection to which then
|
||||
// the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs
|
||||
chat: async function (reqBody = {}) {
|
||||
const {
|
||||
namespace = null,
|
||||
input,
|
||||
workspace = {},
|
||||
chatHistory = [],
|
||||
} = reqBody;
|
||||
if (!namespace || !input) throw new Error("Invalid request body");
|
||||
|
||||
const { client } = await this.connect();
|
||||
if (!(await this.namespaceExists(client, namespace))) {
|
||||
return {
|
||||
response: null,
|
||||
sources: [],
|
||||
message: "Invalid query - no documents found for workspace!",
|
||||
};
|
||||
}
|
||||
|
||||
const LLMConnector = getLLMProvider();
|
||||
const queryVector = await LLMConnector.embedTextInput(input);
|
||||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||
client,
|
||||
namespace,
|
||||
queryVector
|
||||
);
|
||||
const memory = LLMConnector.constructPrompt({
|
||||
systemPrompt: chatPrompt(workspace),
|
||||
contextTexts: contextTexts,
|
||||
userPrompt: input,
|
||||
chatHistory,
|
||||
});
|
||||
const responseText = await LLMConnector.getChatCompletion(memory, {
|
||||
temperature: workspace?.openAiTemp ?? 0.7,
|
||||
});
|
||||
|
||||
return {
|
||||
response: responseText,
|
||||
sources: this.curateSources(sourceDocuments),
|
||||
contextTexts,
|
||||
sources: this.curateSources(sources),
|
||||
message: false,
|
||||
};
|
||||
},
|
||||
@ -445,7 +397,10 @@ const Weaviate = {
|
||||
const documents = [];
|
||||
for (const source of sources) {
|
||||
if (Object.keys(source).length > 0) {
|
||||
documents.push(source);
|
||||
const metadata = source.hasOwnProperty("metadata")
|
||||
? source.metadata
|
||||
: source;
|
||||
documents.push({ ...metadata });
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1556,7 +1556,7 @@ isomorphic-fetch@^3.0.0:
|
||||
node-fetch "^2.6.1"
|
||||
whatwg-fetch "^3.4.1"
|
||||
|
||||
js-tiktoken@^1.0.6:
|
||||
js-tiktoken@^1.0.6, js-tiktoken@^1.0.7:
|
||||
version "1.0.7"
|
||||
resolved "https://registry.yarnpkg.com/js-tiktoken/-/js-tiktoken-1.0.7.tgz#56933fcd2093e8304060dfde3071bda91812e6f5"
|
||||
integrity sha512-biba8u/clw7iesNEWLOLwrNGoBP2lA+hTaBLs/D45pJdUPFXyxD6nhcDVtADChghv4GgyAiMKYMiRx7x6h7Biw==
|
||||
|
Loading…
Reference in New Issue
Block a user