mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-05 06:20:10 +01:00
Implement Chroma Support (#1)
This commit is contained in:
parent
6b48e812c5
commit
6d01970df2
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
[![Twitter](https://img.shields.io/twitter/url/https/twitter.com/tim.svg?style=social&label=Follow%20%40Timothy%20Carambat)](https://twitter.com/tcarambat) [![](https://dcbadge.vercel.app/api/server/6UyHPeGZAC?compact=true&style=flat)](https://discord.gg/6UyHPeGZAC)
|
[![Twitter](https://img.shields.io/twitter/url/https/twitter.com/tim.svg?style=social&label=Follow%20%40Timothy%20Carambat)](https://twitter.com/tcarambat) [![](https://dcbadge.vercel.app/api/server/6UyHPeGZAC?compact=true&style=flat)](https://discord.gg/6UyHPeGZAC)
|
||||||
|
|
||||||
A full-stack application and tool suite that enables you to turn any document, resource, or piece of content into a piece of data that any LLM can use as reference during chatting. This application runs with very minimal overhead as by default the LLM and vectorDB are hosted remotely, but can be swapped for local instances. Currently this project supports Pinecone and OpenAI.
|
A full-stack application and tool suite that enables you to turn any document, resource, or piece of content into a piece of data that any LLM can use as reference during chatting. This application runs with very minimal overhead as by default the LLM and vectorDB are hosted remotely, but can be swapped for local instances. Currently this project supports Pinecone & ChromaDB for vector storage and OpenAI for chatting.
|
||||||
|
|
||||||
![Chatting](/images/screenshots/chat.png)
|
![Chatting](/images/screenshots/chat.png)
|
||||||
[view more screenshots](/images/screenshots/SCREENSHOTS.md)
|
[view more screenshots](/images/screenshots/SCREENSHOTS.md)
|
||||||
@ -38,7 +38,7 @@ This monorepo consists of three main sections:
|
|||||||
- `yarn` and `node` on your machine
|
- `yarn` and `node` on your machine
|
||||||
- `python` 3.8+ for running scripts in `collector/`.
|
- `python` 3.8+ for running scripts in `collector/`.
|
||||||
- access to an LLM like `GPT-3.5`, `GPT-4`*.
|
- access to an LLM like `GPT-3.5`, `GPT-4`*.
|
||||||
- a [Pinecone.io](https://pinecone.io) free account*.
|
- a [Pinecone.io](https://pinecone.io) free account* **or** Local Chroma instance running.
|
||||||
*you can use drop in replacements for these. This is just the easiest to get up and running fast.
|
*you can use drop in replacements for these. This is just the easiest to get up and running fast.
|
||||||
|
|
||||||
### How to get started
|
### How to get started
|
||||||
|
@ -16,8 +16,7 @@ export default function DefaultChatContainer() {
|
|||||||
const MESSAGES = [
|
const MESSAGES = [
|
||||||
<React.Fragment>
|
<React.Fragment>
|
||||||
<div
|
<div
|
||||||
className={`flex w-full mt-2 justify-start ${
|
className={`flex w-full mt-2 justify-start ${popMsg ? "chat__message" : ""
|
||||||
popMsg ? "chat__message" : ""
|
|
||||||
}`}
|
}`}
|
||||||
>
|
>
|
||||||
<div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
|
<div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
|
||||||
@ -34,8 +33,7 @@ export default function DefaultChatContainer() {
|
|||||||
|
|
||||||
<React.Fragment>
|
<React.Fragment>
|
||||||
<div
|
<div
|
||||||
className={`flex w-full mt-2 justify-start ${
|
className={`flex w-full mt-2 justify-start ${popMsg ? "chat__message" : ""
|
||||||
popMsg ? "chat__message" : ""
|
|
||||||
}`}
|
}`}
|
||||||
>
|
>
|
||||||
<div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
|
<div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
|
||||||
@ -51,17 +49,16 @@ export default function DefaultChatContainer() {
|
|||||||
|
|
||||||
<React.Fragment>
|
<React.Fragment>
|
||||||
<div
|
<div
|
||||||
className={`flex w-full mt-2 justify-start ${
|
className={`flex w-full mt-2 justify-start ${popMsg ? "chat__message" : ""
|
||||||
popMsg ? "chat__message" : ""
|
|
||||||
}`}
|
}`}
|
||||||
>
|
>
|
||||||
<div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
|
<div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
|
||||||
<p className="text-slate-800 dark:text-slate-200 font-semibold">
|
<p className="text-slate-800 dark:text-slate-200 font-semibold">
|
||||||
AnythingLLM can run totally locally on your machine with little
|
AnythingLLM can run totally locally on your machine with little
|
||||||
overhead you wont even notice it's there! No GPU needed. Cloud and
|
overhead you wont even notice it's there! No GPU needed. Cloud and
|
||||||
on-premises installtion is available as well.
|
on-premises installation is available as well.
|
||||||
<br />
|
<br />
|
||||||
The AI tooling ecosytem gets more powerful everyday. AnythingLLM
|
The AI tooling ecosystem gets more powerful everyday. AnythingLLM
|
||||||
makes it easy to use.
|
makes it easy to use.
|
||||||
</p>
|
</p>
|
||||||
<a
|
<a
|
||||||
@ -79,8 +76,7 @@ export default function DefaultChatContainer() {
|
|||||||
|
|
||||||
<React.Fragment>
|
<React.Fragment>
|
||||||
<div
|
<div
|
||||||
className={`flex w-full mt-2 justify-end ${
|
className={`flex w-full mt-2 justify-end ${popMsg ? "chat__message" : ""
|
||||||
popMsg ? "chat__message" : ""
|
|
||||||
}`}
|
}`}
|
||||||
>
|
>
|
||||||
<div className="p-4 max-w-[75%] bg-slate-200 dark:bg-amber-800 rounded-b-2xl rounded-tl-2xl rounded-tr-sm">
|
<div className="p-4 max-w-[75%] bg-slate-200 dark:bg-amber-800 rounded-b-2xl rounded-tl-2xl rounded-tr-sm">
|
||||||
@ -93,8 +89,7 @@ export default function DefaultChatContainer() {
|
|||||||
|
|
||||||
<React.Fragment>
|
<React.Fragment>
|
||||||
<div
|
<div
|
||||||
className={`flex w-full mt-2 justify-start ${
|
className={`flex w-full mt-2 justify-start ${popMsg ? "chat__message" : ""
|
||||||
popMsg ? "chat__message" : ""
|
|
||||||
}`}
|
}`}
|
||||||
>
|
>
|
||||||
<div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
|
<div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
|
||||||
@ -122,14 +117,13 @@ export default function DefaultChatContainer() {
|
|||||||
|
|
||||||
<React.Fragment>
|
<React.Fragment>
|
||||||
<div
|
<div
|
||||||
className={`flex w-full mt-2 justify-end ${
|
className={`flex w-full mt-2 justify-end ${popMsg ? "chat__message" : ""
|
||||||
popMsg ? "chat__message" : ""
|
|
||||||
}`}
|
}`}
|
||||||
>
|
>
|
||||||
<div className="p-4 max-w-[75%] bg-slate-200 dark:bg-amber-800 rounded-b-2xl rounded-tl-2xl rounded-tr-sm">
|
<div className="p-4 max-w-[75%] bg-slate-200 dark:bg-amber-800 rounded-b-2xl rounded-tl-2xl rounded-tr-sm">
|
||||||
<p className="text-slate-800 dark:text-slate-200 font-semibold">
|
<p className="text-slate-800 dark:text-slate-200 font-semibold">
|
||||||
Is this like an AI dropbox or something? What about chatting? It is
|
Is this like an AI dropbox or something? What about chatting? It is
|
||||||
a chatbot isnt it?
|
a chatbot isn't it?
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -137,8 +131,7 @@ export default function DefaultChatContainer() {
|
|||||||
|
|
||||||
<React.Fragment>
|
<React.Fragment>
|
||||||
<div
|
<div
|
||||||
className={`flex w-full mt-2 justify-start ${
|
className={`flex w-full mt-2 justify-start ${popMsg ? "chat__message" : ""
|
||||||
popMsg ? "chat__message" : ""
|
|
||||||
}`}
|
}`}
|
||||||
>
|
>
|
||||||
<div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
|
<div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
|
||||||
@ -168,8 +161,7 @@ export default function DefaultChatContainer() {
|
|||||||
|
|
||||||
<React.Fragment>
|
<React.Fragment>
|
||||||
<div
|
<div
|
||||||
className={`flex w-full mt-2 justify-end ${
|
className={`flex w-full mt-2 justify-end ${popMsg ? "chat__message" : ""
|
||||||
popMsg ? "chat__message" : ""
|
|
||||||
}`}
|
}`}
|
||||||
>
|
>
|
||||||
<div className="p-4 max-w-[75%] bg-slate-200 dark:bg-amber-800 rounded-b-2xl rounded-tl-2xl rounded-tr-sm">
|
<div className="p-4 max-w-[75%] bg-slate-200 dark:bg-amber-800 rounded-b-2xl rounded-tl-2xl rounded-tr-sm">
|
||||||
@ -182,8 +174,7 @@ export default function DefaultChatContainer() {
|
|||||||
|
|
||||||
<React.Fragment>
|
<React.Fragment>
|
||||||
<div
|
<div
|
||||||
className={`flex w-full mt-2 justify-start ${
|
className={`flex w-full mt-2 justify-start ${popMsg ? "chat__message" : ""
|
||||||
popMsg ? "chat__message" : ""
|
|
||||||
}`}
|
}`}
|
||||||
>
|
>
|
||||||
<div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
|
<div className="p-4 max-w-[75%] bg-orange-100 dark:bg-stone-700 rounded-b-2xl rounded-tr-2xl rounded-tl-sm">
|
||||||
|
@ -73,6 +73,13 @@ export default function KeysModal({ hideModal = noop }) {
|
|||||||
valid={!!settings?.OpenAiModelPref}
|
valid={!!settings?.OpenAiModelPref}
|
||||||
/>
|
/>
|
||||||
<div className="h-[2px] w-full bg-gray-200 dark:bg-stone-600" />
|
<div className="h-[2px] w-full bg-gray-200 dark:bg-stone-600" />
|
||||||
|
<ShowKey
|
||||||
|
name="Vector DB Choice"
|
||||||
|
value={settings?.VectorDB}
|
||||||
|
valid={!!settings?.VectorDB}
|
||||||
|
/>
|
||||||
|
{settings?.VectorDB === "pinecone" && (
|
||||||
|
<>
|
||||||
<ShowKey
|
<ShowKey
|
||||||
name="Pinecone DB API Key"
|
name="Pinecone DB API Key"
|
||||||
value={settings?.PineConeKey ? "*".repeat(20) : ""}
|
value={settings?.PineConeKey ? "*".repeat(20) : ""}
|
||||||
@ -85,9 +92,20 @@ export default function KeysModal({ hideModal = noop }) {
|
|||||||
/>
|
/>
|
||||||
<ShowKey
|
<ShowKey
|
||||||
name="Pinecone DB Index"
|
name="Pinecone DB Index"
|
||||||
value={settings?.PinceConeIndex}
|
value={settings?.PineConeIndex}
|
||||||
valid={!!settings?.PinceConeIndex}
|
valid={!!settings?.PineConeIndex}
|
||||||
/>
|
/>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
{settings?.VectorDB === "chroma" && (
|
||||||
|
<>
|
||||||
|
<ShowKey
|
||||||
|
name="Chroma Endpoint"
|
||||||
|
value={settings?.ChromaEndpoint}
|
||||||
|
valid={!!settings?.ChromaEndpoint}
|
||||||
|
/>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
</div>
|
</div>
|
||||||
|
@ -41,7 +41,7 @@ export default function ManageWorkspace({ hideModal = noop, workspace }) {
|
|||||||
const deleteWorkspace = async () => {
|
const deleteWorkspace = async () => {
|
||||||
if (
|
if (
|
||||||
!window.confirm(
|
!window.confirm(
|
||||||
`You are about to delete your entire ${workspace.name} workspace. This will remove all vector embeddings on your vector database.\n\nThe original source files will remiain untouched. This action is irreversible.`
|
`You are about to delete your entire ${workspace.name} workspace. This will remove all vector embeddings on your vector database.\n\nThe original source files will remain untouched. This action is irreversible.`
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return false;
|
return false;
|
||||||
|
@ -2,6 +2,7 @@ import { memo, useEffect, useRef, useState } from "react";
|
|||||||
import { AlertTriangle } from "react-feather";
|
import { AlertTriangle } from "react-feather";
|
||||||
import Jazzicon from "../../../../UserIcon";
|
import Jazzicon from "../../../../UserIcon";
|
||||||
import { decode as HTMLDecode } from "he";
|
import { decode as HTMLDecode } from "he";
|
||||||
|
import { v4 } from "uuid";
|
||||||
|
|
||||||
function PromptReply({
|
function PromptReply({
|
||||||
uuid,
|
uuid,
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
"author": "Timothy Carambat (Mintplex Labs)",
|
"author": "Timothy Carambat (Mintplex Labs)",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
|
"lint": "cd server && yarn lint && cd .. && cd frontend && yarn lint",
|
||||||
"setup": "cd server && yarn && cd .. && yarn setup:envs && echo \"Please run yarn dev:server and yarn dev:frontend in separate terminal tabs.\"",
|
"setup": "cd server && yarn && cd .. && yarn setup:envs && echo \"Please run yarn dev:server and yarn dev:frontend in separate terminal tabs.\"",
|
||||||
"setup:envs": "cd server && cp -n .env.example .env.development && cd ../collector && cp -n .env.example .env && cd ..",
|
"setup:envs": "cd server && cp -n .env.example .env.development && cd ../collector && cp -n .env.example .env && cd ..",
|
||||||
"dev:server": "cd server && yarn dev",
|
"dev:server": "cd server && yarn dev",
|
||||||
|
@ -1,8 +1,15 @@
|
|||||||
SERVER_PORT=5000
|
SERVER_PORT=5000
|
||||||
OPEN_AI_KEY=
|
OPEN_AI_KEY=
|
||||||
OPEN_MODEL_PREF='gpt-3.5-turbo'
|
OPEN_MODEL_PREF='gpt-3.5-turbo'
|
||||||
|
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
|
||||||
|
CACHE_VECTORS="true"
|
||||||
|
|
||||||
|
# Enable all below if you are using vector database: Chroma.
|
||||||
|
# VECTOR_DB="chroma"
|
||||||
|
# CHROMA_ENDPOINT='http://localhost:8000'
|
||||||
|
|
||||||
|
# Enable all below if you are using vector database: Pinecone.
|
||||||
|
VECTOR_DB="pinecone"
|
||||||
PINECONE_ENVIRONMENT=
|
PINECONE_ENVIRONMENT=
|
||||||
PINECONE_API_KEY=
|
PINECONE_API_KEY=
|
||||||
PINECONE_INDEX=
|
PINECONE_INDEX=
|
||||||
AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
|
|
||||||
CACHE_VECTORS="true"
|
|
@ -1 +1 @@
|
|||||||
v18.12.1
|
v18.13.0
|
@ -1,13 +1,13 @@
|
|||||||
const { reqBody } = require('../utils/http');
|
const { reqBody } = require("../utils/http");
|
||||||
const { Workspace } = require('../models/workspace');
|
const { Workspace } = require("../models/workspace");
|
||||||
const { chatWithWorkspace } = require('../utils/chats');
|
const { chatWithWorkspace } = require("../utils/chats");
|
||||||
|
|
||||||
function chatEndpoints(app) {
|
function chatEndpoints(app) {
|
||||||
if (!app) return;
|
if (!app) return;
|
||||||
|
|
||||||
app.post('/workspace/:slug/chat', async (request, response) => {
|
app.post("/workspace/:slug/chat", async (request, response) => {
|
||||||
const { slug } = request.params
|
const { slug } = request.params;
|
||||||
const { message, mode = 'query' } = reqBody(request)
|
const { message, mode = "query" } = reqBody(request);
|
||||||
const workspace = await Workspace.get(`slug = '${slug}'`);
|
const workspace = await Workspace.get(`slug = '${slug}'`);
|
||||||
if (!workspace) {
|
if (!workspace) {
|
||||||
response.sendStatus(400).end();
|
response.sendStatus(400).end();
|
||||||
@ -16,8 +16,7 @@ function chatEndpoints(app) {
|
|||||||
|
|
||||||
const result = await chatWithWorkspace(workspace, message, mode);
|
const result = await chatWithWorkspace(workspace, message, mode);
|
||||||
response.status(200).json({ ...result });
|
response.status(200).json({ ...result });
|
||||||
})
|
});
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { chatEndpoints }
|
module.exports = { chatEndpoints };
|
||||||
|
@ -1,34 +1,46 @@
|
|||||||
require('dotenv').config({ path: `.env.${process.env.NODE_ENV}` })
|
require("dotenv").config({ path: `.env.${process.env.NODE_ENV}` });
|
||||||
const { Pinecone } = require('../utils/pinecone');
|
const { viewLocalFiles } = require("../utils/files");
|
||||||
const { viewLocalFiles } = require('../utils/files');
|
const { getVectorDbClass } = require("../utils/helpers");
|
||||||
|
|
||||||
function systemEndpoints(app) {
|
function systemEndpoints(app) {
|
||||||
if (!app) return;
|
if (!app) return;
|
||||||
|
|
||||||
app.get('/ping', (_, response) => {
|
app.get("/ping", (_, response) => {
|
||||||
response.sendStatus(200);
|
response.sendStatus(200);
|
||||||
})
|
});
|
||||||
|
|
||||||
app.get('/setup-complete', (_, response) => {
|
app.get("/setup-complete", (_, response) => {
|
||||||
|
const vectorDB = process.env.VECTOR_DB || "pinecone";
|
||||||
const results = {
|
const results = {
|
||||||
|
VectorDB: vectorDB,
|
||||||
OpenAiKey: !!process.env.OPEN_AI_KEY,
|
OpenAiKey: !!process.env.OPEN_AI_KEY,
|
||||||
OpenAiModelPref: process.env.OPEN_MODEL_PREF || 'gpt-3.5-turbo',
|
OpenAiModelPref: process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo",
|
||||||
|
...(vectorDB === "pinecone"
|
||||||
|
? {
|
||||||
PineConeEnvironment: process.env.PINECONE_ENVIRONMENT,
|
PineConeEnvironment: process.env.PINECONE_ENVIRONMENT,
|
||||||
PineConeKey: !!process.env.PINECONE_API_KEY,
|
PineConeKey: !!process.env.PINECONE_API_KEY,
|
||||||
PinceConeIndex: process.env.PINECONE_INDEX,
|
PineConeIndex: process.env.PINECONE_INDEX,
|
||||||
}
|
}
|
||||||
response.status(200).json({ results })
|
: {}),
|
||||||
})
|
...(vectorDB === "chroma"
|
||||||
|
? {
|
||||||
|
ChromaEndpoint: process.env.CHROMA_ENDPOINT,
|
||||||
|
}
|
||||||
|
: {}),
|
||||||
|
};
|
||||||
|
response.status(200).json({ results });
|
||||||
|
});
|
||||||
|
|
||||||
app.get('/system-vectors', async (_, response) => {
|
app.get("/system-vectors", async (_, response) => {
|
||||||
const vectorCount = await Pinecone.totalIndicies();
|
const VectorDb = getVectorDbClass();
|
||||||
response.status(200).json({ vectorCount })
|
const vectorCount = await VectorDb.totalIndicies();
|
||||||
})
|
response.status(200).json({ vectorCount });
|
||||||
|
});
|
||||||
|
|
||||||
app.get('/local-files', async (_, response) => {
|
app.get("/local-files", async (_, response) => {
|
||||||
const localFiles = await viewLocalFiles()
|
const localFiles = await viewLocalFiles();
|
||||||
response.status(200).json({ localFiles })
|
response.status(200).json({ localFiles });
|
||||||
})
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { systemEndpoints }
|
module.exports = { systemEndpoints };
|
@ -1,21 +1,21 @@
|
|||||||
const { Pinecone } = require('../utils/pinecone');
|
const { reqBody } = require("../utils/http");
|
||||||
const { reqBody } = require('../utils/http');
|
const { Workspace } = require("../models/workspace");
|
||||||
const { Workspace } = require('../models/workspace');
|
const { Document } = require("../models/documents");
|
||||||
const { Document } = require('../models/documents');
|
const { DocumentVectors } = require("../models/vectors");
|
||||||
const { DocumentVectors } = require('../models/vectors');
|
const { WorkspaceChats } = require("../models/workspaceChats");
|
||||||
const { WorkspaceChats } = require('../models/workspaceChats');
|
const { convertToChatHistory } = require("../utils/chats");
|
||||||
const { convertToChatHistory } = require('../utils/chats');
|
const { getVectorDbClass } = require("../utils/helpers");
|
||||||
|
|
||||||
function workspaceEndpoints(app) {
|
function workspaceEndpoints(app) {
|
||||||
if (!app) return;
|
if (!app) return;
|
||||||
|
|
||||||
app.post('/workspace/new', async (request, response) => {
|
app.post("/workspace/new", async (request, response) => {
|
||||||
const { name = null } = reqBody(request);
|
const { name = null } = reqBody(request);
|
||||||
const { workspace, message } = await Workspace.new(name);
|
const { workspace, message } = await Workspace.new(name);
|
||||||
response.status(200).json({ workspace, message })
|
response.status(200).json({ workspace, message });
|
||||||
})
|
});
|
||||||
|
|
||||||
app.post('/workspace/:slug/update-embeddings', async (request, response) => {
|
app.post("/workspace/:slug/update-embeddings", async (request, response) => {
|
||||||
const { slug = null } = request.params;
|
const { slug = null } = request.params;
|
||||||
const { adds = [], deletes = [] } = reqBody(request);
|
const { adds = [], deletes = [] } = reqBody(request);
|
||||||
const currWorkspace = await Workspace.get(`slug = '${slug}'`);
|
const currWorkspace = await Workspace.get(`slug = '${slug}'`);
|
||||||
@ -28,11 +28,12 @@ function workspaceEndpoints(app) {
|
|||||||
await Document.removeDocuments(currWorkspace, deletes);
|
await Document.removeDocuments(currWorkspace, deletes);
|
||||||
await Document.addDocuments(currWorkspace, adds);
|
await Document.addDocuments(currWorkspace, adds);
|
||||||
const updatedWorkspace = await Workspace.get(`slug = '${slug}'`);
|
const updatedWorkspace = await Workspace.get(`slug = '${slug}'`);
|
||||||
response.status(200).json({ workspace: updatedWorkspace })
|
response.status(200).json({ workspace: updatedWorkspace });
|
||||||
})
|
});
|
||||||
|
|
||||||
app.delete('/workspace/:slug', async (request, response) => {
|
app.delete("/workspace/:slug", async (request, response) => {
|
||||||
const { slug = '' } = request.params
|
const VectorDb = getVectorDbClass();
|
||||||
|
const { slug = "" } = request.params;
|
||||||
const workspace = await Workspace.get(`slug = '${slug}'`);
|
const workspace = await Workspace.get(`slug = '${slug}'`);
|
||||||
|
|
||||||
if (!workspace) {
|
if (!workspace) {
|
||||||
@ -42,34 +43,38 @@ function workspaceEndpoints(app) {
|
|||||||
|
|
||||||
await Workspace.delete(`slug = '${slug.toLowerCase()}'`);
|
await Workspace.delete(`slug = '${slug.toLowerCase()}'`);
|
||||||
await DocumentVectors.deleteForWorkspace(workspace.id);
|
await DocumentVectors.deleteForWorkspace(workspace.id);
|
||||||
await Document.delete(`workspaceId = ${Number(workspace.id)}`)
|
await Document.delete(`workspaceId = ${Number(workspace.id)}`);
|
||||||
await WorkspaceChats.delete(`workspaceId = ${Number(workspace.id)}`)
|
await WorkspaceChats.delete(`workspaceId = ${Number(workspace.id)}`);
|
||||||
try { await Pinecone['delete-namespace']({ namespace: slug }) } catch (e) { console.error(e.message) }
|
try {
|
||||||
response.sendStatus(200).end()
|
await VectorDb["delete-namespace"]({ namespace: slug });
|
||||||
})
|
} catch (e) {
|
||||||
|
console.error(e.message);
|
||||||
|
}
|
||||||
|
response.sendStatus(200).end();
|
||||||
|
});
|
||||||
|
|
||||||
app.get('/workspaces', async (_, response) => {
|
app.get("/workspaces", async (_, response) => {
|
||||||
const workspaces = await Workspace.where();
|
const workspaces = await Workspace.where();
|
||||||
response.status(200).json({ workspaces })
|
response.status(200).json({ workspaces });
|
||||||
})
|
});
|
||||||
|
|
||||||
app.get('/workspace/:slug', async (request, response) => {
|
app.get("/workspace/:slug", async (request, response) => {
|
||||||
const { slug } = request.params
|
const { slug } = request.params;
|
||||||
const workspace = await Workspace.get(`slug = '${slug}'`);
|
const workspace = await Workspace.get(`slug = '${slug}'`);
|
||||||
response.status(200).json({ workspace })
|
response.status(200).json({ workspace });
|
||||||
})
|
});
|
||||||
|
|
||||||
app.get('/workspace/:slug/chats', async (request, response) => {
|
app.get("/workspace/:slug/chats", async (request, response) => {
|
||||||
const { slug } = request.params
|
const { slug } = request.params;
|
||||||
const workspace = await Workspace.get(`slug = '${slug}'`);
|
const workspace = await Workspace.get(`slug = '${slug}'`);
|
||||||
if (!workspace) {
|
if (!workspace) {
|
||||||
response.sendStatus(400).end()
|
response.sendStatus(400).end();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const history = await WorkspaceChats.forWorkspace(workspace.id)
|
const history = await WorkspaceChats.forWorkspace(workspace.id);
|
||||||
response.status(200).json({ history: convertToChatHistory(history) })
|
response.status(200).json({ history: convertToChatHistory(history) });
|
||||||
})
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = { workspaceEndpoints }
|
module.exports = { workspaceEndpoints };
|
||||||
|
@ -1,54 +1,62 @@
|
|||||||
require('dotenv').config({ path: `.env.${process.env.NODE_ENV}` })
|
require("dotenv").config({ path: `.env.${process.env.NODE_ENV}` });
|
||||||
const express = require('express')
|
const express = require("express");
|
||||||
const bodyParser = require('body-parser')
|
const bodyParser = require("body-parser");
|
||||||
const cors = require('cors');
|
const cors = require("cors");
|
||||||
const { validatedRequest } = require('./utils/middleware/validatedRequest');
|
const { validatedRequest } = require("./utils/middleware/validatedRequest");
|
||||||
const { Pinecone } = require('./utils/pinecone');
|
const { reqBody } = require("./utils/http");
|
||||||
const { reqBody } = require('./utils/http');
|
const { systemEndpoints } = require("./endpoints/system");
|
||||||
const { systemEndpoints } = require('./endpoints/system');
|
const { workspaceEndpoints } = require("./endpoints/workspaces");
|
||||||
const { workspaceEndpoints } = require('./endpoints/workspaces');
|
const { chatEndpoints } = require("./endpoints/chat");
|
||||||
const { chatEndpoints } = require('./endpoints/chat');
|
const { getVectorDbClass } = require("./utils/helpers");
|
||||||
const app = express();
|
const app = express();
|
||||||
|
|
||||||
app.use(cors({ origin: true }));
|
app.use(cors({ origin: true }));
|
||||||
app.use(validatedRequest);
|
app.use(validatedRequest);
|
||||||
app.use(bodyParser.text());
|
app.use(bodyParser.text());
|
||||||
app.use(bodyParser.json());
|
app.use(bodyParser.json());
|
||||||
app.use(bodyParser.urlencoded({
|
app.use(
|
||||||
extended: true
|
bodyParser.urlencoded({
|
||||||
}));
|
extended: true,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
systemEndpoints(app);
|
systemEndpoints(app);
|
||||||
workspaceEndpoints(app);
|
workspaceEndpoints(app);
|
||||||
chatEndpoints(app);
|
chatEndpoints(app);
|
||||||
|
|
||||||
app.post('/v/:command', async (request, response) => {
|
app.post("/v/:command", async (request, response) => {
|
||||||
const { command } = request.params
|
const VectorDb = getVectorDbClass();
|
||||||
if (!Object.getOwnPropertyNames(Pinecone).includes(command)) {
|
const { command } = request.params;
|
||||||
response.status(500).json({ message: 'invalid interface command', commands: Object.getOwnPropertyNames(Pinecone.prototype) });
|
if (!Object.getOwnPropertyNames(VectorDb).includes(command)) {
|
||||||
return
|
response.status(500).json({
|
||||||
|
message: "invalid interface command",
|
||||||
|
commands: Object.getOwnPropertyNames(VectorDb),
|
||||||
|
});
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const body = reqBody(request);
|
const body = reqBody(request);
|
||||||
const resBody = await Pinecone[command](body)
|
const resBody = await VectorDb[command](body);
|
||||||
response.status(200).json({ ...resBody });
|
response.status(200).json({ ...resBody });
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
// console.error(e)
|
// console.error(e)
|
||||||
console.error(JSON.stringify(e))
|
console.error(JSON.stringify(e));
|
||||||
response.status(500).json({ error: e.message });
|
response.status(500).json({ error: e.message });
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
})
|
});
|
||||||
|
|
||||||
|
app.all("*", function (_, response) {
|
||||||
app.all('*', function (_, response) {
|
|
||||||
response.sendStatus(404);
|
response.sendStatus(404);
|
||||||
});
|
});
|
||||||
|
|
||||||
app.listen(process.env.SERVER_PORT || 5000, () => {
|
app
|
||||||
console.log(`Example app listening on port ${process.env.SERVER_PORT || 5000}`)
|
.listen(process.env.SERVER_PORT || 5000, () => {
|
||||||
})
|
console.log(
|
||||||
|
`Example app listening on port ${process.env.SERVER_PORT || 5000}`
|
||||||
|
);
|
||||||
|
})
|
||||||
.on("error", function (err) {
|
.on("error", function (err) {
|
||||||
process.once("SIGUSR2", function () {
|
process.once("SIGUSR2", function () {
|
||||||
process.kill(process.pid, "SIGUSR2");
|
process.kill(process.pid, "SIGUSR2");
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
const { fileData } = require('../utils/files');
|
const { fileData } = require("../utils/files");
|
||||||
const { v4: uuidv4 } = require('uuid');
|
const { v4: uuidv4 } = require("uuid");
|
||||||
|
const { getVectorDbClass } = require("../utils/helpers");
|
||||||
|
|
||||||
const Document = {
|
const Document = {
|
||||||
tablename: 'workspace_documents',
|
tablename: "workspace_documents",
|
||||||
colsInit: `
|
colsInit: `
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
docId TEXT NOT NULL UNIQUE,
|
docId TEXT NOT NULL UNIQUE,
|
||||||
@ -14,64 +15,82 @@ const Document = {
|
|||||||
lastUpdatedAt TEXT DEFAULT CURRENT_TIMESTAMP
|
lastUpdatedAt TEXT DEFAULT CURRENT_TIMESTAMP
|
||||||
`,
|
`,
|
||||||
db: async function () {
|
db: async function () {
|
||||||
const sqlite3 = require('sqlite3').verbose();
|
const sqlite3 = require("sqlite3").verbose();
|
||||||
const { open } = require('sqlite');
|
const { open } = require("sqlite");
|
||||||
|
|
||||||
const db = await open({
|
const db = await open({
|
||||||
filename: 'anythingllm.db',
|
filename: "anythingllm.db",
|
||||||
driver: sqlite3.Database
|
driver: sqlite3.Database,
|
||||||
})
|
});
|
||||||
|
|
||||||
await db.exec(`CREATE TABLE IF NOT EXISTS ${this.tablename} (${this.colsInit})`);
|
await db.exec(
|
||||||
db.on('trace', (sql) => console.log(sql))
|
`CREATE TABLE IF NOT EXISTS ${this.tablename} (${this.colsInit})`
|
||||||
return db
|
);
|
||||||
|
db.on("trace", (sql) => console.log(sql));
|
||||||
|
return db;
|
||||||
},
|
},
|
||||||
forWorkspace: async function (workspaceId = null) {
|
forWorkspace: async function (workspaceId = null) {
|
||||||
if (!workspaceId) return [];
|
if (!workspaceId) return [];
|
||||||
return await this.where(`workspaceId = ${workspaceId}`);
|
return await this.where(`workspaceId = ${workspaceId}`);
|
||||||
},
|
},
|
||||||
delete: async function (clause = '') {
|
delete: async function (clause = "") {
|
||||||
const db = await this.db()
|
const db = await this.db();
|
||||||
await db.get(`DELETE FROM ${this.tablename} WHERE ${clause}`)
|
await db.get(`DELETE FROM ${this.tablename} WHERE ${clause}`);
|
||||||
db.close()
|
db.close();
|
||||||
return true
|
return true;
|
||||||
},
|
},
|
||||||
where: async function (clause = '', limit = null) {
|
where: async function (clause = "", limit = null) {
|
||||||
const db = await this.db()
|
const db = await this.db();
|
||||||
const results = await db.all(`SELECT * FROM ${this.tablename} ${clause ? `WHERE ${clause}` : ''} ${!!limit ? `LIMIT ${limit}` : ''}`)
|
const results = await db.all(
|
||||||
|
`SELECT * FROM ${this.tablename} ${clause ? `WHERE ${clause}` : ""} ${
|
||||||
|
!!limit ? `LIMIT ${limit}` : ""
|
||||||
|
}`
|
||||||
|
);
|
||||||
|
|
||||||
db.close()
|
db.close();
|
||||||
return results
|
return results;
|
||||||
},
|
},
|
||||||
firstWhere: async function (clause = '') {
|
firstWhere: async function (clause = "") {
|
||||||
const results = await this.where(clause);
|
const results = await this.where(clause);
|
||||||
return results.length > 0 ? results[0] : null
|
return results.length > 0 ? results[0] : null;
|
||||||
},
|
},
|
||||||
addDocuments: async function (workspace, additions = []) {
|
addDocuments: async function (workspace, additions = []) {
|
||||||
const { Pinecone } = require('../utils/pinecone');
|
const VectorDb = getVectorDbClass();
|
||||||
if (additions.length === 0) return;
|
if (additions.length === 0) return;
|
||||||
|
|
||||||
const db = await this.db()
|
const db = await this.db();
|
||||||
const stmt = await db.prepare(`INSERT INTO ${this.tablename} (docId, filename, docpath, workspaceId, metadata) VALUES (?,?,?,?,?)`)
|
const stmt = await db.prepare(
|
||||||
|
`INSERT INTO ${this.tablename} (docId, filename, docpath, workspaceId, metadata) VALUES (?,?,?,?,?)`
|
||||||
|
);
|
||||||
for (const path of additions) {
|
for (const path of additions) {
|
||||||
const data = await fileData(path);
|
const data = await fileData(path);
|
||||||
if (!data) continue;
|
if (!data) continue;
|
||||||
|
|
||||||
const docId = uuidv4();
|
const docId = uuidv4();
|
||||||
const { pageContent, ...metadata } = data
|
const { pageContent, ...metadata } = data;
|
||||||
const newDoc = {
|
const newDoc = {
|
||||||
docId,
|
docId,
|
||||||
filename: path.split('/')[1],
|
filename: path.split("/")[1],
|
||||||
docpath: path,
|
docpath: path,
|
||||||
workspaceId: Number(workspace.id),
|
workspaceId: Number(workspace.id),
|
||||||
metadata: JSON.stringify(metadata)
|
metadata: JSON.stringify(metadata),
|
||||||
}
|
};
|
||||||
const vectorized = await Pinecone.addDocumentToNamespace(workspace.slug, { ...data, docId }, path);
|
const vectorized = await VectorDb.addDocumentToNamespace(
|
||||||
|
workspace.slug,
|
||||||
|
{ ...data, docId },
|
||||||
|
path
|
||||||
|
);
|
||||||
if (!vectorized) {
|
if (!vectorized) {
|
||||||
console.error('Failed to vectorize', path)
|
console.error("Failed to vectorize", path);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
stmt.run([docId, newDoc.filename, newDoc.docpath, newDoc.workspaceId, newDoc.metadata])
|
stmt.run([
|
||||||
|
docId,
|
||||||
|
newDoc.filename,
|
||||||
|
newDoc.docpath,
|
||||||
|
newDoc.workspaceId,
|
||||||
|
newDoc.metadata,
|
||||||
|
]);
|
||||||
}
|
}
|
||||||
stmt.finalize();
|
stmt.finalize();
|
||||||
db.close();
|
db.close();
|
||||||
@ -79,21 +98,28 @@ const Document = {
|
|||||||
return;
|
return;
|
||||||
},
|
},
|
||||||
removeDocuments: async function (workspace, removals = []) {
|
removeDocuments: async function (workspace, removals = []) {
|
||||||
const { Pinecone } = require('../utils/pinecone');
|
const VectorDb = getVectorDbClass();
|
||||||
|
|
||||||
if (removals.length === 0) return;
|
if (removals.length === 0) return;
|
||||||
const db = await this.db()
|
const db = await this.db();
|
||||||
const stmt = await db.prepare(`DELETE FROM ${this.tablename} WHERE docpath = ? AND workspaceId = ?`);
|
const stmt = await db.prepare(
|
||||||
|
`DELETE FROM ${this.tablename} WHERE docpath = ? AND workspaceId = ?`
|
||||||
|
);
|
||||||
for (const path of removals) {
|
for (const path of removals) {
|
||||||
const document = await this.firstWhere(`docPath = '${path}' AND workspaceId = ${workspace.id}`)
|
const document = await this.firstWhere(
|
||||||
|
`docPath = '${path}' AND workspaceId = ${workspace.id}`
|
||||||
|
);
|
||||||
if (!document) continue;
|
if (!document) continue;
|
||||||
await Pinecone.deleteDocumentFromNamespace(workspace.slug, document.docId);
|
await VectorDb.deleteDocumentFromNamespace(
|
||||||
stmt.run([path, workspace.id])
|
workspace.slug,
|
||||||
|
document.docId
|
||||||
|
);
|
||||||
|
stmt.run([path, workspace.id]);
|
||||||
}
|
}
|
||||||
stmt.finalize();
|
stmt.finalize();
|
||||||
db.close();
|
db.close();
|
||||||
return true;
|
return true;
|
||||||
}
|
},
|
||||||
}
|
};
|
||||||
|
|
||||||
module.exports = { Document }
|
module.exports = { Document };
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
const { Document } = require('./documents');
|
const { Document } = require("./documents");
|
||||||
|
|
||||||
// TODO: Do we want to store entire vectorized chunks in here
|
// TODO: Do we want to store entire vectorized chunks in here
|
||||||
// so that we can easily spin up temp-namespace clones for threading
|
// so that we can easily spin up temp-namespace clones for threading
|
||||||
//
|
//
|
||||||
const DocumentVectors = {
|
const DocumentVectors = {
|
||||||
tablename: 'document_vectors',
|
tablename: "document_vectors",
|
||||||
colsInit: `
|
colsInit: `
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
docId TEXT NOT NULL,
|
docId TEXT NOT NULL,
|
||||||
@ -13,51 +13,63 @@ const DocumentVectors = {
|
|||||||
lastUpdatedAt TEXT DEFAULT CURRENT_TIMESTAMP
|
lastUpdatedAt TEXT DEFAULT CURRENT_TIMESTAMP
|
||||||
`,
|
`,
|
||||||
db: async function () {
|
db: async function () {
|
||||||
const sqlite3 = require('sqlite3').verbose();
|
const sqlite3 = require("sqlite3").verbose();
|
||||||
const { open } = require('sqlite');
|
const { open } = require("sqlite");
|
||||||
|
|
||||||
const db = await open({
|
const db = await open({
|
||||||
filename: 'anythingllm.db',
|
filename: "anythingllm.db",
|
||||||
driver: sqlite3.Database
|
driver: sqlite3.Database,
|
||||||
})
|
});
|
||||||
|
|
||||||
await db.exec(`CREATE TABLE IF NOT EXISTS ${this.tablename} (${this.colsInit})`);
|
await db.exec(
|
||||||
db.on('trace', (sql) => console.log(sql))
|
`CREATE TABLE IF NOT EXISTS ${this.tablename} (${this.colsInit})`
|
||||||
return db
|
);
|
||||||
|
db.on("trace", (sql) => console.log(sql));
|
||||||
|
return db;
|
||||||
},
|
},
|
||||||
bulkInsert: async function (vectorRecords = []) {
|
bulkInsert: async function (vectorRecords = []) {
|
||||||
if (vectorRecords.length === 0) return;
|
if (vectorRecords.length === 0) return;
|
||||||
const db = await this.db();
|
const db = await this.db();
|
||||||
const stmt = await db.prepare(`INSERT INTO ${this.tablename} (docId, vectorId) VALUES (?, ?)`);
|
const stmt = await db.prepare(
|
||||||
|
`INSERT INTO ${this.tablename} (docId, vectorId) VALUES (?, ?)`
|
||||||
|
);
|
||||||
for (const record of vectorRecords) {
|
for (const record of vectorRecords) {
|
||||||
const { docId, vectorId } = record
|
const { docId, vectorId } = record;
|
||||||
stmt.run([docId, vectorId])
|
stmt.run([docId, vectorId]);
|
||||||
}
|
}
|
||||||
|
|
||||||
stmt.finalize()
|
stmt.finalize();
|
||||||
db.close()
|
db.close();
|
||||||
return { documentsInserted: vectorRecords.length };
|
return { documentsInserted: vectorRecords.length };
|
||||||
},
|
},
|
||||||
deleteForWorkspace: async function (workspaceId) {
|
deleteForWorkspace: async function (workspaceId) {
|
||||||
const documents = await Document.forWorkspace(workspaceId);
|
const documents = await Document.forWorkspace(workspaceId);
|
||||||
const docIds = [...(new Set(documents.map((doc) => doc.docId)))];
|
const docIds = [...new Set(documents.map((doc) => doc.docId))];
|
||||||
const ids = (await this.where(`docId IN (${docIds.map((id) => `'${id}'`).join(',')})`)).map((doc) => doc.id)
|
const ids = (
|
||||||
await this.deleteIds(ids)
|
await this.where(`docId IN (${docIds.map((id) => `'${id}'`).join(",")})`)
|
||||||
|
).map((doc) => doc.id);
|
||||||
|
await this.deleteIds(ids);
|
||||||
return true;
|
return true;
|
||||||
},
|
},
|
||||||
where: async function (clause = '', limit = null) {
|
where: async function (clause = "", limit = null) {
|
||||||
const db = await this.db()
|
const db = await this.db();
|
||||||
const results = await db.all(`SELECT * FROM ${this.tablename} ${clause ? `WHERE ${clause}` : ''} ${!!limit ? `LIMIT ${limit}` : ''}`)
|
const results = await db.all(
|
||||||
|
`SELECT * FROM ${this.tablename} ${clause ? `WHERE ${clause}` : ""} ${
|
||||||
|
!!limit ? `LIMIT ${limit}` : ""
|
||||||
|
}`
|
||||||
|
);
|
||||||
|
|
||||||
db.close()
|
db.close();
|
||||||
return results
|
return results;
|
||||||
},
|
},
|
||||||
deleteIds: async function (ids = []) {
|
deleteIds: async function (ids = []) {
|
||||||
const db = await this.db()
|
const db = await this.db();
|
||||||
await db.get(`DELETE FROM ${this.tablename} WHERE id IN (${ids.join(', ')}) `)
|
await db.get(
|
||||||
db.close()
|
`DELETE FROM ${this.tablename} WHERE id IN (${ids.join(", ")}) `
|
||||||
return true
|
);
|
||||||
}
|
db.close();
|
||||||
}
|
return true;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
module.exports = { DocumentVectors }
|
module.exports = { DocumentVectors };
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
const slugify = require('slugify');
|
const slugify = require("slugify");
|
||||||
const { Document } = require('./documents');
|
const { Document } = require("./documents");
|
||||||
|
|
||||||
const Workspace = {
|
const Workspace = {
|
||||||
tablename: 'workspaces',
|
tablename: "workspaces",
|
||||||
colsInit: `
|
colsInit: `
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
name TEXT NOT NULL UNIQUE,
|
name TEXT NOT NULL UNIQUE,
|
||||||
@ -12,52 +12,66 @@ const Workspace = {
|
|||||||
lastUpdatedAt TEXT DEFAULT CURRENT_TIMESTAMP
|
lastUpdatedAt TEXT DEFAULT CURRENT_TIMESTAMP
|
||||||
`,
|
`,
|
||||||
db: async function () {
|
db: async function () {
|
||||||
const sqlite3 = require('sqlite3').verbose();
|
const sqlite3 = require("sqlite3").verbose();
|
||||||
const { open } = require('sqlite');
|
const { open } = require("sqlite");
|
||||||
|
|
||||||
const db = await open({
|
const db = await open({
|
||||||
filename: 'anythingllm.db',
|
filename: "anythingllm.db",
|
||||||
driver: sqlite3.Database
|
driver: sqlite3.Database,
|
||||||
})
|
});
|
||||||
|
|
||||||
await db.exec(`CREATE TABLE IF NOT EXISTS ${this.tablename} (${this.colsInit})`);
|
await db.exec(
|
||||||
db.on('trace', (sql) => console.log(sql))
|
`CREATE TABLE IF NOT EXISTS ${this.tablename} (${this.colsInit})`
|
||||||
return db
|
);
|
||||||
|
db.on("trace", (sql) => console.log(sql));
|
||||||
|
return db;
|
||||||
},
|
},
|
||||||
new: async function (name = null) {
|
new: async function (name = null) {
|
||||||
if (!name) return { result: null, message: 'name cannot be null' };
|
if (!name) return { result: null, message: "name cannot be null" };
|
||||||
|
|
||||||
const db = await this.db()
|
const db = await this.db();
|
||||||
const { id, success, message } = await db.run(`INSERT INTO ${this.tablename} (name, slug) VALUES (?, ?)`, [name, slugify(name, { lower: true })])
|
const { id, success, message } = await db
|
||||||
|
.run(`INSERT INTO ${this.tablename} (name, slug) VALUES (?, ?)`, [
|
||||||
|
name,
|
||||||
|
slugify(name, { lower: true }),
|
||||||
|
])
|
||||||
.then((res) => {
|
.then((res) => {
|
||||||
return { id: res.lastID, success: true, message: null }
|
return { id: res.lastID, success: true, message: null };
|
||||||
})
|
})
|
||||||
.catch((error) => {
|
.catch((error) => {
|
||||||
return { id: null, success: false, message: error.message }
|
return { id: null, success: false, message: error.message };
|
||||||
})
|
});
|
||||||
if (!success) return { workspace: null, message }
|
if (!success) return { workspace: null, message };
|
||||||
|
|
||||||
const workspace = await db.get(`SELECT * FROM ${this.tablename} WHERE id = ${id}`)
|
const workspace = await db.get(
|
||||||
return { workspace, message: null }
|
`SELECT * FROM ${this.tablename} WHERE id = ${id}`
|
||||||
|
);
|
||||||
|
return { workspace, message: null };
|
||||||
},
|
},
|
||||||
get: async function (clause = '') {
|
get: async function (clause = "") {
|
||||||
const db = await this.db()
|
const db = await this.db();
|
||||||
const result = await db.get(`SELECT * FROM ${this.tablename} WHERE ${clause}`).then((res) => res || null)
|
const result = await db
|
||||||
|
.get(`SELECT * FROM ${this.tablename} WHERE ${clause}`)
|
||||||
|
.then((res) => res || null);
|
||||||
if (!result) return null;
|
if (!result) return null;
|
||||||
|
|
||||||
const documents = await Document.forWorkspace(result.id);
|
const documents = await Document.forWorkspace(result.id);
|
||||||
return { ...result, documents }
|
return { ...result, documents };
|
||||||
},
|
},
|
||||||
delete: async function (clause = '') {
|
delete: async function (clause = "") {
|
||||||
const db = await this.db()
|
const db = await this.db();
|
||||||
await db.get(`DELETE FROM ${this.tablename} WHERE ${clause}`)
|
await db.get(`DELETE FROM ${this.tablename} WHERE ${clause}`);
|
||||||
return true
|
return true;
|
||||||
},
|
},
|
||||||
where: async function (clause = '', limit = null) {
|
where: async function (clause = "", limit = null) {
|
||||||
const db = await this.db()
|
const db = await this.db();
|
||||||
const results = await db.all(`SELECT * FROM ${this.tablename} ${clause ? `WHERE ${clause}` : ''} ${!!limit ? `LIMIT ${limit}` : ''}`)
|
const results = await db.all(
|
||||||
return results
|
`SELECT * FROM ${this.tablename} ${clause ? `WHERE ${clause}` : ""} ${
|
||||||
|
!!limit ? `LIMIT ${limit}` : ""
|
||||||
|
}`
|
||||||
|
);
|
||||||
|
return results;
|
||||||
},
|
},
|
||||||
}
|
};
|
||||||
|
|
||||||
module.exports = { Workspace }
|
module.exports = { Workspace };
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
|
|
||||||
const WorkspaceChats = {
|
const WorkspaceChats = {
|
||||||
tablename: 'workspace_chats',
|
tablename: "workspace_chats",
|
||||||
colsInit: `
|
colsInit: `
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
workspaceId INTEGER NOT NULL,
|
workspaceId INTEGER NOT NULL,
|
||||||
@ -11,58 +10,79 @@ const WorkspaceChats = {
|
|||||||
lastUpdatedAt TEXT DEFAULT CURRENT_TIMESTAMP
|
lastUpdatedAt TEXT DEFAULT CURRENT_TIMESTAMP
|
||||||
`,
|
`,
|
||||||
db: async function () {
|
db: async function () {
|
||||||
const sqlite3 = require('sqlite3').verbose();
|
const sqlite3 = require("sqlite3").verbose();
|
||||||
const { open } = require('sqlite');
|
const { open } = require("sqlite");
|
||||||
|
|
||||||
const db = await open({
|
const db = await open({
|
||||||
filename: 'anythingllm.db',
|
filename: "anythingllm.db",
|
||||||
driver: sqlite3.Database
|
driver: sqlite3.Database,
|
||||||
})
|
});
|
||||||
|
|
||||||
await db.exec(`CREATE TABLE IF NOT EXISTS ${this.tablename} (${this.colsInit})`);
|
await db.exec(
|
||||||
db.on('trace', (sql) => console.log(sql))
|
`CREATE TABLE IF NOT EXISTS ${this.tablename} (${this.colsInit})`
|
||||||
return db
|
);
|
||||||
|
db.on("trace", (sql) => console.log(sql));
|
||||||
|
return db;
|
||||||
},
|
},
|
||||||
new: async function ({ workspaceId, prompt, response = {} }) {
|
new: async function ({ workspaceId, prompt, response = {} }) {
|
||||||
const db = await this.db()
|
const db = await this.db();
|
||||||
const { id, success, message } = await db.run(`INSERT INTO ${this.tablename} (workspaceId, prompt, response) VALUES (?, ?, ?)`, [workspaceId, prompt, JSON.stringify(response)])
|
const { id, success, message } = await db
|
||||||
|
.run(
|
||||||
|
`INSERT INTO ${this.tablename} (workspaceId, prompt, response) VALUES (?, ?, ?)`,
|
||||||
|
[workspaceId, prompt, JSON.stringify(response)]
|
||||||
|
)
|
||||||
.then((res) => {
|
.then((res) => {
|
||||||
return { id: res.lastID, success: true, message: null }
|
return { id: res.lastID, success: true, message: null };
|
||||||
})
|
})
|
||||||
.catch((error) => {
|
.catch((error) => {
|
||||||
return { id: null, success: false, message: error.message }
|
return { id: null, success: false, message: error.message };
|
||||||
})
|
});
|
||||||
if (!success) return { chat: null, message }
|
if (!success) return { chat: null, message };
|
||||||
|
|
||||||
const chat = await db.get(`SELECT * FROM ${this.tablename} WHERE id = ${id}`)
|
const chat = await db.get(
|
||||||
return { chat, message: null }
|
`SELECT * FROM ${this.tablename} WHERE id = ${id}`
|
||||||
|
);
|
||||||
|
return { chat, message: null };
|
||||||
},
|
},
|
||||||
forWorkspace: async function (workspaceId = null) {
|
forWorkspace: async function (workspaceId = null) {
|
||||||
if (!workspaceId) return [];
|
if (!workspaceId) return [];
|
||||||
return await this.where(`workspaceId = ${workspaceId} AND include = true`, null, 'ORDER BY id ASC')
|
return await this.where(
|
||||||
|
`workspaceId = ${workspaceId} AND include = true`,
|
||||||
|
null,
|
||||||
|
"ORDER BY id ASC"
|
||||||
|
);
|
||||||
},
|
},
|
||||||
markHistoryInvalid: async function (workspaceId = null) {
|
markHistoryInvalid: async function (workspaceId = null) {
|
||||||
if (!workspaceId) return;
|
if (!workspaceId) return;
|
||||||
const db = await this.db()
|
const db = await this.db();
|
||||||
await db.run(`UPDATE ${this.tablename} SET include = false WHERE workspaceId = ?`, [workspaceId]);
|
await db.run(
|
||||||
|
`UPDATE ${this.tablename} SET include = false WHERE workspaceId = ?`,
|
||||||
|
[workspaceId]
|
||||||
|
);
|
||||||
return;
|
return;
|
||||||
},
|
},
|
||||||
get: async function (clause = '') {
|
get: async function (clause = "") {
|
||||||
const db = await this.db()
|
const db = await this.db();
|
||||||
const result = await db.get(`SELECT * FROM ${this.tablename} WHERE ${clause}`).then((res) => res || null)
|
const result = await db
|
||||||
|
.get(`SELECT * FROM ${this.tablename} WHERE ${clause}`)
|
||||||
|
.then((res) => res || null);
|
||||||
if (!result) return null;
|
if (!result) return null;
|
||||||
return result
|
return result;
|
||||||
},
|
},
|
||||||
delete: async function (clause = '') {
|
delete: async function (clause = "") {
|
||||||
const db = await this.db()
|
const db = await this.db();
|
||||||
await db.get(`DELETE FROM ${this.tablename} WHERE ${clause}`)
|
await db.get(`DELETE FROM ${this.tablename} WHERE ${clause}`);
|
||||||
return true
|
return true;
|
||||||
},
|
},
|
||||||
where: async function (clause = '', limit = null, order = null) {
|
where: async function (clause = "", limit = null, order = null) {
|
||||||
const db = await this.db()
|
const db = await this.db();
|
||||||
const results = await db.all(`SELECT * FROM ${this.tablename} ${clause ? `WHERE ${clause}` : ''} ${!!limit ? `LIMIT ${limit}` : ''} ${!!order ? order : ''}`)
|
const results = await db.all(
|
||||||
return results
|
`SELECT * FROM ${this.tablename} ${clause ? `WHERE ${clause}` : ""} ${
|
||||||
|
!!limit ? `LIMIT ${limit}` : ""
|
||||||
|
} ${!!order ? order : ""}`
|
||||||
|
);
|
||||||
|
return results;
|
||||||
},
|
},
|
||||||
}
|
};
|
||||||
|
|
||||||
module.exports = { WorkspaceChats }
|
module.exports = { WorkspaceChats };
|
||||||
|
@ -10,17 +10,19 @@
|
|||||||
"node": ">=18.12.1"
|
"node": ">=18.12.1"
|
||||||
},
|
},
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"dev": "NODE_ENV=development nodemon --ignore documents index.js",
|
"dev": "NODE_ENV=development nodemon --ignore documents --ignore vector-cache --trace-warnings index.js",
|
||||||
"start": "NODE_ENV=production node index.js"
|
"start": "NODE_ENV=production node index.js",
|
||||||
|
"lint": "yarn prettier --write ./endpoints ./models ./utils index.js"
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@googleapis/youtube": "^9.0.0",
|
"@googleapis/youtube": "^9.0.0",
|
||||||
"@pinecone-database/pinecone": "^0.1.6",
|
"@pinecone-database/pinecone": "^0.1.6",
|
||||||
"body-parser": "^1.20.2",
|
"body-parser": "^1.20.2",
|
||||||
|
"chromadb": "^1.5.2",
|
||||||
"cors": "^2.8.5",
|
"cors": "^2.8.5",
|
||||||
"dotenv": "^16.0.3",
|
"dotenv": "^16.0.3",
|
||||||
"express": "^4.18.2",
|
"express": "^4.18.2",
|
||||||
"langchain": "^0.0.81",
|
"langchain": "^0.0.90",
|
||||||
"moment": "^2.29.4",
|
"moment": "^2.29.4",
|
||||||
"openai": "^3.2.1",
|
"openai": "^3.2.1",
|
||||||
"pinecone-client": "^1.1.0",
|
"pinecone-client": "^1.1.0",
|
||||||
@ -30,6 +32,7 @@
|
|||||||
"uuid": "^9.0.0"
|
"uuid": "^9.0.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"nodemon": "^2.0.22"
|
"nodemon": "^2.0.22",
|
||||||
|
"prettier": "^2.4.1"
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -4,8 +4,8 @@ async function resetMemory(workspace, _message, msgUUID) {
|
|||||||
await WorkspaceChats.markHistoryInvalid(workspace.id);
|
await WorkspaceChats.markHistoryInvalid(workspace.id);
|
||||||
return {
|
return {
|
||||||
uuid: msgUUID,
|
uuid: msgUUID,
|
||||||
type: 'textResponse',
|
type: "textResponse",
|
||||||
textResponse: 'Workspace chat memory was reset!',
|
textResponse: "Workspace chat memory was reset!",
|
||||||
sources: [],
|
sources: [],
|
||||||
close: true,
|
close: true,
|
||||||
error: false,
|
error: false,
|
||||||
@ -13,5 +13,5 @@ async function resetMemory(workspace, _message, msgUUID) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
resetMemory
|
resetMemory,
|
||||||
}
|
};
|
||||||
|
@ -1,50 +1,49 @@
|
|||||||
const { v4: uuidv4 } = require('uuid');
|
const { v4: uuidv4 } = require("uuid");
|
||||||
const { OpenAi } = require('../openAi');
|
const { OpenAi } = require("../openAi");
|
||||||
const { Pinecone } = require('../pinecone');
|
const { WorkspaceChats } = require("../../models/workspaceChats");
|
||||||
const { WorkspaceChats } = require('../../models/workspaceChats');
|
|
||||||
const { resetMemory } = require("./commands/reset");
|
const { resetMemory } = require("./commands/reset");
|
||||||
const moment = require('moment')
|
const moment = require("moment");
|
||||||
|
const { getVectorDbClass } = require("../helpers");
|
||||||
|
|
||||||
function convertToChatHistory(history = []) {
|
function convertToChatHistory(history = []) {
|
||||||
const formattedHistory = []
|
const formattedHistory = [];
|
||||||
history.forEach((history) => {
|
history.forEach((history) => {
|
||||||
const { prompt, response, createdAt } = history
|
const { prompt, response, createdAt } = history;
|
||||||
const data = JSON.parse(response);
|
const data = JSON.parse(response);
|
||||||
formattedHistory.push([
|
formattedHistory.push([
|
||||||
{
|
{
|
||||||
role: 'user',
|
role: "user",
|
||||||
content: prompt,
|
content: prompt,
|
||||||
sentAt: moment(createdAt).unix(),
|
sentAt: moment(createdAt).unix(),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
role: 'assistant',
|
role: "assistant",
|
||||||
content: data.text,
|
content: data.text,
|
||||||
sources: data.sources || [],
|
sources: data.sources || [],
|
||||||
sentAt: moment(createdAt).unix(),
|
sentAt: moment(createdAt).unix(),
|
||||||
},
|
},
|
||||||
])
|
]);
|
||||||
})
|
});
|
||||||
|
|
||||||
return formattedHistory.flat()
|
return formattedHistory.flat();
|
||||||
}
|
}
|
||||||
|
|
||||||
function convertToPromptHistory(history = []) {
|
function convertToPromptHistory(history = []) {
|
||||||
const formattedHistory = []
|
const formattedHistory = [];
|
||||||
history.forEach((history) => {
|
history.forEach((history) => {
|
||||||
const { prompt, response } = history
|
const { prompt, response } = history;
|
||||||
const data = JSON.parse(response);
|
const data = JSON.parse(response);
|
||||||
formattedHistory.push([
|
formattedHistory.push([
|
||||||
{ role: 'user', content: prompt },
|
{ role: "user", content: prompt },
|
||||||
{ role: 'assistant', content: data.text },
|
{ role: "assistant", content: data.text },
|
||||||
])
|
]);
|
||||||
})
|
});
|
||||||
return formattedHistory.flat()
|
return formattedHistory.flat();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const VALID_COMMANDS = {
|
const VALID_COMMANDS = {
|
||||||
'/reset': resetMemory,
|
"/reset": resetMemory,
|
||||||
}
|
};
|
||||||
|
|
||||||
function grepCommand(message) {
|
function grepCommand(message) {
|
||||||
const availableCommands = Object.keys(VALID_COMMANDS);
|
const availableCommands = Object.keys(VALID_COMMANDS);
|
||||||
@ -57,52 +56,63 @@ function grepCommand(message) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return null
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function chatWithWorkspace(workspace, message, chatMode = 'query') {
|
async function chatWithWorkspace(workspace, message, chatMode = "query") {
|
||||||
const uuid = uuidv4();
|
const uuid = uuidv4();
|
||||||
const openai = new OpenAi();
|
const openai = new OpenAi();
|
||||||
|
const VectorDb = getVectorDbClass();
|
||||||
|
const command = grepCommand(message);
|
||||||
|
|
||||||
const command = grepCommand(message)
|
|
||||||
if (!!command && Object.keys(VALID_COMMANDS).includes(command)) {
|
if (!!command && Object.keys(VALID_COMMANDS).includes(command)) {
|
||||||
return await VALID_COMMANDS[command](workspace, message, uuid);
|
return await VALID_COMMANDS[command](workspace, message, uuid);
|
||||||
}
|
}
|
||||||
|
|
||||||
const { safe, reasons = [] } = await openai.isSafe(message)
|
const { safe, reasons = [] } = await openai.isSafe(message);
|
||||||
if (!safe) {
|
if (!safe) {
|
||||||
return {
|
return {
|
||||||
id: uuid,
|
id: uuid,
|
||||||
type: 'abort',
|
type: "abort",
|
||||||
textResponse: null,
|
textResponse: null,
|
||||||
sources: [],
|
sources: [],
|
||||||
close: true,
|
close: true,
|
||||||
error: `This message was moderated and will not be allowed. Violations for ${reasons.join(', ')} found.`
|
error: `This message was moderated and will not be allowed. Violations for ${reasons.join(
|
||||||
|
", "
|
||||||
|
)} found.`,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const hasVectorizedSpace = await Pinecone.hasNamespace(workspace.slug);
|
const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
|
||||||
if (!hasVectorizedSpace) {
|
if (!hasVectorizedSpace) {
|
||||||
const rawHistory = await WorkspaceChats.forWorkspace(workspace.id)
|
const rawHistory = await WorkspaceChats.forWorkspace(workspace.id);
|
||||||
const chatHistory = convertToPromptHistory(rawHistory);
|
const chatHistory = convertToPromptHistory(rawHistory);
|
||||||
const response = await openai.sendChat(chatHistory, message);
|
const response = await openai.sendChat(chatHistory, message);
|
||||||
const data = { text: response, sources: [], type: 'chat' }
|
const data = { text: response, sources: [], type: "chat" };
|
||||||
|
|
||||||
await WorkspaceChats.new({ workspaceId: workspace.id, prompt: message, response: data })
|
await WorkspaceChats.new({
|
||||||
|
workspaceId: workspace.id,
|
||||||
|
prompt: message,
|
||||||
|
response: data,
|
||||||
|
});
|
||||||
return {
|
return {
|
||||||
id: uuid,
|
id: uuid,
|
||||||
type: 'textResponse',
|
type: "textResponse",
|
||||||
textResponse: response,
|
textResponse: response,
|
||||||
sources: [],
|
sources: [],
|
||||||
close: true,
|
close: true,
|
||||||
error: null,
|
error: null,
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
const { response, sources, message: error } = await Pinecone[chatMode]({ namespace: workspace.slug, input: message });
|
const {
|
||||||
|
response,
|
||||||
|
sources,
|
||||||
|
message: error,
|
||||||
|
} = await VectorDb[chatMode]({ namespace: workspace.slug, input: message });
|
||||||
if (!response) {
|
if (!response) {
|
||||||
return {
|
return {
|
||||||
id: uuid,
|
id: uuid,
|
||||||
type: 'abort',
|
type: "abort",
|
||||||
textResponse: null,
|
textResponse: null,
|
||||||
sources: [],
|
sources: [],
|
||||||
close: true,
|
close: true,
|
||||||
@ -110,11 +120,15 @@ async function chatWithWorkspace(workspace, message, chatMode = 'query') {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = { text: response, sources, type: chatMode }
|
const data = { text: response, sources, type: chatMode };
|
||||||
await WorkspaceChats.new({ workspaceId: workspace.id, prompt: message, response: data })
|
await WorkspaceChats.new({
|
||||||
|
workspaceId: workspace.id,
|
||||||
|
prompt: message,
|
||||||
|
response: data,
|
||||||
|
});
|
||||||
return {
|
return {
|
||||||
id: uuid,
|
id: uuid,
|
||||||
type: 'textResponse',
|
type: "textResponse",
|
||||||
textResponse: response,
|
textResponse: response,
|
||||||
sources,
|
sources,
|
||||||
close: true,
|
close: true,
|
||||||
@ -124,5 +138,5 @@ async function chatWithWorkspace(workspace, message, chatMode = 'query') {
|
|||||||
}
|
}
|
||||||
module.exports = {
|
module.exports = {
|
||||||
convertToChatHistory,
|
convertToChatHistory,
|
||||||
chatWithWorkspace
|
chatWithWorkspace,
|
||||||
}
|
};
|
||||||
|
24
server/utils/chroma/CHROMA_SETUP.md
Normal file
24
server/utils/chroma/CHROMA_SETUP.md
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# How to setup a local (or remote) Chroma Vector Database
|
||||||
|
|
||||||
|
[Official Chroma Docs](https://docs.trychroma.com/usage-guide#running-chroma-in-clientserver-mode) for reference.
|
||||||
|
|
||||||
|
### How to get started
|
||||||
|
|
||||||
|
**Requirements**
|
||||||
|
|
||||||
|
- Docker
|
||||||
|
- `git` available in your CLI/terminal
|
||||||
|
|
||||||
|
**Instructions**
|
||||||
|
|
||||||
|
- `git clone git@github.com:chroma-core/chroma.git` to somewhere on computer.
|
||||||
|
- `cd chroma`
|
||||||
|
- `docker-compose up -d --build`
|
||||||
|
- set the `CHROMA_ENDPOINT=` .env variable in `server` and also set `VECTOR_DB=` to `chroma`.
|
||||||
|
|
||||||
|
eg: `server/.env.development`
|
||||||
|
|
||||||
|
```
|
||||||
|
VECTOR_DB="chroma"
|
||||||
|
CHROMA_ENDPOINT='http://localhost:8000'
|
||||||
|
```
|
361
server/utils/chroma/index.js
Normal file
361
server/utils/chroma/index.js
Normal file
@ -0,0 +1,361 @@
|
|||||||
|
const { ChromaClient, OpenAIEmbeddingFunction } = require("chromadb");
|
||||||
|
const { Chroma: ChromaStore } = require("langchain/vectorstores/chroma");
|
||||||
|
const { OpenAI } = require("langchain/llms/openai");
|
||||||
|
const { ChatOpenAI } = require("langchain/chat_models/openai");
|
||||||
|
const {
|
||||||
|
VectorDBQAChain,
|
||||||
|
LLMChain,
|
||||||
|
RetrievalQAChain,
|
||||||
|
ConversationalRetrievalQAChain,
|
||||||
|
} = require("langchain/chains");
|
||||||
|
const { OpenAIEmbeddings } = require("langchain/embeddings/openai");
|
||||||
|
// const { VectorStoreRetrieverMemory, BufferMemory } = require("langchain/memory");
|
||||||
|
// const { PromptTemplate } = require("langchain/prompts");
|
||||||
|
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
|
||||||
|
const { storeVectorResult, cachedVectorInformation } = require("../files");
|
||||||
|
const { Configuration, OpenAIApi } = require("openai");
|
||||||
|
const { v4: uuidv4 } = require("uuid");
|
||||||
|
|
||||||
|
const toChunks = (arr, size) => {
|
||||||
|
return Array.from({ length: Math.ceil(arr.length / size) }, (_v, i) =>
|
||||||
|
arr.slice(i * size, i * size + size)
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
function curateSources(sources = []) {
|
||||||
|
const knownDocs = [];
|
||||||
|
const documents = [];
|
||||||
|
for (const source of sources) {
|
||||||
|
const { metadata = {} } = source;
|
||||||
|
if (
|
||||||
|
Object.keys(metadata).length > 0 &&
|
||||||
|
!knownDocs.includes(metadata.title)
|
||||||
|
) {
|
||||||
|
documents.push({ ...metadata });
|
||||||
|
knownDocs.push(metadata.title);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return documents;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Chroma = {
|
||||||
|
name: 'Chroma',
|
||||||
|
connect: async function () {
|
||||||
|
const client = new ChromaClient({
|
||||||
|
path: process.env.CHROMA_ENDPOINT, // if not set will fallback to localhost:8000
|
||||||
|
});
|
||||||
|
|
||||||
|
const isAlive = await client.heartbeat();
|
||||||
|
if (!isAlive)
|
||||||
|
throw new Error(
|
||||||
|
"ChromaDB::Invalid Heartbeat received - is the instance online?"
|
||||||
|
);
|
||||||
|
return { client };
|
||||||
|
},
|
||||||
|
heartbeat: async function () {
|
||||||
|
const { client } = await this.connect();
|
||||||
|
return { heartbeat: await client.heartbeat() };
|
||||||
|
},
|
||||||
|
totalIndicies: async function () {
|
||||||
|
const { client } = await this.connect();
|
||||||
|
const collections = await client.listCollections();
|
||||||
|
var totalVectors = 0;
|
||||||
|
for (const collectionObj of collections) {
|
||||||
|
const collection = await client
|
||||||
|
.getCollection({ name: collectionObj.name })
|
||||||
|
.catch(() => null);
|
||||||
|
if (!collection) continue;
|
||||||
|
totalVectors += await collection.count();
|
||||||
|
}
|
||||||
|
return totalVectors;
|
||||||
|
},
|
||||||
|
embeddingFunc: function () {
|
||||||
|
return new OpenAIEmbeddingFunction({
|
||||||
|
openai_api_key: process.env.OPEN_AI_KEY,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
embedder: function () {
|
||||||
|
return new OpenAIEmbeddings({ openAIApiKey: process.env.OPEN_AI_KEY });
|
||||||
|
},
|
||||||
|
openai: function () {
|
||||||
|
const config = new Configuration({ apiKey: process.env.OPEN_AI_KEY });
|
||||||
|
const openai = new OpenAIApi(config);
|
||||||
|
return openai;
|
||||||
|
},
|
||||||
|
llm: function () {
|
||||||
|
const model = process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo";
|
||||||
|
return new OpenAI({
|
||||||
|
openAIApiKey: process.env.OPEN_AI_KEY,
|
||||||
|
temperature: 0.7,
|
||||||
|
modelName: model,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
chatLLM: function () {
|
||||||
|
const model = process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo";
|
||||||
|
return new ChatOpenAI({
|
||||||
|
openAIApiKey: process.env.OPEN_AI_KEY,
|
||||||
|
temperature: 0.7,
|
||||||
|
modelName: model,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
embedChunk: async function (openai, textChunk) {
|
||||||
|
const {
|
||||||
|
data: { data },
|
||||||
|
} = await openai.createEmbedding({
|
||||||
|
model: "text-embedding-ada-002",
|
||||||
|
input: textChunk,
|
||||||
|
});
|
||||||
|
return data.length > 0 && data[0].hasOwnProperty("embedding")
|
||||||
|
? data[0].embedding
|
||||||
|
: null;
|
||||||
|
},
|
||||||
|
namespace: async function (client, namespace = null) {
|
||||||
|
if (!namespace) throw new Error("No namespace value provided.");
|
||||||
|
const collection = await client
|
||||||
|
.getCollection({ name: namespace })
|
||||||
|
.catch(() => null);
|
||||||
|
if (!collection) return null;
|
||||||
|
|
||||||
|
return {
|
||||||
|
...collection,
|
||||||
|
vectorCount: await collection.count(),
|
||||||
|
};
|
||||||
|
},
|
||||||
|
hasNamespace: async function (namespace = null) {
|
||||||
|
if (!namespace) return false;
|
||||||
|
const { client } = await this.connect();
|
||||||
|
return await this.namespaceExists(client, namespace);
|
||||||
|
},
|
||||||
|
namespaceExists: async function (client, namespace = null) {
|
||||||
|
if (!namespace) throw new Error("No namespace value provided.");
|
||||||
|
const collection = await client
|
||||||
|
.getCollection({ name: namespace })
|
||||||
|
.catch((e) => {
|
||||||
|
console.error("ChromaDB::namespaceExists", e.message);
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
return !!collection;
|
||||||
|
},
|
||||||
|
deleteVectorsInNamespace: async function (client, namespace = null) {
|
||||||
|
await client.deleteCollection({ name: namespace });
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
addDocumentToNamespace: async function (
|
||||||
|
namespace,
|
||||||
|
documentData = {},
|
||||||
|
fullFilePath = null
|
||||||
|
) {
|
||||||
|
const { DocumentVectors } = require("../../models/vectors");
|
||||||
|
try {
|
||||||
|
const { pageContent, docId, ...metadata } = documentData;
|
||||||
|
if (!pageContent || pageContent.length == 0) return false;
|
||||||
|
|
||||||
|
console.log("Adding new vectorized document into namespace", namespace);
|
||||||
|
const cacheResult = await cachedVectorInformation(fullFilePath);
|
||||||
|
if (cacheResult.exists) {
|
||||||
|
const { client } = await this.connect();
|
||||||
|
const collection = await client.getOrCreateCollection({
|
||||||
|
name: namespace,
|
||||||
|
metadata: { "hnsw:space": "cosine" },
|
||||||
|
embeddingFunction: this.embeddingFunc(),
|
||||||
|
});
|
||||||
|
const { chunks } = cacheResult;
|
||||||
|
const documentVectors = [];
|
||||||
|
|
||||||
|
for (const chunk of chunks) {
|
||||||
|
const submission = {
|
||||||
|
ids: [],
|
||||||
|
embeddings: [],
|
||||||
|
metadatas: [],
|
||||||
|
documents: [],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Before sending to Chroma and saving the records to our db
|
||||||
|
// we need to assign the id of each chunk that is stored in the cached file.
|
||||||
|
chunk.forEach((chunk) => {
|
||||||
|
const id = uuidv4();
|
||||||
|
const { id: _id, ...metadata } = chunk.metadata;
|
||||||
|
documentVectors.push({ docId, vectorId: id });
|
||||||
|
submission.ids.push(id);
|
||||||
|
submission.embeddings.push(chunk.values);
|
||||||
|
submission.metadatas.push(metadata);
|
||||||
|
submission.documents.push(metadata.text);
|
||||||
|
});
|
||||||
|
|
||||||
|
const additionResult = await collection.add(submission);
|
||||||
|
if (!additionResult)
|
||||||
|
throw new Error("Error embedding into ChromaDB", additionResult);
|
||||||
|
}
|
||||||
|
|
||||||
|
await DocumentVectors.bulkInsert(documentVectors);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we are here then we are going to embed and store a novel document.
|
||||||
|
// We have to do this manually as opposed to using LangChains `Chroma.fromDocuments`
|
||||||
|
// because we then cannot atomically control our namespace to granularly find/remove documents
|
||||||
|
// from vectordb.
|
||||||
|
const textSplitter = new RecursiveCharacterTextSplitter({
|
||||||
|
chunkSize: 1000,
|
||||||
|
chunkOverlap: 20,
|
||||||
|
});
|
||||||
|
const textChunks = await textSplitter.splitText(pageContent);
|
||||||
|
|
||||||
|
console.log("Chunks created from document:", textChunks.length);
|
||||||
|
const documentVectors = [];
|
||||||
|
const vectors = [];
|
||||||
|
const openai = this.openai();
|
||||||
|
|
||||||
|
const submission = {
|
||||||
|
ids: [],
|
||||||
|
embeddings: [],
|
||||||
|
metadatas: [],
|
||||||
|
documents: [],
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const textChunk of textChunks) {
|
||||||
|
const vectorValues = await this.embedChunk(openai, textChunk);
|
||||||
|
|
||||||
|
if (!!vectorValues) {
|
||||||
|
const vectorRecord = {
|
||||||
|
id: uuidv4(),
|
||||||
|
values: vectorValues,
|
||||||
|
// [DO NOT REMOVE]
|
||||||
|
// LangChain will be unable to find your text if you embed manually and dont include the `text` key.
|
||||||
|
// https://github.com/hwchase17/langchainjs/blob/2def486af734c0ca87285a48f1a04c057ab74bdf/langchain/src/vectorstores/pinecone.ts#L64
|
||||||
|
metadata: { ...metadata, text: textChunk },
|
||||||
|
};
|
||||||
|
|
||||||
|
submission.ids.push(vectorRecord.id);
|
||||||
|
submission.embeddings.push(vectorRecord.values);
|
||||||
|
submission.metadatas.push(metadata);
|
||||||
|
submission.documents.push(textChunk);
|
||||||
|
|
||||||
|
vectors.push(vectorRecord);
|
||||||
|
documentVectors.push({ docId, vectorId: vectorRecord.id });
|
||||||
|
} else {
|
||||||
|
console.error(
|
||||||
|
"Could not use OpenAI to embed document chunk! This document will not be recorded."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const { client } = await this.connect();
|
||||||
|
const collection = await client.getOrCreateCollection({
|
||||||
|
name: namespace,
|
||||||
|
metadata: { "hnsw:space": "cosine" },
|
||||||
|
embeddingFunction: this.embeddingFunc(),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (vectors.length > 0) {
|
||||||
|
const chunks = [];
|
||||||
|
|
||||||
|
console.log("Inserting vectorized chunks into Chroma collection.");
|
||||||
|
for (const chunk of toChunks(vectors, 500)) chunks.push(chunk);
|
||||||
|
|
||||||
|
const additionResult = await collection.add(submission);
|
||||||
|
if (!additionResult)
|
||||||
|
throw new Error("Error embedding into ChromaDB", additionResult);
|
||||||
|
|
||||||
|
await storeVectorResult(chunks, fullFilePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
await DocumentVectors.bulkInsert(documentVectors);
|
||||||
|
return true;
|
||||||
|
} catch (e) {
|
||||||
|
console.error("addDocumentToNamespace", e.message);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
deleteDocumentFromNamespace: async function (namespace, docId) {
|
||||||
|
const { DocumentVectors } = require("../../models/vectors");
|
||||||
|
const { client } = await this.connect();
|
||||||
|
if (!(await this.namespaceExists(client, namespace))) return;
|
||||||
|
const collection = await client.getCollection({
|
||||||
|
name: namespace,
|
||||||
|
embeddingFunction: this.embeddingFunc(),
|
||||||
|
});
|
||||||
|
|
||||||
|
const knownDocuments = await DocumentVectors.where(`docId = '${docId}'`);
|
||||||
|
if (knownDocuments.length === 0) return;
|
||||||
|
|
||||||
|
const vectorIds = knownDocuments.map((doc) => doc.vectorId);
|
||||||
|
await collection.delete({ ids: vectorIds });
|
||||||
|
|
||||||
|
const indexes = knownDocuments.map((doc) => doc.id);
|
||||||
|
await DocumentVectors.deleteIds(indexes);
|
||||||
|
return true;
|
||||||
|
},
|
||||||
|
query: async function (reqBody = {}) {
|
||||||
|
const { namespace = null, input } = reqBody;
|
||||||
|
if (!namespace || !input) throw new Error("Invalid request body");
|
||||||
|
|
||||||
|
const { client } = await this.connect();
|
||||||
|
if (!(await this.namespaceExists(client, namespace))) {
|
||||||
|
return {
|
||||||
|
response: null,
|
||||||
|
sources: [],
|
||||||
|
message: "Invalid query - no documents found for workspace!",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// const collection = await client.getCollection({ name: namespace, embeddingFunction: this.embeddingFunc() })
|
||||||
|
// const results = await collection.get({
|
||||||
|
// where: {
|
||||||
|
// description: 'a custom file uploaded by the user.'
|
||||||
|
// },
|
||||||
|
// includes: ['ids']
|
||||||
|
// })
|
||||||
|
// console.log(results)
|
||||||
|
// return { response: null, sources: [], }
|
||||||
|
|
||||||
|
const vectorStore = await ChromaStore.fromExistingCollection(
|
||||||
|
this.embedder(),
|
||||||
|
{ collectionName: namespace, url: process.env.CHROMA_ENDPOINT }
|
||||||
|
);
|
||||||
|
const model = this.llm();
|
||||||
|
const chain = VectorDBQAChain.fromLLM(model, vectorStore, {
|
||||||
|
k: 5,
|
||||||
|
returnSourceDocuments: true,
|
||||||
|
});
|
||||||
|
const response = await chain.call({ query: input });
|
||||||
|
return {
|
||||||
|
response: response.text,
|
||||||
|
sources: curateSources(response.sourceDocuments),
|
||||||
|
message: false,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
"namespace-stats": async function (reqBody = {}) {
|
||||||
|
const { namespace = null } = reqBody;
|
||||||
|
if (!namespace) throw new Error("namespace required");
|
||||||
|
const { client } = await this.connect();
|
||||||
|
if (!(await this.namespaceExists(client, namespace)))
|
||||||
|
throw new Error("Namespace by that name does not exist.");
|
||||||
|
const stats = await this.namespace(client, namespace);
|
||||||
|
return stats
|
||||||
|
? stats
|
||||||
|
: { message: "No stats were able to be fetched from DB for namespace" };
|
||||||
|
},
|
||||||
|
"delete-namespace": async function (reqBody = {}) {
|
||||||
|
const { namespace = null } = reqBody;
|
||||||
|
const { client } = await this.connect();
|
||||||
|
if (!(await this.namespaceExists(client, namespace)))
|
||||||
|
throw new Error("Namespace by that name does not exist.");
|
||||||
|
|
||||||
|
const details = await this.namespace(client, namespace);
|
||||||
|
await this.deleteVectorsInNamespace(client, namespace);
|
||||||
|
return {
|
||||||
|
message: `Namespace ${namespace} was deleted along with ${details?.vectorCount} vectors.`,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
reset: async function () {
|
||||||
|
const { client } = await this.connect();
|
||||||
|
await client.reset();
|
||||||
|
return { reset: true };
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
Chroma,
|
||||||
|
};
|
@ -1,21 +1,24 @@
|
|||||||
const fs = require("fs")
|
const fs = require("fs");
|
||||||
const path = require('path');
|
const path = require("path");
|
||||||
const { v5: uuidv5 } = require('uuid');
|
const { v5: uuidv5 } = require("uuid");
|
||||||
|
|
||||||
async function collectDocumentData(folderName = null) {
|
async function collectDocumentData(folderName = null) {
|
||||||
if (!folderName) throw new Error('No docPath provided in request');
|
if (!folderName) throw new Error("No docPath provided in request");
|
||||||
const folder = path.resolve(__dirname, `../../documents/${folderName}`)
|
const folder = path.resolve(__dirname, `../../documents/${folderName}`);
|
||||||
const dirExists = fs.existsSync(folder);
|
const dirExists = fs.existsSync(folder);
|
||||||
if (!dirExists) throw new Error(`No documents folder for ${folderName} - did you run collector/main.py for this element?`);
|
if (!dirExists)
|
||||||
|
throw new Error(
|
||||||
|
`No documents folder for ${folderName} - did you run collector/main.py for this element?`
|
||||||
|
);
|
||||||
|
|
||||||
const files = fs.readdirSync(folder);
|
const files = fs.readdirSync(folder);
|
||||||
const fileData = [];
|
const fileData = [];
|
||||||
files.forEach(file => {
|
files.forEach((file) => {
|
||||||
if (path.extname(file) === '.json') {
|
if (path.extname(file) === ".json") {
|
||||||
const filePath = path.join(folder, file);
|
const filePath = path.join(folder, file);
|
||||||
const data = fs.readFileSync(filePath, 'utf8');
|
const data = fs.readFileSync(filePath, "utf8");
|
||||||
console.log(`Parsing document: ${file}`);
|
console.log(`Parsing document: ${file}`);
|
||||||
fileData.push(JSON.parse(data))
|
fileData.push(JSON.parse(data));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return fileData;
|
return fileData;
|
||||||
@ -24,75 +27,78 @@ async function collectDocumentData(folderName = null) {
|
|||||||
// Should take in a folder that is a subfolder of documents
|
// Should take in a folder that is a subfolder of documents
|
||||||
// eg: youtube-subject/video-123.json
|
// eg: youtube-subject/video-123.json
|
||||||
async function fileData(filePath = null) {
|
async function fileData(filePath = null) {
|
||||||
if (!filePath) throw new Error('No docPath provided in request');
|
if (!filePath) throw new Error("No docPath provided in request");
|
||||||
const fullPath = path.resolve(__dirname, `../../documents/${filePath}`)
|
const fullPath = path.resolve(__dirname, `../../documents/${filePath}`);
|
||||||
const fileExists = fs.existsSync(fullPath);
|
const fileExists = fs.existsSync(fullPath);
|
||||||
if (!fileExists) return null;
|
if (!fileExists) return null;
|
||||||
|
|
||||||
const data = fs.readFileSync(fullPath, 'utf8');
|
const data = fs.readFileSync(fullPath, "utf8");
|
||||||
return JSON.parse(data)
|
return JSON.parse(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function viewLocalFiles() {
|
async function viewLocalFiles() {
|
||||||
const folder = path.resolve(__dirname, `../../documents`)
|
const folder = path.resolve(__dirname, `../../documents`);
|
||||||
const dirExists = fs.existsSync(folder);
|
const dirExists = fs.existsSync(folder);
|
||||||
if (!dirExists) return {}
|
if (!dirExists) return {};
|
||||||
|
|
||||||
const directory = {
|
const directory = {
|
||||||
name: "documents",
|
name: "documents",
|
||||||
type: "folder",
|
type: "folder",
|
||||||
items: [],
|
items: [],
|
||||||
}
|
};
|
||||||
|
|
||||||
for (const file of fs.readdirSync(folder)) {
|
for (const file of fs.readdirSync(folder)) {
|
||||||
if (path.extname(file) === '.md') continue;
|
if (path.extname(file) === ".md") continue;
|
||||||
const folderPath = path.resolve(__dirname, `../../documents/${file}`)
|
const folderPath = path.resolve(__dirname, `../../documents/${file}`);
|
||||||
const isFolder = fs.lstatSync(folderPath).isDirectory()
|
const isFolder = fs.lstatSync(folderPath).isDirectory();
|
||||||
if (isFolder) {
|
if (isFolder) {
|
||||||
const subdocs = {
|
const subdocs = {
|
||||||
name: file,
|
name: file,
|
||||||
type: "folder",
|
type: "folder",
|
||||||
items: [],
|
items: [],
|
||||||
}
|
};
|
||||||
const subfiles = fs.readdirSync(folderPath);
|
const subfiles = fs.readdirSync(folderPath);
|
||||||
|
|
||||||
for (const subfile of subfiles) {
|
for (const subfile of subfiles) {
|
||||||
if (path.extname(subfile) !== '.json') continue;
|
if (path.extname(subfile) !== ".json") continue;
|
||||||
const filePath = path.join(folderPath, subfile);
|
const filePath = path.join(folderPath, subfile);
|
||||||
const rawData = fs.readFileSync(filePath, 'utf8');
|
const rawData = fs.readFileSync(filePath, "utf8");
|
||||||
const cachefilename = `${file}/${subfile}`
|
const cachefilename = `${file}/${subfile}`;
|
||||||
const { pageContent, ...metadata } = JSON.parse(rawData)
|
const { pageContent, ...metadata } = JSON.parse(rawData);
|
||||||
|
|
||||||
subdocs.items.push({
|
subdocs.items.push({
|
||||||
name: subfile,
|
name: subfile,
|
||||||
type: "file",
|
type: "file",
|
||||||
...metadata,
|
...metadata,
|
||||||
cached: await cachedVectorInformation(cachefilename, true)
|
cached: await cachedVectorInformation(cachefilename, true),
|
||||||
})
|
});
|
||||||
|
}
|
||||||
|
directory.items.push(subdocs);
|
||||||
}
|
}
|
||||||
directory.items.push(subdocs)
|
|
||||||
}
|
}
|
||||||
};
|
|
||||||
|
|
||||||
return directory
|
return directory;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Searches the vector-cache folder for existing information so we dont have to re-embed a
|
// Searches the vector-cache folder for existing information so we dont have to re-embed a
|
||||||
// document and can instead push directly to vector db.
|
// document and can instead push directly to vector db.
|
||||||
async function cachedVectorInformation(filename = null, checkOnly = false) {
|
async function cachedVectorInformation(filename = null, checkOnly = false) {
|
||||||
if (!process.env.CACHE_VECTORS) return checkOnly ? false : { exists: false, chunks: [] };
|
if (!process.env.CACHE_VECTORS)
|
||||||
|
return checkOnly ? false : { exists: false, chunks: [] };
|
||||||
if (!filename) return checkOnly ? false : { exists: false, chunks: [] };
|
if (!filename) return checkOnly ? false : { exists: false, chunks: [] };
|
||||||
|
|
||||||
const digest = uuidv5(filename, uuidv5.URL);
|
const digest = uuidv5(filename, uuidv5.URL);
|
||||||
const file = path.resolve(__dirname, `../../vector-cache/${digest}.json`);
|
const file = path.resolve(__dirname, `../../vector-cache/${digest}.json`);
|
||||||
const exists = fs.existsSync(file);
|
const exists = fs.existsSync(file);
|
||||||
|
|
||||||
if (checkOnly) return exists
|
if (checkOnly) return exists;
|
||||||
if (!exists) return { exists, chunks: [] }
|
if (!exists) return { exists, chunks: [] };
|
||||||
|
|
||||||
console.log(`Cached vectorized results of ${filename} found! Using cached data to save on embed costs.`)
|
console.log(
|
||||||
const rawData = fs.readFileSync(file, 'utf8');
|
`Cached vectorized results of ${filename} found! Using cached data to save on embed costs.`
|
||||||
return { exists: true, chunks: JSON.parse(rawData) }
|
);
|
||||||
|
const rawData = fs.readFileSync(file, "utf8");
|
||||||
|
return { exists: true, chunks: JSON.parse(rawData) };
|
||||||
}
|
}
|
||||||
|
|
||||||
// vectorData: pre-chunked vectorized data for a given file that includes the proper metadata and chunk-size limit so it can be iterated and dumped into Pinecone, etc
|
// vectorData: pre-chunked vectorized data for a given file that includes the proper metadata and chunk-size limit so it can be iterated and dumped into Pinecone, etc
|
||||||
@ -100,14 +106,16 @@ async function cachedVectorInformation(filename = null, checkOnly = false) {
|
|||||||
async function storeVectorResult(vectorData = [], filename = null) {
|
async function storeVectorResult(vectorData = [], filename = null) {
|
||||||
if (!process.env.CACHE_VECTORS) return;
|
if (!process.env.CACHE_VECTORS) return;
|
||||||
if (!filename) return;
|
if (!filename) return;
|
||||||
console.log(`Caching vectorized results of ${filename} to prevent duplicated embedding.`)
|
console.log(
|
||||||
|
`Caching vectorized results of ${filename} to prevent duplicated embedding.`
|
||||||
|
);
|
||||||
const folder = path.resolve(__dirname, `../../vector-cache`);
|
const folder = path.resolve(__dirname, `../../vector-cache`);
|
||||||
|
|
||||||
if (!fs.existsSync(folder)) fs.mkdirSync(folder);
|
if (!fs.existsSync(folder)) fs.mkdirSync(folder);
|
||||||
|
|
||||||
const digest = uuidv5(filename, uuidv5.URL);
|
const digest = uuidv5(filename, uuidv5.URL);
|
||||||
const writeTo = path.resolve(folder, `${digest}.json`);
|
const writeTo = path.resolve(folder, `${digest}.json`);
|
||||||
fs.writeFileSync(writeTo, JSON.stringify(vectorData), 'utf8');
|
fs.writeFileSync(writeTo, JSON.stringify(vectorData), "utf8");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -116,5 +124,5 @@ module.exports = {
|
|||||||
collectDocumentData,
|
collectDocumentData,
|
||||||
viewLocalFiles,
|
viewLocalFiles,
|
||||||
storeVectorResult,
|
storeVectorResult,
|
||||||
fileData
|
fileData,
|
||||||
}
|
};
|
||||||
|
18
server/utils/helpers/index.js
Normal file
18
server/utils/helpers/index.js
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
const { Pinecone } = require("../pinecone");
|
||||||
|
const { Chroma } = require("../chroma");
|
||||||
|
|
||||||
|
function getVectorDbClass() {
|
||||||
|
const vectorSelection = process.env.VECTOR_DB || "pinecone";
|
||||||
|
switch (vectorSelection) {
|
||||||
|
case "pinecone":
|
||||||
|
return Pinecone;
|
||||||
|
case "chroma":
|
||||||
|
return Chroma;
|
||||||
|
default:
|
||||||
|
return Pinecone;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
getVectorDbClass,
|
||||||
|
};
|
@ -1,5 +1,5 @@
|
|||||||
function reqBody(request) {
|
function reqBody(request) {
|
||||||
return typeof request.body === 'string'
|
return typeof request.body === "string"
|
||||||
? JSON.parse(request.body)
|
? JSON.parse(request.body)
|
||||||
: request.body;
|
: request.body;
|
||||||
}
|
}
|
||||||
|
@ -1,30 +1,30 @@
|
|||||||
function validatedRequest(request, response, next) {
|
function validatedRequest(request, response, next) {
|
||||||
// When in development passthrough auth token for ease of development.
|
// When in development passthrough auth token for ease of development.
|
||||||
if (process.env.NODE_ENV === 'development' || !process.env.AUTH_TOKEN) {
|
if (process.env.NODE_ENV === "development" || !process.env.AUTH_TOKEN) {
|
||||||
next();
|
next();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!process.env.AUTH_TOKEN) {
|
if (!process.env.AUTH_TOKEN) {
|
||||||
response.status(403).json({
|
response.status(403).json({
|
||||||
error: "You need to set an AUTH_TOKEN environment variable."
|
error: "You need to set an AUTH_TOKEN environment variable.",
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auth = request.header('Authorization');
|
const auth = request.header("Authorization");
|
||||||
const token = auth ? auth.split(' ')[1] : null;
|
const token = auth ? auth.split(" ")[1] : null;
|
||||||
|
|
||||||
if (!token) {
|
if (!token) {
|
||||||
response.status(403).json({
|
response.status(403).json({
|
||||||
error: "No auth token found."
|
error: "No auth token found.",
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (token !== process.env.AUTH_TOKEN) {
|
if (token !== process.env.AUTH_TOKEN) {
|
||||||
response.status(403).json({
|
response.status(403).json({
|
||||||
error: "Invalid auth token found."
|
error: "Invalid auth token found.",
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1,64 +1,76 @@
|
|||||||
const { Configuration, OpenAIApi } = require('openai')
|
const { Configuration, OpenAIApi } = require("openai");
|
||||||
class OpenAi {
|
class OpenAi {
|
||||||
constructor() {
|
constructor() {
|
||||||
const config = new Configuration({ apiKey: process.env.OPEN_AI_KEY, organization: 'org-amIuvAIIcdUmN5YCiwRayVfb' })
|
const config = new Configuration({
|
||||||
|
apiKey: process.env.OPEN_AI_KEY,
|
||||||
|
organization: "org-amIuvAIIcdUmN5YCiwRayVfb",
|
||||||
|
});
|
||||||
const openai = new OpenAIApi(config);
|
const openai = new OpenAIApi(config);
|
||||||
this.openai = openai
|
this.openai = openai;
|
||||||
}
|
}
|
||||||
isValidChatModel(modelName = '') {
|
isValidChatModel(modelName = "") {
|
||||||
const validModels = ['gpt-4', 'gpt-3.5-turbo']
|
const validModels = ["gpt-4", "gpt-3.5-turbo"];
|
||||||
return validModels.includes(modelName)
|
return validModels.includes(modelName);
|
||||||
}
|
}
|
||||||
|
|
||||||
async isSafe(input = '') {
|
async isSafe(input = "") {
|
||||||
const { flagged = false, categories = {} } = await this.openai.createModeration({ input })
|
const { flagged = false, categories = {} } = await this.openai
|
||||||
|
.createModeration({ input })
|
||||||
.then((json) => {
|
.then((json) => {
|
||||||
const res = json.data;
|
const res = json.data;
|
||||||
if (!res.hasOwnProperty('results')) throw new Error('OpenAI moderation: No results!');
|
if (!res.hasOwnProperty("results"))
|
||||||
if (res.results.length === 0) throw new Error('OpenAI moderation: No results length!');
|
throw new Error("OpenAI moderation: No results!");
|
||||||
return res.results[0]
|
if (res.results.length === 0)
|
||||||
})
|
throw new Error("OpenAI moderation: No results length!");
|
||||||
|
return res.results[0];
|
||||||
|
});
|
||||||
|
|
||||||
if (!flagged) return { safe: true, reasons: [] };
|
if (!flagged) return { safe: true, reasons: [] };
|
||||||
const reasons = Object.keys(categories).map((category) => {
|
const reasons = Object.keys(categories)
|
||||||
const value = categories[category]
|
.map((category) => {
|
||||||
|
const value = categories[category];
|
||||||
if (value === true) {
|
if (value === true) {
|
||||||
return category.replace('/', ' or ');
|
return category.replace("/", " or ");
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}).filter((reason) => !!reason)
|
})
|
||||||
|
.filter((reason) => !!reason);
|
||||||
|
|
||||||
return { safe: false, reasons }
|
return { safe: false, reasons };
|
||||||
}
|
}
|
||||||
|
|
||||||
async sendChat(chatHistory = [], prompt) {
|
async sendChat(chatHistory = [], prompt) {
|
||||||
const model = process.env.OPEN_MODEL_PREF
|
const model = process.env.OPEN_MODEL_PREF;
|
||||||
if (!this.isValidChatModel(model)) throw new Error(`OpenAI chat: ${model} is not valid for chat completion!`);
|
if (!this.isValidChatModel(model))
|
||||||
|
throw new Error(
|
||||||
|
`OpenAI chat: ${model} is not valid for chat completion!`
|
||||||
|
);
|
||||||
|
|
||||||
const textResponse = await this.openai.createChatCompletion({
|
const textResponse = await this.openai
|
||||||
|
.createChatCompletion({
|
||||||
model,
|
model,
|
||||||
temperature: 0.7,
|
temperature: 0.7,
|
||||||
n: 1,
|
n: 1,
|
||||||
messages: [
|
messages: [
|
||||||
{ role: 'system', content: '' },
|
{ role: "system", content: "" },
|
||||||
...chatHistory,
|
...chatHistory,
|
||||||
{ role: 'user', content: prompt },
|
{ role: "user", content: prompt },
|
||||||
]
|
],
|
||||||
})
|
})
|
||||||
.then((json) => {
|
.then((json) => {
|
||||||
const res = json.data
|
const res = json.data;
|
||||||
if (!res.hasOwnProperty('choices')) throw new Error('OpenAI chat: No results!');
|
if (!res.hasOwnProperty("choices"))
|
||||||
if (res.choices.length === 0) throw new Error('OpenAI chat: No results length!');
|
throw new Error("OpenAI chat: No results!");
|
||||||
return res.choices[0].message.content
|
if (res.choices.length === 0)
|
||||||
})
|
throw new Error("OpenAI chat: No results length!");
|
||||||
|
return res.choices[0].message.content;
|
||||||
|
});
|
||||||
|
|
||||||
return textResponse
|
return textResponse;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
OpenAi,
|
OpenAi,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,30 +1,41 @@
|
|||||||
const { PineconeClient } = require("@pinecone-database/pinecone");
|
const { PineconeClient } = require("@pinecone-database/pinecone");
|
||||||
const { PineconeStore } = require("langchain/vectorstores/pinecone");
|
const { PineconeStore } = require("langchain/vectorstores/pinecone");
|
||||||
const { OpenAI } = require("langchain/llms/openai");
|
const { OpenAI } = require("langchain/llms/openai");
|
||||||
const { ChatOpenAI } = require('langchain/chat_models/openai');
|
const { ChatOpenAI } = require("langchain/chat_models/openai");
|
||||||
const { VectorDBQAChain, LLMChain, RetrievalQAChain, ConversationalRetrievalQAChain } = require("langchain/chains");
|
const {
|
||||||
|
VectorDBQAChain,
|
||||||
|
LLMChain,
|
||||||
|
RetrievalQAChain,
|
||||||
|
ConversationalRetrievalQAChain,
|
||||||
|
} = require("langchain/chains");
|
||||||
const { OpenAIEmbeddings } = require("langchain/embeddings/openai");
|
const { OpenAIEmbeddings } = require("langchain/embeddings/openai");
|
||||||
const { VectorStoreRetrieverMemory, BufferMemory } = require("langchain/memory");
|
const {
|
||||||
|
VectorStoreRetrieverMemory,
|
||||||
|
BufferMemory,
|
||||||
|
} = require("langchain/memory");
|
||||||
const { PromptTemplate } = require("langchain/prompts");
|
const { PromptTemplate } = require("langchain/prompts");
|
||||||
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
|
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
|
||||||
const { storeVectorResult, cachedVectorInformation } = require('../files');
|
const { storeVectorResult, cachedVectorInformation } = require("../files");
|
||||||
const { Configuration, OpenAIApi } = require('openai')
|
const { Configuration, OpenAIApi } = require("openai");
|
||||||
const { v4: uuidv4 } = require('uuid');
|
const { v4: uuidv4 } = require("uuid");
|
||||||
|
|
||||||
const toChunks = (arr, size) => {
|
const toChunks = (arr, size) => {
|
||||||
return Array.from({ length: Math.ceil(arr.length / size) }, (_v, i) =>
|
return Array.from({ length: Math.ceil(arr.length / size) }, (_v, i) =>
|
||||||
arr.slice(i * size, i * size + size)
|
arr.slice(i * size, i * size + size)
|
||||||
);
|
);
|
||||||
}
|
};
|
||||||
|
|
||||||
function curateSources(sources = []) {
|
function curateSources(sources = []) {
|
||||||
const knownDocs = [];
|
const knownDocs = [];
|
||||||
const documents = []
|
const documents = [];
|
||||||
for (const source of sources) {
|
for (const source of sources) {
|
||||||
const { metadata = {} } = source
|
const { metadata = {} } = source;
|
||||||
if (Object.keys(metadata).length > 0 && !knownDocs.includes(metadata.title)) {
|
if (
|
||||||
documents.push({ ...metadata })
|
Object.keys(metadata).length > 0 &&
|
||||||
knownDocs.push(metadata.title)
|
!knownDocs.includes(metadata.title)
|
||||||
|
) {
|
||||||
|
documents.push({ ...metadata });
|
||||||
|
knownDocs.push(metadata.title);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -32,6 +43,7 @@ function curateSources(sources = []) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const Pinecone = {
|
const Pinecone = {
|
||||||
|
name: 'Pinecone',
|
||||||
connect: async function () {
|
connect: async function () {
|
||||||
const client = new PineconeClient();
|
const client = new PineconeClient();
|
||||||
await client.init({
|
await client.init({
|
||||||
@ -39,91 +51,112 @@ const Pinecone = {
|
|||||||
environment: process.env.PINECONE_ENVIRONMENT,
|
environment: process.env.PINECONE_ENVIRONMENT,
|
||||||
});
|
});
|
||||||
const pineconeIndex = client.Index(process.env.PINECONE_INDEX);
|
const pineconeIndex = client.Index(process.env.PINECONE_INDEX);
|
||||||
const { status } = await client.describeIndex({ indexName: process.env.PINECONE_INDEX });
|
const { status } = await client.describeIndex({
|
||||||
|
indexName: process.env.PINECONE_INDEX,
|
||||||
|
});
|
||||||
|
|
||||||
if (!status.ready) throw new Error("Pinecode::Index not ready.")
|
if (!status.ready) throw new Error("Pinecode::Index not ready.");
|
||||||
return { client, pineconeIndex, indexName: process.env.PINECONE_INDEX };
|
return { client, pineconeIndex, indexName: process.env.PINECONE_INDEX };
|
||||||
},
|
},
|
||||||
embedder: function () {
|
embedder: function () {
|
||||||
return new OpenAIEmbeddings({ openAIApiKey: process.env.OPEN_AI_KEY });
|
return new OpenAIEmbeddings({ openAIApiKey: process.env.OPEN_AI_KEY });
|
||||||
},
|
},
|
||||||
openai: function () {
|
openai: function () {
|
||||||
const config = new Configuration({ apiKey: process.env.OPEN_AI_KEY })
|
const config = new Configuration({ apiKey: process.env.OPEN_AI_KEY });
|
||||||
const openai = new OpenAIApi(config);
|
const openai = new OpenAIApi(config);
|
||||||
return openai
|
return openai;
|
||||||
},
|
},
|
||||||
embedChunk: async function (openai, textChunk) {
|
embedChunk: async function (openai, textChunk) {
|
||||||
const { data: { data } } = await openai.createEmbedding({
|
const {
|
||||||
model: 'text-embedding-ada-002',
|
data: { data },
|
||||||
input: textChunk
|
} = await openai.createEmbedding({
|
||||||
})
|
model: "text-embedding-ada-002",
|
||||||
return data.length > 0 && data[0].hasOwnProperty('embedding') ? data[0].embedding : null
|
input: textChunk,
|
||||||
|
});
|
||||||
|
return data.length > 0 && data[0].hasOwnProperty("embedding")
|
||||||
|
? data[0].embedding
|
||||||
|
: null;
|
||||||
},
|
},
|
||||||
llm: function () {
|
llm: function () {
|
||||||
const model = process.env.OPEN_MODEL_PREF || 'gpt-3.5-turbo'
|
const model = process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo";
|
||||||
return new OpenAI({ openAIApiKey: process.env.OPEN_AI_KEY, temperature: 0.7, modelName: model });
|
return new OpenAI({
|
||||||
|
openAIApiKey: process.env.OPEN_AI_KEY,
|
||||||
|
temperature: 0.7,
|
||||||
|
modelName: model,
|
||||||
|
});
|
||||||
},
|
},
|
||||||
chatLLM: function () {
|
chatLLM: function () {
|
||||||
const model = process.env.OPEN_MODEL_PREF || 'gpt-3.5-turbo'
|
const model = process.env.OPEN_MODEL_PREF || "gpt-3.5-turbo";
|
||||||
return new ChatOpenAI({ openAIApiKey: process.env.OPEN_AI_KEY, temperature: 0.7, modelName: model });
|
return new ChatOpenAI({
|
||||||
|
openAIApiKey: process.env.OPEN_AI_KEY,
|
||||||
|
temperature: 0.7,
|
||||||
|
modelName: model,
|
||||||
|
});
|
||||||
},
|
},
|
||||||
totalIndicies: async function () {
|
totalIndicies: async function () {
|
||||||
const { pineconeIndex } = await this.connect();
|
const { pineconeIndex } = await this.connect();
|
||||||
const { namespaces } = await pineconeIndex.describeIndexStats1();
|
const { namespaces } = await pineconeIndex.describeIndexStats1();
|
||||||
return Object.values(namespaces).reduce((a, b) => a + (b?.vectorCount || 0), 0)
|
return Object.values(namespaces).reduce(
|
||||||
|
(a, b) => a + (b?.vectorCount || 0),
|
||||||
|
0
|
||||||
|
);
|
||||||
},
|
},
|
||||||
namespace: async function (index, namespace = null) {
|
namespace: async function (index, namespace = null) {
|
||||||
if (!namespace) throw new Error("No namespace value provided.");
|
if (!namespace) throw new Error("No namespace value provided.");
|
||||||
const { namespaces } = await index.describeIndexStats1();
|
const { namespaces } = await index.describeIndexStats1();
|
||||||
return namespaces.hasOwnProperty(namespace) ? namespaces[namespace] : null
|
return namespaces.hasOwnProperty(namespace) ? namespaces[namespace] : null;
|
||||||
},
|
},
|
||||||
hasNamespace: async function (namespace = null) {
|
hasNamespace: async function (namespace = null) {
|
||||||
if (!namespace) return false;
|
if (!namespace) return false;
|
||||||
const { pineconeIndex } = await this.connect();
|
const { pineconeIndex } = await this.connect();
|
||||||
return await this.namespaceExists(pineconeIndex, namespace)
|
return await this.namespaceExists(pineconeIndex, namespace);
|
||||||
},
|
},
|
||||||
namespaceExists: async function (index, namespace = null) {
|
namespaceExists: async function (index, namespace = null) {
|
||||||
if (!namespace) throw new Error("No namespace value provided.");
|
if (!namespace) throw new Error("No namespace value provided.");
|
||||||
const { namespaces } = await index.describeIndexStats1();
|
const { namespaces } = await index.describeIndexStats1();
|
||||||
return namespaces.hasOwnProperty(namespace)
|
return namespaces.hasOwnProperty(namespace);
|
||||||
},
|
},
|
||||||
deleteVectorsInNamespace: async function (index, namespace = null) {
|
deleteVectorsInNamespace: async function (index, namespace = null) {
|
||||||
await index.delete1({ namespace, deleteAll: true })
|
await index.delete1({ namespace, deleteAll: true });
|
||||||
return true
|
return true;
|
||||||
},
|
},
|
||||||
addDocumentToNamespace: async function (namespace, documentData = {}, fullFilePath = null) {
|
addDocumentToNamespace: async function (
|
||||||
|
namespace,
|
||||||
|
documentData = {},
|
||||||
|
fullFilePath = null
|
||||||
|
) {
|
||||||
const { DocumentVectors } = require("../../models/vectors");
|
const { DocumentVectors } = require("../../models/vectors");
|
||||||
try {
|
try {
|
||||||
const { pageContent, docId, ...metadata } = documentData
|
const { pageContent, docId, ...metadata } = documentData;
|
||||||
if (!pageContent || pageContent.length == 0) return false;
|
if (!pageContent || pageContent.length == 0) return false;
|
||||||
|
|
||||||
console.log("Adding new vectorized document into namespace", namespace);
|
console.log("Adding new vectorized document into namespace", namespace);
|
||||||
const cacheResult = await cachedVectorInformation(fullFilePath)
|
const cacheResult = await cachedVectorInformation(fullFilePath);
|
||||||
if (cacheResult.exists) {
|
if (cacheResult.exists) {
|
||||||
const { pineconeIndex } = await this.connect();
|
const { pineconeIndex } = await this.connect();
|
||||||
const { chunks } = cacheResult
|
const { chunks } = cacheResult;
|
||||||
const documentVectors = []
|
const documentVectors = [];
|
||||||
|
|
||||||
for (const chunk of chunks) {
|
for (const chunk of chunks) {
|
||||||
// Before sending to Pinecone and saving the records to our db
|
// Before sending to Pinecone and saving the records to our db
|
||||||
// we need to assign the id of each chunk that is stored in the cached file.
|
// we need to assign the id of each chunk that is stored in the cached file.
|
||||||
const newChunks = chunk.map((chunk) => {
|
const newChunks = chunk.map((chunk) => {
|
||||||
const id = uuidv4()
|
const id = uuidv4();
|
||||||
documentVectors.push({ docId, vectorId: id });
|
documentVectors.push({ docId, vectorId: id });
|
||||||
return { ...chunk, id }
|
return { ...chunk, id };
|
||||||
})
|
});
|
||||||
|
|
||||||
// Push chunks with new ids to pinecone.
|
// Push chunks with new ids to pinecone.
|
||||||
await pineconeIndex.upsert({
|
await pineconeIndex.upsert({
|
||||||
upsertRequest: {
|
upsertRequest: {
|
||||||
vectors: [...newChunks],
|
vectors: [...newChunks],
|
||||||
namespace,
|
namespace,
|
||||||
}
|
},
|
||||||
})
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
await DocumentVectors.bulkInsert(documentVectors)
|
await DocumentVectors.bulkInsert(documentVectors);
|
||||||
return true
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we are here then we are going to embed and store a novel document.
|
// If we are here then we are going to embed and store a novel document.
|
||||||
@ -131,13 +164,16 @@ const Pinecone = {
|
|||||||
// because we then cannot atomically control our namespace to granularly find/remove documents
|
// because we then cannot atomically control our namespace to granularly find/remove documents
|
||||||
// from vectordb.
|
// from vectordb.
|
||||||
// https://github.com/hwchase17/langchainjs/blob/2def486af734c0ca87285a48f1a04c057ab74bdf/langchain/src/vectorstores/pinecone.ts#L167
|
// https://github.com/hwchase17/langchainjs/blob/2def486af734c0ca87285a48f1a04c057ab74bdf/langchain/src/vectorstores/pinecone.ts#L167
|
||||||
const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000, chunkOverlap: 20 });
|
const textSplitter = new RecursiveCharacterTextSplitter({
|
||||||
const textChunks = await textSplitter.splitText(pageContent)
|
chunkSize: 1000,
|
||||||
|
chunkOverlap: 20,
|
||||||
|
});
|
||||||
|
const textChunks = await textSplitter.splitText(pageContent);
|
||||||
|
|
||||||
console.log('Chunks created from document:', textChunks.length)
|
console.log("Chunks created from document:", textChunks.length);
|
||||||
const documentVectors = []
|
const documentVectors = [];
|
||||||
const vectors = []
|
const vectors = [];
|
||||||
const openai = this.openai()
|
const openai = this.openai();
|
||||||
for (const textChunk of textChunks) {
|
for (const textChunk of textChunks) {
|
||||||
const vectorValues = await this.embedChunk(openai, textChunk);
|
const vectorValues = await this.embedChunk(openai, textChunk);
|
||||||
|
|
||||||
@ -149,87 +185,97 @@ const Pinecone = {
|
|||||||
// LangChain will be unable to find your text if you embed manually and dont include the `text` key.
|
// LangChain will be unable to find your text if you embed manually and dont include the `text` key.
|
||||||
// https://github.com/hwchase17/langchainjs/blob/2def486af734c0ca87285a48f1a04c057ab74bdf/langchain/src/vectorstores/pinecone.ts#L64
|
// https://github.com/hwchase17/langchainjs/blob/2def486af734c0ca87285a48f1a04c057ab74bdf/langchain/src/vectorstores/pinecone.ts#L64
|
||||||
metadata: { ...metadata, text: textChunk },
|
metadata: { ...metadata, text: textChunk },
|
||||||
}
|
};
|
||||||
vectors.push(vectorRecord);
|
vectors.push(vectorRecord);
|
||||||
documentVectors.push({ docId, vectorId: vectorRecord.id });
|
documentVectors.push({ docId, vectorId: vectorRecord.id });
|
||||||
} else {
|
} else {
|
||||||
console.error('Could not use OpenAI to embed document chunk! This document will not be recorded.')
|
console.error(
|
||||||
|
"Could not use OpenAI to embed document chunk! This document will not be recorded."
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vectors.length > 0) {
|
if (vectors.length > 0) {
|
||||||
const chunks = []
|
const chunks = [];
|
||||||
const { pineconeIndex } = await this.connect();
|
const { pineconeIndex } = await this.connect();
|
||||||
console.log('Inserting vectorized chunks into Pinecone.')
|
console.log("Inserting vectorized chunks into Pinecone.");
|
||||||
for (const chunk of toChunks(vectors, 100)) {
|
for (const chunk of toChunks(vectors, 100)) {
|
||||||
chunks.push(chunk)
|
chunks.push(chunk);
|
||||||
await pineconeIndex.upsert({
|
await pineconeIndex.upsert({
|
||||||
upsertRequest: {
|
upsertRequest: {
|
||||||
vectors: [...chunk],
|
vectors: [...chunk],
|
||||||
namespace,
|
namespace,
|
||||||
|
},
|
||||||
|
});
|
||||||
}
|
}
|
||||||
})
|
await storeVectorResult(chunks, fullFilePath);
|
||||||
}
|
|
||||||
await storeVectorResult(chunks, fullFilePath)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
await DocumentVectors.bulkInsert(documentVectors)
|
await DocumentVectors.bulkInsert(documentVectors);
|
||||||
return true;
|
return true;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error('addDocumentToNamespace', e.message)
|
console.error("addDocumentToNamespace", e.message);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
deleteDocumentFromNamespace: async function (namespace, docId) {
|
deleteDocumentFromNamespace: async function (namespace, docId) {
|
||||||
const { DocumentVectors } = require("../../models/vectors");
|
const { DocumentVectors } = require("../../models/vectors");
|
||||||
const { pineconeIndex } = await this.connect();
|
const { pineconeIndex } = await this.connect();
|
||||||
if (!await this.namespaceExists(pineconeIndex, namespace)) return;
|
if (!(await this.namespaceExists(pineconeIndex, namespace))) return;
|
||||||
|
|
||||||
const knownDocuments = await DocumentVectors.where(`docId = '${docId}'`)
|
const knownDocuments = await DocumentVectors.where(`docId = '${docId}'`);
|
||||||
if (knownDocuments.length === 0) return;
|
if (knownDocuments.length === 0) return;
|
||||||
|
|
||||||
const vectorIds = knownDocuments.map((doc) => doc.vectorId);
|
const vectorIds = knownDocuments.map((doc) => doc.vectorId);
|
||||||
await pineconeIndex.delete1({
|
await pineconeIndex.delete1({
|
||||||
ids: vectorIds,
|
ids: vectorIds,
|
||||||
namespace,
|
namespace,
|
||||||
})
|
});
|
||||||
|
|
||||||
const indexes = knownDocuments.map((doc) => doc.id);
|
const indexes = knownDocuments.map((doc) => doc.id);
|
||||||
await DocumentVectors.deleteIds(indexes)
|
await DocumentVectors.deleteIds(indexes);
|
||||||
return true;
|
return true;
|
||||||
},
|
},
|
||||||
'namespace-stats': async function (reqBody = {}) {
|
"namespace-stats": async function (reqBody = {}) {
|
||||||
const { namespace = null } = reqBody
|
const { namespace = null } = reqBody;
|
||||||
if (!namespace) throw new Error("namespace required");
|
if (!namespace) throw new Error("namespace required");
|
||||||
const { pineconeIndex } = await this.connect();
|
const { pineconeIndex } = await this.connect();
|
||||||
if (!await this.namespaceExists(pineconeIndex, namespace)) throw new Error('Namespace by that name does not exist.');
|
if (!(await this.namespaceExists(pineconeIndex, namespace)))
|
||||||
const stats = await this.namespace(pineconeIndex, namespace)
|
throw new Error("Namespace by that name does not exist.");
|
||||||
return stats ? stats : { message: 'No stats were able to be fetched from DB' }
|
const stats = await this.namespace(pineconeIndex, namespace);
|
||||||
|
return stats
|
||||||
|
? stats
|
||||||
|
: { message: "No stats were able to be fetched from DB" };
|
||||||
},
|
},
|
||||||
'delete-namespace': async function (reqBody = {}) {
|
"delete-namespace": async function (reqBody = {}) {
|
||||||
const { namespace = null } = reqBody
|
const { namespace = null } = reqBody;
|
||||||
const { pineconeIndex } = await this.connect();
|
const { pineconeIndex } = await this.connect();
|
||||||
if (!await this.namespaceExists(pineconeIndex, namespace)) throw new Error('Namespace by that name does not exist.');
|
if (!(await this.namespaceExists(pineconeIndex, namespace)))
|
||||||
|
throw new Error("Namespace by that name does not exist.");
|
||||||
|
|
||||||
const details = await this.namespace(pineconeIndex, namespace);
|
const details = await this.namespace(pineconeIndex, namespace);
|
||||||
await this.deleteVectorsInNamespace(pineconeIndex, namespace);
|
await this.deleteVectorsInNamespace(pineconeIndex, namespace);
|
||||||
return { message: `Namespace ${namespace} was deleted along with ${details.vectorCount} vectors.` }
|
return {
|
||||||
|
message: `Namespace ${namespace} was deleted along with ${details.vectorCount} vectors.`,
|
||||||
|
};
|
||||||
},
|
},
|
||||||
query: async function (reqBody = {}) {
|
query: async function (reqBody = {}) {
|
||||||
const { namespace = null, input } = reqBody;
|
const { namespace = null, input } = reqBody;
|
||||||
if (!namespace || !input) throw new Error("Invalid request body");
|
if (!namespace || !input) throw new Error("Invalid request body");
|
||||||
|
|
||||||
const { pineconeIndex } = await this.connect();
|
const { pineconeIndex } = await this.connect();
|
||||||
if (!await this.namespaceExists(pineconeIndex, namespace)) {
|
if (!(await this.namespaceExists(pineconeIndex, namespace))) {
|
||||||
return {
|
return {
|
||||||
response: null, sources: [], message: 'Invalid query - no documents found for workspace!'
|
response: null,
|
||||||
}
|
sources: [],
|
||||||
|
message: "Invalid query - no documents found for workspace!",
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const vectorStore = await PineconeStore.fromExistingIndex(
|
const vectorStore = await PineconeStore.fromExistingIndex(this.embedder(), {
|
||||||
this.embedder(),
|
pineconeIndex,
|
||||||
{ pineconeIndex, namespace }
|
namespace,
|
||||||
);
|
});
|
||||||
|
|
||||||
const model = this.llm();
|
const model = this.llm();
|
||||||
const chain = VectorDBQAChain.fromLLM(model, vectorStore, {
|
const chain = VectorDBQAChain.fromLLM(model, vectorStore, {
|
||||||
@ -237,7 +283,11 @@ const Pinecone = {
|
|||||||
returnSourceDocuments: true,
|
returnSourceDocuments: true,
|
||||||
});
|
});
|
||||||
const response = await chain.call({ query: input });
|
const response = await chain.call({ query: input });
|
||||||
return { response: response.text, sources: curateSources(response.sourceDocuments), message: false }
|
return {
|
||||||
|
response: response.text,
|
||||||
|
sources: curateSources(response.sourceDocuments),
|
||||||
|
message: false,
|
||||||
|
};
|
||||||
},
|
},
|
||||||
// This implementation of chat also expands the memory of the chat itself
|
// This implementation of chat also expands the memory of the chat itself
|
||||||
// and adds more tokens to the PineconeDB instance namespace
|
// and adds more tokens to the PineconeDB instance namespace
|
||||||
@ -246,13 +296,16 @@ const Pinecone = {
|
|||||||
if (!namespace || !input) throw new Error("Invalid request body");
|
if (!namespace || !input) throw new Error("Invalid request body");
|
||||||
|
|
||||||
const { pineconeIndex } = await this.connect();
|
const { pineconeIndex } = await this.connect();
|
||||||
if (!await this.namespaceExists(pineconeIndex, namespace)) throw new Error("Invalid namespace - has it been collected and seeded yet?");
|
if (!(await this.namespaceExists(pineconeIndex, namespace)))
|
||||||
|
throw new Error(
|
||||||
const vectorStore = await PineconeStore.fromExistingIndex(
|
"Invalid namespace - has it been collected and seeded yet?"
|
||||||
this.embedder(),
|
|
||||||
{ pineconeIndex, namespace }
|
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const vectorStore = await PineconeStore.fromExistingIndex(this.embedder(), {
|
||||||
|
pineconeIndex,
|
||||||
|
namespace,
|
||||||
|
});
|
||||||
|
|
||||||
const memory = new VectorStoreRetrieverMemory({
|
const memory = new VectorStoreRetrieverMemory({
|
||||||
vectorStoreRetriever: vectorStore.asRetriever(1),
|
vectorStoreRetriever: vectorStore.asRetriever(1),
|
||||||
memoryKey: "history",
|
memoryKey: "history",
|
||||||
@ -270,10 +323,10 @@ const Pinecone = {
|
|||||||
|
|
||||||
const chain = new LLMChain({ llm: model, prompt, memory });
|
const chain = new LLMChain({ llm: model, prompt, memory });
|
||||||
const response = await chain.call({ input });
|
const response = await chain.call({ input });
|
||||||
return { response: response.text, sources: [], message: false }
|
return { response: response.text, sources: [], message: false };
|
||||||
},
|
},
|
||||||
}
|
};
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
Pinecone
|
Pinecone,
|
||||||
}
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user