mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-14 02:20:12 +01:00
Update all vector dbs to filter duplicate source documents that may be pinned (#1122)
* Update all vector dbs to filter duplicate parents * cleanup
This commit is contained in:
parent
41978765bc
commit
9655880cf0
@ -1,6 +1,6 @@
|
|||||||
const { v4: uuidv4 } = require("uuid");
|
const { v4: uuidv4 } = require("uuid");
|
||||||
const { getVectorDbClass, getLLMProvider } = require("../helpers");
|
const { getVectorDbClass, getLLMProvider } = require("../helpers");
|
||||||
const { chatPrompt } = require("./index");
|
const { chatPrompt, sourceIdentifier } = require("./index");
|
||||||
const { EmbedChats } = require("../../models/embedChats");
|
const { EmbedChats } = require("../../models/embedChats");
|
||||||
const {
|
const {
|
||||||
convertToPromptHistory,
|
convertToPromptHistory,
|
||||||
@ -69,6 +69,7 @@ async function streamChatWithForEmbed(
|
|||||||
let completeText;
|
let completeText;
|
||||||
let contextTexts = [];
|
let contextTexts = [];
|
||||||
let sources = [];
|
let sources = [];
|
||||||
|
let pinnedDocIdentifiers = [];
|
||||||
const { rawHistory, chatHistory } = await recentEmbedChatHistory(
|
const { rawHistory, chatHistory } = await recentEmbedChatHistory(
|
||||||
sessionId,
|
sessionId,
|
||||||
embed,
|
embed,
|
||||||
@ -86,6 +87,7 @@ async function streamChatWithForEmbed(
|
|||||||
.then((pinnedDocs) => {
|
.then((pinnedDocs) => {
|
||||||
pinnedDocs.forEach((doc) => {
|
pinnedDocs.forEach((doc) => {
|
||||||
const { pageContent, ...metadata } = doc;
|
const { pageContent, ...metadata } = doc;
|
||||||
|
pinnedDocIdentifiers.push(sourceIdentifier(doc));
|
||||||
contextTexts.push(doc.pageContent);
|
contextTexts.push(doc.pageContent);
|
||||||
sources.push({
|
sources.push({
|
||||||
text:
|
text:
|
||||||
@ -104,6 +106,7 @@ async function streamChatWithForEmbed(
|
|||||||
LLMConnector,
|
LLMConnector,
|
||||||
similarityThreshold: embed.workspace?.similarityThreshold,
|
similarityThreshold: embed.workspace?.similarityThreshold,
|
||||||
topN: embed.workspace?.topN,
|
topN: embed.workspace?.topN,
|
||||||
|
filterIdentifiers: pinnedDocIdentifiers,
|
||||||
})
|
})
|
||||||
: {
|
: {
|
||||||
contextTexts: [],
|
contextTexts: [],
|
||||||
|
@ -79,6 +79,7 @@ async function chatWithWorkspace(
|
|||||||
// 2. Chatting in "query" mode and has at least 1 embedding
|
// 2. Chatting in "query" mode and has at least 1 embedding
|
||||||
let contextTexts = [];
|
let contextTexts = [];
|
||||||
let sources = [];
|
let sources = [];
|
||||||
|
let pinnedDocIdentifiers = [];
|
||||||
const { rawHistory, chatHistory } = await recentChatHistory({
|
const { rawHistory, chatHistory } = await recentChatHistory({
|
||||||
user,
|
user,
|
||||||
workspace,
|
workspace,
|
||||||
@ -97,6 +98,7 @@ async function chatWithWorkspace(
|
|||||||
.then((pinnedDocs) => {
|
.then((pinnedDocs) => {
|
||||||
pinnedDocs.forEach((doc) => {
|
pinnedDocs.forEach((doc) => {
|
||||||
const { pageContent, ...metadata } = doc;
|
const { pageContent, ...metadata } = doc;
|
||||||
|
pinnedDocIdentifiers.push(sourceIdentifier(doc));
|
||||||
contextTexts.push(doc.pageContent);
|
contextTexts.push(doc.pageContent);
|
||||||
sources.push({
|
sources.push({
|
||||||
text:
|
text:
|
||||||
@ -115,6 +117,7 @@ async function chatWithWorkspace(
|
|||||||
LLMConnector,
|
LLMConnector,
|
||||||
similarityThreshold: workspace?.similarityThreshold,
|
similarityThreshold: workspace?.similarityThreshold,
|
||||||
topN: workspace?.topN,
|
topN: workspace?.topN,
|
||||||
|
filterIdentifiers: pinnedDocIdentifiers,
|
||||||
})
|
})
|
||||||
: {
|
: {
|
||||||
contextTexts: [],
|
contextTexts: [],
|
||||||
@ -227,7 +230,18 @@ function chatPrompt(workspace) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We use this util function to deduplicate sources from similarity searching
|
||||||
|
// if the document is already pinned.
|
||||||
|
// Eg: You pin a csv, if we RAG + full-text that you will get the same data
|
||||||
|
// points both in the full-text and possibly from RAG - result in bad results
|
||||||
|
// even if the LLM was not even going to hallucinate.
|
||||||
|
function sourceIdentifier(sourceDocument) {
|
||||||
|
if (!sourceDocument?.title || !sourceDocument?.published) return uuidv4();
|
||||||
|
return `title:${sourceDocument.title}-timestamp:${sourceDocument.published}`;
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
|
sourceIdentifier,
|
||||||
recentChatHistory,
|
recentChatHistory,
|
||||||
chatWithWorkspace,
|
chatWithWorkspace,
|
||||||
chatPrompt,
|
chatPrompt,
|
||||||
|
@ -9,6 +9,7 @@ const {
|
|||||||
VALID_COMMANDS,
|
VALID_COMMANDS,
|
||||||
chatPrompt,
|
chatPrompt,
|
||||||
recentChatHistory,
|
recentChatHistory,
|
||||||
|
sourceIdentifier,
|
||||||
} = require("./index");
|
} = require("./index");
|
||||||
|
|
||||||
const VALID_CHAT_MODE = ["chat", "query"];
|
const VALID_CHAT_MODE = ["chat", "query"];
|
||||||
@ -92,6 +93,7 @@ async function streamChatWithWorkspace(
|
|||||||
let completeText;
|
let completeText;
|
||||||
let contextTexts = [];
|
let contextTexts = [];
|
||||||
let sources = [];
|
let sources = [];
|
||||||
|
let pinnedDocIdentifiers = [];
|
||||||
const { rawHistory, chatHistory } = await recentChatHistory({
|
const { rawHistory, chatHistory } = await recentChatHistory({
|
||||||
user,
|
user,
|
||||||
workspace,
|
workspace,
|
||||||
@ -110,6 +112,7 @@ async function streamChatWithWorkspace(
|
|||||||
.then((pinnedDocs) => {
|
.then((pinnedDocs) => {
|
||||||
pinnedDocs.forEach((doc) => {
|
pinnedDocs.forEach((doc) => {
|
||||||
const { pageContent, ...metadata } = doc;
|
const { pageContent, ...metadata } = doc;
|
||||||
|
pinnedDocIdentifiers.push(sourceIdentifier(doc));
|
||||||
contextTexts.push(doc.pageContent);
|
contextTexts.push(doc.pageContent);
|
||||||
sources.push({
|
sources.push({
|
||||||
text:
|
text:
|
||||||
@ -128,6 +131,7 @@ async function streamChatWithWorkspace(
|
|||||||
LLMConnector,
|
LLMConnector,
|
||||||
similarityThreshold: workspace?.similarityThreshold,
|
similarityThreshold: workspace?.similarityThreshold,
|
||||||
topN: workspace?.topN,
|
topN: workspace?.topN,
|
||||||
|
filterIdentifiers: pinnedDocIdentifiers,
|
||||||
})
|
})
|
||||||
: {
|
: {
|
||||||
contextTexts: [],
|
contextTexts: [],
|
||||||
|
@ -8,6 +8,7 @@ const {
|
|||||||
getLLMProvider,
|
getLLMProvider,
|
||||||
getEmbeddingEngineSelection,
|
getEmbeddingEngineSelection,
|
||||||
} = require("../../helpers");
|
} = require("../../helpers");
|
||||||
|
const { sourceIdentifier } = require("../../chats");
|
||||||
|
|
||||||
const AstraDB = {
|
const AstraDB = {
|
||||||
name: "AstraDB",
|
name: "AstraDB",
|
||||||
@ -252,6 +253,7 @@ const AstraDB = {
|
|||||||
LLMConnector = null,
|
LLMConnector = null,
|
||||||
similarityThreshold = 0.25,
|
similarityThreshold = 0.25,
|
||||||
topN = 4,
|
topN = 4,
|
||||||
|
filterIdentifiers = [],
|
||||||
}) {
|
}) {
|
||||||
if (!namespace || !input || !LLMConnector)
|
if (!namespace || !input || !LLMConnector)
|
||||||
throw new Error("Invalid request to performSimilaritySearch.");
|
throw new Error("Invalid request to performSimilaritySearch.");
|
||||||
@ -272,7 +274,8 @@ const AstraDB = {
|
|||||||
namespace,
|
namespace,
|
||||||
queryVector,
|
queryVector,
|
||||||
similarityThreshold,
|
similarityThreshold,
|
||||||
topN
|
topN,
|
||||||
|
filterIdentifiers
|
||||||
);
|
);
|
||||||
|
|
||||||
const sources = sourceDocuments.map((metadata, i) => {
|
const sources = sourceDocuments.map((metadata, i) => {
|
||||||
@ -289,7 +292,8 @@ const AstraDB = {
|
|||||||
namespace,
|
namespace,
|
||||||
queryVector,
|
queryVector,
|
||||||
similarityThreshold = 0.25,
|
similarityThreshold = 0.25,
|
||||||
topN = 4
|
topN = 4,
|
||||||
|
filterIdentifiers = []
|
||||||
) {
|
) {
|
||||||
const result = {
|
const result = {
|
||||||
contextTexts: [],
|
contextTexts: [],
|
||||||
@ -311,6 +315,12 @@ const AstraDB = {
|
|||||||
|
|
||||||
responses.forEach((response) => {
|
responses.forEach((response) => {
|
||||||
if (response.$similarity < similarityThreshold) return;
|
if (response.$similarity < similarityThreshold) return;
|
||||||
|
if (filterIdentifiers.includes(sourceIdentifier(response.metadata))) {
|
||||||
|
console.log(
|
||||||
|
"AstraDB: A source was filtered from context as it's parent document is pinned."
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
result.contextTexts.push(response.metadata.text);
|
result.contextTexts.push(response.metadata.text);
|
||||||
result.sourceDocuments.push(response);
|
result.sourceDocuments.push(response);
|
||||||
result.scores.push(response.$similarity);
|
result.scores.push(response.$similarity);
|
||||||
|
@ -9,6 +9,7 @@ const {
|
|||||||
getEmbeddingEngineSelection,
|
getEmbeddingEngineSelection,
|
||||||
} = require("../../helpers");
|
} = require("../../helpers");
|
||||||
const { parseAuthHeader } = require("../../http");
|
const { parseAuthHeader } = require("../../http");
|
||||||
|
const { sourceIdentifier } = require("../../chats");
|
||||||
|
|
||||||
const Chroma = {
|
const Chroma = {
|
||||||
name: "Chroma",
|
name: "Chroma",
|
||||||
@ -70,7 +71,8 @@ const Chroma = {
|
|||||||
namespace,
|
namespace,
|
||||||
queryVector,
|
queryVector,
|
||||||
similarityThreshold = 0.25,
|
similarityThreshold = 0.25,
|
||||||
topN = 4
|
topN = 4,
|
||||||
|
filterIdentifiers = []
|
||||||
) {
|
) {
|
||||||
const collection = await client.getCollection({ name: namespace });
|
const collection = await client.getCollection({ name: namespace });
|
||||||
const result = {
|
const result = {
|
||||||
@ -89,6 +91,15 @@ const Chroma = {
|
|||||||
similarityThreshold
|
similarityThreshold
|
||||||
)
|
)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (
|
||||||
|
filterIdentifiers.includes(sourceIdentifier(response.metadatas[0][i]))
|
||||||
|
) {
|
||||||
|
console.log(
|
||||||
|
"Chroma: A source was filtered from context as it's parent document is pinned."
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
result.contextTexts.push(response.documents[0][i]);
|
result.contextTexts.push(response.documents[0][i]);
|
||||||
result.sourceDocuments.push(response.metadatas[0][i]);
|
result.sourceDocuments.push(response.metadatas[0][i]);
|
||||||
result.scores.push(this.distanceToSimilarity(response.distances[0][i]));
|
result.scores.push(this.distanceToSimilarity(response.distances[0][i]));
|
||||||
@ -282,6 +293,7 @@ const Chroma = {
|
|||||||
LLMConnector = null,
|
LLMConnector = null,
|
||||||
similarityThreshold = 0.25,
|
similarityThreshold = 0.25,
|
||||||
topN = 4,
|
topN = 4,
|
||||||
|
filterIdentifiers = [],
|
||||||
}) {
|
}) {
|
||||||
if (!namespace || !input || !LLMConnector)
|
if (!namespace || !input || !LLMConnector)
|
||||||
throw new Error("Invalid request to performSimilaritySearch.");
|
throw new Error("Invalid request to performSimilaritySearch.");
|
||||||
@ -301,7 +313,8 @@ const Chroma = {
|
|||||||
namespace,
|
namespace,
|
||||||
queryVector,
|
queryVector,
|
||||||
similarityThreshold,
|
similarityThreshold,
|
||||||
topN
|
topN,
|
||||||
|
filterIdentifiers
|
||||||
);
|
);
|
||||||
|
|
||||||
const sources = sourceDocuments.map((metadata, i) => {
|
const sources = sourceDocuments.map((metadata, i) => {
|
||||||
|
@ -9,6 +9,7 @@ const { TextSplitter } = require("../../TextSplitter");
|
|||||||
const { SystemSettings } = require("../../../models/systemSettings");
|
const { SystemSettings } = require("../../../models/systemSettings");
|
||||||
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
const { storeVectorResult, cachedVectorInformation } = require("../../files");
|
||||||
const { v4: uuidv4 } = require("uuid");
|
const { v4: uuidv4 } = require("uuid");
|
||||||
|
const { sourceIdentifier } = require("../../chats");
|
||||||
|
|
||||||
const LanceDb = {
|
const LanceDb = {
|
||||||
uri: `${
|
uri: `${
|
||||||
@ -64,7 +65,8 @@ const LanceDb = {
|
|||||||
namespace,
|
namespace,
|
||||||
queryVector,
|
queryVector,
|
||||||
similarityThreshold = 0.25,
|
similarityThreshold = 0.25,
|
||||||
topN = 4
|
topN = 4,
|
||||||
|
filterIdentifiers = []
|
||||||
) {
|
) {
|
||||||
const collection = await client.openTable(namespace);
|
const collection = await client.openTable(namespace);
|
||||||
const result = {
|
const result = {
|
||||||
@ -82,6 +84,13 @@ const LanceDb = {
|
|||||||
response.forEach((item) => {
|
response.forEach((item) => {
|
||||||
if (this.distanceToSimilarity(item.score) < similarityThreshold) return;
|
if (this.distanceToSimilarity(item.score) < similarityThreshold) return;
|
||||||
const { vector: _, ...rest } = item;
|
const { vector: _, ...rest } = item;
|
||||||
|
if (filterIdentifiers.includes(sourceIdentifier(rest))) {
|
||||||
|
console.log(
|
||||||
|
"LanceDB: A source was filtered from context as it's parent document is pinned."
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
result.contextTexts.push(rest.text);
|
result.contextTexts.push(rest.text);
|
||||||
result.sourceDocuments.push(rest);
|
result.sourceDocuments.push(rest);
|
||||||
result.scores.push(this.distanceToSimilarity(item.score));
|
result.scores.push(this.distanceToSimilarity(item.score));
|
||||||
@ -250,6 +259,7 @@ const LanceDb = {
|
|||||||
LLMConnector = null,
|
LLMConnector = null,
|
||||||
similarityThreshold = 0.25,
|
similarityThreshold = 0.25,
|
||||||
topN = 4,
|
topN = 4,
|
||||||
|
filterIdentifiers = [],
|
||||||
}) {
|
}) {
|
||||||
if (!namespace || !input || !LLMConnector)
|
if (!namespace || !input || !LLMConnector)
|
||||||
throw new Error("Invalid request to performSimilaritySearch.");
|
throw new Error("Invalid request to performSimilaritySearch.");
|
||||||
@ -269,7 +279,8 @@ const LanceDb = {
|
|||||||
namespace,
|
namespace,
|
||||||
queryVector,
|
queryVector,
|
||||||
similarityThreshold,
|
similarityThreshold,
|
||||||
topN
|
topN,
|
||||||
|
filterIdentifiers
|
||||||
);
|
);
|
||||||
|
|
||||||
const sources = sourceDocuments.map((metadata, i) => {
|
const sources = sourceDocuments.map((metadata, i) => {
|
||||||
|
@ -13,6 +13,7 @@ const {
|
|||||||
getLLMProvider,
|
getLLMProvider,
|
||||||
getEmbeddingEngineSelection,
|
getEmbeddingEngineSelection,
|
||||||
} = require("../../helpers");
|
} = require("../../helpers");
|
||||||
|
const { sourceIdentifier } = require("../../chats");
|
||||||
|
|
||||||
const Milvus = {
|
const Milvus = {
|
||||||
name: "Milvus",
|
name: "Milvus",
|
||||||
@ -288,6 +289,7 @@ const Milvus = {
|
|||||||
LLMConnector = null,
|
LLMConnector = null,
|
||||||
similarityThreshold = 0.25,
|
similarityThreshold = 0.25,
|
||||||
topN = 4,
|
topN = 4,
|
||||||
|
filterIdentifiers = [],
|
||||||
}) {
|
}) {
|
||||||
if (!namespace || !input || !LLMConnector)
|
if (!namespace || !input || !LLMConnector)
|
||||||
throw new Error("Invalid request to performSimilaritySearch.");
|
throw new Error("Invalid request to performSimilaritySearch.");
|
||||||
@ -307,7 +309,8 @@ const Milvus = {
|
|||||||
namespace,
|
namespace,
|
||||||
queryVector,
|
queryVector,
|
||||||
similarityThreshold,
|
similarityThreshold,
|
||||||
topN
|
topN,
|
||||||
|
filterIdentifiers
|
||||||
);
|
);
|
||||||
|
|
||||||
const sources = sourceDocuments.map((metadata, i) => {
|
const sources = sourceDocuments.map((metadata, i) => {
|
||||||
@ -324,7 +327,8 @@ const Milvus = {
|
|||||||
namespace,
|
namespace,
|
||||||
queryVector,
|
queryVector,
|
||||||
similarityThreshold = 0.25,
|
similarityThreshold = 0.25,
|
||||||
topN = 4
|
topN = 4,
|
||||||
|
filterIdentifiers = []
|
||||||
) {
|
) {
|
||||||
const result = {
|
const result = {
|
||||||
contextTexts: [],
|
contextTexts: [],
|
||||||
@ -338,6 +342,13 @@ const Milvus = {
|
|||||||
});
|
});
|
||||||
response.results.forEach((match) => {
|
response.results.forEach((match) => {
|
||||||
if (match.score < similarityThreshold) return;
|
if (match.score < similarityThreshold) return;
|
||||||
|
if (filterIdentifiers.includes(sourceIdentifier(match.metadata))) {
|
||||||
|
console.log(
|
||||||
|
"Milvus: A source was filtered from context as it's parent document is pinned."
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
result.contextTexts.push(match.metadata.text);
|
result.contextTexts.push(match.metadata.text);
|
||||||
result.sourceDocuments.push(match);
|
result.sourceDocuments.push(match);
|
||||||
result.scores.push(match.score);
|
result.scores.push(match.score);
|
||||||
|
@ -8,6 +8,7 @@ const {
|
|||||||
getLLMProvider,
|
getLLMProvider,
|
||||||
getEmbeddingEngineSelection,
|
getEmbeddingEngineSelection,
|
||||||
} = require("../../helpers");
|
} = require("../../helpers");
|
||||||
|
const { sourceIdentifier } = require("../../chats");
|
||||||
|
|
||||||
const PineconeDB = {
|
const PineconeDB = {
|
||||||
name: "Pinecone",
|
name: "Pinecone",
|
||||||
@ -44,7 +45,8 @@ const PineconeDB = {
|
|||||||
namespace,
|
namespace,
|
||||||
queryVector,
|
queryVector,
|
||||||
similarityThreshold = 0.25,
|
similarityThreshold = 0.25,
|
||||||
topN = 4
|
topN = 4,
|
||||||
|
filterIdentifiers = []
|
||||||
) {
|
) {
|
||||||
const result = {
|
const result = {
|
||||||
contextTexts: [],
|
contextTexts: [],
|
||||||
@ -61,6 +63,13 @@ const PineconeDB = {
|
|||||||
|
|
||||||
response.matches.forEach((match) => {
|
response.matches.forEach((match) => {
|
||||||
if (match.score < similarityThreshold) return;
|
if (match.score < similarityThreshold) return;
|
||||||
|
if (filterIdentifiers.includes(sourceIdentifier(match.metadata))) {
|
||||||
|
console.log(
|
||||||
|
"Pinecone: A source was filtered from context as it's parent document is pinned."
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
result.contextTexts.push(match.metadata.text);
|
result.contextTexts.push(match.metadata.text);
|
||||||
result.sourceDocuments.push(match);
|
result.sourceDocuments.push(match);
|
||||||
result.scores.push(match.score);
|
result.scores.push(match.score);
|
||||||
@ -233,6 +242,7 @@ const PineconeDB = {
|
|||||||
LLMConnector = null,
|
LLMConnector = null,
|
||||||
similarityThreshold = 0.25,
|
similarityThreshold = 0.25,
|
||||||
topN = 4,
|
topN = 4,
|
||||||
|
filterIdentifiers = [],
|
||||||
}) {
|
}) {
|
||||||
if (!namespace || !input || !LLMConnector)
|
if (!namespace || !input || !LLMConnector)
|
||||||
throw new Error("Invalid request to performSimilaritySearch.");
|
throw new Error("Invalid request to performSimilaritySearch.");
|
||||||
@ -249,7 +259,8 @@ const PineconeDB = {
|
|||||||
namespace,
|
namespace,
|
||||||
queryVector,
|
queryVector,
|
||||||
similarityThreshold,
|
similarityThreshold,
|
||||||
topN
|
topN,
|
||||||
|
filterIdentifiers
|
||||||
);
|
);
|
||||||
|
|
||||||
const sources = sourceDocuments.map((metadata, i) => {
|
const sources = sourceDocuments.map((metadata, i) => {
|
||||||
|
@ -8,6 +8,7 @@ const {
|
|||||||
getLLMProvider,
|
getLLMProvider,
|
||||||
getEmbeddingEngineSelection,
|
getEmbeddingEngineSelection,
|
||||||
} = require("../../helpers");
|
} = require("../../helpers");
|
||||||
|
const { sourceIdentifier } = require("../../chats");
|
||||||
|
|
||||||
const QDrant = {
|
const QDrant = {
|
||||||
name: "QDrant",
|
name: "QDrant",
|
||||||
@ -55,7 +56,8 @@ const QDrant = {
|
|||||||
namespace,
|
namespace,
|
||||||
queryVector,
|
queryVector,
|
||||||
similarityThreshold = 0.25,
|
similarityThreshold = 0.25,
|
||||||
topN = 4
|
topN = 4,
|
||||||
|
filterIdentifiers = []
|
||||||
) {
|
) {
|
||||||
const { client } = await this.connect();
|
const { client } = await this.connect();
|
||||||
const result = {
|
const result = {
|
||||||
@ -72,6 +74,13 @@ const QDrant = {
|
|||||||
|
|
||||||
responses.forEach((response) => {
|
responses.forEach((response) => {
|
||||||
if (response.score < similarityThreshold) return;
|
if (response.score < similarityThreshold) return;
|
||||||
|
if (filterIdentifiers.includes(sourceIdentifier(response?.payload))) {
|
||||||
|
console.log(
|
||||||
|
"QDrant: A source was filtered from context as it's parent document is pinned."
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
result.contextTexts.push(response?.payload?.text || "");
|
result.contextTexts.push(response?.payload?.text || "");
|
||||||
result.sourceDocuments.push({
|
result.sourceDocuments.push({
|
||||||
...(response?.payload || {}),
|
...(response?.payload || {}),
|
||||||
@ -146,7 +155,8 @@ const QDrant = {
|
|||||||
const { client } = await this.connect();
|
const { client } = await this.connect();
|
||||||
const { chunks } = cacheResult;
|
const { chunks } = cacheResult;
|
||||||
const documentVectors = [];
|
const documentVectors = [];
|
||||||
vectorDimension = chunks[0][0].vector.length || null;
|
vectorDimension =
|
||||||
|
chunks[0][0]?.vector?.length ?? chunks[0][0]?.values?.length ?? null;
|
||||||
|
|
||||||
const collection = await this.getOrCreateCollection(
|
const collection = await this.getOrCreateCollection(
|
||||||
client,
|
client,
|
||||||
@ -311,6 +321,7 @@ const QDrant = {
|
|||||||
LLMConnector = null,
|
LLMConnector = null,
|
||||||
similarityThreshold = 0.25,
|
similarityThreshold = 0.25,
|
||||||
topN = 4,
|
topN = 4,
|
||||||
|
filterIdentifiers = [],
|
||||||
}) {
|
}) {
|
||||||
if (!namespace || !input || !LLMConnector)
|
if (!namespace || !input || !LLMConnector)
|
||||||
throw new Error("Invalid request to performSimilaritySearch.");
|
throw new Error("Invalid request to performSimilaritySearch.");
|
||||||
@ -330,7 +341,8 @@ const QDrant = {
|
|||||||
namespace,
|
namespace,
|
||||||
queryVector,
|
queryVector,
|
||||||
similarityThreshold,
|
similarityThreshold,
|
||||||
topN
|
topN,
|
||||||
|
filterIdentifiers
|
||||||
);
|
);
|
||||||
|
|
||||||
const sources = sourceDocuments.map((metadata, i) => {
|
const sources = sourceDocuments.map((metadata, i) => {
|
||||||
|
@ -9,6 +9,7 @@ const {
|
|||||||
getEmbeddingEngineSelection,
|
getEmbeddingEngineSelection,
|
||||||
} = require("../../helpers");
|
} = require("../../helpers");
|
||||||
const { camelCase } = require("../../helpers/camelcase");
|
const { camelCase } = require("../../helpers/camelcase");
|
||||||
|
const { sourceIdentifier } = require("../../chats");
|
||||||
|
|
||||||
const Weaviate = {
|
const Weaviate = {
|
||||||
name: "Weaviate",
|
name: "Weaviate",
|
||||||
@ -82,7 +83,8 @@ const Weaviate = {
|
|||||||
namespace,
|
namespace,
|
||||||
queryVector,
|
queryVector,
|
||||||
similarityThreshold = 0.25,
|
similarityThreshold = 0.25,
|
||||||
topN = 4
|
topN = 4,
|
||||||
|
filterIdentifiers = []
|
||||||
) {
|
) {
|
||||||
const result = {
|
const result = {
|
||||||
contextTexts: [],
|
contextTexts: [],
|
||||||
@ -91,7 +93,8 @@ const Weaviate = {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const weaviateClass = await this.namespace(client, namespace);
|
const weaviateClass = await this.namespace(client, namespace);
|
||||||
const fields = weaviateClass.properties.map((prop) => prop.name).join(" ");
|
const fields =
|
||||||
|
weaviateClass.properties?.map((prop) => prop.name)?.join(" ") ?? "";
|
||||||
const queryResponse = await client.graphql
|
const queryResponse = await client.graphql
|
||||||
.get()
|
.get()
|
||||||
.withClassName(camelCase(namespace))
|
.withClassName(camelCase(namespace))
|
||||||
@ -109,6 +112,12 @@ const Weaviate = {
|
|||||||
...rest
|
...rest
|
||||||
} = response;
|
} = response;
|
||||||
if (certainty < similarityThreshold) return;
|
if (certainty < similarityThreshold) return;
|
||||||
|
if (filterIdentifiers.includes(sourceIdentifier(rest))) {
|
||||||
|
console.log(
|
||||||
|
"Weaviate: A source was filtered from context as it's parent document is pinned."
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
result.contextTexts.push(rest.text);
|
result.contextTexts.push(rest.text);
|
||||||
result.sourceDocuments.push({ ...rest, id });
|
result.sourceDocuments.push({ ...rest, id });
|
||||||
result.scores.push(certainty);
|
result.scores.push(certainty);
|
||||||
@ -214,7 +223,7 @@ const Weaviate = {
|
|||||||
chunk.forEach((chunk) => {
|
chunk.forEach((chunk) => {
|
||||||
const id = uuidv4();
|
const id = uuidv4();
|
||||||
const flattenedMetadata = this.flattenObjectForWeaviate(
|
const flattenedMetadata = this.flattenObjectForWeaviate(
|
||||||
chunk.properties
|
chunk.properties ?? chunk.metadata
|
||||||
);
|
);
|
||||||
documentVectors.push({ docId, vectorId: id });
|
documentVectors.push({ docId, vectorId: id });
|
||||||
const vectorRecord = {
|
const vectorRecord = {
|
||||||
@ -357,6 +366,7 @@ const Weaviate = {
|
|||||||
LLMConnector = null,
|
LLMConnector = null,
|
||||||
similarityThreshold = 0.25,
|
similarityThreshold = 0.25,
|
||||||
topN = 4,
|
topN = 4,
|
||||||
|
filterIdentifiers = [],
|
||||||
}) {
|
}) {
|
||||||
if (!namespace || !input || !LLMConnector)
|
if (!namespace || !input || !LLMConnector)
|
||||||
throw new Error("Invalid request to performSimilaritySearch.");
|
throw new Error("Invalid request to performSimilaritySearch.");
|
||||||
@ -376,7 +386,8 @@ const Weaviate = {
|
|||||||
namespace,
|
namespace,
|
||||||
queryVector,
|
queryVector,
|
||||||
similarityThreshold,
|
similarityThreshold,
|
||||||
topN
|
topN,
|
||||||
|
filterIdentifiers
|
||||||
);
|
);
|
||||||
|
|
||||||
const sources = sourceDocuments.map((metadata, i) => {
|
const sources = sourceDocuments.map((metadata, i) => {
|
||||||
@ -437,7 +448,7 @@ const Weaviate = {
|
|||||||
const flattenedObject = {};
|
const flattenedObject = {};
|
||||||
|
|
||||||
for (const key in obj) {
|
for (const key in obj) {
|
||||||
if (!Object.hasOwn(obj, key)) {
|
if (!Object.hasOwn(obj, key) || key === "id") {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const value = obj[key];
|
const value = obj[key];
|
||||||
|
@ -13,6 +13,7 @@ const {
|
|||||||
getLLMProvider,
|
getLLMProvider,
|
||||||
getEmbeddingEngineSelection,
|
getEmbeddingEngineSelection,
|
||||||
} = require("../../helpers");
|
} = require("../../helpers");
|
||||||
|
const { sourceIdentifier } = require("../../chats");
|
||||||
|
|
||||||
// Zilliz is basically a copy of Milvus DB class with a different constructor
|
// Zilliz is basically a copy of Milvus DB class with a different constructor
|
||||||
// to connect to the cloud
|
// to connect to the cloud
|
||||||
@ -289,6 +290,7 @@ const Zilliz = {
|
|||||||
LLMConnector = null,
|
LLMConnector = null,
|
||||||
similarityThreshold = 0.25,
|
similarityThreshold = 0.25,
|
||||||
topN = 4,
|
topN = 4,
|
||||||
|
filterIdentifiers = [],
|
||||||
}) {
|
}) {
|
||||||
if (!namespace || !input || !LLMConnector)
|
if (!namespace || !input || !LLMConnector)
|
||||||
throw new Error("Invalid request to performSimilaritySearch.");
|
throw new Error("Invalid request to performSimilaritySearch.");
|
||||||
@ -308,7 +310,8 @@ const Zilliz = {
|
|||||||
namespace,
|
namespace,
|
||||||
queryVector,
|
queryVector,
|
||||||
similarityThreshold,
|
similarityThreshold,
|
||||||
topN
|
topN,
|
||||||
|
filterIdentifiers
|
||||||
);
|
);
|
||||||
|
|
||||||
const sources = sourceDocuments.map((metadata, i) => {
|
const sources = sourceDocuments.map((metadata, i) => {
|
||||||
@ -325,7 +328,8 @@ const Zilliz = {
|
|||||||
namespace,
|
namespace,
|
||||||
queryVector,
|
queryVector,
|
||||||
similarityThreshold = 0.25,
|
similarityThreshold = 0.25,
|
||||||
topN = 4
|
topN = 4,
|
||||||
|
filterIdentifiers = []
|
||||||
) {
|
) {
|
||||||
const result = {
|
const result = {
|
||||||
contextTexts: [],
|
contextTexts: [],
|
||||||
@ -339,6 +343,12 @@ const Zilliz = {
|
|||||||
});
|
});
|
||||||
response.results.forEach((match) => {
|
response.results.forEach((match) => {
|
||||||
if (match.score < similarityThreshold) return;
|
if (match.score < similarityThreshold) return;
|
||||||
|
if (filterIdentifiers.includes(sourceIdentifier(match.metadata))) {
|
||||||
|
console.log(
|
||||||
|
"Zilliz: A source was filtered from context as it's parent document is pinned."
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
result.contextTexts.push(match.metadata.text);
|
result.contextTexts.push(match.metadata.text);
|
||||||
result.sourceDocuments.push(match);
|
result.sourceDocuments.push(match);
|
||||||
result.scores.push(match.score);
|
result.scores.push(match.score);
|
||||||
|
Loading…
Reference in New Issue
Block a user