chore: Better VectorDb and Embedder error messages (#620)

* chore: propogate embedder and vectordb errors during document mutations

* add default value for errors on addDocuments
This commit is contained in:
Timothy Carambat 2024-01-18 11:40:48 -08:00 committed by GitHub
parent 0df86699e7
commit 658e7fa390
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 77 additions and 47 deletions

View File

@ -403,3 +403,7 @@ dialog::backdrop {
.tooltip { .tooltip {
@apply !bg-black !text-white !py-2 !px-3 !rounded-md; @apply !bg-black !text-white !py-2 !px-3 !rounded-md;
} }
.Toastify__toast-body {
white-space: pre-line;
}

View File

@ -159,7 +159,7 @@ function workspaceEndpoints(app) {
} }
await Document.removeDocuments(currWorkspace, deletes); await Document.removeDocuments(currWorkspace, deletes);
const { failed = [] } = await Document.addDocuments( const { failedToEmbed = [], errors = [] } = await Document.addDocuments(
currWorkspace, currWorkspace,
adds adds
); );
@ -167,8 +167,10 @@ function workspaceEndpoints(app) {
response.status(200).json({ response.status(200).json({
workspace: updatedWorkspace, workspace: updatedWorkspace,
message: message:
failed.length > 0 failedToEmbed.length > 0
? `${failed.length} documents could not be embedded.` ? `${failedToEmbed.length} documents failed to add.\n\n${errors
.map((msg) => `${msg}`)
.join("\n\n")}`
: null, : null,
}); });
} catch (e) { } catch (e) {

View File

@ -39,6 +39,7 @@ const Document = {
if (additions.length === 0) return { failed: [], embedded: [] }; if (additions.length === 0) return { failed: [], embedded: [] };
const embedded = []; const embedded = [];
const failedToEmbed = []; const failedToEmbed = [];
const errors = new Set();
for (const path of additions) { for (const path of additions) {
const data = await fileData(path); const data = await fileData(path);
@ -53,14 +54,20 @@ const Document = {
workspaceId: workspace.id, workspaceId: workspace.id,
metadata: JSON.stringify(metadata), metadata: JSON.stringify(metadata),
}; };
const vectorized = await VectorDb.addDocumentToNamespace(
const { vectorized, error } = await VectorDb.addDocumentToNamespace(
workspace.slug, workspace.slug,
{ ...data, docId }, { ...data, docId },
path path
); );
if (!vectorized) { if (!vectorized) {
console.error("Failed to vectorize", path); console.error(
failedToEmbed.push(path); "Failed to vectorize",
metadata?.title || newDoc.filename
);
failedToEmbed.push(metadata?.title || newDoc.filename);
errors.add(error);
continue; continue;
} }
@ -77,7 +84,7 @@ const Document = {
Embedder: process.env.EMBEDDING_ENGINE || "inherit", Embedder: process.env.EMBEDDING_ENGINE || "inherit",
VectorDbSelection: process.env.VECTOR_DB || "pinecone", VectorDbSelection: process.env.VECTOR_DB || "pinecone",
}); });
return { failed: failedToEmbed, embedded }; return { failedToEmbed, errors: Array.from(errors), embedded };
}, },
removeDocuments: async function (workspace, removals = []) { removeDocuments: async function (workspace, removals = []) {

View File

@ -46,7 +46,12 @@ class AzureOpenAiEmbedder {
resolve({ data: res.data, error: null }); resolve({ data: res.data, error: null });
}) })
.catch((e) => { .catch((e) => {
resolve({ data: [], error: e?.error }); e.type =
e?.response?.data?.error?.code ||
e?.response?.status ||
"failed_to_embed";
e.message = e?.response?.data?.error?.message || e.message;
resolve({ data: [], error: e });
}); });
}) })
); );
@ -62,11 +67,14 @@ class AzureOpenAiEmbedder {
.map((res) => res.error) .map((res) => res.error)
.flat(); .flat();
if (errors.length > 0) { if (errors.length > 0) {
let uniqueErrors = new Set();
errors.map((error) =>
uniqueErrors.add(`[${error.type}]: ${error.message}`)
);
return { return {
data: [], data: [],
error: `(${errors.length}) Embedding Errors! ${errors error: Array.from(uniqueErrors).join(", "),
.map((error) => `[${error.type}]: ${error.message}`)
.join(", ")}`,
}; };
} }
return { return {

View File

@ -41,7 +41,12 @@ class LocalAiEmbedder {
resolve({ data: res.data?.data, error: null }); resolve({ data: res.data?.data, error: null });
}) })
.catch((e) => { .catch((e) => {
resolve({ data: [], error: e?.error }); e.type =
e?.response?.data?.error?.code ||
e?.response?.status ||
"failed_to_embed";
e.message = e?.response?.data?.error?.message || e.message;
resolve({ data: [], error: e });
}); });
}) })
); );
@ -57,11 +62,14 @@ class LocalAiEmbedder {
.map((res) => res.error) .map((res) => res.error)
.flat(); .flat();
if (errors.length > 0) { if (errors.length > 0) {
let uniqueErrors = new Set();
errors.map((error) =>
uniqueErrors.add(`[${error.type}]: ${error.message}`)
);
return { return {
data: [], data: [],
error: `(${errors.length}) Embedding Errors! ${errors error: Array.from(uniqueErrors).join(", "),
.map((error) => `[${error.type}]: ${error.message}`)
.join(", ")}`,
}; };
} }
return { return {

View File

@ -37,7 +37,12 @@ class OpenAiEmbedder {
resolve({ data: res.data?.data, error: null }); resolve({ data: res.data?.data, error: null });
}) })
.catch((e) => { .catch((e) => {
resolve({ data: [], error: e?.error }); e.type =
e?.response?.data?.error?.code ||
e?.response?.status ||
"failed_to_embed";
e.message = e?.response?.data?.error?.message || e.message;
resolve({ data: [], error: e });
}); });
}) })
); );
@ -53,11 +58,14 @@ class OpenAiEmbedder {
.map((res) => res.error) .map((res) => res.error)
.flat(); .flat();
if (errors.length > 0) { if (errors.length > 0) {
let uniqueErrors = new Set();
errors.map((error) =>
uniqueErrors.add(`[${error.type}]: ${error.message}`)
);
return { return {
data: [], data: [],
error: `(${errors.length}) Embedding Errors! ${errors error: Array.from(uniqueErrors).join(", "),
.map((error) => `[${error.type}]: ${error.message}`)
.join(", ")}`,
}; };
} }
return { return {

View File

@ -171,7 +171,7 @@ const Chroma = {
} }
await DocumentVectors.bulkInsert(documentVectors); await DocumentVectors.bulkInsert(documentVectors);
return true; return { vectorized: true, error: null };
} }
// If we are here then we are going to embed and store a novel document. // If we are here then we are going to embed and store a novel document.
@ -242,11 +242,10 @@ const Chroma = {
} }
await DocumentVectors.bulkInsert(documentVectors); await DocumentVectors.bulkInsert(documentVectors);
return true; return { vectorized: true, error: null };
} catch (e) { } catch (e) {
console.error(e);
console.error("addDocumentToNamespace", e.message); console.error("addDocumentToNamespace", e.message);
return false; return { vectorized: false, error: e.message };
} }
}, },
deleteDocumentFromNamespace: async function (namespace, docId) { deleteDocumentFromNamespace: async function (namespace, docId) {

View File

@ -172,7 +172,7 @@ const LanceDb = {
await this.updateOrCreateCollection(client, submissions, namespace); await this.updateOrCreateCollection(client, submissions, namespace);
await DocumentVectors.bulkInsert(documentVectors); await DocumentVectors.bulkInsert(documentVectors);
return true; return { vectorized: true, error: null };
} }
// If we are here then we are going to embed and store a novel document. // If we are here then we are going to embed and store a novel document.
@ -229,11 +229,10 @@ const LanceDb = {
} }
await DocumentVectors.bulkInsert(documentVectors); await DocumentVectors.bulkInsert(documentVectors);
return true; return { vectorized: true, error: null };
} catch (e) { } catch (e) {
console.error(e);
console.error("addDocumentToNamespace", e.message); console.error("addDocumentToNamespace", e.message);
return false; return { vectorized: false, error: e.message };
} }
}, },
performSimilaritySearch: async function ({ performSimilaritySearch: async function ({

View File

@ -167,7 +167,7 @@ const Milvus = {
} }
await DocumentVectors.bulkInsert(documentVectors); await DocumentVectors.bulkInsert(documentVectors);
await client.flushSync({ collection_names: [namespace] }); await client.flushSync({ collection_names: [namespace] });
return true; return { vectorized: true, error: null };
} }
const textSplitter = new RecursiveCharacterTextSplitter({ const textSplitter = new RecursiveCharacterTextSplitter({
@ -231,11 +231,10 @@ const Milvus = {
} }
await DocumentVectors.bulkInsert(documentVectors); await DocumentVectors.bulkInsert(documentVectors);
return true; return { vectorized: true, error: null };
} catch (e) { } catch (e) {
console.error(e);
console.error("addDocumentToNamespace", e.message); console.error("addDocumentToNamespace", e.message);
return false; return { vectorized: false, error: e.message };
} }
}, },
deleteDocumentFromNamespace: async function (namespace, docId) { deleteDocumentFromNamespace: async function (namespace, docId) {

View File

@ -125,7 +125,7 @@ const Pinecone = {
} }
await DocumentVectors.bulkInsert(documentVectors); await DocumentVectors.bulkInsert(documentVectors);
return true; return { vectorized: true, error: null };
} }
// If we are here then we are going to embed and store a novel document. // If we are here then we are going to embed and store a novel document.
@ -183,11 +183,10 @@ const Pinecone = {
} }
await DocumentVectors.bulkInsert(documentVectors); await DocumentVectors.bulkInsert(documentVectors);
return true; return { vectorized: true, error: null };
} catch (e) { } catch (e) {
console.error(e);
console.error("addDocumentToNamespace", e.message); console.error("addDocumentToNamespace", e.message);
return false; return { vectorized: false, error: e.message };
} }
}, },
deleteDocumentFromNamespace: async function (namespace, docId) { deleteDocumentFromNamespace: async function (namespace, docId) {

View File

@ -190,7 +190,7 @@ const QDrant = {
} }
await DocumentVectors.bulkInsert(documentVectors); await DocumentVectors.bulkInsert(documentVectors);
return true; return { vectorized: true, error: null };
} }
// If we are here then we are going to embed and store a novel document. // If we are here then we are going to embed and store a novel document.
@ -272,11 +272,10 @@ const QDrant = {
} }
await DocumentVectors.bulkInsert(documentVectors); await DocumentVectors.bulkInsert(documentVectors);
return true; return { vectorized: true, error: null };
} catch (e) { } catch (e) {
console.error(e);
console.error("addDocumentToNamespace", e.message); console.error("addDocumentToNamespace", e.message);
return false; return { vectorized: false, error: e.message };
} }
}, },
deleteDocumentFromNamespace: async function (namespace, docId) { deleteDocumentFromNamespace: async function (namespace, docId) {

View File

@ -233,7 +233,7 @@ const Weaviate = {
} }
await DocumentVectors.bulkInsert(documentVectors); await DocumentVectors.bulkInsert(documentVectors);
return true; return { vectorized: true, error: null };
} }
// If we are here then we are going to embed and store a novel document. // If we are here then we are going to embed and store a novel document.
@ -316,11 +316,10 @@ const Weaviate = {
} }
await DocumentVectors.bulkInsert(documentVectors); await DocumentVectors.bulkInsert(documentVectors);
return true; return { vectorized: true, error: null };
} catch (e) { } catch (e) {
console.error(e);
console.error("addDocumentToNamespace", e.message); console.error("addDocumentToNamespace", e.message);
return false; return { vectorized: false, error: e.message };
} }
}, },
deleteDocumentFromNamespace: async function (namespace, docId) { deleteDocumentFromNamespace: async function (namespace, docId) {

View File

@ -168,7 +168,7 @@ const Zilliz = {
} }
await DocumentVectors.bulkInsert(documentVectors); await DocumentVectors.bulkInsert(documentVectors);
await client.flushSync({ collection_names: [namespace] }); await client.flushSync({ collection_names: [namespace] });
return true; return { vectorized: true, error: null };
} }
const textSplitter = new RecursiveCharacterTextSplitter({ const textSplitter = new RecursiveCharacterTextSplitter({
@ -232,11 +232,10 @@ const Zilliz = {
} }
await DocumentVectors.bulkInsert(documentVectors); await DocumentVectors.bulkInsert(documentVectors);
return true; return { vectorized: true, error: null };
} catch (e) { } catch (e) {
console.error(e);
console.error("addDocumentToNamespace", e.message); console.error("addDocumentToNamespace", e.message);
return false; return { vectorized: false, error: e.message };
} }
}, },
deleteDocumentFromNamespace: async function (namespace, docId) { deleteDocumentFromNamespace: async function (namespace, docId) {