chore: Better VectorDb and Embedder error messages (#620)

* chore: propogate embedder and vectordb errors during document mutations

* add default value for errors on addDocuments
This commit is contained in:
Timothy Carambat 2024-01-18 11:40:48 -08:00 committed by GitHub
parent 0df86699e7
commit 658e7fa390
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 77 additions and 47 deletions

View File

@ -403,3 +403,7 @@ dialog::backdrop {
.tooltip {
@apply !bg-black !text-white !py-2 !px-3 !rounded-md;
}
.Toastify__toast-body {
white-space: pre-line;
}

View File

@ -159,7 +159,7 @@ function workspaceEndpoints(app) {
}
await Document.removeDocuments(currWorkspace, deletes);
const { failed = [] } = await Document.addDocuments(
const { failedToEmbed = [], errors = [] } = await Document.addDocuments(
currWorkspace,
adds
);
@ -167,8 +167,10 @@ function workspaceEndpoints(app) {
response.status(200).json({
workspace: updatedWorkspace,
message:
failed.length > 0
? `${failed.length} documents could not be embedded.`
failedToEmbed.length > 0
? `${failedToEmbed.length} documents failed to add.\n\n${errors
.map((msg) => `${msg}`)
.join("\n\n")}`
: null,
});
} catch (e) {

View File

@ -39,6 +39,7 @@ const Document = {
if (additions.length === 0) return { failed: [], embedded: [] };
const embedded = [];
const failedToEmbed = [];
const errors = new Set();
for (const path of additions) {
const data = await fileData(path);
@ -53,14 +54,20 @@ const Document = {
workspaceId: workspace.id,
metadata: JSON.stringify(metadata),
};
const vectorized = await VectorDb.addDocumentToNamespace(
const { vectorized, error } = await VectorDb.addDocumentToNamespace(
workspace.slug,
{ ...data, docId },
path
);
if (!vectorized) {
console.error("Failed to vectorize", path);
failedToEmbed.push(path);
console.error(
"Failed to vectorize",
metadata?.title || newDoc.filename
);
failedToEmbed.push(metadata?.title || newDoc.filename);
errors.add(error);
continue;
}
@ -77,7 +84,7 @@ const Document = {
Embedder: process.env.EMBEDDING_ENGINE || "inherit",
VectorDbSelection: process.env.VECTOR_DB || "pinecone",
});
return { failed: failedToEmbed, embedded };
return { failedToEmbed, errors: Array.from(errors), embedded };
},
removeDocuments: async function (workspace, removals = []) {

View File

@ -46,7 +46,12 @@ class AzureOpenAiEmbedder {
resolve({ data: res.data, error: null });
})
.catch((e) => {
resolve({ data: [], error: e?.error });
e.type =
e?.response?.data?.error?.code ||
e?.response?.status ||
"failed_to_embed";
e.message = e?.response?.data?.error?.message || e.message;
resolve({ data: [], error: e });
});
})
);
@ -62,11 +67,14 @@ class AzureOpenAiEmbedder {
.map((res) => res.error)
.flat();
if (errors.length > 0) {
let uniqueErrors = new Set();
errors.map((error) =>
uniqueErrors.add(`[${error.type}]: ${error.message}`)
);
return {
data: [],
error: `(${errors.length}) Embedding Errors! ${errors
.map((error) => `[${error.type}]: ${error.message}`)
.join(", ")}`,
error: Array.from(uniqueErrors).join(", "),
};
}
return {

View File

@ -41,7 +41,12 @@ class LocalAiEmbedder {
resolve({ data: res.data?.data, error: null });
})
.catch((e) => {
resolve({ data: [], error: e?.error });
e.type =
e?.response?.data?.error?.code ||
e?.response?.status ||
"failed_to_embed";
e.message = e?.response?.data?.error?.message || e.message;
resolve({ data: [], error: e });
});
})
);
@ -57,11 +62,14 @@ class LocalAiEmbedder {
.map((res) => res.error)
.flat();
if (errors.length > 0) {
let uniqueErrors = new Set();
errors.map((error) =>
uniqueErrors.add(`[${error.type}]: ${error.message}`)
);
return {
data: [],
error: `(${errors.length}) Embedding Errors! ${errors
.map((error) => `[${error.type}]: ${error.message}`)
.join(", ")}`,
error: Array.from(uniqueErrors).join(", "),
};
}
return {

View File

@ -37,7 +37,12 @@ class OpenAiEmbedder {
resolve({ data: res.data?.data, error: null });
})
.catch((e) => {
resolve({ data: [], error: e?.error });
e.type =
e?.response?.data?.error?.code ||
e?.response?.status ||
"failed_to_embed";
e.message = e?.response?.data?.error?.message || e.message;
resolve({ data: [], error: e });
});
})
);
@ -53,11 +58,14 @@ class OpenAiEmbedder {
.map((res) => res.error)
.flat();
if (errors.length > 0) {
let uniqueErrors = new Set();
errors.map((error) =>
uniqueErrors.add(`[${error.type}]: ${error.message}`)
);
return {
data: [],
error: `(${errors.length}) Embedding Errors! ${errors
.map((error) => `[${error.type}]: ${error.message}`)
.join(", ")}`,
error: Array.from(uniqueErrors).join(", "),
};
}
return {

View File

@ -171,7 +171,7 @@ const Chroma = {
}
await DocumentVectors.bulkInsert(documentVectors);
return true;
return { vectorized: true, error: null };
}
// If we are here then we are going to embed and store a novel document.
@ -242,11 +242,10 @@ const Chroma = {
}
await DocumentVectors.bulkInsert(documentVectors);
return true;
return { vectorized: true, error: null };
} catch (e) {
console.error(e);
console.error("addDocumentToNamespace", e.message);
return false;
return { vectorized: false, error: e.message };
}
},
deleteDocumentFromNamespace: async function (namespace, docId) {

View File

@ -172,7 +172,7 @@ const LanceDb = {
await this.updateOrCreateCollection(client, submissions, namespace);
await DocumentVectors.bulkInsert(documentVectors);
return true;
return { vectorized: true, error: null };
}
// If we are here then we are going to embed and store a novel document.
@ -229,11 +229,10 @@ const LanceDb = {
}
await DocumentVectors.bulkInsert(documentVectors);
return true;
return { vectorized: true, error: null };
} catch (e) {
console.error(e);
console.error("addDocumentToNamespace", e.message);
return false;
return { vectorized: false, error: e.message };
}
},
performSimilaritySearch: async function ({

View File

@ -167,7 +167,7 @@ const Milvus = {
}
await DocumentVectors.bulkInsert(documentVectors);
await client.flushSync({ collection_names: [namespace] });
return true;
return { vectorized: true, error: null };
}
const textSplitter = new RecursiveCharacterTextSplitter({
@ -231,11 +231,10 @@ const Milvus = {
}
await DocumentVectors.bulkInsert(documentVectors);
return true;
return { vectorized: true, error: null };
} catch (e) {
console.error(e);
console.error("addDocumentToNamespace", e.message);
return false;
return { vectorized: false, error: e.message };
}
},
deleteDocumentFromNamespace: async function (namespace, docId) {

View File

@ -125,7 +125,7 @@ const Pinecone = {
}
await DocumentVectors.bulkInsert(documentVectors);
return true;
return { vectorized: true, error: null };
}
// If we are here then we are going to embed and store a novel document.
@ -183,11 +183,10 @@ const Pinecone = {
}
await DocumentVectors.bulkInsert(documentVectors);
return true;
return { vectorized: true, error: null };
} catch (e) {
console.error(e);
console.error("addDocumentToNamespace", e.message);
return false;
return { vectorized: false, error: e.message };
}
},
deleteDocumentFromNamespace: async function (namespace, docId) {

View File

@ -190,7 +190,7 @@ const QDrant = {
}
await DocumentVectors.bulkInsert(documentVectors);
return true;
return { vectorized: true, error: null };
}
// If we are here then we are going to embed and store a novel document.
@ -272,11 +272,10 @@ const QDrant = {
}
await DocumentVectors.bulkInsert(documentVectors);
return true;
return { vectorized: true, error: null };
} catch (e) {
console.error(e);
console.error("addDocumentToNamespace", e.message);
return false;
return { vectorized: false, error: e.message };
}
},
deleteDocumentFromNamespace: async function (namespace, docId) {

View File

@ -233,7 +233,7 @@ const Weaviate = {
}
await DocumentVectors.bulkInsert(documentVectors);
return true;
return { vectorized: true, error: null };
}
// If we are here then we are going to embed and store a novel document.
@ -316,11 +316,10 @@ const Weaviate = {
}
await DocumentVectors.bulkInsert(documentVectors);
return true;
return { vectorized: true, error: null };
} catch (e) {
console.error(e);
console.error("addDocumentToNamespace", e.message);
return false;
return { vectorized: false, error: e.message };
}
},
deleteDocumentFromNamespace: async function (namespace, docId) {

View File

@ -168,7 +168,7 @@ const Zilliz = {
}
await DocumentVectors.bulkInsert(documentVectors);
await client.flushSync({ collection_names: [namespace] });
return true;
return { vectorized: true, error: null };
}
const textSplitter = new RecursiveCharacterTextSplitter({
@ -232,11 +232,10 @@ const Zilliz = {
}
await DocumentVectors.bulkInsert(documentVectors);
return true;
return { vectorized: true, error: null };
} catch (e) {
console.error(e);
console.error("addDocumentToNamespace", e.message);
return false;
return { vectorized: false, error: e.message };
}
},
deleteDocumentFromNamespace: async function (namespace, docId) {