diff --git a/frontend/src/index.css b/frontend/src/index.css index e8d7e2d8..729cccb5 100644 --- a/frontend/src/index.css +++ b/frontend/src/index.css @@ -403,3 +403,7 @@ dialog::backdrop { .tooltip { @apply !bg-black !text-white !py-2 !px-3 !rounded-md; } + +.Toastify__toast-body { + white-space: pre-line; +} diff --git a/server/endpoints/workspaces.js b/server/endpoints/workspaces.js index 7873ef76..7119297f 100644 --- a/server/endpoints/workspaces.js +++ b/server/endpoints/workspaces.js @@ -159,7 +159,7 @@ function workspaceEndpoints(app) { } await Document.removeDocuments(currWorkspace, deletes); - const { failed = [] } = await Document.addDocuments( + const { failedToEmbed = [], errors = [] } = await Document.addDocuments( currWorkspace, adds ); @@ -167,8 +167,10 @@ function workspaceEndpoints(app) { response.status(200).json({ workspace: updatedWorkspace, message: - failed.length > 0 - ? `${failed.length} documents could not be embedded.` + failedToEmbed.length > 0 + ? `${failedToEmbed.length} documents failed to add.\n\n${errors + .map((msg) => `${msg}`) + .join("\n\n")}` : null, }); } catch (e) { diff --git a/server/models/documents.js b/server/models/documents.js index 4505089d..8f3b88fb 100644 --- a/server/models/documents.js +++ b/server/models/documents.js @@ -39,6 +39,7 @@ const Document = { if (additions.length === 0) return { failed: [], embedded: [] }; const embedded = []; const failedToEmbed = []; + const errors = new Set(); for (const path of additions) { const data = await fileData(path); @@ -53,14 +54,20 @@ const Document = { workspaceId: workspace.id, metadata: JSON.stringify(metadata), }; - const vectorized = await VectorDb.addDocumentToNamespace( + + const { vectorized, error } = await VectorDb.addDocumentToNamespace( workspace.slug, { ...data, docId }, path ); + if (!vectorized) { - console.error("Failed to vectorize", path); - failedToEmbed.push(path); + console.error( + "Failed to vectorize", + metadata?.title || newDoc.filename + ); + failedToEmbed.push(metadata?.title || newDoc.filename); + errors.add(error); continue; } @@ -77,7 +84,7 @@ const Document = { Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "pinecone", }); - return { failed: failedToEmbed, embedded }; + return { failedToEmbed, errors: Array.from(errors), embedded }; }, removeDocuments: async function (workspace, removals = []) { diff --git a/server/utils/EmbeddingEngines/azureOpenAi/index.js b/server/utils/EmbeddingEngines/azureOpenAi/index.js index e80b4b73..4193e860 100644 --- a/server/utils/EmbeddingEngines/azureOpenAi/index.js +++ b/server/utils/EmbeddingEngines/azureOpenAi/index.js @@ -46,7 +46,12 @@ class AzureOpenAiEmbedder { resolve({ data: res.data, error: null }); }) .catch((e) => { - resolve({ data: [], error: e?.error }); + e.type = + e?.response?.data?.error?.code || + e?.response?.status || + "failed_to_embed"; + e.message = e?.response?.data?.error?.message || e.message; + resolve({ data: [], error: e }); }); }) ); @@ -62,11 +67,14 @@ class AzureOpenAiEmbedder { .map((res) => res.error) .flat(); if (errors.length > 0) { + let uniqueErrors = new Set(); + errors.map((error) => + uniqueErrors.add(`[${error.type}]: ${error.message}`) + ); + return { data: [], - error: `(${errors.length}) Embedding Errors! ${errors - .map((error) => `[${error.type}]: ${error.message}`) - .join(", ")}`, + error: Array.from(uniqueErrors).join(", "), }; } return { diff --git a/server/utils/EmbeddingEngines/localAi/index.js b/server/utils/EmbeddingEngines/localAi/index.js index 1480755d..2c9db2c7 100644 --- a/server/utils/EmbeddingEngines/localAi/index.js +++ b/server/utils/EmbeddingEngines/localAi/index.js @@ -41,7 +41,12 @@ class LocalAiEmbedder { resolve({ data: res.data?.data, error: null }); }) .catch((e) => { - resolve({ data: [], error: e?.error }); + e.type = + e?.response?.data?.error?.code || + e?.response?.status || + "failed_to_embed"; + e.message = e?.response?.data?.error?.message || e.message; + resolve({ data: [], error: e }); }); }) ); @@ -57,11 +62,14 @@ class LocalAiEmbedder { .map((res) => res.error) .flat(); if (errors.length > 0) { + let uniqueErrors = new Set(); + errors.map((error) => + uniqueErrors.add(`[${error.type}]: ${error.message}`) + ); + return { data: [], - error: `(${errors.length}) Embedding Errors! ${errors - .map((error) => `[${error.type}]: ${error.message}`) - .join(", ")}`, + error: Array.from(uniqueErrors).join(", "), }; } return { diff --git a/server/utils/EmbeddingEngines/openAi/index.js b/server/utils/EmbeddingEngines/openAi/index.js index 105be9d7..1f9ba432 100644 --- a/server/utils/EmbeddingEngines/openAi/index.js +++ b/server/utils/EmbeddingEngines/openAi/index.js @@ -37,7 +37,12 @@ class OpenAiEmbedder { resolve({ data: res.data?.data, error: null }); }) .catch((e) => { - resolve({ data: [], error: e?.error }); + e.type = + e?.response?.data?.error?.code || + e?.response?.status || + "failed_to_embed"; + e.message = e?.response?.data?.error?.message || e.message; + resolve({ data: [], error: e }); }); }) ); @@ -53,11 +58,14 @@ class OpenAiEmbedder { .map((res) => res.error) .flat(); if (errors.length > 0) { + let uniqueErrors = new Set(); + errors.map((error) => + uniqueErrors.add(`[${error.type}]: ${error.message}`) + ); + return { data: [], - error: `(${errors.length}) Embedding Errors! ${errors - .map((error) => `[${error.type}]: ${error.message}`) - .join(", ")}`, + error: Array.from(uniqueErrors).join(", "), }; } return { diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js index 878cf05f..28af39e6 100644 --- a/server/utils/vectorDbProviders/chroma/index.js +++ b/server/utils/vectorDbProviders/chroma/index.js @@ -171,7 +171,7 @@ const Chroma = { } await DocumentVectors.bulkInsert(documentVectors); - return true; + return { vectorized: true, error: null }; } // If we are here then we are going to embed and store a novel document. @@ -242,11 +242,10 @@ const Chroma = { } await DocumentVectors.bulkInsert(documentVectors); - return true; + return { vectorized: true, error: null }; } catch (e) { - console.error(e); console.error("addDocumentToNamespace", e.message); - return false; + return { vectorized: false, error: e.message }; } }, deleteDocumentFromNamespace: async function (namespace, docId) { diff --git a/server/utils/vectorDbProviders/lance/index.js b/server/utils/vectorDbProviders/lance/index.js index 5e58ef1c..8f243cf9 100644 --- a/server/utils/vectorDbProviders/lance/index.js +++ b/server/utils/vectorDbProviders/lance/index.js @@ -172,7 +172,7 @@ const LanceDb = { await this.updateOrCreateCollection(client, submissions, namespace); await DocumentVectors.bulkInsert(documentVectors); - return true; + return { vectorized: true, error: null }; } // If we are here then we are going to embed and store a novel document. @@ -229,11 +229,10 @@ const LanceDb = { } await DocumentVectors.bulkInsert(documentVectors); - return true; + return { vectorized: true, error: null }; } catch (e) { - console.error(e); console.error("addDocumentToNamespace", e.message); - return false; + return { vectorized: false, error: e.message }; } }, performSimilaritySearch: async function ({ diff --git a/server/utils/vectorDbProviders/milvus/index.js b/server/utils/vectorDbProviders/milvus/index.js index cc934a9a..79a13241 100644 --- a/server/utils/vectorDbProviders/milvus/index.js +++ b/server/utils/vectorDbProviders/milvus/index.js @@ -167,7 +167,7 @@ const Milvus = { } await DocumentVectors.bulkInsert(documentVectors); await client.flushSync({ collection_names: [namespace] }); - return true; + return { vectorized: true, error: null }; } const textSplitter = new RecursiveCharacterTextSplitter({ @@ -231,11 +231,10 @@ const Milvus = { } await DocumentVectors.bulkInsert(documentVectors); - return true; + return { vectorized: true, error: null }; } catch (e) { - console.error(e); console.error("addDocumentToNamespace", e.message); - return false; + return { vectorized: false, error: e.message }; } }, deleteDocumentFromNamespace: async function (namespace, docId) { diff --git a/server/utils/vectorDbProviders/pinecone/index.js b/server/utils/vectorDbProviders/pinecone/index.js index 7a7f862c..594a9aaf 100644 --- a/server/utils/vectorDbProviders/pinecone/index.js +++ b/server/utils/vectorDbProviders/pinecone/index.js @@ -125,7 +125,7 @@ const Pinecone = { } await DocumentVectors.bulkInsert(documentVectors); - return true; + return { vectorized: true, error: null }; } // If we are here then we are going to embed and store a novel document. @@ -183,11 +183,10 @@ const Pinecone = { } await DocumentVectors.bulkInsert(documentVectors); - return true; + return { vectorized: true, error: null }; } catch (e) { - console.error(e); console.error("addDocumentToNamespace", e.message); - return false; + return { vectorized: false, error: e.message }; } }, deleteDocumentFromNamespace: async function (namespace, docId) { diff --git a/server/utils/vectorDbProviders/qdrant/index.js b/server/utils/vectorDbProviders/qdrant/index.js index 2783cde9..70c069e8 100644 --- a/server/utils/vectorDbProviders/qdrant/index.js +++ b/server/utils/vectorDbProviders/qdrant/index.js @@ -190,7 +190,7 @@ const QDrant = { } await DocumentVectors.bulkInsert(documentVectors); - return true; + return { vectorized: true, error: null }; } // If we are here then we are going to embed and store a novel document. @@ -272,11 +272,10 @@ const QDrant = { } await DocumentVectors.bulkInsert(documentVectors); - return true; + return { vectorized: true, error: null }; } catch (e) { - console.error(e); console.error("addDocumentToNamespace", e.message); - return false; + return { vectorized: false, error: e.message }; } }, deleteDocumentFromNamespace: async function (namespace, docId) { diff --git a/server/utils/vectorDbProviders/weaviate/index.js b/server/utils/vectorDbProviders/weaviate/index.js index 91faff64..ac89315a 100644 --- a/server/utils/vectorDbProviders/weaviate/index.js +++ b/server/utils/vectorDbProviders/weaviate/index.js @@ -233,7 +233,7 @@ const Weaviate = { } await DocumentVectors.bulkInsert(documentVectors); - return true; + return { vectorized: true, error: null }; } // If we are here then we are going to embed and store a novel document. @@ -316,11 +316,10 @@ const Weaviate = { } await DocumentVectors.bulkInsert(documentVectors); - return true; + return { vectorized: true, error: null }; } catch (e) { - console.error(e); console.error("addDocumentToNamespace", e.message); - return false; + return { vectorized: false, error: e.message }; } }, deleteDocumentFromNamespace: async function (namespace, docId) { diff --git a/server/utils/vectorDbProviders/zilliz/index.js b/server/utils/vectorDbProviders/zilliz/index.js index b8493e1c..31afab35 100644 --- a/server/utils/vectorDbProviders/zilliz/index.js +++ b/server/utils/vectorDbProviders/zilliz/index.js @@ -168,7 +168,7 @@ const Zilliz = { } await DocumentVectors.bulkInsert(documentVectors); await client.flushSync({ collection_names: [namespace] }); - return true; + return { vectorized: true, error: null }; } const textSplitter = new RecursiveCharacterTextSplitter({ @@ -232,11 +232,10 @@ const Zilliz = { } await DocumentVectors.bulkInsert(documentVectors); - return true; + return { vectorized: true, error: null }; } catch (e) { - console.error(e); console.error("addDocumentToNamespace", e.message); - return false; + return { vectorized: false, error: e.message }; } }, deleteDocumentFromNamespace: async function (namespace, docId) {