From 122f29069c965e286e60831cb952c832419bcd2a Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Mon, 14 Aug 2023 17:42:17 -0700 Subject: [PATCH] Enable optional Telemetry integration via posthog (#190) * Enable optional Telemetry integration via posthog --- README.md | 25 ++++++++++++- server/endpoints/chat.js | 6 +++ server/endpoints/system.js | 2 + server/endpoints/utils.js | 2 +- server/endpoints/workspaces.js | 7 ++++ server/index.js | 6 ++- server/models/documents.js | 9 +++++ server/models/systemSettings.js | 1 + server/models/telemetry.js | 65 +++++++++++++++++++++++++++++++++ server/package.json | 1 + server/utils/database/index.js | 33 +++++++++++++++++ server/yarn.lock | 23 +++++++++++- 12 files changed, 176 insertions(+), 4 deletions(-) create mode 100644 server/models/telemetry.js diff --git a/README.md b/README.md index cc68ec62a..0f36576fe 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,30 @@ Next, you will need some content to embed. This could be a Youtube Channel, Medi [Learn about vector caching](./server/storage/vector-cache/VECTOR_CACHE.md) -### Contributing +## Contributing - create issue - create PR with branch name format of `-` - yee haw let's merge + +## Telemetry +AnythingLLM by Mintplex Labs Inc contains a telemetry feature that collects anonymous usage information. + +### Why? +We use this information to help us understand how AnythingLLM is used, to help us prioritize work on new features and bug fixes, and to help us improve AnythingLLM's performance and stability. + +### Opting out +Set `DISABLE_TELEMETRY` in your server or docker .env settings to "true" to opt out of telemetry. + +``` +DISABLE_TELEMETRY="true" +``` + +### What do you explicitly track? +We will only track usage details that help us make product and roadmap decisions, specifically: +- Version of your installation +- When a document is added or removed. No information _about_ the document. Just that the event occurred. This gives us an idea of use. +- Type of vector database in use. Let's us know which vector database provider is the most used to prioritize changes when updates arrive for that provider. +- Type of LLM in use. Let's us know the most popular choice and prioritize changes when updates arrive for that provider. +- Chat is sent. This is the most regular "event" and gives us an idea of the daily-activity of this project across all installations. Again, only the event is sent - we have no information on the nature or content of the chat itself. + +You can verify these claims by finding all locations `Telemetry.sendTelemetry` is called. Additionally these events are written to the output log so you can also see the specific data which was sent - if enabled. No IP or other identifying information is collected. The Telemetry provider is [PostHog](https://posthog.com/) - an open-source telemetry collection service. \ No newline at end of file diff --git a/server/endpoints/chat.js b/server/endpoints/chat.js index 0a9544d5d..5582dd919 100644 --- a/server/endpoints/chat.js +++ b/server/endpoints/chat.js @@ -5,6 +5,7 @@ const { chatWithWorkspace } = require("../utils/chats"); const { validatedRequest } = require("../utils/middleware/validatedRequest"); const { WorkspaceChats } = require("../models/workspaceChats"); const { SystemSettings } = require("../models/systemSettings"); +const { Telemetry } = require("../models/telemetry"); function chatEndpoints(app) { if (!app) return; @@ -55,6 +56,11 @@ function chatEndpoints(app) { } const result = await chatWithWorkspace(workspace, message, mode, user); + await Telemetry.sendTelemetry("sent_chat", { + multiUserMode: multiUserMode(response), + LLMSelection: process.env.LLM_PROVIDER || "openai", + VectorDbSelection: process.env.VECTOR_DB || "pinecone", + }); response.status(200).json({ ...result }); } catch (e) { response.status(500).json({ diff --git a/server/endpoints/system.js b/server/endpoints/system.js index 9cb0ee92b..73041e183 100644 --- a/server/endpoints/system.js +++ b/server/endpoints/system.js @@ -34,6 +34,7 @@ const { removeCustomLogo, DARK_LOGO_FILENAME, } = require("../utils/files/logo"); +const { Telemetry } = require("../models/telemetry"); function systemEndpoints(app) { if (!app) return; @@ -325,6 +326,7 @@ function systemEndpoints(app) { }); process.env.AUTH_TOKEN = null; process.env.JWT_SECRET = process.env.JWT_SECRET ?? v4(); // Make sure JWT_SECRET is set for JWT issuance. + await Telemetry.sendTelemetry("enabled_multi_user_mode"); response.status(200).json({ success: !!user, error }); } catch (e) { console.log(e.message, e); diff --git a/server/endpoints/utils.js b/server/endpoints/utils.js index 0e6eb986c..c9b946dfd 100644 --- a/server/endpoints/utils.js +++ b/server/endpoints/utils.js @@ -49,4 +49,4 @@ function utilEndpoints(app) { }); } -module.exports = { utilEndpoints }; +module.exports = { utilEndpoints, getGitVersion }; diff --git a/server/endpoints/workspaces.js b/server/endpoints/workspaces.js index f103a1c59..c1b468fbd 100644 --- a/server/endpoints/workspaces.js +++ b/server/endpoints/workspaces.js @@ -15,6 +15,7 @@ const { } = require("../utils/files/documentProcessor"); const { validatedRequest } = require("../utils/middleware/validatedRequest"); const { SystemSettings } = require("../models/systemSettings"); +const { Telemetry } = require("../models/telemetry"); const { handleUploads } = setupMulter(); function workspaceEndpoints(app) { @@ -25,6 +26,11 @@ function workspaceEndpoints(app) { const user = await userFromSession(request, response); const { name = null } = reqBody(request); const { workspace, message } = await Workspace.new(name, user?.id); + await Telemetry.sendTelemetry("workspace_created", { + multiUserMode: multiUserMode(response), + LLMSelection: process.env.LLM_PROVIDER || "openai", + VectorDbSelection: process.env.VECTOR_DB || "pinecone", + }); response.status(200).json({ workspace, message }); } catch (e) { console.log(e.message, e); @@ -87,6 +93,7 @@ function workspaceEndpoints(app) { console.log( `Document ${originalname} uploaded processed and successfully. It is now available in documents.` ); + await Telemetry.sendTelemetry("document_uploaded"); return; } ); diff --git a/server/index.js b/server/index.js index 5c788daf7..f309e8aca 100644 --- a/server/index.js +++ b/server/index.js @@ -12,10 +12,11 @@ const { systemEndpoints } = require("./endpoints/system"); const { workspaceEndpoints } = require("./endpoints/workspaces"); const { chatEndpoints } = require("./endpoints/chat"); const { getVectorDbClass } = require("./utils/helpers"); -const { validateTablePragmas } = require("./utils/database"); +const { validateTablePragmas, setupTelemetry } = require("./utils/database"); const { adminEndpoints } = require("./endpoints/admin"); const { inviteEndpoints } = require("./endpoints/invite"); const { utilEndpoints } = require("./endpoints/utils"); +const { Telemetry } = require("./models/telemetry"); const app = express(); const apiRouter = express.Router(); @@ -86,15 +87,18 @@ app.all("*", function (_, response) { app .listen(process.env.SERVER_PORT || 3001, async () => { await validateTablePragmas(); + await setupTelemetry(); console.log( `Example app listening on port ${process.env.SERVER_PORT || 3001}` ); }) .on("error", function (err) { process.once("SIGUSR2", function () { + Telemetry.flush(); process.kill(process.pid, "SIGUSR2"); }); process.on("SIGINT", function () { + Telemetry.flush(); process.kill(process.pid, "SIGINT"); }); }); diff --git a/server/models/documents.js b/server/models/documents.js index e51071a1a..143885b1f 100644 --- a/server/models/documents.js +++ b/server/models/documents.js @@ -2,6 +2,7 @@ const { fileData } = require("../utils/files"); const { v4: uuidv4 } = require("uuid"); const { getVectorDbClass } = require("../utils/helpers"); const { checkForMigrations } = require("../utils/database"); +const { Telemetry } = require("./telemetry"); const Document = { tablename: "workspace_documents", @@ -120,6 +121,10 @@ const Document = { stmt.finalize(); db.close(); + await Telemetry.sendTelemetry("documents_embedded_in_workspace", { + LLMSelection: process.env.LLM_PROVIDER || "openai", + VectorDbSelection: process.env.VECTOR_DB || "pinecone", + }); return; }, removeDocuments: async function (workspace, removals = []) { @@ -156,6 +161,10 @@ const Document = { stmt.finalize(); db.close(); + await Telemetry.sendTelemetry("documents_removed_in_workspace", { + LLMSelection: process.env.LLM_PROVIDER || "openai", + VectorDbSelection: process.env.VECTOR_DB || "pinecone", + }); return true; }, }; diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index e0d58d451..bc46dfe1f 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -5,6 +5,7 @@ const SystemSettings = { "limit_user_messages", "message_limit", "logo_filename", + "telemetry_id", ], privateField: [], tablename: "system_settings", diff --git a/server/models/telemetry.js b/server/models/telemetry.js new file mode 100644 index 000000000..793ce44a4 --- /dev/null +++ b/server/models/telemetry.js @@ -0,0 +1,65 @@ +const { v4 } = require("uuid"); +const { SystemSettings } = require("./systemSettings"); + +const Telemetry = { + // Write-only key. It can't read events or any of your other data, so it's safe to use in public apps. + pubkey: "phc_9qu7QLpV8L84P3vFmEiZxL020t2EqIubP7HHHxrSsqS", + stubDevelopmentEvents: true, // [DO NOT TOUCH] Core team only. + label: "telemetry_id", + id: async function () { + const result = await SystemSettings.get(`label = '${this.label}'`); + if (!!result?.value) return result.value; + return result?.value; + }, + connect: async function () { + const client = this.client(); + const distinctId = await this.findOrCreateId(); + return { client, distinctId }; + }, + isDev: function () { + if (process.env.NODE_ENV === "development") + return this.stubDevelopmentEvents; + return false; + }, + client: function () { + if (process.env.DISABLE_TELEMETRY === "true" || this.isDev()) return null; + const { PostHog } = require("posthog-node"); + return new PostHog(this.pubkey); + }, + sendTelemetry: async function (event, properties = {}) { + try { + const { client, distinctId } = await this.connect(); + if (!client) return; + console.log(`\x1b[32m[TELEMETRY SENT]\x1b[0m`, { + event, + properties, + }); + client.capture({ + event, + distinctId, + properties, + }); + } catch { + return; + } + }, + flush: async function () { + const { client } = this.client(); + if (!client) return; + await client.shutdownAsync(); + return; + }, + setUid: async function () { + const newId = v4(); + await SystemSettings.updateSettings({ [this.label]: newId }); + return newId; + }, + findOrCreateId: async function () { + const currentId = await this.id(); + if (!!currentId) return currentId; + const newId = await this.setUid(); + return newId; + }, +}; + +module.exports = { Telemetry }; diff --git a/server/package.json b/server/package.json index f2714838e..b2f5bdc8a 100644 --- a/server/package.json +++ b/server/package.json @@ -35,6 +35,7 @@ "multer": "^1.4.5-lts.1", "openai": "^3.2.1", "pinecone-client": "^1.1.0", + "posthog-node": "^3.1.1", "serve-index": "^1.9.1", "slugify": "^1.6.6", "sqlite": "^4.2.1", diff --git a/server/utils/database/index.js b/server/utils/database/index.js index e3b658a54..65f9707ea 100644 --- a/server/utils/database/index.js +++ b/server/utils/database/index.js @@ -1,3 +1,6 @@ +const { getGitVersion } = require("../../endpoints/utils"); +const { Telemetry } = require("../../models/telemetry"); + function checkColumnTemplate(tablename = null, column = null) { if (!tablename || !column) throw new Error(`Migration Error`, { tablename, column }); @@ -73,7 +76,37 @@ async function validateTablePragmas(force = false) { return; } +// Telemetry is anonymized and your data is never read. This can be disabled by setting +// DISABLE_TELEMETRY=true in the `.env` of however you setup. Telemetry helps us determine use +// of how AnythingLLM is used and how to improve this product! +// You can see all Telemetry events by ctrl+f `Telemetry.sendEvent` calls to verify this claim. +async function setupTelemetry() { + if (process.env.DISABLE_TELEMETRY === "true") { + console.log( + `\x1b[31m[TELEMETRY DISABLED]\x1b[0m Telemetry is marked as disabled - no events will send. Telemetry helps Mintplex Labs Inc improve AnythingLLM.` + ); + return true; + } + + if (Telemetry.isDev()) { + console.log( + `\x1b[33m[TELEMETRY STUBBED]\x1b[0m Anonymous Telemetry stubbed in development.` + ); + return; + } + + console.log( + `\x1b[32m[TELEMETRY ENABLED]\x1b[0m Anonymous Telemetry enabled. Telemetry helps Mintplex Labs Inc improve AnythingLLM.` + ); + await Telemetry.findOrCreateId(); + await Telemetry.sendTelemetry("server_boot", { + commit: getGitVersion(), + }); + return; +} + module.exports = { checkForMigrations, validateTablePragmas, + setupTelemetry, }; diff --git a/server/yarn.lock b/server/yarn.lock index 51300358e..2ff2aec4b 100644 --- a/server/yarn.lock +++ b/server/yarn.lock @@ -390,6 +390,14 @@ axios@^0.26.0: dependencies: follow-redirects "^1.14.8" +axios@^0.27.0: + version "0.27.2" + resolved "https://registry.yarnpkg.com/axios/-/axios-0.27.2.tgz#207658cc8621606e586c85db4b41a750e756d972" + integrity sha512-t+yRIyySRTp/wua5xEr+z1q60QmLq8ABsS5O9Me1AsE5dfKqgnCFzwiCZZ/cGNd1lq4/7akDWMxdhVlucjmnOQ== + dependencies: + follow-redirects "^1.14.9" + form-data "^4.0.0" + balanced-match@^1.0.0: version "1.0.2" resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.2.tgz#e83e3a7e3f300b34cb9d87f615fa0cbf357690ee" @@ -991,7 +999,7 @@ flatbuffers@23.3.3: resolved "https://registry.yarnpkg.com/flatbuffers/-/flatbuffers-23.3.3.tgz#23654ba7a98d4b866a977ae668fe4f8969f34a66" integrity sha512-jmreOaAT1t55keaf+Z259Tvh8tR/Srry9K8dgCgvizhKSEr6gLGgaOJI2WFL5fkOpGOGRZwxUrlFn0GCmXUy6g== -follow-redirects@^1.14.8: +follow-redirects@^1.14.8, follow-redirects@^1.14.9: version "1.15.2" resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.2.tgz#b460864144ba63f2681096f274c4e57026da2c13" integrity sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA== @@ -2026,6 +2034,14 @@ pinecone-client@^1.1.0: dependencies: ky "^0.33.1" +posthog-node@^3.1.1: + version "3.1.1" + resolved "https://registry.yarnpkg.com/posthog-node/-/posthog-node-3.1.1.tgz#f92c44a871552c9bfb98bf4cc8fd326d36af6cbd" + integrity sha512-OUSYcnLHbzvY/dxNsbUGoYuTZz5XNx48BkfiCkOIJZMFvot5VPQ0KWEjX+kzYxEwHeXbjW9plqsOVcYCYfidgg== + dependencies: + axios "^0.27.0" + rusha "^0.8.14" + prettier@^2.4.1: version "2.8.8" resolved "https://registry.yarnpkg.com/prettier/-/prettier-2.8.8.tgz#e8c5d7e98a4305ffe3de2e1fc4aca1a71c28b1da" @@ -2172,6 +2188,11 @@ rimraf@^3.0.2: dependencies: glob "^7.1.3" +rusha@^0.8.14: + version "0.8.14" + resolved "https://registry.yarnpkg.com/rusha/-/rusha-0.8.14.tgz#a977d0de9428406138b7bb90d3de5dcd024e2f68" + integrity sha512-cLgakCUf6PedEu15t8kbsjnwIFFR2D4RfL+W3iWFJ4iac7z4B0ZI8fxy4R3J956kAI68HclCFGL8MPoUVC3qVA== + safe-buffer@5.2.1, safe-buffer@^5.0.1, safe-buffer@~5.2.0: version "5.2.1" resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6"