From a30fa9b2ed4f223a79c8a07967fffcae1fa9142a Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Mon, 16 Sep 2024 12:10:44 -0700 Subject: [PATCH] 1943 add fireworksai support (#2300) * Issue #1943: Add support for LLM provider - Fireworks AI * Update UI selection boxes Update base AI keys for future embedder support if needed Add agent capabilites for FireworksAI * class only return --------- Co-authored-by: Aaron Van Doren --- .vscode/settings.json | 1 + README.md | 1 + docker/.env.example | 4 + .../LLMSelection/FireworksAiOptions/index.jsx | 99 +++++++++++++ frontend/src/hooks/useGetProvidersModels.js | 5 +- .../src/media/llmprovider/fireworksai.jpeg | Bin 0 -> 3795 bytes .../GeneralSettings/LLMPreference/index.jsx | 11 ++ .../Steps/DataHandling/index.jsx | 9 ++ .../Steps/LLMPreference/index.jsx | 10 ++ .../AgentConfig/AgentLLMSelection/index.jsx | 1 + locales/README.ja-JP.md | 1 + locales/README.zh-CN.md | 1 + server/.env.example | 4 + server/models/systemSettings.js | 4 + server/utils/AiProviders/fireworksAi/index.js | 135 ++++++++++++++++++ .../utils/AiProviders/fireworksAi/models.js | 112 +++++++++++++++ .../fireworksAi/scripts/.gitignore | 1 + .../fireworksAi/scripts/chat_models.txt | 20 +++ .../AiProviders/fireworksAi/scripts/parse.mjs | 46 ++++++ server/utils/agents/aibitat/index.js | 2 + .../agents/aibitat/providers/ai-provider.js | 5 + .../agents/aibitat/providers/fireworksai.js | 118 +++++++++++++++ .../utils/agents/aibitat/providers/index.js | 2 + server/utils/agents/index.js | 11 ++ server/utils/helpers/customModels.js | 19 +++ server/utils/helpers/index.js | 6 + server/utils/helpers/updateENV.js | 11 ++ 27 files changed, 637 insertions(+), 2 deletions(-) create mode 100644 frontend/src/components/LLMSelection/FireworksAiOptions/index.jsx create mode 100644 frontend/src/media/llmprovider/fireworksai.jpeg create mode 100644 server/utils/AiProviders/fireworksAi/index.js create mode 100644 server/utils/AiProviders/fireworksAi/models.js create mode 100644 server/utils/AiProviders/fireworksAi/scripts/.gitignore create mode 100644 server/utils/AiProviders/fireworksAi/scripts/chat_models.txt create mode 100644 server/utils/AiProviders/fireworksAi/scripts/parse.mjs create mode 100644 server/utils/agents/aibitat/providers/fireworksai.js diff --git a/.vscode/settings.json b/.vscode/settings.json index 4769a939c..d60238c72 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -47,6 +47,7 @@ "streamable", "textgenwebui", "togetherai", + "fireworksai", "Unembed", "vectordbs", "Weaviate", diff --git a/README.md b/README.md index aa36f8bb8..d42f6fe91 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace - [LM Studio (all models)](https://lmstudio.ai) - [LocalAi (all models)](https://localai.io/) - [Together AI (chat models)](https://www.together.ai/) +- [Fireworks AI (chat models)](https://fireworks.ai/) - [Perplexity (chat models)](https://www.perplexity.ai/) - [OpenRouter (chat models)](https://openrouter.ai/) - [Mistral](https://mistral.ai/) diff --git a/docker/.env.example b/docker/.env.example index 1521a307a..e67ac5ddd 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -101,6 +101,10 @@ GID='1000' # AWS_BEDROCK_LLM_MODEL_PREFERENCE=meta.llama3-1-8b-instruct-v1:0 # AWS_BEDROCK_LLM_MODEL_TOKEN_LIMIT=8191 +# LLM_PROVIDER='fireworksai' +# FIREWORKS_AI_LLM_API_KEY='my-fireworks-ai-key' +# FIREWORKS_AI_LLM_MODEL_PREF='accounts/fireworks/models/llama-v3p1-8b-instruct' + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/frontend/src/components/LLMSelection/FireworksAiOptions/index.jsx b/frontend/src/components/LLMSelection/FireworksAiOptions/index.jsx new file mode 100644 index 000000000..e61e4fc4b --- /dev/null +++ b/frontend/src/components/LLMSelection/FireworksAiOptions/index.jsx @@ -0,0 +1,99 @@ +import System from "@/models/system"; +import { useState, useEffect } from "react"; + +export default function FireworksAiOptions({ settings }) { + return ( +
+
+ + +
+ {!settings?.credentialsOnly && ( + + )} +
+ ); +} +function FireworksAiModelSelection({ settings }) { + const [groupedModels, setGroupedModels] = useState({}); + const [loading, setLoading] = useState(true); + + useEffect(() => { + async function findCustomModels() { + setLoading(true); + const { models } = await System.customModels("fireworksai"); + + if (models?.length > 0) { + const modelsByOrganization = models.reduce((acc, model) => { + acc[model.organization] = acc[model.organization] || []; + acc[model.organization].push(model); + return acc; + }, {}); + + setGroupedModels(modelsByOrganization); + } + + setLoading(false); + } + findCustomModels(); + }, []); + + if (loading || Object.keys(groupedModels).length === 0) { + return ( +
+ + +
+ ); + } + + return ( +
+ + +
+ ); +} diff --git a/frontend/src/hooks/useGetProvidersModels.js b/frontend/src/hooks/useGetProvidersModels.js index ca9c42ca5..ece31c2b5 100644 --- a/frontend/src/hooks/useGetProvidersModels.js +++ b/frontend/src/hooks/useGetProvidersModels.js @@ -35,6 +35,7 @@ const PROVIDER_DEFAULT_MODELS = { localai: [], ollama: [], togetherai: [], + fireworksai: [], groq: [], native: [], cohere: [ @@ -50,7 +51,7 @@ const PROVIDER_DEFAULT_MODELS = { bedrock: [], }; -// For togetherAi, which has a large model list - we subgroup the options +// For providers with large model lists (e.g. togetherAi) - we subgroup the options // by their creator organization (eg: Meta, Mistral, etc) // which makes selection easier to read. function groupModels(models) { @@ -61,7 +62,7 @@ function groupModels(models) { }, {}); } -const groupedProviders = ["togetherai", "openai", "openrouter"]; +const groupedProviders = ["togetherai", "fireworksai", "openai", "openrouter"]; export default function useGetProviderModels(provider = null) { const [defaultModels, setDefaultModels] = useState([]); const [customModels, setCustomModels] = useState([]); diff --git a/frontend/src/media/llmprovider/fireworksai.jpeg b/frontend/src/media/llmprovider/fireworksai.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..46a25812cce2a62e05264d811f745625cd2f5d05 GIT binary patch literal 3795 zcmcImc{tQx7yr&`nHkM6j3kU=LS)}624zns^~*L2B@|}tS+X@ms6iAJvX?b2io!(p z>}8Ma5z1cV{p$VI^ZW08-sgGWbMN!q=X=igo_p@++_P-;Yz+fgLtO)100IF32snVP zen1C+L803dBse$`><9!Lj^JctL$aee(P$JW3Wed~#$vc|TqqQl4~yf$^YZedcM$mb z@ci6(Ui`Ka2n^f0ga5B-iv{4=fqI}G1`!3II0y^}*{T5qK_g)RY}?-7 zgk)ofKoM}*wiM(;LE%57a3mClV1ux4^#N!YD8<6C;8lzCrfiCTEBKFV>CBFYLhAAR zzBCsZ-#|@9J{>x?Y~naT)w#a7<4EGK?fRWYb4kK{nQ}7$F>KTA{%kEHwk>jcGp`dp z2O;xoeAHIz6ZF|$}DZPW3jQD~Xz1B~o7_)mCDFHD?gl|sHcYV9U zVr(5h_T|oN(Ul_78o{4o&&%hXEfb@C-qub=J`y~WXrFS`O?W!L$!UnNa~D$LhlPgR zySEjy5kanMlXe=i^E~y!X+|8H86 zBY=pqgGfUV&`n*0?3U6guLsC-RxW>(z3)oMGJS{i0&(|Vx_&t6nl3r`urO7!H}A8~ne(yr%$sX$I9f&x zxN8_?N(X&HcHI?@yVGxK>?)nL_qLxnw^gN8vf#e#c-({ggSxYKc14b5#v=rM8V21B zBGaC>o|{&{YhT5xEGU1lN_;U3GBDS_lR!(9Z~%aSKsnjje!Ck21;ZX7${|5>@LCkx z^aVbVJ!bR}W(ihS-t=sBd&e)!pxc)Hc=PtVnMcO6VyBUBT@JOFk$#f);c#+ED4Z@x!NB+=;%vG*8NwR%>P-H`w@f=faB!2iCVT;9??At6go2=AtEcU zr0pJu=T#)r(yK=nbWVhXmi!$*NKNPl#oy?*U4n5%tQ@48VJ-bCpYkr_7}^n1b7@h! zd}>I7&9kgnn=bT=EV7 zgUK@+SC$Vq-_JM7>OXOeSUYCd?6b*|vewrr=(%Jm+bm~Ruy?2INOEB99k#laOgd7p^6AGUZujv8J2Q=uM^ED z$Ba}t;{?kk3g!sBYRcV*?V@?mx=tbu5r|iX1#!L5_D|NrJ`!1AUms1H;jDNN=6B^xrQK*!n$0lPAIeqhd?{pk%bKT;o zg3Evtcm{5+xaGKc27L4{$5C-X_Go(I#}wC=)Ryk$q-Ed21FPAQCfoyy_N8u%K|m-Z zxMfUjU)J;I)@&7Z%7-29v#QrEjO1qrG=>ui^G%HsDXyc(+-Q|3bk}lwD1D)JN9@b zUpzpv98eI7^NjvHAho^Wbm?3LU(p5XnzYTkmkXA~8sRDlA$Y-&j+tJnWI%UEP775O zTn+?!UjJxj+22@7HGC-;790F=LdWIMBi=5f^su{%t5M4zUcWp#U&!>I$u#*sZ8ni7 zAc{6y4AWcOZ~z~x1Qr1E&2~Zj6XyVqC`Zu(0~>3bUcK;Dwq1lw`~5H>n#7+OYATY% zuxOoGo#1!CwfRvw&mJZk_vX*3c6pr}3Xd~*?%r(IL#<>lclNvX zoZ!ss=*=h=NK#krh9Fn+ajHg9A(=wGHxhlS+mdI;pPj;ooa6BPDh^XT7|WFwPOHgy z;Hh(!j}H)?G55&DLT52cIDKQHDjYemFPkIz&@8W3aBnE}_%(GinD@P(u2v3;LMswg z@T`ctcs}1iTb9E>H_@g&4-!v89ra6=H**jv*vtq?f+Kx5?QR2&{F93IlzETun1wqr zq*t+O-Br++`>iKOFGO_6VSQG1hsy4IQ#)zHFU|!CyfQqHZ4#yIyD5%^sOMWxMqfNU zN8184wAtZ|VE!WfY(>grw|7%_SPPH2Z`f9;=ocOG$kUgbQdmykfUd{DA`Y4oZzf7X zJUJKNsB4aSnp_^_86q5=7-XW6qU@TO)55`Ui6Kv^?xf5EM{HtWxd>e2uexb~whUWf zQzipsVxBYO^P{#81w}`XZ6eIZF969t9=ShTz%28t?*Y3e0KkHk=YKxNIa&WyiuyF& zalZ05r(I_L=-(fk*LBV;sU4XAE&9K%HGl{Xtfvq-n2vuF1i)!wiL!Dcrf1U^{*+ei zD&NBUsylZ0gqF`!xM?Fc{ub2 z4;W7*@C3+qm;9$qg5bcSsYPMh(nCb57qGIXXG_}uR7&hd4Vh(5=hD0VKqQRARgAGA zH&n9rXw+nm({JjwSadXhsYy<+Km&~6WtGYDY!8nHGLIR`xVcw-Q_yYAr{`6QHKH>0 z>dW6A#NVniZ*!d~DYQC21yMNPVV{(0Y5McJ$H4phagopVOp;z<$i#_vW6LV=kJqpi{JRGRY{Ir@5$Ig|I+ba=>)t!{8#J!cRu-beTuDC2v(L} zy&ytnmeA7MNB*vJ5KZh)$}Plw?RrN(f5#qG-ZVrMTHnuiBDBg)AX2w0R%TG^Y%7Ty zPyC2lxgsg!z*CbXao`R{ig5YF3xS$3TS%8Lx1!1jor9LZ`oWA*H#?%A!_Ie^JT4(s zw%b7SKo24jDRM4b>(X9V$@#S4)bOj}T0PfC=!`sXB8{OY*-T~2V*`9Wc+Ja*17nJ( z?gEV3(^YGXxLd4MnJ)38ZTs{Bz9CPNv$}AkWE#U-H;H-F{Dfs^oAllrRXbSv_1)c6 zTg~D`d9E7EVET&gG6QB8zWeg6FfL}s@uEfA%=cXWo%@d%P5q2YrT8fg@}nB(Sq=5f z)B)2w1agkn-CL6)m0-hs5*qKJdKoiS^?3D$GGR*Q)cI zukaE9>dhr@pc%A&zm~SvdH_3K=-+@Ii`FW#xzwBYj}A{PMUed zLnzHxUQdN?NE(Rq;2v{>C>I0TxeS6&(0e17Y$WRXuEM3W`%kYol19j?p&1!_nN=vT`WtIUG+3@zY=~_%v zdb~lCf;H>7p##GDxO+0$9WHwA_+Fl_MiFY0yg;rosnu)oqu+Y#D-OoG>+8ewCZ8 , + description: + "The fastest and most efficient inference engine to build production-ready, compound AI systems.", + requiredConfig: ["FireworksAiLLMApiKey"], + }, { name: "Mistral", value: "mistral", diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx index 372a65236..b739d502a 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx @@ -8,6 +8,7 @@ import AnthropicLogo from "@/media/llmprovider/anthropic.png"; import GeminiLogo from "@/media/llmprovider/gemini.png"; import OllamaLogo from "@/media/llmprovider/ollama.png"; import TogetherAILogo from "@/media/llmprovider/togetherai.png"; +import FireworksAILogo from "@/media/llmprovider/fireworksai.jpeg"; import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import LocalAiLogo from "@/media/llmprovider/localai.png"; import MistralLogo from "@/media/llmprovider/mistral.jpeg"; @@ -107,6 +108,14 @@ export const LLM_SELECTION_PRIVACY = { ], logo: TogetherAILogo, }, + fireworksai: { + name: "FireworksAI", + description: [ + "Your chats will not be used for training", + "Your prompts and document text used in response creation are visible to Fireworks AI", + ], + logo: FireworksAILogo, + }, mistral: { name: "Mistral", description: [ diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx index aab026896..52996b695 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx @@ -9,6 +9,7 @@ import OllamaLogo from "@/media/llmprovider/ollama.png"; import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import LocalAiLogo from "@/media/llmprovider/localai.png"; import TogetherAILogo from "@/media/llmprovider/togetherai.png"; +import FireworksAILogo from "@/media/llmprovider/fireworksai.jpeg"; import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png"; import MistralLogo from "@/media/llmprovider/mistral.jpeg"; import HuggingFaceLogo from "@/media/llmprovider/huggingface.png"; @@ -33,6 +34,7 @@ import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions"; import MistralOptions from "@/components/LLMSelection/MistralOptions"; import HuggingFaceOptions from "@/components/LLMSelection/HuggingFaceOptions"; import TogetherAiOptions from "@/components/LLMSelection/TogetherAiOptions"; +import FireworksAiOptions from "@/components/LLMSelection/FireworksAiOptions"; import PerplexityOptions from "@/components/LLMSelection/PerplexityOptions"; import OpenRouterOptions from "@/components/LLMSelection/OpenRouterOptions"; import GroqAiOptions from "@/components/LLMSelection/GroqAiOptions"; @@ -132,6 +134,14 @@ const LLMS = [ options: (settings) => , description: "Run open source models from Together AI.", }, + { + name: "Fireworks AI", + value: "fireworksai", + logo: FireworksAILogo, + options: (settings) => , + description: + "The fastest and most efficient inference engine to build production-ready, compound AI systems.", + }, { name: "Mistral", value: "mistral", diff --git a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx index 02c9a671c..00a0aef95 100644 --- a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx +++ b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx @@ -22,6 +22,7 @@ const ENABLED_PROVIDERS = [ "textgenwebui", "generic-openai", "bedrock", + "fireworksai", // TODO: More agent support. // "cohere", // Has tool calling and will need to build explicit support // "huggingface" // Can be done but already has issues with no-chat templated. Needs to be tested. diff --git a/locales/README.ja-JP.md b/locales/README.ja-JP.md index 952bf1f94..e273576af 100644 --- a/locales/README.ja-JP.md +++ b/locales/README.ja-JP.md @@ -82,6 +82,7 @@ AnythingLLMのいくつかのクールな機能 - [LM Studio (すべてのモデル)](https://lmstudio.ai) - [LocalAi (すべてのモデル)](https://localai.io/) - [Together AI (チャットモデル)](https://www.together.ai/) +- [Fireworks AI (チャットモデル)](https://fireworks.ai/) - [Perplexity (チャットモデル)](https://www.perplexity.ai/) - [OpenRouter (チャットモデル)](https://openrouter.ai/) - [Mistral](https://mistral.ai/) diff --git a/locales/README.zh-CN.md b/locales/README.zh-CN.md index fbdb4e05a..03e9ece13 100644 --- a/locales/README.zh-CN.md +++ b/locales/README.zh-CN.md @@ -78,6 +78,7 @@ AnythingLLM的一些酷炫特性 - [LM Studio (所有模型)](https://lmstudio.ai) - [LocalAi (所有模型)](https://localai.io/) - [Together AI (聊天模型)](https://www.together.ai/) +- [Fireworks AI (聊天模型)](https://fireworks.ai/) - [Perplexity (聊天模型)](https://www.perplexity.ai/) - [OpenRouter (聊天模型)](https://openrouter.ai/) - [Mistral](https://mistral.ai/) diff --git a/server/.env.example b/server/.env.example index f942d6832..80009cfe8 100644 --- a/server/.env.example +++ b/server/.env.example @@ -44,6 +44,10 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long. # TOGETHER_AI_API_KEY='my-together-ai-key' # TOGETHER_AI_MODEL_PREF='mistralai/Mixtral-8x7B-Instruct-v0.1' +# LLM_PROVIDER='fireworksai' +# FIREWORKS_AI_LLM_API_KEY='my-fireworks-ai-key' +# FIREWORKS_AI_LLM_MODEL_PREF='accounts/fireworks/models/llama-v3p1-8b-instruct' + # LLM_PROVIDER='perplexity' # PERPLEXITY_API_KEY='my-perplexity-key' # PERPLEXITY_MODEL_PREF='codellama-34b-instruct' diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index c2c03ffa0..e3012ec4e 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -441,6 +441,10 @@ const SystemSettings = { TogetherAiApiKey: !!process.env.TOGETHER_AI_API_KEY, TogetherAiModelPref: process.env.TOGETHER_AI_MODEL_PREF, + // Fireworks AI API Keys + FireworksAiLLMApiKey: !!process.env.FIREWORKS_AI_LLM_API_KEY, + FireworksAiLLMModelPref: process.env.FIREWORKS_AI_LLM_MODEL_PREF, + // Perplexity AI Keys PerplexityApiKey: !!process.env.PERPLEXITY_API_KEY, PerplexityModelPref: process.env.PERPLEXITY_MODEL_PREF, diff --git a/server/utils/AiProviders/fireworksAi/index.js b/server/utils/AiProviders/fireworksAi/index.js new file mode 100644 index 000000000..2332965fc --- /dev/null +++ b/server/utils/AiProviders/fireworksAi/index.js @@ -0,0 +1,135 @@ +const { NativeEmbedder } = require("../../EmbeddingEngines/native"); +const { + handleDefaultStreamResponseV2, +} = require("../../helpers/chat/responses"); + +function fireworksAiModels() { + const { MODELS } = require("./models.js"); + return MODELS || {}; +} + +class FireworksAiLLM { + constructor(embedder = null, modelPreference = null) { + if (!process.env.FIREWORKS_AI_LLM_API_KEY) + throw new Error("No FireworksAI API key was set."); + const { OpenAI: OpenAIApi } = require("openai"); + this.openai = new OpenAIApi({ + baseURL: "https://api.fireworks.ai/inference/v1", + apiKey: process.env.FIREWORKS_AI_LLM_API_KEY ?? null, + }); + this.model = modelPreference || process.env.FIREWORKS_AI_LLM_MODEL_PREF; + this.limits = { + history: this.promptWindowLimit() * 0.15, + system: this.promptWindowLimit() * 0.15, + user: this.promptWindowLimit() * 0.7, + }; + + this.embedder = !embedder ? new NativeEmbedder() : embedder; + this.defaultTemp = 0.7; + } + + #appendContext(contextTexts = []) { + if (!contextTexts || !contextTexts.length) return ""; + return ( + "\nContext:\n" + + contextTexts + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("") + ); + } + + allModelInformation() { + return fireworksAiModels(); + } + + streamingEnabled() { + return "streamGetChatCompletion" in this; + } + + static promptWindowLimit(modelName) { + const availableModels = fireworksAiModels(); + return availableModels[modelName]?.maxLength || 4096; + } + + // Ensure the user set a value for the token limit + // and if undefined - assume 4096 window. + promptWindowLimit() { + const availableModels = this.allModelInformation(); + return availableModels[this.model]?.maxLength || 4096; + } + + async isValidChatCompletionModel(model = "") { + const availableModels = this.allModelInformation(); + return availableModels.hasOwnProperty(model); + } + + constructPrompt({ + systemPrompt = "", + contextTexts = [], + chatHistory = [], + userPrompt = "", + }) { + const prompt = { + role: "system", + content: `${systemPrompt}${this.#appendContext(contextTexts)}`, + }; + return [prompt, ...chatHistory, { role: "user", content: userPrompt }]; + } + + async getChatCompletion(messages = null, { temperature = 0.7 }) { + if (!(await this.isValidChatCompletionModel(this.model))) + throw new Error( + `FireworksAI chat: ${this.model} is not valid for chat completion!` + ); + + const result = await this.openai.chat.completions.create({ + model: this.model, + messages, + temperature, + }); + + if (!result.hasOwnProperty("choices") || result.choices.length === 0) + return null; + return result.choices[0].message.content; + } + + async streamGetChatCompletion(messages = null, { temperature = 0.7 }) { + if (!(await this.isValidChatCompletionModel(this.model))) + throw new Error( + `FireworksAI chat: ${this.model} is not valid for chat completion!` + ); + + const streamRequest = await this.openai.chat.completions.create({ + model: this.model, + stream: true, + messages, + temperature, + }); + return streamRequest; + } + + handleStream(response, stream, responseProps) { + return handleDefaultStreamResponseV2(response, stream, responseProps); + } + + // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations + async embedTextInput(textInput) { + return await this.embedder.embedTextInput(textInput); + } + async embedChunks(textChunks = []) { + return await this.embedder.embedChunks(textChunks); + } + + async compressMessages(promptArgs = {}, rawHistory = []) { + const { messageArrayCompressor } = require("../../helpers/chat"); + const messageArray = this.constructPrompt(promptArgs); + return await messageArrayCompressor(this, messageArray, rawHistory); + } +} + +module.exports = { + FireworksAiLLM, + fireworksAiModels, +}; diff --git a/server/utils/AiProviders/fireworksAi/models.js b/server/utils/AiProviders/fireworksAi/models.js new file mode 100644 index 000000000..c28f98af4 --- /dev/null +++ b/server/utils/AiProviders/fireworksAi/models.js @@ -0,0 +1,112 @@ +const MODELS = { + "accounts/fireworks/models/llama-v3p1-405b-instruct": { + id: "accounts/fireworks/models/llama-v3p1-405b-instruct", + organization: "Meta", + name: "Llama 3.1 405B Instruct", + maxLength: 131072, + }, + "accounts/fireworks/models/llama-v3p1-70b-instruct": { + id: "accounts/fireworks/models/llama-v3p1-70b-instruct", + organization: "Meta", + name: "Llama 3.1 70B Instruct", + maxLength: 131072, + }, + "accounts/fireworks/models/llama-v3p1-8b-instruct": { + id: "accounts/fireworks/models/llama-v3p1-8b-instruct", + organization: "Meta", + name: "Llama 3.1 8B Instruct", + maxLength: 131072, + }, + "accounts/fireworks/models/llama-v3-70b-instruct": { + id: "accounts/fireworks/models/llama-v3-70b-instruct", + organization: "Meta", + name: "Llama 3 70B Instruct", + maxLength: 8192, + }, + "accounts/fireworks/models/mixtral-8x22b-instruct": { + id: "accounts/fireworks/models/mixtral-8x22b-instruct", + organization: "mistralai", + name: "Mixtral MoE 8x22B Instruct", + maxLength: 65536, + }, + "accounts/fireworks/models/mixtral-8x7b-instruct": { + id: "accounts/fireworks/models/mixtral-8x7b-instruct", + organization: "mistralai", + name: "Mixtral MoE 8x7B Instruct", + maxLength: 32768, + }, + "accounts/fireworks/models/firefunction-v2": { + id: "accounts/fireworks/models/firefunction-v2", + organization: "Fireworks AI", + name: "Firefunction V2", + maxLength: 8192, + }, + "accounts/fireworks/models/firefunction-v1": { + id: "accounts/fireworks/models/firefunction-v1", + organization: "Fireworks AI", + name: "FireFunction V1", + maxLength: 32768, + }, + "accounts/fireworks/models/gemma2-9b-it": { + id: "accounts/fireworks/models/gemma2-9b-it", + organization: "Google", + name: "Gemma 2 9B Instruct", + maxLength: 8192, + }, + "accounts/fireworks/models/llama-v3-70b-instruct-hf": { + id: "accounts/fireworks/models/llama-v3-70b-instruct-hf", + organization: "Hugging Face", + name: "Llama 3 70B Instruct (HF version)", + maxLength: 8192, + }, + "accounts/fireworks/models/llama-v3-8b-instruct": { + id: "accounts/fireworks/models/llama-v3-8b-instruct", + organization: "Hugging Face", + name: "Llama 3 8B Instruct", + maxLength: 8192, + }, + "accounts/fireworks/models/llama-v3-8b-instruct-hf": { + id: "accounts/fireworks/models/llama-v3-8b-instruct-hf", + organization: "Hugging Face", + name: "Llama 3 8B Instruct (HF version)", + maxLength: 8192, + }, + "accounts/fireworks/models/mixtral-8x7b-instruct-hf": { + id: "accounts/fireworks/models/mixtral-8x7b-instruct-hf", + organization: "Hugging Face", + name: "Mixtral MoE 8x7B Instruct (HF version)", + maxLength: 32768, + }, + "accounts/fireworks/models/mythomax-l2-13b": { + id: "accounts/fireworks/models/mythomax-l2-13b", + organization: "Gryphe", + name: "MythoMax L2 13b", + maxLength: 4096, + }, + "accounts/fireworks/models/phi-3-vision-128k-instruct": { + id: "accounts/fireworks/models/phi-3-vision-128k-instruct", + organization: "Microsoft", + name: "Phi 3.5 Vision Instruct", + maxLength: 8192, + }, + "accounts/fireworks/models/starcoder-16b": { + id: "accounts/fireworks/models/starcoder-16b", + organization: "BigCode", + name: "StarCoder 15.5B", + maxLength: 8192, + }, + "accounts/fireworks/models/starcoder-7b": { + id: "accounts/fireworks/models/starcoder-7b", + organization: "BigCode", + name: "StarCoder 7B", + maxLength: 8192, + }, + "accounts/fireworks/models/yi-01-ai/yi-large": { + id: "accounts/fireworks/models/yi-01-ai/yi-large", + organization: "01.AI", + name: "Yi-Large", + maxLength: 32768, + }, +}; + +module.exports.MODELS = MODELS; diff --git a/server/utils/AiProviders/fireworksAi/scripts/.gitignore b/server/utils/AiProviders/fireworksAi/scripts/.gitignore new file mode 100644 index 000000000..94a2dd146 --- /dev/null +++ b/server/utils/AiProviders/fireworksAi/scripts/.gitignore @@ -0,0 +1 @@ +*.json \ No newline at end of file diff --git a/server/utils/AiProviders/fireworksAi/scripts/chat_models.txt b/server/utils/AiProviders/fireworksAi/scripts/chat_models.txt new file mode 100644 index 000000000..a1afec9d6 --- /dev/null +++ b/server/utils/AiProviders/fireworksAi/scripts/chat_models.txt @@ -0,0 +1,20 @@ +| Organization | Model Name | Model String for API | Context length | +|--------------|------------|----------------------|----------------| +| Meta | Llama 3.1 405B Instruct | accounts/fireworks/models/llama-v3p1-405b-instruct | 131072 | +| Meta | Llama 3.1 70B Instruct | accounts/fireworks/models/llama-v3p1-70b-instruct | 131072 | +| Meta | Llama 3.1 8B Instruct | accounts/fireworks/models/llama-v3p1-8b-instruct | 131072 | +| Meta | Llama 3 70B Instruct | accounts/fireworks/models/llama-v3-70b-instruct | 8192 | +| mistralai | Mixtral MoE 8x22B Instruct | accounts/fireworks/models/mixtral-8x22b-instruct | 65536 | +| mistralai | Mixtral MoE 8x7B Instruct | accounts/fireworks/models/mixtral-8x7b-instruct | 32768 | +| Fireworks AI | Firefunction V2 | accounts/fireworks/models/firefunction-v2 | 8192 | +| Fireworks AI | FireFunction V1 | accounts/fireworks/models/firefunction-v1 | 32768 | +| Google | Gemma 2 9B Instruct | accounts/fireworks/models/gemma2-9b-it | 8192 | +| Hugging Face | Llama 3 70B Instruct (HF version) | accounts/fireworks/models/llama-v3-70b-instruct-hf | 8192 | +| Hugging Face | Llama 3 8B Instruct | accounts/fireworks/models/llama-v3-8b-instruct | 8192 | +| Hugging Face | Llama 3 8B Instruct (HF version) | accounts/fireworks/models/llama-v3-8b-instruct-hf | 8192 | +| Hugging Face | Mixtral MoE 8x7B Instruct (HF version) | accounts/fireworks/models/mixtral-8x7b-instruct-hf | 32768 | +| Gryphe | MythoMax L2 13b | accounts/fireworks/models/mythomax-l2-13b | 4096 | +| Microsoft | Phi 3.5 Vision Instruct | accounts/fireworks/models/phi-3-vision-128k-instruct | 8192 | +| BigCode | StarCoder 15.5B | accounts/fireworks/models/starcoder-16b | 8192 | +| BigCode | StarCoder 7B | accounts/fireworks/models/starcoder-7b | 8192 | +| 01.AI | Yi-Large | accounts/fireworks/models/yi-01-ai/yi-large | 32768 | \ No newline at end of file diff --git a/server/utils/AiProviders/fireworksAi/scripts/parse.mjs b/server/utils/AiProviders/fireworksAi/scripts/parse.mjs new file mode 100644 index 000000000..33581d159 --- /dev/null +++ b/server/utils/AiProviders/fireworksAi/scripts/parse.mjs @@ -0,0 +1,46 @@ +// Fireworks AI does not provide a simple REST API to get models, +// so we have a table which we copy from their documentation +// at https://fireworks.ai/models that we can +// then parse and get all models from in a format that makes sense +// Why this does not exist is so bizarre, but whatever. + +// To run, cd into this directory and run `node parse.mjs` +// copy outputs into the export in ../models.js + +// Update the date below if you run this again because Fireworks AI added new models. + +// Last Collected: Sep 15, 2024 +// NOTE: Only managed to collect 18 out of ~100 models! +// https://fireworks.ai/models lists almost 100 chat language models. +// If you want to add models, please manually add them to chat_models.txt... +// ... I tried to write a script to grab them all but gave up after a few hours... + +import fs from "fs"; + +function parseChatModels() { + const fixed = {}; + const tableString = fs.readFileSync("chat_models.txt", { encoding: "utf-8" }); + const rows = tableString.split("\n").slice(2); + + rows.forEach((row) => { + const [provider, name, id, maxLength] = row.split("|").slice(1, -1); + const data = { + provider: provider.trim(), + name: name.trim(), + id: id.trim(), + maxLength: Number(maxLength.trim()), + }; + + fixed[data.id] = { + id: data.id, + organization: data.provider, + name: data.name, + maxLength: data.maxLength, + }; + }); + + fs.writeFileSync("chat_models.json", JSON.stringify(fixed, null, 2), "utf-8"); + return fixed; +} + +parseChatModels(); diff --git a/server/utils/agents/aibitat/index.js b/server/utils/agents/aibitat/index.js index 56da25eb1..90d6069c0 100644 --- a/server/utils/agents/aibitat/index.js +++ b/server/utils/agents/aibitat/index.js @@ -781,6 +781,8 @@ ${this.getHistory({ to: route.to }) return new Providers.TextWebGenUiProvider({}); case "bedrock": return new Providers.AWSBedrockProvider({}); + case "fireworksai": + return new Providers.FireworksAIProvider({ model: config.model }); default: throw new Error( diff --git a/server/utils/agents/aibitat/providers/ai-provider.js b/server/utils/agents/aibitat/providers/ai-provider.js index 034c67ad0..23d107647 100644 --- a/server/utils/agents/aibitat/providers/ai-provider.js +++ b/server/utils/agents/aibitat/providers/ai-provider.js @@ -125,6 +125,11 @@ class Provider { }, ...config, }); + case "fireworksai": + return new ChatOpenAI({ + apiKey: process.env.FIREWORKS_AI_LLM_API_KEY, + ...config, + }); // OSS Model Runners // case "anythingllm_ollama": diff --git a/server/utils/agents/aibitat/providers/fireworksai.js b/server/utils/agents/aibitat/providers/fireworksai.js new file mode 100644 index 000000000..5802fc6d7 --- /dev/null +++ b/server/utils/agents/aibitat/providers/fireworksai.js @@ -0,0 +1,118 @@ +const OpenAI = require("openai"); +const Provider = require("./ai-provider.js"); +const InheritMultiple = require("./helpers/classes.js"); +const UnTooled = require("./helpers/untooled.js"); + +/** + * The agent provider for the FireworksAI provider. + * We wrap FireworksAI in UnTooled because its tool-calling may not be supported for specific models and this normalizes that. + */ +class FireworksAIProvider extends InheritMultiple([Provider, UnTooled]) { + model; + + constructor(config = {}) { + const { model = "accounts/fireworks/models/llama-v3p1-8b-instruct" } = + config; + super(); + const client = new OpenAI({ + baseURL: "https://api.fireworks.ai/inference/v1", + apiKey: process.env.FIREWORKS_AI_LLM_API_KEY, + maxRetries: 0, + }); + + this._client = client; + this.model = model; + this.verbose = true; + } + + get client() { + return this._client; + } + + async #handleFunctionCallChat({ messages = [] }) { + return await this.client.chat.completions + .create({ + model: this.model, + temperature: 0, + messages, + }) + .then((result) => { + if (!result.hasOwnProperty("choices")) + throw new Error("FireworksAI chat: No results!"); + if (result.choices.length === 0) + throw new Error("FireworksAI chat: No results length!"); + return result.choices[0].message.content; + }) + .catch((_) => { + return null; + }); + } + + /** + * Create a completion based on the received messages. + * + * @param messages A list of messages to send to the API. + * @param functions + * @returns The completion. + */ + async complete(messages, functions = null) { + try { + let completion; + if (functions.length > 0) { + const { toolCall, text } = await this.functionCall( + messages, + functions, + this.#handleFunctionCallChat.bind(this) + ); + + if (toolCall !== null) { + this.providerLog(`Valid tool call found - running ${toolCall.name}.`); + this.deduplicator.trackRun(toolCall.name, toolCall.arguments); + return { + result: null, + functionCall: { + name: toolCall.name, + arguments: toolCall.arguments, + }, + cost: 0, + }; + } + completion = { content: text }; + } + + if (!completion?.content) { + this.providerLog( + "Will assume chat completion without tool call inputs." + ); + const response = await this.client.chat.completions.create({ + model: this.model, + messages: this.cleanMsgs(messages), + }); + completion = response.choices[0].message; + } + + // The UnTooled class inherited Deduplicator is mostly useful to prevent the agent + // from calling the exact same function over and over in a loop within a single chat exchange + // _but_ we should enable it to call previously used tools in a new chat interaction. + this.deduplicator.reset("runs"); + return { + result: completion.content, + cost: 0, + }; + } catch (error) { + throw error; + } + } + + /** + * Get the cost of the completion. + * + * @param _usage The completion to get the cost for. + * @returns The cost of the completion. + */ + getCost(_usage) { + return 0; + } +} + +module.exports = FireworksAIProvider; diff --git a/server/utils/agents/aibitat/providers/index.js b/server/utils/agents/aibitat/providers/index.js index 6f784ae5f..dd95bb54a 100644 --- a/server/utils/agents/aibitat/providers/index.js +++ b/server/utils/agents/aibitat/providers/index.js @@ -13,6 +13,7 @@ const GenericOpenAiProvider = require("./genericOpenAi.js"); const PerplexityProvider = require("./perplexity.js"); const TextWebGenUiProvider = require("./textgenwebui.js"); const AWSBedrockProvider = require("./bedrock.js"); +const FireworksAIProvider = require("./fireworksai.js"); module.exports = { OpenAIProvider, @@ -30,4 +31,5 @@ module.exports = { PerplexityProvider, TextWebGenUiProvider, AWSBedrockProvider, + FireworksAIProvider, }; diff --git a/server/utils/agents/index.js b/server/utils/agents/index.js index 521b9e9ca..9fdfdd1ba 100644 --- a/server/utils/agents/index.js +++ b/server/utils/agents/index.js @@ -156,6 +156,15 @@ class AgentHandler { "AWS Bedrock Access Keys, model and region must be provided to use agents." ); break; + case "fireworksai": + if ( + !process.env.FIREWORKS_AI_LLM_API_KEY || + !process.env.FIREWORKS_AI_LLM_MODEL_PREF + ) + throw new Error( + "FireworksAI API Key & model must be provided to use agents." + ); + break; default: throw new Error( @@ -198,6 +207,8 @@ class AgentHandler { return null; case "bedrock": return null; + case "fireworksai": + return null; default: return "unknown"; } diff --git a/server/utils/helpers/customModels.js b/server/utils/helpers/customModels.js index 27afa150f..a25896ef4 100644 --- a/server/utils/helpers/customModels.js +++ b/server/utils/helpers/customModels.js @@ -1,6 +1,7 @@ const { fetchOpenRouterModels } = require("../AiProviders/openRouter"); const { perplexityModels } = require("../AiProviders/perplexity"); const { togetherAiModels } = require("../AiProviders/togetherAi"); +const { fireworksAiModels } = require("../AiProviders/fireworksAi"); const { ElevenLabsTTS } = require("../TextToSpeech/elevenLabs"); const SUPPORT_CUSTOM_MODELS = [ "openai", @@ -8,6 +9,7 @@ const SUPPORT_CUSTOM_MODELS = [ "ollama", "native-llm", "togetherai", + "fireworksai", "mistral", "perplexity", "openrouter", @@ -31,6 +33,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) { return await ollamaAIModels(basePath); case "togetherai": return await getTogetherAiModels(); + case "fireworksai": + return await getFireworksAiModels(apiKey); case "mistral": return await getMistralModels(apiKey); case "native-llm": @@ -304,6 +308,21 @@ async function getTogetherAiModels() { return { models, error: null }; } +async function getFireworksAiModels() { + const knownModels = fireworksAiModels(); + if (!Object.keys(knownModels).length === 0) + return { models: [], error: null }; + + const models = Object.values(knownModels).map((model) => { + return { + id: model.id, + organization: model.organization, + name: model.name, + }; + }); + return { models, error: null }; +} + async function getPerplexityModels() { const knownModels = perplexityModels(); if (!Object.keys(knownModels).length === 0) diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index 6ec0b2a31..71e352c30 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -120,6 +120,9 @@ function getLLMProvider({ provider = null, model = null } = {}) { case "togetherai": const { TogetherAiLLM } = require("../AiProviders/togetherAi"); return new TogetherAiLLM(embedder, model); + case "fireworksai": + const { FireworksAiLLM } = require("../AiProviders/fireworksAi"); + return new FireworksAiLLM(embedder, model); case "perplexity": const { PerplexityLLM } = require("../AiProviders/perplexity"); return new PerplexityLLM(embedder, model); @@ -240,6 +243,9 @@ function getLLMProviderClass({ provider = null } = {}) { case "togetherai": const { TogetherAiLLM } = require("../AiProviders/togetherAi"); return TogetherAiLLM; + case "fireworksai": + const { FireworksAiLLM } = require("../AiProviders/fireworksAi"); + return FireworksAiLLM; case "perplexity": const { PerplexityLLM } = require("../AiProviders/perplexity"); return PerplexityLLM; diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index bd29a746f..7b70efa23 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -350,6 +350,16 @@ const KEY_MAPPING = { checks: [isNotEmpty], }, + // Fireworks AI Options + FireworksAiLLMApiKey: { + envKey: "FIREWORKS_AI_LLM_API_KEY", + checks: [isNotEmpty], + }, + FireworksAiLLMModelPref: { + envKey: "FIREWORKS_AI_LLM_MODEL_PREF", + checks: [isNotEmpty], + }, + // Perplexity Options PerplexityApiKey: { envKey: "PERPLEXITY_API_KEY", @@ -580,6 +590,7 @@ function supportedLLM(input = "") { "ollama", "native", "togetherai", + "fireworksai", "mistral", "huggingface", "perplexity",