From 1d39b8a2ceca4bb62362eb5b7ad8f66750a317a5 Mon Sep 17 00:00:00 2001 From: Sean Hatfield Date: Wed, 10 Jan 2024 12:35:30 -0800 Subject: [PATCH] add Together AI LLM support (#560) * add Together AI LLM support * update readme to support together ai * Patch togetherAI implementation * add model sorting/option labels by organization for model selection * linting + add data handling for TogetherAI * change truthy statement patch validLLMSelection method --------- Co-authored-by: timothycarambat --- README.md | 31 ++- docker/.env.example | 4 + .../LLMSelection/TogetherAiOptions/index.jsx | 95 ++++++++ frontend/src/media/llmprovider/togetherai.png | Bin 0 -> 8356 bytes .../GeneralSettings/LLMPreference/index.jsx | 9 + .../Steps/DataHandling/index.jsx | 15 +- .../Steps/LLMPreference/index.jsx | 9 + server/.env.example | 4 + server/models/systemSettings.js | 18 ++ server/utils/AiProviders/togetherAi/index.js | 198 +++++++++++++++ server/utils/AiProviders/togetherAi/models.js | 226 ++++++++++++++++++ .../AiProviders/togetherAi/scripts/.gitignore | 1 + .../togetherAi/scripts/chat_models.txt | 39 +++ .../AiProviders/togetherAi/scripts/parse.mjs | 41 ++++ server/utils/chats/stream.js | 90 +++++++ server/utils/helpers/customModels.js | 26 +- server/utils/helpers/index.js | 3 + server/utils/helpers/updateENV.js | 14 +- 18 files changed, 809 insertions(+), 14 deletions(-) create mode 100644 frontend/src/components/LLMSelection/TogetherAiOptions/index.jsx create mode 100644 frontend/src/media/llmprovider/togetherai.png create mode 100644 server/utils/AiProviders/togetherAi/index.js create mode 100644 server/utils/AiProviders/togetherAi/models.js create mode 100644 server/utils/AiProviders/togetherAi/scripts/.gitignore create mode 100644 server/utils/AiProviders/togetherAi/scripts/chat_models.txt create mode 100644 server/utils/AiProviders/togetherAi/scripts/parse.mjs diff --git a/README.md b/README.md index 62d58d87..5af9278b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ +

AnythingLLM logo

@@ -38,13 +39,14 @@ A full-stack application that enables you to turn any document, resource, or pie - ### Product Overview + AnythingLLM is a full-stack application where you can use commercial off-the-shelf LLMs or popular open source LLMs and vectorDB solutions to build a private ChatGPT with no compromises that you can run locally as well as host remotely and be able to chat intelligently with any documents you provide it. AnythingLLM divides your documents into objects called `workspaces`. A Workspace functions a lot like a thread, but with the addition of containerization of your documents. Workspaces can share documents, but they do not talk to each other so you can keep your context for each workspace clean. Some cool features of AnythingLLM + - **Multi-user instance support and permissioning** - Multiple document type support (PDF, TXT, DOCX, etc) - Manage documents in your vector database from a simple UI @@ -57,7 +59,9 @@ Some cool features of AnythingLLM - Full Developer API for custom integrations! ### Supported LLMs, Embedders, and Vector Databases + **Supported LLMs:** + - [Any open-source llama.cpp compatible model](/server/storage/models/README.md#text-generation-llm-selection) - [OpenAI](https://openai.com) - [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service) @@ -66,8 +70,10 @@ Some cool features of AnythingLLM - [Ollama (chat models)](https://ollama.ai/) - [LM Studio (all models)](https://lmstudio.ai) - [LocalAi (all models)](https://localai.io/) +- [Together AI (chat models)](https://www.together.ai/) **Supported Embedding models:** + - [AnythingLLM Native Embedder](/server/storage/models/README.md) (default) - [OpenAI](https://openai.com) - [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service) @@ -75,42 +81,43 @@ Some cool features of AnythingLLM - [LocalAi (all)](https://localai.io/) **Supported Vector Databases:** + - [LanceDB](https://github.com/lancedb/lancedb) (default) - [Pinecone](https://pinecone.io) - [Chroma](https://trychroma.com) - [Weaviate](https://weaviate.io) - [QDrant](https://qdrant.tech) - ### Technical Overview + This monorepo consists of three main sections: + - `frontend`: A viteJS + React frontend that you can run to easily create and manage all your content the LLM can use. - `server`: A NodeJS express server to handle all the interactions and do all the vectorDB management and LLM interactions. - `docker`: Docker instructions and build process + information for building from source. - `collector`: NodeJS express server that process and parses documents from the UI. ## 🛳 Self Hosting -Mintplex Labs & the community maintain a number of deployment methods, scripts, and templates that you can use to run AnythingLLM locally. Refer to the table below to read how to deploy on your preferred environment or to automatically deploy. -| Docker | AWS | GCP | Digital Ocean | Render.com | -|----------------------------------------|----:|-----|---------------|------------| -| [![Deploy on Docker][docker-btn]][docker-deploy] | [![Deploy on AWS][aws-btn]][aws-deploy] | [![Deploy on GCP][gcp-btn]][gcp-deploy] | [![Deploy on DigitalOcean][do-btn]][aws-deploy] | [![Deploy on Render.com][render-btn]][render-deploy] | +Mintplex Labs & the community maintain a number of deployment methods, scripts, and templates that you can use to run AnythingLLM locally. Refer to the table below to read how to deploy on your preferred environment or to automatically deploy. +| Docker | AWS | GCP | Digital Ocean | Render.com | +|----------------------------------------|----:|-----|---------------|------------| +| [![Deploy on Docker][docker-btn]][docker-deploy] | [![Deploy on AWS][aws-btn]][aws-deploy] | [![Deploy on GCP][gcp-btn]][gcp-deploy] | [![Deploy on DigitalOcean][do-btn]][aws-deploy] | [![Deploy on Render.com][render-btn]][render-deploy] | ## How to setup for development + - `yarn setup` To fill in the required `.env` files you'll need in each of the application sections (from root of repo). - Go fill those out before proceeding. Ensure `server/.env.development` is filled or else things won't work right. - `yarn dev:server` To boot the server locally (from root of repo). - `yarn dev:frontend` To boot the frontend locally (from root of repo). - `yarn dev:collector` To then run the document collector (from root of repo). - - - [Learn about documents](./server/storage/documents/DOCUMENTS.md) [Learn about vector caching](./server/storage/vector-cache/VECTOR_CACHE.md) ## Contributing + - create issue - create PR with branch name format of `-` - yee haw let's merge @@ -119,12 +126,15 @@ Mintplex Labs & the community maintain a number of deployment methods, scripts, Telemetry for AnythingLLM ## Telemetry + AnythingLLM by Mintplex Labs Inc contains a telemetry feature that collects anonymous usage information. ### Why? + We use this information to help us understand how AnythingLLM is used, to help us prioritize work on new features and bug fixes, and to help us improve AnythingLLM's performance and stability. ### Opting out + Set `DISABLE_TELEMETRY` in your server or docker .env settings to "true" to opt out of telemetry. ``` @@ -132,7 +142,9 @@ DISABLE_TELEMETRY="true" ``` ### What do you explicitly track? + We will only track usage details that help us make product and roadmap decisions, specifically: + - Version of your installation - When a document is added or removed. No information _about_ the document. Just that the event occurred. This gives us an idea of use. - Type of vector database in use. Let's us know which vector database provider is the most used to prioritize changes when updates arrive for that provider. @@ -160,6 +172,7 @@ Copyright © 2023 [Mintplex Labs][profile-link].
This project is [MIT](./LICENSE) licensed. + [back-to-top]: https://img.shields.io/badge/-BACK_TO_TOP-222628?style=flat-square [profile-link]: https://github.com/mintplex-labs [vector-admin]: https://github.com/mintplex-labs/vector-admin diff --git a/docker/.env.example b/docker/.env.example index 9b2b24c3..2f8b2ff3 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -40,6 +40,10 @@ GID='1000' # OLLAMA_MODEL_PREF='llama2' # OLLAMA_MODEL_TOKEN_LIMIT=4096 +# LLM_PROVIDER='togetherai' +# TOGETHER_AI_API_KEY='my-together-ai-key' +# TOGETHER_AI_MODEL_PREF='mistralai/Mixtral-8x7B-Instruct-v0.1' + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/frontend/src/components/LLMSelection/TogetherAiOptions/index.jsx b/frontend/src/components/LLMSelection/TogetherAiOptions/index.jsx new file mode 100644 index 00000000..e526b3af --- /dev/null +++ b/frontend/src/components/LLMSelection/TogetherAiOptions/index.jsx @@ -0,0 +1,95 @@ +import System from "@/models/system"; +import { useState, useEffect } from "react"; + +export default function TogetherAiOptions({ settings }) { + return ( +
+
+ + +
+ +
+ ); +} +function TogetherAiModelSelection({ settings }) { + const [groupedModels, setGroupedModels] = useState({}); + const [loading, setLoading] = useState(true); + + useEffect(() => { + async function findCustomModels() { + setLoading(true); + const { models } = await System.customModels("togetherai"); + + if (models?.length > 0) { + const modelsByOrganization = models.reduce((acc, model) => { + acc[model.organization] = acc[model.organization] || []; + acc[model.organization].push(model); + return acc; + }, {}); + + setGroupedModels(modelsByOrganization); + } + + setLoading(false); + } + findCustomModels(); + }, []); + + if (loading || Object.keys(groupedModels).length === 0) { + return ( +
+ + +
+ ); + } + + return ( +
+ + +
+ ); +} diff --git a/frontend/src/media/llmprovider/togetherai.png b/frontend/src/media/llmprovider/togetherai.png new file mode 100644 index 0000000000000000000000000000000000000000..95e796a6b8209b7b932cf93751e4dce33b9729d1 GIT binary patch literal 8356 zcmeHN_fwNyx21?65fBxW{t6-j0i=sGQFr`}ocMAMV_l?}ukH$>ijm*=O&y*IGNDUg)UN(X!H#k&)49s4MG}k&%D- z`_NDTpD;{ir2;SHp89If$j7?DA;6n!uhdOF$;d#r{yyYnDQS1f$o|pLP*yU0leRf^ z9KtkI-?_V=IqCWNfd1V98AaCntoOJ1xhW{JU^n2|6y$m-12GcM5dzwIQFJr{{8#SZ zD0-T!to~(Ety+4yzt=3_sDBzmnR)DdNs+AJ+*~od!B6U;CgUX@qVJHAU2vnxzAIUi zzj%+K_;xjv>M0!s=q>9NW->_{FqzX0DB0jY|NX*$1(Eox(orU zH~jc?GXg_;8V=V6g^n@{1Q3EVq`cs6wFJ_|KJFv#Bj(v+U#htDatLwre^=NL%KL{( zE!%$N6dR%{1^D^r_qowG{H_cq@Q8@$jvHj0*PAT~u&}VirE`Rab@8Ax*wyH{gipOh z@ms;}J&9fYF~E;9LUm&hSd*_uRf!{=pFeGEWV1nix2LsrA^YHhkW0y;3f0kEmhC#M zn7DY5PAJCTaz3*#^DKtH8nDX!V*{1+$EeF+yJfhmP-KlV&={8N_f1k@nZ# zhSs>x#B4PFXA#X%X?jDw&zgCu*Xq-@A|;oNqTJlZ#>T%Pd3P%`?9t-#vV)ynpy$r) zk43vxK2^Bz73MS=uw40nmcOz4&kj3X+~`?l>$y0lIZXW?DP3Jx+GsNTaLsD>=70#u zcWbh{tLxn@nyBu@7w`~Mq9!LND+dQ9 zi%oRz9{aQ?1j+iYJQUxY+W)23rq-J&yWQa4(%`r6FgNEuSvyx(J~lRXyfp=a50?i4 z5p0>Pa(0;i9M=|p?DxaO8WG^Nj(lXNK3@r6A1O(2<%X#unnpd) zmLHjqRV2RB!iQK-H3gU$J5Hrk-J0Q4{1!qgO@HF+G-Ekzf6%2D#Vjm$u@GD1HcbmZ z|Jrp`#&2(tus(wOs$e0N*vpNcvZIVqzx&|Oairfu-{{$;4YtxTtYNsDWM^-$;fyuOe%Zq zae^MCB_;ILd1F06%1$#(%yXC`gB*uB{bGM_gI_|7`<1q{Pfv*r+ZVm+)qi3(C+uO= z!5USG0!#w*tdC|f#7knV4M+wPOk6GC6lr`>o%p)>m+ec|A$PO@f8p$dM9~~ozfu&t z6fyQjauqv9EE4qxPa?+JBd|%rJZi-JXtv8qDjuWZABI~2P84~I_+N4(FAs~1V&G@j zQ3sWkN$1V?i*3i^UCpW|8v=CobY$WS*rZ*)(CKGsnPOEFv;{PTQjfdhc*+ZdbOJZn z-2L5d@jK!&gya^J#T`4EiK;`I2oTPYLSo~P_7l5^K3Bc(rqc`!RjPcoPx^5|qafj? zBA?+{K?clv{M>-2nVXRw^=*Rt-?0|=%phj{WIJi;SZD6kA(9>Bj>tX2m&C-xY*hM& zBAn`EY$>#fsGq42I^)X=m5+#%io)40L~OSt9p&)y$}`Uv-o2K?6>{=3xDsMe^RdCD zJNs7*{3;z7&ME?{KS?Gt@%|S)n|7E2&7)es9s)q^R( z@HDl2p|5LIDOyEmJ?F3(Y~>8J)^Jox`F^CD-Ow)SaGKrulrN%IBjbaBQ7_QF??(yL~$dIp84 z!P#kfD=#abA77YzTB$&}nM~8{yf(JZLq`_|#mhn)+FNYWZt4rIgKGO$;q;u`(X2RA zxmiq#Y(PIy$~z8&B@mNL`G4V~nq26Q>%7Ulo1k;V2gYk{S~(D1;|>nFS6}Rz_2|Hb zX70^L{-bdgwS#4BsR)F93S;Qf+{x}jew)JQy|b^PL=pJr!Ja?u-ZLaTU@utD08W8E+mt!7`Y2I@; zXNs!e&GE)uxC$d*Q!2Q-rkV!#Q_cO_2l-aVwBc4iTApJ4As<&Rh zZcm$45*huE{97MfCq(igtUNIYuC4fypB`LD8y2-xR5VE0i3y!AW!I;gsP>z-q;fV4 zFlOb!B83sdzw0HP^LS^nH4M&^&6%wi!s(8N;*7sG9Z~1=&^fSZ`K0zr+jQONjuGS4 zJd?SYht-JooedTm#8@TLCehmW1WPy@FR$XMf?%eGEcUInHQK&<8gV@PJqW=V77csv z4GIH>!AfkrajBOU-qD)M$9$bxh`M<%vhvg7Y@uG2*>B{Z&#QQ$2AA_{S0w*nv4M%1 z{6GVLt=F6N<&w%l!__CI!UtYXGd~Bjs;?kdTkYp=Hepy5-O}K)iNcmIS7z_&!)SFz zJ$xDgJm34Xc&FhQ4R)QGZjvYDJx2H2rwf_`z{Q8}Q8X#b0D$gwjsN|(U^cyMCHg54 z0Vmth)7>3cFWC9k-dv`5R5pCz{*O$b#oJB3#~Cvrq^2F{P3}Ve!fmaT`;u*zR=i+2 zf*{*miF9X5htuS&48Cp)r$|b4wXAUv!q;vI59L9U*c&i|`p(RE2zA)Wnu(KZD6^sB zGOLEI`Cm~~WA`GWH#(OfNg7k=&ky%f#R<}0_PVb=%0^Ak*P-OPWY*`#yh-}IY#$gEc2v`z)p`l!VnQkY@YZA;js zDO=#yBeG{!gE!ZVAcu%s`Ikmmk40=ZUjLlbFglig>Nq@7YU&^)>)x~2eKd1IzcJ&! z?e>)dl5L#7EfaZQKUs1=!YY_JHdu}L`sG7;Ea#By6y*{kSR}QaheTuuM8*^y{MKdh z4+DCk_^%HV0~`b<=`pK(u6GUIt80EDOY62Bqa&5yx=fV^Hls>lxD5Gql0w-IyNpM> z*P+u6MoG!tDFGo_@~c{;`4xk%^`$LlbrfzdLitNTtw2zk*98*i?l(MMob7!6`6^Ne{GL`w8aCONEs^D2Ho=72Ei9o zR7r0#I(2Vc8CC=zqp!cIYr%;l(uTE~#8>q=_7d`U_gDL3`R zdp)V%r$MDkyQjF_h%PajAZ(xU^Yd@H!>&fMCmg-b;eZ;<1SqcDF0ZIqACa^ax2y{c z3rIwvz4XrTzfz0eoe<#7g0lvL~Zo8 zD`!>&dtK~dMe;e9Cfu8rI5RM36C+kZ_N?+NLP_*iL8p0zAtq%y7RR8Q39%1gPFV}0 zqO!?}@Z*51$b)6A3i?$9(bEwpCDhWS#yo^6GHR;dUdpsh=}PQQm}*NkD<7z;oLt#< zILkww7>I72M{kBQ%@lc8ikPUh?Jjg`C%x#5=2-u`ZEB^WcFu%5!83vQZdEw6oEMT; zkS7yf6UbR^({y&P&+JJo&e*TR(OQGncXhP8GL3Hle~CNZKU~)?M*A&1z<$)DLuE|W zpQn9XdiL3_-qsqfoFrg8={Y+(?j6`KI$QN8M|^Fj1EX@#Z?jdmy}m}Y_8 z0eg6^n*@G+{Q{sS`Js*>CpPN)2-O7kH3iD`-ehs4tRjz5INyCJ^W&n>B@%)MR`cNo zw83X9@<=ry{$R{8=G2U$`1ynnV+bScjoVaXl|6pBpW}%pq~)V7m=UU)By1rTEx0-K zVkRKTy#%&J*cg}Raz|B_vNgOHn`&93M*;NuTTLo+)89g|hv%?Dx%yI;Jh|^tA(b^M zrOiH%SOg$7c`YEB&aCn`jevZBkD>hjWM(qlrocX2XupQ9H4-qqZfgTJX=iVNLiqXz z*boUu-z$*SHH6&BP(+}F47cv)c$9>c5QJ?A6Ti?&P<}>>`WDNgmJ2pVV??bW*7B1{ zN?4Z5uDP}&$E&RmuORg_H6!8X@Uie8j|NyowSVv0ZXT|W2r;}ntvl@hc~ErA*V=T8 zGSb>FZw8CZ3sRn5<bIs@Vrsiqz*~9hWuqW$ByRPzlOZ2Gimh&xg_BYS1I9yE4D6oc;Wew;n z#=q6pa|i$%2_DcK-$V2u+;6Z6RyC>M9jNkNzbDkLY=M<1*Ug7z@U;M+Zmms&`+fiY znkQRJuGz>LkIB&Gg95rKWgf=??TnAMXjMIAL%}MfxK*BjkW|BWp&-T6c8OO&tK+Gf z>Q9qd_*%w`u6CoZ%gpRq zM}zRjSH>s=4Bs|7d*hYfnc~*JB)xA2^?WcehNo zix;6qIbn5c&1Y?pAMYzT8Opoa>h_(ovjw+n?f3J zQa(G1^h0N9JMp)~f8KP_8E))r($-t4R_T(@_0m&+55dtuz!7TUYUGX?~dQORw z+oARqbNe{$oRPq4fAyKhi?iY~@JRTiP!dlBL#hMEt)4#RmxDWuL`ulPI(|f=G1C`& zJG}!_@4KC`+Vp+^O0ytko5@*)seeUmT=fW;kirEt2jlwoUfc<__asirVC}&qU5bgB z;n_M<``g%K>h+Q6LqA6C{tIO6d%vs5)hv5{fMg!j^{8ZtqLp!OQ`&81u!`aBo`@Aa z5GsLBH)T3?955PWY662IWnPa;sWt`={1_t`_ zo#lzPIqipNDElZCY>F`eSDqN+~hyjDM@7vaPmOQ z!;pueya8hpfvHF#5zEFE$9%w+$Y2xNnfE*1`2#{tHeQsq;J6fcS{7C=e0cV)&L4&B zPhX^2TWQP0Ww`kl`j&CZYb%1DKrO{D7q22E%rCWU0R529uWm52FaC%gY1xzrfJ}-d z8Mn~mb4ogoo^o@l7NA=#F}+1%;-{lm!zT4r>Df3BqIt!__!Ib2nH4tfoJhh4AFsf| zDLFN*yB$QEDqcv1vBQ<{#?`6pw)x5a#PPd%n0V`yiYF~Lq>HmlB0rsFf98%xaE3P_ zlXz%i5T#IU?5b#$jj#b*x%>4}3d+h(R4(gBylYOCBN5v$`MwqHlOd{|L{A$xw^Bnl zh#ktJsY9YPEe6c6azzuk&h63Wsa*w)PesveQUDcISfMn}2<25sk;FloMasy#_vKyEEBjyHKCvh|>?s@Oz2kXW}=WD4Zl`{vS-12J~~@|a*&QwBkFF6-cT zvpxx=1pCbb?F6Zh=iPoX{RPd(&GwyrT4{5pM%j!|lMtM{9EPWaLZv>UC3!n1-l!t* zSvH`306R>3K{MpxirS`K&=ik?LMX3R$-&oiX_W8gpy`t!kN%kk$FfRIxfy@(lrSFV z42tx6)Y-hdJlqG1;pKw}&Q>r9HUJ9VKL_?ree=5inB~o%dtF|CqWIB!+rH}BB6^_2 zo+Ux`OhdDe`c!#=bD2e4*L!$wqNt3wRU$HDpT7s@>7ql_MmhYzSxE zk>q@7J=S8$IPFtmE)cD1?KW+Hofdn+^7ay6z4LKq^`ZkoMeFyVWk)zYuwP?9LmPpsWm3SI2-yO|KwpXw~tnI5X(W zy><;1b7zC#L%%-@e9OR={TcGkbxvCkf!cj91YgoTFKi=Z$=dxc&IEXExY|JGr>gq{Sc`|bC zd3bG$vjtRwVtxGZLbdFJAD!USqO_%mQ#nIaI0Vn`yWOUi`E1`g+G1{13z(pDzB9zb)bU@SJ6b01fDB{x zYECZDg=MtUn^BmoJim-2bkMN{>X80i2fqeV0za{Pd0RuUutq85cH}_WhixIztHYC$ zr@^c}5g`|%jSv=(cUk~L@Gh3${uO|t5G0XU)q&6{n3wky&Z<}*iU?^b0_;m&|D}6IOfr0j8^cosI(9u;RI1Erg^5gvpv$P@ zNf0z_b>hzXf5$R00NsesDX1`B$ zzdBd=YE$L?YhnjOKL6|5;2YZM5+mGi+(JnV>Ou$zrJ70QXR7TBFx9NqvJ0CEpi*HtP2W+ombtyRs}eWSc*HS+ABW@nvzjbo-+!k zbJP`b)PMPOwhOr~gD?ObEs?W}-3c&h_tQ7i7l4KQ37-#OnW*nk8qOH?*LPG^?MRpP zz0lQWbxRVlng%dR>z+;57oE)s2phDYtX$ikLE~dr`x2PXqA22gH~ud>phv74Tn0bO zf7^~!CS`|#)b3vo$ns3WoZm~8Z~_-7!=LG7Jl2qMFmrNJhYfH8mcLK3;ZhigSzW4| z0&9#g9ssmM^JEpoCz*iP8!59=w4EjCr{tEvxdDe61Q*IJ>awX0nNcu3v3VZhv-Mj~ zK*PF0qnrU?-DwxOMe3AtgHfsFvRWY1_6ilu=44AL)S{l%!;t9#gxlL;Yhz5`@kzBa zpR>^VG~n?RDz@Fa$sBm`!%(j?@5xwehM>3bdYWus9zUTeHG8M`l#&j`hX*Vn?SiTm z`9zQ$k_Po%d}`P*$EnJ;yJI4Typ|CD;f7Gu=-^loEr$SiuY+~Cqt;conNjn0Dy{jZifIP3GC(_wOIBbGG}r*9U5@X zsV5=1{K2b+#wOc0r+fBT*+*=OBahzIn3Czz0BT|7xr&jX5z&r#0f>%`4Hu2ffa9&D zq{k0q$b-a(;(&;oOaz=M^BWHR7I0+^YSj) zl->Hq6v{MNXi#v!qBVfac;CZ!xr{mX?+spKbw8z4~3T z#Z%*wNvp}v?o$qn=(*zfcrh`ti$*EVx4QqGjsAB={QqTE-a(?+x3LO%XsPWA{L_F; NLq$iq^qKX${{dl*iT?lq literal 0 HcmV?d00001 diff --git a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx index d72cf3c2..28771622 100644 --- a/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/LLMPreference/index.jsx @@ -11,6 +11,7 @@ import GeminiLogo from "@/media/llmprovider/gemini.png"; import OllamaLogo from "@/media/llmprovider/ollama.png"; import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import LocalAiLogo from "@/media/llmprovider/localai.png"; +import TogetherAILogo from "@/media/llmprovider/togetherai.png"; import PreLoader from "@/components/Preloader"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions"; @@ -22,6 +23,7 @@ import GeminiLLMOptions from "@/components/LLMSelection/GeminiLLMOptions"; import OllamaLLMOptions from "@/components/LLMSelection/OllamaLLMOptions"; import LLMItem from "@/components/LLMSelection/LLMItem"; import { MagnifyingGlass } from "@phosphor-icons/react"; +import TogetherAiOptions from "@/components/LLMSelection/TogetherAiOptions"; export default function GeneralLLMPreference() { const [saving, setSaving] = useState(false); @@ -127,6 +129,13 @@ export default function GeneralLLMPreference() { options: , description: "Run LLMs locally on your own machine.", }, + { + name: "Together AI", + value: "togetherai", + logo: TogetherAILogo, + options: , + description: "Run open source models from Together AI.", + }, { name: "Native", value: "native", diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx index db285f12..d9fea4c6 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx @@ -6,6 +6,7 @@ import AzureOpenAiLogo from "@/media/llmprovider/azure.png"; import AnthropicLogo from "@/media/llmprovider/anthropic.png"; import GeminiLogo from "@/media/llmprovider/gemini.png"; import OllamaLogo from "@/media/llmprovider/ollama.png"; +import TogetherAILogo from "@/media/llmprovider/togetherai.png"; import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import LocalAiLogo from "@/media/llmprovider/localai.png"; import ChromaLogo from "@/media/vectordbs/chroma.png"; @@ -25,7 +26,7 @@ const LLM_SELECTION_PRIVACY = { name: "OpenAI", description: [ "Your chats will not be used for training", - "Your prompts and document text used in responses are visible to OpenAI", + "Your prompts and document text used in response creation are visible to OpenAI", ], logo: OpenAiLogo, }, @@ -41,7 +42,7 @@ const LLM_SELECTION_PRIVACY = { name: "Anthropic", description: [ "Your chats will not be used for training", - "Your prompts and document text used in responses are visible to Anthropic", + "Your prompts and document text used in response creation are visible to Anthropic", ], logo: AnthropicLogo, }, @@ -49,7 +50,7 @@ const LLM_SELECTION_PRIVACY = { name: "Google Gemini", description: [ "Your chats are de-identified and used in training", - "Your prompts and document text are visible in responses to Google", + "Your prompts and document text used in response creation are visible to Google", ], logo: GeminiLogo, }, @@ -81,6 +82,14 @@ const LLM_SELECTION_PRIVACY = { ], logo: AnythingLLMIcon, }, + togetherai: { + name: "TogetherAI", + description: [ + "Your chats will not be used for training", + "Your prompts and document text used in response creation are visible to TogetherAI", + ], + logo: TogetherAILogo, + }, }; const VECTOR_DB_PRIVACY = { diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx index 88768198..dc060594 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx @@ -7,6 +7,7 @@ import GeminiLogo from "@/media/llmprovider/gemini.png"; import OllamaLogo from "@/media/llmprovider/ollama.png"; import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import LocalAiLogo from "@/media/llmprovider/localai.png"; +import TogetherAILogo from "@/media/llmprovider/togetherai.png"; import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import AzureAiOptions from "@/components/LLMSelection/AzureAiOptions"; @@ -21,6 +22,7 @@ import System from "@/models/system"; import paths from "@/utils/paths"; import showToast from "@/utils/toast"; import { useNavigate } from "react-router-dom"; +import TogetherAiOptions from "@/components/LLMSelection/TogetherAiOptions"; const TITLE = "LLM Preference"; const DESCRIPTION = @@ -100,6 +102,13 @@ export default function LLMPreference({ options: , description: "Run LLMs locally on your own machine.", }, + { + name: "Together AI", + value: "togetherai", + logo: TogetherAILogo, + options: , + description: "Run open source models from Together AI.", + }, { name: "Native", value: "native", diff --git a/server/.env.example b/server/.env.example index 5b159a03..e41ab63d 100644 --- a/server/.env.example +++ b/server/.env.example @@ -37,6 +37,10 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea # OLLAMA_MODEL_PREF='llama2' # OLLAMA_MODEL_TOKEN_LIMIT=4096 +# LLM_PROVIDER='togetherai' +# TOGETHER_AI_API_KEY='my-together-ai-key' +# TOGETHER_AI_MODEL_PREF='mistralai/Mixtral-8x7B-Instruct-v0.1' + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index a66f93e1..29c2238f 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -133,6 +133,18 @@ const SystemSettings = { OllamaLLMModelPref: process.env.OLLAMA_MODEL_PREF, OllamaLLMTokenLimit: process.env.OLLAMA_MODEL_TOKEN_LIMIT, + // For embedding credentials when ollama is selected. + OpenAiKey: !!process.env.OPEN_AI_KEY, + AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT, + AzureOpenAiKey: !!process.env.AZURE_OPENAI_KEY, + AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF, + } + : {}), + ...(llmProvider === "togetherai" + ? { + TogetherAiApiKey: !!process.env.TOGETHER_AI_API_KEY, + TogetherAiModelPref: process.env.TOGETHER_AI_MODEL_PREF, + // For embedding credentials when ollama is selected. OpenAiKey: !!process.env.OPEN_AI_KEY, AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT, @@ -143,6 +155,12 @@ const SystemSettings = { ...(llmProvider === "native" ? { NativeLLMModelPref: process.env.NATIVE_LLM_MODEL_PREF, + + // For embedding credentials when ollama is selected. + OpenAiKey: !!process.env.OPEN_AI_KEY, + AzureOpenAiEndpoint: process.env.AZURE_OPENAI_ENDPOINT, + AzureOpenAiKey: !!process.env.AZURE_OPENAI_KEY, + AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF, } : {}), }; diff --git a/server/utils/AiProviders/togetherAi/index.js b/server/utils/AiProviders/togetherAi/index.js new file mode 100644 index 00000000..df64c413 --- /dev/null +++ b/server/utils/AiProviders/togetherAi/index.js @@ -0,0 +1,198 @@ +const { chatPrompt } = require("../../chats"); + +function togetherAiModels() { + const { MODELS } = require("./models.js"); + return MODELS || {}; +} + +class TogetherAiLLM { + constructor(embedder = null) { + const { Configuration, OpenAIApi } = require("openai"); + if (!process.env.TOGETHER_AI_API_KEY) + throw new Error("No TogetherAI API key was set."); + + const config = new Configuration({ + basePath: "https://api.together.xyz/v1", + apiKey: process.env.TOGETHER_AI_API_KEY, + }); + this.openai = new OpenAIApi(config); + this.model = process.env.TOGETHER_AI_MODEL_PREF; + this.limits = { + history: this.promptWindowLimit() * 0.15, + system: this.promptWindowLimit() * 0.15, + user: this.promptWindowLimit() * 0.7, + }; + + if (!embedder) + throw new Error( + "INVALID TOGETHER AI SETUP. No embedding engine has been set. Go to instance settings and set up an embedding interface to use Together AI as your LLM." + ); + this.embedder = embedder; + } + + #appendContext(contextTexts = []) { + if (!contextTexts || !contextTexts.length) return ""; + return ( + "\nContext:\n" + + contextTexts + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("") + ); + } + + allModelInformation() { + return togetherAiModels(); + } + + streamingEnabled() { + return "streamChat" in this && "streamGetChatCompletion" in this; + } + + // Ensure the user set a value for the token limit + // and if undefined - assume 4096 window. + promptWindowLimit() { + const availableModels = this.allModelInformation(); + return availableModels[this.model]?.maxLength || 4096; + } + + async isValidChatCompletionModel(model = "") { + const availableModels = this.allModelInformation(); + return availableModels.hasOwnProperty(model); + } + + constructPrompt({ + systemPrompt = "", + contextTexts = [], + chatHistory = [], + userPrompt = "", + }) { + const prompt = { + role: "system", + content: `${systemPrompt}${this.#appendContext(contextTexts)}`, + }; + return [prompt, ...chatHistory, { role: "user", content: userPrompt }]; + } + + async isSafe(_input = "") { + // Not implemented so must be stubbed + return { safe: true, reasons: [] }; + } + + async sendChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) { + if (!(await this.isValidChatCompletionModel(this.model))) + throw new Error( + `Together AI chat: ${this.model} is not valid for chat completion!` + ); + + const textResponse = await this.openai + .createChatCompletion({ + model: this.model, + temperature: Number(workspace?.openAiTemp ?? 0.7), + n: 1, + messages: await this.compressMessages( + { + systemPrompt: chatPrompt(workspace), + userPrompt: prompt, + chatHistory, + }, + rawHistory + ), + }) + .then((json) => { + const res = json.data; + if (!res.hasOwnProperty("choices")) + throw new Error("Together AI chat: No results!"); + if (res.choices.length === 0) + throw new Error("Together AI chat: No results length!"); + return res.choices[0].message.content; + }) + .catch((error) => { + throw new Error( + `TogetherAI::createChatCompletion failed with: ${error.message}` + ); + }); + + return textResponse; + } + + async streamChat(chatHistory = [], prompt, workspace = {}, rawHistory = []) { + if (!(await this.isValidChatCompletionModel(this.model))) + throw new Error( + `TogetherAI chat: ${this.model} is not valid for chat completion!` + ); + + const streamRequest = await this.openai.createChatCompletion( + { + model: this.model, + stream: true, + temperature: Number(workspace?.openAiTemp ?? 0.7), + n: 1, + messages: await this.compressMessages( + { + systemPrompt: chatPrompt(workspace), + userPrompt: prompt, + chatHistory, + }, + rawHistory + ), + }, + { responseType: "stream" } + ); + return { type: "togetherAiStream", stream: streamRequest }; + } + + async getChatCompletion(messages = null, { temperature = 0.7 }) { + if (!(await this.isValidChatCompletionModel(this.model))) + throw new Error( + `TogetherAI chat: ${this.model} is not valid for chat completion!` + ); + + const { data } = await this.openai.createChatCompletion({ + model: this.model, + messages, + temperature, + }); + + if (!data.hasOwnProperty("choices")) return null; + return data.choices[0].message.content; + } + + async streamGetChatCompletion(messages = null, { temperature = 0.7 }) { + if (!(await this.isValidChatCompletionModel(this.model))) + throw new Error( + `TogetherAI chat: ${this.model} is not valid for chat completion!` + ); + + const streamRequest = await this.openai.createChatCompletion( + { + model: this.model, + stream: true, + messages, + temperature, + }, + { responseType: "stream" } + ); + return { type: "togetherAiStream", stream: streamRequest }; + } + + // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations + async embedTextInput(textInput) { + return await this.embedder.embedTextInput(textInput); + } + async embedChunks(textChunks = []) { + return await this.embedder.embedChunks(textChunks); + } + + async compressMessages(promptArgs = {}, rawHistory = []) { + const { messageArrayCompressor } = require("../../helpers/chat"); + const messageArray = this.constructPrompt(promptArgs); + return await messageArrayCompressor(this, messageArray, rawHistory); + } +} + +module.exports = { + TogetherAiLLM, + togetherAiModels, +}; diff --git a/server/utils/AiProviders/togetherAi/models.js b/server/utils/AiProviders/togetherAi/models.js new file mode 100644 index 00000000..ad940bc3 --- /dev/null +++ b/server/utils/AiProviders/togetherAi/models.js @@ -0,0 +1,226 @@ +const MODELS = { + "togethercomputer/alpaca-7b": { + id: "togethercomputer/alpaca-7b", + organization: "Stanford", + name: "Alpaca (7B)", + maxLength: 2048, + }, + "Austism/chronos-hermes-13b": { + id: "Austism/chronos-hermes-13b", + organization: "Austism", + name: "Chronos Hermes (13B)", + maxLength: 2048, + }, + "togethercomputer/CodeLlama-13b-Instruct": { + id: "togethercomputer/CodeLlama-13b-Instruct", + organization: "Meta", + name: "Code Llama Instruct (13B)", + maxLength: 8192, + }, + "togethercomputer/CodeLlama-34b-Instruct": { + id: "togethercomputer/CodeLlama-34b-Instruct", + organization: "Meta", + name: "Code Llama Instruct (34B)", + maxLength: 8192, + }, + "togethercomputer/CodeLlama-7b-Instruct": { + id: "togethercomputer/CodeLlama-7b-Instruct", + organization: "Meta", + name: "Code Llama Instruct (7B)", + maxLength: 8192, + }, + "DiscoResearch/DiscoLM-mixtral-8x7b-v2": { + id: "DiscoResearch/DiscoLM-mixtral-8x7b-v2", + organization: "DiscoResearch", + name: "DiscoLM Mixtral 8x7b", + maxLength: 32768, + }, + "togethercomputer/falcon-40b-instruct": { + id: "togethercomputer/falcon-40b-instruct", + organization: "TII UAE", + name: "Falcon Instruct (40B)", + maxLength: 2048, + }, + "togethercomputer/falcon-7b-instruct": { + id: "togethercomputer/falcon-7b-instruct", + organization: "TII UAE", + name: "Falcon Instruct (7B)", + maxLength: 2048, + }, + "togethercomputer/GPT-NeoXT-Chat-Base-20B": { + id: "togethercomputer/GPT-NeoXT-Chat-Base-20B", + organization: "Together", + name: "GPT-NeoXT-Chat-Base (20B)", + maxLength: 2048, + }, + "togethercomputer/llama-2-13b-chat": { + id: "togethercomputer/llama-2-13b-chat", + organization: "Meta", + name: "LLaMA-2 Chat (13B)", + maxLength: 4096, + }, + "togethercomputer/llama-2-70b-chat": { + id: "togethercomputer/llama-2-70b-chat", + organization: "Meta", + name: "LLaMA-2 Chat (70B)", + maxLength: 4096, + }, + "togethercomputer/llama-2-7b-chat": { + id: "togethercomputer/llama-2-7b-chat", + organization: "Meta", + name: "LLaMA-2 Chat (7B)", + maxLength: 4096, + }, + "togethercomputer/Llama-2-7B-32K-Instruct": { + id: "togethercomputer/Llama-2-7B-32K-Instruct", + organization: "Together", + name: "LLaMA-2-7B-32K-Instruct (7B)", + maxLength: 32768, + }, + "mistralai/Mistral-7B-Instruct-v0.1": { + id: "mistralai/Mistral-7B-Instruct-v0.1", + organization: "MistralAI", + name: "Mistral (7B) Instruct v0.1", + maxLength: 4096, + }, + "mistralai/Mistral-7B-Instruct-v0.2": { + id: "mistralai/Mistral-7B-Instruct-v0.2", + organization: "MistralAI", + name: "Mistral (7B) Instruct v0.2", + maxLength: 32768, + }, + "mistralai/Mixtral-8x7B-Instruct-v0.1": { + id: "mistralai/Mixtral-8x7B-Instruct-v0.1", + organization: "MistralAI", + name: "Mixtral-8x7B Instruct", + maxLength: 32768, + }, + "Gryphe/MythoMax-L2-13b": { + id: "Gryphe/MythoMax-L2-13b", + organization: "Gryphe", + name: "MythoMax-L2 (13B)", + maxLength: 4096, + }, + "NousResearch/Nous-Hermes-llama-2-7b": { + id: "NousResearch/Nous-Hermes-llama-2-7b", + organization: "NousResearch", + name: "Nous Hermes LLaMA-2 (7B)", + maxLength: 4096, + }, + "NousResearch/Nous-Hermes-Llama2-13b": { + id: "NousResearch/Nous-Hermes-Llama2-13b", + organization: "NousResearch", + name: "Nous Hermes Llama-2 (13B)", + maxLength: 4096, + }, + "NousResearch/Nous-Hermes-Llama2-70b": { + id: "NousResearch/Nous-Hermes-Llama2-70b", + organization: "NousResearch", + name: "Nous Hermes Llama-2 (70B)", + maxLength: 4096, + }, + "NousResearch/Nous-Hermes-2-Yi-34B": { + id: "NousResearch/Nous-Hermes-2-Yi-34B", + organization: "NousResearch", + name: "Nous Hermes-2 Yi (34B)", + maxLength: 4096, + }, + "NousResearch/Nous-Capybara-7B-V1p9": { + id: "NousResearch/Nous-Capybara-7B-V1p9", + organization: "NousResearch", + name: "Nous Capybara v1.9 (7B)", + maxLength: 8192, + }, + "openchat/openchat-3.5-1210": { + id: "openchat/openchat-3.5-1210", + organization: "OpenChat", + name: "OpenChat 3.5 1210 (7B)", + maxLength: 8192, + }, + "teknium/OpenHermes-2-Mistral-7B": { + id: "teknium/OpenHermes-2-Mistral-7B", + organization: "teknium", + name: "OpenHermes-2-Mistral (7B)", + maxLength: 4096, + }, + "teknium/OpenHermes-2p5-Mistral-7B": { + id: "teknium/OpenHermes-2p5-Mistral-7B", + organization: "teknium", + name: "OpenHermes-2.5-Mistral (7B)", + maxLength: 4096, + }, + "Open-Orca/Mistral-7B-OpenOrca": { + id: "Open-Orca/Mistral-7B-OpenOrca", + organization: "OpenOrca", + name: "OpenOrca Mistral (7B) 8K", + maxLength: 8192, + }, + "garage-bAInd/Platypus2-70B-instruct": { + id: "garage-bAInd/Platypus2-70B-instruct", + organization: "garage-bAInd", + name: "Platypus2 Instruct (70B)", + maxLength: 4096, + }, + "togethercomputer/Pythia-Chat-Base-7B-v0.16": { + id: "togethercomputer/Pythia-Chat-Base-7B-v0.16", + organization: "Together", + name: "Pythia-Chat-Base (7B)", + maxLength: 2048, + }, + "togethercomputer/Qwen-7B-Chat": { + id: "togethercomputer/Qwen-7B-Chat", + organization: "Qwen", + name: "Qwen-Chat (7B)", + maxLength: 8192, + }, + "togethercomputer/RedPajama-INCITE-Chat-3B-v1": { + id: "togethercomputer/RedPajama-INCITE-Chat-3B-v1", + organization: "Together", + name: "RedPajama-INCITE Chat (3B)", + maxLength: 2048, + }, + "togethercomputer/RedPajama-INCITE-7B-Chat": { + id: "togethercomputer/RedPajama-INCITE-7B-Chat", + organization: "Together", + name: "RedPajama-INCITE Chat (7B)", + maxLength: 2048, + }, + "upstage/SOLAR-0-70b-16bit": { + id: "upstage/SOLAR-0-70b-16bit", + organization: "Upstage", + name: "SOLAR v0 (70B)", + maxLength: 4096, + }, + "togethercomputer/StripedHyena-Nous-7B": { + id: "togethercomputer/StripedHyena-Nous-7B", + organization: "Together", + name: "StripedHyena Nous (7B)", + maxLength: 32768, + }, + "lmsys/vicuna-7b-v1.5": { + id: "lmsys/vicuna-7b-v1.5", + organization: "LM Sys", + name: "Vicuna v1.5 (7B)", + maxLength: 4096, + }, + "lmsys/vicuna-13b-v1.5": { + id: "lmsys/vicuna-13b-v1.5", + organization: "LM Sys", + name: "Vicuna v1.5 (13B)", + maxLength: 4096, + }, + "lmsys/vicuna-13b-v1.5-16k": { + id: "lmsys/vicuna-13b-v1.5-16k", + organization: "LM Sys", + name: "Vicuna v1.5 16K (13B)", + maxLength: 16384, + }, + "zero-one-ai/Yi-34B-Chat": { + id: "zero-one-ai/Yi-34B-Chat", + organization: "01.AI", + name: "01-ai Yi Chat (34B)", + maxLength: 4096, + }, +}; + +module.exports.MODELS = MODELS; diff --git a/server/utils/AiProviders/togetherAi/scripts/.gitignore b/server/utils/AiProviders/togetherAi/scripts/.gitignore new file mode 100644 index 00000000..94a2dd14 --- /dev/null +++ b/server/utils/AiProviders/togetherAi/scripts/.gitignore @@ -0,0 +1 @@ +*.json \ No newline at end of file diff --git a/server/utils/AiProviders/togetherAi/scripts/chat_models.txt b/server/utils/AiProviders/togetherAi/scripts/chat_models.txt new file mode 100644 index 00000000..81c23bf4 --- /dev/null +++ b/server/utils/AiProviders/togetherAi/scripts/chat_models.txt @@ -0,0 +1,39 @@ +| Organization | Model Name | Model String for API | Max Seq Length | +| ------------- | ---------------------------- | -------------------------------------------- | -------------- | +| Stanford | Alpaca (7B) | togethercomputer/alpaca-7b | 2048 | +| Austism | Chronos Hermes (13B) | Austism/chronos-hermes-13b | 2048 | +| Meta | Code Llama Instruct (13B) | togethercomputer/CodeLlama-13b-Instruct | 8192 | +| Meta | Code Llama Instruct (34B) | togethercomputer/CodeLlama-34b-Instruct | 8192 | +| Meta | Code Llama Instruct (7B) | togethercomputer/CodeLlama-7b-Instruct | 8192 | +| DiscoResearch | DiscoLM Mixtral 8x7b | DiscoResearch/DiscoLM-mixtral-8x7b-v2 | 32768 | +| TII UAE | Falcon Instruct (40B) | togethercomputer/falcon-40b-instruct | 2048 | +| TII UAE | Falcon Instruct (7B) | togethercomputer/falcon-7b-instruct | 2048 | +| Together | GPT-NeoXT-Chat-Base (20B) | togethercomputer/GPT-NeoXT-Chat-Base-20B | 2048 | +| Meta | LLaMA-2 Chat (13B) | togethercomputer/llama-2-13b-chat | 4096 | +| Meta | LLaMA-2 Chat (70B) | togethercomputer/llama-2-70b-chat | 4096 | +| Meta | LLaMA-2 Chat (7B) | togethercomputer/llama-2-7b-chat | 4096 | +| Together | LLaMA-2-7B-32K-Instruct (7B) | togethercomputer/Llama-2-7B-32K-Instruct | 32768 | +| MistralAI | Mistral (7B) Instruct v0.1 | mistralai/Mistral-7B-Instruct-v0.1 | 4096 | +| MistralAI | Mistral (7B) Instruct v0.2 | mistralai/Mistral-7B-Instruct-v0.2 | 32768 | +| MistralAI | Mixtral-8x7B Instruct | mistralai/Mixtral-8x7B-Instruct-v0.1 | 32768 | +| Gryphe | MythoMax-L2 (13B) | Gryphe/MythoMax-L2-13b | 4096 | +| NousResearch | Nous Hermes LLaMA-2 (7B) | NousResearch/Nous-Hermes-llama-2-7b | 4096 | +| NousResearch | Nous Hermes Llama-2 (13B) | NousResearch/Nous-Hermes-Llama2-13b | 4096 | +| NousResearch | Nous Hermes Llama-2 (70B) | NousResearch/Nous-Hermes-Llama2-70b | 4096 | +| NousResearch | Nous Hermes-2 Yi (34B) | NousResearch/Nous-Hermes-2-Yi-34B | 4096 | +| NousResearch | Nous Capybara v1.9 (7B) | NousResearch/Nous-Capybara-7B-V1p9 | 8192 | +| OpenChat | OpenChat 3.5 1210 (7B) | openchat/openchat-3.5-1210 | 8192 | +| teknium | OpenHermes-2-Mistral (7B) | teknium/OpenHermes-2-Mistral-7B | 4096 | +| teknium | OpenHermes-2.5-Mistral (7B) | teknium/OpenHermes-2p5-Mistral-7B | 4096 | +| OpenOrca | OpenOrca Mistral (7B) 8K | Open-Orca/Mistral-7B-OpenOrca | 8192 | +| garage-bAInd | Platypus2 Instruct (70B) | garage-bAInd/Platypus2-70B-instruct | 4096 | +| Together | Pythia-Chat-Base (7B) | togethercomputer/Pythia-Chat-Base-7B-v0.16 | 2048 | +| Qwen | Qwen-Chat (7B) | togethercomputer/Qwen-7B-Chat | 8192 | +| Together | RedPajama-INCITE Chat (3B) | togethercomputer/RedPajama-INCITE-Chat-3B-v1 | 2048 | +| Together | RedPajama-INCITE Chat (7B) | togethercomputer/RedPajama-INCITE-7B-Chat | 2048 | +| Upstage | SOLAR v0 (70B) | upstage/SOLAR-0-70b-16bit | 4096 | +| Together | StripedHyena Nous (7B) | togethercomputer/StripedHyena-Nous-7B | 32768 | +| LM Sys | Vicuna v1.5 (7B) | lmsys/vicuna-7b-v1.5 | 4096 | +| LM Sys | Vicuna v1.5 (13B) | lmsys/vicuna-13b-v1.5 | 4096 | +| LM Sys | Vicuna v1.5 16K (13B) | lmsys/vicuna-13b-v1.5-16k | 16384 | +| 01.AI | 01-ai Yi Chat (34B) | zero-one-ai/Yi-34B-Chat | 4096 | \ No newline at end of file diff --git a/server/utils/AiProviders/togetherAi/scripts/parse.mjs b/server/utils/AiProviders/togetherAi/scripts/parse.mjs new file mode 100644 index 00000000..b96d40ab --- /dev/null +++ b/server/utils/AiProviders/togetherAi/scripts/parse.mjs @@ -0,0 +1,41 @@ +// Together AI does not provide a simple REST API to get models, +// so we have a table which we copy from their documentation +// https://docs.together.ai/edit/inference-models that we can +// then parse and get all models from in a format that makes sense +// Why this does not exist is so bizarre, but whatever. + +// To run, cd into this directory and run `node parse.mjs` +// copy outputs into the export in ../models.js + +// Update the date below if you run this again because TogetherAI added new models. +// Last Collected: Jan 10, 2023 + +import fs from "fs"; + +function parseChatModels() { + const fixed = {}; + const tableString = fs.readFileSync("chat_models.txt", { encoding: "utf-8" }); + const rows = tableString.split("\n").slice(2); + + rows.forEach((row) => { + const [provider, name, id, maxLength] = row.split("|").slice(1, -1); + const data = { + provider: provider.trim(), + name: name.trim(), + id: id.trim(), + maxLength: Number(maxLength.trim()), + }; + + fixed[data.id] = { + id: data.id, + organization: data.provider, + name: data.name, + maxLength: data.maxLength, + }; + }); + + fs.writeFileSync("chat_models.json", JSON.stringify(fixed, null, 2), "utf-8"); + return fixed; +} + +parseChatModels(); diff --git a/server/utils/chats/stream.js b/server/utils/chats/stream.js index a6ade181..84058c8d 100644 --- a/server/utils/chats/stream.js +++ b/server/utils/chats/stream.js @@ -262,6 +262,96 @@ function handleStreamResponses(response, stream, responseProps) { }); } + if ((stream.type = "togetherAiStream")) { + return new Promise((resolve) => { + let fullText = ""; + let chunk = ""; + stream.stream.data.on("data", (data) => { + const lines = data + ?.toString() + ?.split("\n") + .filter((line) => line.trim() !== ""); + + for (const line of lines) { + let validJSON = false; + const message = chunk + line.replace(/^data: /, ""); + + if (message !== "[DONE]") { + // JSON chunk is incomplete and has not ended yet + // so we need to stitch it together. You would think JSON + // chunks would only come complete - but they don't! + try { + JSON.parse(message); + validJSON = true; + } catch {} + + if (!validJSON) { + // It can be possible that the chunk decoding is running away + // and the message chunk fails to append due to string length. + // In this case abort the chunk and reset so we can continue. + // ref: https://github.com/Mintplex-Labs/anything-llm/issues/416 + try { + chunk += message; + } catch (e) { + console.error(`Chunk appending error`, e); + chunk = ""; + } + continue; + } else { + chunk = ""; + } + } + + if (message == "[DONE]") { + writeResponseChunk(response, { + uuid, + sources, + type: "textResponseChunk", + textResponse: "", + close: true, + error: false, + }); + resolve(fullText); + } else { + let finishReason = null; + let token = ""; + try { + const json = JSON.parse(message); + token = json?.choices?.[0]?.delta?.content; + finishReason = json?.choices?.[0]?.finish_reason || null; + } catch { + continue; + } + + if (token) { + fullText += token; + writeResponseChunk(response, { + uuid, + sources: [], + type: "textResponseChunk", + textResponse: token, + close: false, + error: false, + }); + } + + if (finishReason !== null) { + writeResponseChunk(response, { + uuid, + sources, + type: "textResponseChunk", + textResponse: "", + close: true, + error: false, + }); + resolve(fullText); + } + } + } + }); + }); + } + // If stream is not a regular OpenAI Stream (like if using native model, Ollama, or most LangChain interfaces) // we can just iterate the stream content instead. if (!stream.hasOwnProperty("data")) { diff --git a/server/utils/helpers/customModels.js b/server/utils/helpers/customModels.js index 5bd7b299..54976895 100644 --- a/server/utils/helpers/customModels.js +++ b/server/utils/helpers/customModels.js @@ -1,4 +1,11 @@ -const SUPPORT_CUSTOM_MODELS = ["openai", "localai", "ollama", "native-llm"]; +const { togetherAiModels } = require("../AiProviders/togetherAi"); +const SUPPORT_CUSTOM_MODELS = [ + "openai", + "localai", + "ollama", + "native-llm", + "togetherai", +]; async function getCustomModels(provider = "", apiKey = null, basePath = null) { if (!SUPPORT_CUSTOM_MODELS.includes(provider)) @@ -11,6 +18,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) { return await localAIModels(basePath, apiKey); case "ollama": return await ollamaAIModels(basePath, apiKey); + case "togetherai": + return await getTogetherAiModels(); case "native-llm": return nativeLLMModels(); default: @@ -92,6 +101,21 @@ async function ollamaAIModels(basePath = null, _apiKey = null) { return { models, error: null }; } +async function getTogetherAiModels() { + const knownModels = togetherAiModels(); + if (!Object.keys(knownModels).length === 0) + return { models: [], error: null }; + + const models = Object.values(knownModels).map((model) => { + return { + id: model.id, + organization: model.organization, + name: model.name, + }; + }); + return { models, error: null }; +} + function nativeLLMModels() { const fs = require("fs"); const path = require("path"); diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index bde5e8a0..ac702936 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -46,6 +46,9 @@ function getLLMProvider() { case "ollama": const { OllamaAILLM } = require("../AiProviders/ollama"); return new OllamaAILLM(embedder); + case "togetherai": + const { TogetherAiLLM } = require("../AiProviders/togetherAi"); + return new TogetherAiLLM(embedder); case "native": const { NativeLLM } = require("../AiProviders/native"); return new NativeLLM(embedder); diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 0f891f1b..e6e97df5 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -170,6 +170,16 @@ const KEY_MAPPING = { checks: [], }, + // Together Ai Options + TogetherAiApiKey: { + envKey: "TOGETHER_AI_API_KEY", + checks: [isNotEmpty], + }, + TogetherAiModelPref: { + envKey: "TOGETHER_AI_MODEL_PREF", + checks: [isNotEmpty], + }, + // System Settings AuthToken: { envKey: "AUTH_TOKEN", @@ -233,7 +243,7 @@ function validOllamaLLMBasePath(input = "") { } function supportedLLM(input = "") { - return [ + const validSelection = [ "openai", "azure", "anthropic", @@ -242,7 +252,9 @@ function supportedLLM(input = "") { "localai", "ollama", "native", + "togetherai", ].includes(input); + return validSelection ? null : `${input} is not a valid LLM provider.`; } function validGeminiModel(input = "") {