From 0df86699e7b4b8a76c83da796c1295864da583e3 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Wed, 17 Jan 2024 18:00:54 -0800 Subject: [PATCH] feat: Add support for Zilliz Cloud by Milvus (#615) * feat: Add support for Zilliz Cloud by Milvus * update placeholder text update data handling stmt * update zilliz descriptor --- .vscode/settings.json | 5 +- README.md | 1 + docker/.env.example | 5 + .../ZillizCloudOptions/index.jsx | 38 ++ frontend/src/media/vectordbs/zilliz.png | Bin 0 -> 14336 bytes .../GeneralSettings/VectorDatabase/index.jsx | 11 +- .../Steps/DataHandling/index.jsx | 8 + .../Steps/VectorDatabaseConnection/index.jsx | 10 + server/.env.example | 5 + server/models/systemSettings.js | 6 + server/utils/helpers/index.js | 3 + server/utils/helpers/updateENV.js | 11 + .../utils/vectorDbProviders/zilliz/index.js | 365 ++++++++++++++++++ 13 files changed, 466 insertions(+), 2 deletions(-) create mode 100644 frontend/src/components/VectorDBSelection/ZillizCloudOptions/index.jsx create mode 100644 frontend/src/media/vectordbs/zilliz.png create mode 100644 server/utils/vectorDbProviders/zilliz/index.js diff --git a/.vscode/settings.json b/.vscode/settings.json index 82165a17..ab66c194 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,10 +2,13 @@ "cSpell.words": [ "Dockerized", "Langchain", + "Milvus", "Ollama", "openai", "Qdrant", - "Weaviate" + "vectordbs", + "Weaviate", + "Zilliz" ], "eslint.experimental.useFlatConfig": true } \ No newline at end of file diff --git a/README.md b/README.md index 6e3df0df..c3eb429c 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,7 @@ Some cool features of AnythingLLM - [Weaviate](https://weaviate.io) - [QDrant](https://qdrant.tech) - [Milvus](https://milvus.io) +- [Zilliz](https://zilliz.com) ### Technical Overview diff --git a/docker/.env.example b/docker/.env.example index 8d33a809..f3eba241 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -99,6 +99,11 @@ GID='1000' # MILVUS_USERNAME= # MILVUS_PASSWORD= +# Enable all below if you are using vector database: Zilliz Cloud. +# VECTOR_DB="zilliz" +# ZILLIZ_ENDPOINT="https://sample.api.gcp-us-west1.zillizcloud.com" +# ZILLIZ_API_TOKEN=api-token-here + # CLOUD DEPLOYMENT VARIRABLES ONLY # AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting. diff --git a/frontend/src/components/VectorDBSelection/ZillizCloudOptions/index.jsx b/frontend/src/components/VectorDBSelection/ZillizCloudOptions/index.jsx new file mode 100644 index 00000000..5a26b437 --- /dev/null +++ b/frontend/src/components/VectorDBSelection/ZillizCloudOptions/index.jsx @@ -0,0 +1,38 @@ +export default function ZillizCloudOptions({ settings }) { + return ( +
+
+
+ + +
+ +
+ + +
+
+
+ ); +} diff --git a/frontend/src/media/vectordbs/zilliz.png b/frontend/src/media/vectordbs/zilliz.png new file mode 100644 index 0000000000000000000000000000000000000000..e755b0f12085c61990ce8ae45d62fec9fcb3c987 GIT binary patch literal 14336 zcmdtJWmwzI)-Q|{cXudKqku7P49I20>hyf_pn#oJQ6xVt;WC4u4=XmLB? z-tz3dpZ7f<&N*Ms$#vz*WY(HBzh#q|`L9I1(on?1rp87>Lc&v4l6#GWgsk%T#zaT- z_;HYTZjgonI=hpdaUGYIT~}9yO@t8+g}`Fi1uSQFPjwh)4#tY+-z*c zUdt)`ofYv-lI@L$2SkjQ*W25h$6J8M#m$bFUsP0-mk+=T0B|EXxZQoh9+p1bV0UJO zq<DXlh|1Ef}B8)hH>ZR z=i&c1ls4949`+tip#K0a63G@AztiKKUZ;}+O5Q2mNLI7bz)C=;9hzbGu z{yG0&#Q!!#$Hmk7&7)dUfPZrSSKoiJ|Npr8-_7+8RsY?r|A!n6`Ipps^zvUh`{-W; z0pVYPfAcS*PfW(sLPxfJscZSZ-yU4W#(=@Xp3j5bOq*_Ep)~G~1#6su=tdn0RDb;7#Ptt9zlm*nS zt%y5UQ}dbf1&gTUWb9Qai!Ja>NC4FMN*#?Cg%RZzKzTpGt0i ziqh57(_;!F>`Kaa!gv}+sHK=s`Vtd^MTId8^5XyYrb;#4y8&}@%`p3*V3l_nCk9kO zqo@2{m&d9>7>AmeYU4yCkQeHoP$CKaU= zrdQc4nJs-&2egd&?;c0&f3G(*eDCzg_T@(tjbhCzQ$GU1=M3ekL!Tl-Eg6I;*cN%O z1C^Z`RndET;v&oxk94$Wg&cCE2tH_`A58&3m5A&SN~(eCl)d2x7LNR|zl;6{U~shp zLa_d$Y6hckm!hZS3SZjLM7@3zon`2P^;(V&(Nqx|4nar}dX!Y>rx9Y?DnIsAEBZy% z$1|@HLD~1}nDvkNZ0+z!H|Nw*azwo9VOW;pb8YNnlb=asXC4Zcgs*PEVZ)HhWgi}0mg8FtF@kqfiwo~3GKW--4GY26q*`*; zDMU6`#_%3p`2Ft)meRQ0-dO7ky^IGNJ=t2eqNN<__Cp`gd=<83C}0S)%9m{9mec<} zF#tX7b~%Is)<85-2^#2ZdYY(p-596!p8azy0EKoQ_4pR`Q!Bl&zfGdO;qASeuc+FC z;w67GlP!I_uD7_*G%3?__JR>WE_ay7acD)oLF{y6gZ_2$Lgf)E@HC7N<^27A@8-uD zA|eGUP#Q-WPIG-m>~!xpZ(VMIQOcGxLLFLM=|J()Mp#6BVQO6y~iX5$+4FshnCl;W6XQ&at4QDSgM4j;TVDv z3nh$lm>)xrxku~n^9t5#)7P)!M>;5+YmsEXyAq_y`sYQL$9k>?k4c!GBfwY>5CH#} z_m2B)rbH5~F@F*|Xjb{8N<>2@9i&77?^TEG6?RoIhV&DTwL@)-6!$_BI9xjQB_` z&yW7vemf$x{3^UsK7Rxm-3;H8x{Lp;dn>H@eQA%R5K|zIW@}Be$XRK&nWvS(^iFFGRSJQ$Hh25TeMO^5oyF9%di;Xh3 z0E%|@%(0`b>Ef=-bj#(SJY5=KqsSjoye2PIX)t4wmO`erpacFL@2T)AhAv2#Dl})w za%j4<@9oaDu%Cul@3EXb+Q>)W%Ue7ei(5{}HXYQLk~o-8{Mr}w z@+D%lieu-*amFz3$&Rf_f4R-lCfvDN8n+*cCscxSsu^h2X2zhyadz<*uR*@#*fhu^wh z7|xXA1>9UZ=dnLw8KSkj^!}`H_-)0hk`QC|G~*A;!D{B!rNpo}hnIMrOC*6*&1@_ajR5`5|gIKF4=5IMSDvO}2=|nL{^bHyRd7cC`QP zoInY8jPh4=RQ`((?7!d16nu7d-v-y|dW*P@WaHAz+6W6Px-P}(FV4GNI*u_0uv?&hI z0xfA=o)th~P&w&%&gK zjQRRBIpHggf5y$gfHL&ms8HmiZeYF|T;bc=X@ zfXEWphQyWJoB0mAW~RQ6agdc`*vFd9a_mRl(TFIcj@#YZmR?Y;DH{l4-f zp*=Mf?v5P=AmHGMh^cVpC8#8y-H=MhDayLaTxkP3r#18V)zJhU2HVvzsLXGdthP8F(U$zHzr{(x0=gVK*?BBsJl=DQMl^|j{ zL$y5Ez}550xApis)3q`v_hMgPj}1vpY}$FGLeH~R*I^V{&a`fb$4LFeicA(=2Z zIQJHh=YfpNFw_yN=`dPaV0b42#~lHz7(@dM2y1*_=_5!oJ6pzgA1;Uj5jC_HnoJp4 z5?JwG7#(w5#eGYktUyye(uNAJr+{V~!)hA~pUH?Q%0H91eVf5O%#9Fa_Y2gg*099G zn_=geY!*CfZ$k@iUa*}~t-Kd7sG15ewRHx%zq^>P8Fh4zbsdN_6AsJd;4tzWX1gVr zb-}L)Xbh9f+@AT0vz_3zHd;3{J2V7~QfxI{dhX{b*+&^PK0`(B+3+35V#mUz$}TxU zzkA1=%Jh)8KC$ky#cP=JIV|z3Wi6oo13S(;8QDB%i~ zu_q1dSml$4#Z9*(M=)XWChuUcvew=lJmKSa&G%>&YiP$69l1}YfeL5AG~-w$WsMPX zhDfAjottw;c9s^5JS2%y*YB$&dbzzkGtHzyEpC(j@J-w82DK$Z>ErM3D{{CvBOf#X zO$)qZz}p0aR~w06702V@2b$2f;XL1N$YQo*ij6oD5&lYic=Ao>?Lyz0t()kdH%TYY zh8$-*W;h_{eA|w!!{g^~EySd-;ah)FfD*I1CQt0nz2k+9M-1L9yES+*M{de@Y+kO7 z(sXK$h5kOZaJHXNN zR*^IQqH-y=m)+N6t`Va{c#v^xK0p4SH4+kOl5Hincny0V4d+f@ayIr=x?h?eqvYGW z1MO16D5ttr9IS5S4TIfQ7l^VBSFdUr2eiBmlmO6^fxK2| zZ8LO>D5;PlMT?xWfG2{=rMs@|s9LGi-OLX!0zX_>_(rUWD)NV4w$##QBRQ3QtJs7z z)5=gN8T>%!aw(=VfFcrvk>}CaTpOGlfclmpwNrtNaEKiz_G2wHOTS^9DcFI8E0yww z{(f6}Y)&A#Fzb6Q0KK`b?hC5-3BfK-k&sCsA2Q(Bx&f}y;K${(rf!i8uvJV(hnEgR zi&nnEHDI82QjgTqRdpFie#e}V7ydzEjD6FnzYeCo-V{P@u{lEA{V$iT);_? zbPmKTZTik)c=%D6Kk>Ex`pzWUV(sZkrw_E$8PYEeoqC8-1tcf)5GV)q>TtxT=NHGq-+yg>i%;Y)T1+qC#?N&$fN4bB-?Q6q+pe67- z-)N0F6iAjR!ijAhFgRxpQKSL|YZs7~w+yiw7c{*X1&oHK!e0%!F_w&sSGUbv6q~KF zG^^7->xJk^n=?c)yNpG;yM$wWQctp>Zayn?|KW@s)JomA!lE1z1yGS$YlDpN!zYP1 zTzEH92gkY1Pz3C{JK7@my|mN!)uy*v-QW4Om4Nq;MhOpZh*|)y6|vnC&*cpc zd_-sC_n^*2qsqco?eCk81jUyEhX=_*GiUgTnrR)X3Eh2o3-N_wbmCnLc2~I;R}KOa z(w%wukxk!P=8Cg&jP&o`PCWFe1u*rkC?>SycY}jViZhe|vdI9weykj5qcLoC_}QRS zK`cNH@#`D278x}z+h0E6g87FE^Ngvwd30yk;+kuJZA|0#a7HL_?-`|7&z67g%E9w9 z)*SaO`^;vL>$z2fU3S%Q!q)-J^5qtyWif08gM#!A{9mx;D z8=4V;p?Uf)?1SsAgLpJup!BZx{-%9feP1==+0{Gvct5P+h&A7X()y2pnJ*Z{F=fHwI8%Zg6-q_xlks?=p~2S> z&P=c(jre``?D$IgdM31dt-}xJa0A;Y(5&fzzR-p!i!bmQj<8{nx_{U+-f zY%38m`L)l5I_eFxRz!#MTvxg|6zqnwuU1otp6!o~ON-m85w?!xVrbls+!lmGE3xC; zGBr+Vohl|VuH)++~P5BYp?>RazbwNA%-Vh2pe{7uh4#Lx4 z#T54fDFtR8RQK<5azxlagrVbmZ?s7D&&zplq+OCAs}E(Qir0pT^G==HHO9(OJtZT9 zOnY10cx||p@;3i;)TMVGQ%$j!pD4hra}>+hu%l z!S5K;^byp`%!i;&_%9mxgleUu=Ivl5tE1IKNomz`iKC+&ecoU;($$XaHgEH80nU{# z6~%HBomqwjE!`4LmqY-Pplf48qdvh-5cbI1ai;hg#dO!J!)k(kW8EYH3Tp44-|w03 zH?~mfK_bSqL|kg!5{JKN1T_R z6u1BYbx$|9HyMK2jw3#qTKwFG90Psqv|e{meSGiweC{jS99}qQ?VX`n!NU*a6W9&% z0B@-2d9;epF>1yG@=Q4zsz!JXhD^oA$kKE*l?edllTUBf?X5@6;UdIFb!VRX$Pf2Z zw$(DW%5AGl2>;<^*Tm4|IjeL$`Tq=hl$)9e!cB(}jvF-5q3K!a( ze+nNiV5F}+$*OI=d&H#<$?&b;ru;{Og?UKnX$zz zZiXn`Y+32^aK2yZ;Ph5MJD}gw?4JzO!hB{y0b;jXGTw%z`d6Yk)_N2t+uf82POF$; zNyI`!#zYJn_-^Km>As~lZ(H3qr76;EL^qf~FaPn@NSXi7E}AUH%ZFsxb#8Z3>a6G0 zpdk0shbfKqW63|1P@i9UhvN|rH#IL2_Z`G!WN;V;UjX_@4#k7>;iY4y5qS!5JfZSi z4^ik_&(G?r22DeLrn@#Diw$kwuN+vi+205-YAJ-Y`dJ>C`1ql78PS?QjnljWRU@`e zpK`%jGD9Ob|_H*H>U-ZX*=pEljOG|v2l;aR^Nxe)}QiitI=ukpoc5yB}R zdG~WaZu>S~|N5x9WM;dFXMG$)lHRZ#e;N34DHrQPP9deHq@!*Wu}{VuIyufb+Yj73 zy)~<;Uic*!v43qNG-6EofF=XrYJbI(%=3#!zGWFOU`Ax_cz!`{4{LX z;>8uhZu~4`dvnDH@71}tUEs^asVN;W7FV?HTI_a5I+<1kP26u2lQw-UizviwYE5W zTX8w+H-PoK;J%9Svhwxeo@w8E$!?X0(w-n(L$Wf^80RQ(+1MCn5Gt+ z$U4u<@0`h}?#75Talgl*d!bf^*+*(5su&f^&KReL;lecf{#IgO_7VH?Z0eXaAJZSb+mfmvvg9cG-24p# zf9)Hjg0~%&M-v^p^`0%aK?k}lqHhE3i3Ce`tBx=UskEZunPLoq4^RB-I7Ybu&#Q4n ziz#0jp=unXY}29MiNA~rtm3aoWXJhHwhwJ{<#SO^YhExLHjMA~sAffv+wT}Cz_X_> zU|=6sMVub2Zg_m)zCP#N>=k^UhWBjLb)FnBvDP~>H4BDoDR#dW@ zAFNdTP;rvy_R_ixb6~e%hzgAyK9klK$G&jaT|(!di{K1daw@WuA|h?A#FM41%$7P3 zd1kQV4=@sQbH|tRi9ALoMZ{G|Y{f;Fw0T6oc_=CdGs0z1p?ePw|2n$9AWTk?B8KFm zr>M^v)=(5YgJC(JE%A9WIam-_)>EnA9n+P*ewdC-$-|baB1QgcG(IO{1-Fyie6B=l zx0rQ@uIBo}K{KTa{J5gT3SJSx&Uq1O)Q{U$f=dY6vH-c15n2!6gZ$eMFAhIe){op> z$qZhK6hQMy2t?{zP;3LRNyxjGaw!))`)US|78k#eXD?wXc!tm`K4n5&dw2WSlHLCy zRrAO2lVPP7xZP~4CP44-PU>xCLd=G9 zsJkQsU_Mv^`-t1|fqMX@2-f`>;6Sp2Q8}WPwR%pAZ_8?DlK#jCw{Kiilugy@3T)nR zsOEI#B(8Qz*~c4vwp2q=y7Zeh?wtnS!4gY|B4UkS&Bkc=qm^14VU&3ZOcvs%?slf= zYHW=wIj4VjZ*4JtZ&ALj3w+iK-}8jnkeF0KP}K~uD|jPwGQqOwdk%&atMr@Y+)M^P zaD^2Os_>8KaDJy#2H81P_x1YLs=%7$l713C-Z-IN3e>o9Plpcig3o$Y?ZWRz5YO5|M$BxMy&4>3{gYVl(Xg$nx89>ehOZ!6 z<~NjV+yl3C{N+PDW5$hSpRSM;*tjWFBIDy=TiS8ftLPb(D8=6+Du!G8f7SZGrBMc` zuouJVD9lx_p7pxYqV0!KLG0mANyd zOtO#_u{>LAMuw+hRQNX1Ak-M& zTq;MFNDq~g+EtEtnTOfF6Hls^Mtauo;f>~E{VKQ`jy%c=M2LgwIm5fEWN4zk#aggn zwNmc-f!~kGQK_NrC!LoU0u;&GS9{v9<73*uRb6uX)5U<}7wzY-s86}vR;VF?Y#Z|p z7_8;tCytgsdG-wyz41j9VECS`vKB)omvFA`&ST12=<%j_zl5=ec?-tX2x{1&*b?EQ(X_ZLSK)1 zD3V73LQ@#RO4i;*eu^~p3s9Rac~)qW-tp(f3gui3|Zj3>)kG6M=P&c5bnV~DZo=SShQz?mGHc59@o;7GN zhHHpEAx#~sIoCl_kIt<8PMLvaC7gJINcCQP_hH278Dzhsa67Y@p6*1`J!DE&oX=+4Nhay)*sqyqb(7R$Uzs;-41W$awBu;bwP9Mta z+BaP1$fRu}YRhMG8#{dMY<9Ep0FA|eW&Ch~lfDx~mY}0U+3t0jUSg?wdqiq(Sg2U_ zXU8H|_8i+a!sGVk#YrRgl5HYu(i32vsEH3(%a570+V*AC1g?-QrfL%krzp@S@C3&+ z%W$rsF`GQqYO>fMzno5nUmtvXjccuLV~^Lcu7>!eOE#y({n%Iv<;)USZm}8s6ePYB zR_qDAUWNCIvTpmmw||hiyhFL1e$hOOWBF8TvGN!xHV5;8-X(=`nd@}K400ns!4orV z$m*}8yHGe8pV9xjG*$cZ`~-gc+Ms@VOOj_Od9JIzEiBpCK-e7fMg9+)Q`*Z7l0N6i z4|#*s>D!*p*30Pu-}(>70i&y^@U7QVtFK2->TAk|$V1d#8R1GFVu+}n(`!3ieL{mz zL2mZ37`2KkJ>p!KU?Ct5!vbx@=0UAmE8r5Q)F2plI35BWEmF1){*cKvrO28&^5Gm} zB%ZP1o_+>ef;DQwh`A|yF;FOX2Wp#yl)C!aQqKRiM?Kkd60$>ZNf)uwqWK zpBZBVR;ux=HA1y`W#reR1Ro}^_pK;b>GyVzC@p0=W2#%04REgDF@1{zm28gJbU4;L z;TqhCm{+T^rx-B{XS*uYn4Kv{_8VSlopV&cnrP%V@xqm8oaq;`sysuI_`|;(7}C5S z-Bs`z@#G8x@&Md8qSjn-5&=d27W)w<&J!#n3HbSO1h8As{qPD{+y6X9qLfn&l9Zuo zxaz~y(j?Yqni*qIHl*o~o$e8Vf3{+(PfAWiiG`CZl-uoj(RFswxyOsY7Gp|P`qrgx z_ilmz`|+fgD}!6B804mx4@NPW%2Mdj%pGsQ6eB3QC=@|rlzhJZOD_e_u?w%&WY&&E zQ55pPG|rIBPKf=yV7a!x;Sl~+BsM2-qe;ZyTiH z$TTh;v6oaWx+c8j0nJ*0HflnX>FiCIze-p$kKAKl3wwK;Y5_^r6t23A`POZH4r;qE zY_mX#k&vvs2H;!oaEc)G_9ltzq$iZQ+3LkML0*P7r~IcoJ|HP6k$U4}3X0C0);m*9 z*X0__AUu<^Mr>-v`$UU`DkB4nw2C)VF; zC(E8jbw)YoIm;%`E#n%pq?)p(AZ0INYq)IlNHu5Z9Y&Ps`(5?OF}~uuG)tH#aC8k-Y(JOokHw@BZl2@an0z5gYbk0 zuO8?zr^Qn6-dRjnk%ins9$74KXp`tR%kFOZ|UKc?trkn8k&D9Ft)ASIcXB`AQAxT!bD^4*Rmm2UtbdU${ zJl_KNVv6#4T4z=+R0`q5u^n8zB#~ptjTs4Y$?xBv=jcfukoj{TBy)`kIK7IA-VLZ? ztE4z*b?@YzNKf4n7y0L92}Rtr;f$|hv3X$E2xZz0xyh#Osw7dy?xgAD#XTuaRgV`; zQrVk*XB=PuWxAWv9u0ClVqvM<-^?KxB3KoJ7aUw}Jkfi@z*O_HrOc6T3i>;A@x7US70TV0b7ax-v z%=pr{iSi{UT#_l&Vj7&$if8325r`7hh;PJ@m@^-0HT62zDyDM)th@i?psP8>^&{3Zq}KR% zL}vFiB|V7-=FOH2v8MaXgozZSh3qjg&~i@5-KM<>L*SLO&n^$JC`-4jag-C>yh5I< z9oYxTUZCTq2TVCcY6iM}Khdr?tNHa@T8K6@5)lh`_Zviwsxr~UpS`^)7YLSkHI=bU ze92o*jI9#}@+Izmq`8D>3U6@Uh_4+m5Hc*-v)U}=3!5zcysIU)QcLOdxxIdRYgyE8 zAO}t^F%R%w4*be*Ykyuu1<9^*?WIe9akprQCjen(@#4Qoyo=j$&}tlxo%w!X7Ypdr z&B;sccFWU|(Mtr~o@~ZdDh-EYr-hJ%5Wf^PlR8&Z9!_Wp%eW6sSYirJ2?I~Rd4B%iP`olB{v(prTIMoJxeQS5maO^hpDR zq9q^UCS@7t8rxo%3IMAQbrwk8cPwniiP&XxiWt~jAirrpo5^mMzn^1)cybdb3gL65 z`vz7s$vk~?c73eZJyHCs85r6bHJ}ZrdiG)(J)C1u5(M8;@dx?a{fK;9BuwF&m}rzL zkn##;q7~VMcFmMs$oPA;R zjl7~^-*5#u+}DUlhY#^QQRR@h0Y>^pC+ko1N)8>*I-|IdI6yQr`LhC1Y!vTHZ}BDP zXT#>@qPH|~rgp^PKIK{nhS3T;Lxeh#pfP_EXt2KJ5K9TDxlWux?Y@+X0p!fgew zs;8n{5IAn4(&%F$4O}>CAFs+*@ zL+%!%#OP=;eYFK*Hm-0JH>>m z-k05upkIZ0sR5wk?%ywMRe?>PE}A`G=lc0ks7<@&waDj_cET(r0N<&Ob)v1|F&YNr z{u_MpnSVazy1aAarWjI1)S7aE1%|kWsx?t5S4kZW7bW{Gg9Dow?wdukGvpf(C3--A z%xVCOGV7(GAG`Xsq^oAN+S?-nb7}z?Q#+!bwX%UWK`o_JiM@w!X(sHrUgH6r)3Xte z-C*vgw?&7jmc{23T?8d^Y=m06$#TBY`gp%07S-Qh*V%v5m0J zq3mNh|L(!5n!JsV;vGm7c*_Jt0Hg(@*)wKWvbrZV!ZCNq)_N8k8b8n&j;-R5{4ilh zRC&o)8mxTESPT#%(MW4(&WXcy4Qk#PZx6XMBj1i0VA6_)LK5W{)w6A4NcOsV6@hl~ z8ixkv_ypi#GjX}C$MJ=tv))Q#hE{5h)+5zJ8TQnNmm7NXUp8NgCOqQ%o~3@~+RpCT ziflCVF3UKBOaF8}UNRM~%KdWO_YonFfFd7-rtl-gI;tO>p1H`OzT%%2(2ngJUuhsp zi(j~MQ{kN0Bu`N;uTX1tqv=&V=oo??DIIWtsuOD_J46F@Cmo?qj+aiKMJKY4gTB|| z)7!_PCjiy##eg{MiyvY6JpoYrAIvA8%)y~6Mm#CW2=^FBcxaaO-BpiH)n;*pZ zzJq@%QTdv7^k6h6qG+glsb1*E`A&lzKL#U8(N+wRf#sS9eB}g<@1?H z%@MtScEDkWN&V5<{QCz(>iR<8q~FYcMK@WSXr!y6^e>_3azLJ&6=*tQ5=ne?sg1r~ zO?M+-X7Y$8jm&SWeJ|XvXve~l7&Nle>j!<$|7P1Cd|BVtqueN30*P+(`F}1>7Lc+E zhAWSLE@5>#MMtQw0CFSW)k_;3m(S+h>jhDUGOI!nRTySCz#6?eW|GWemL=r(5^RQV zSyyzvh;^HC*Te#YoNyn+k=80oG7tP8E77flvvQhZ`}WRE;A3Bf;$(k`Jo+X1q`&8P zS|(`fcjE`G=q^NMf3RgOCe0UN)$D89!GxDKRFCE1OI%a0(_YFUA=;7sC(6^{2124o zR@XP#lNxElC}fXyyiBh~Y<8Mxc;w^|!3-%}jo+!F;jxTHl&(g<;7`p%)N{u(BCN}| zI>S)HzTJ zKv;juNFJDd6+g3J8L6-9pu|O^5NKiQ_dMt16e48_BNX0QbNYYbZxR3b=MLxt^6B8- V=Mc1O`p5s)Da&ifmA|wI`5#n5e98a- literal 0 HcmV?d00001 diff --git a/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx b/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx index f49054b9..02887b86 100644 --- a/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx +++ b/frontend/src/pages/GeneralSettings/VectorDatabase/index.jsx @@ -9,6 +9,7 @@ import LanceDbLogo from "@/media/vectordbs/lancedb.png"; import WeaviateLogo from "@/media/vectordbs/weaviate.png"; import QDrantLogo from "@/media/vectordbs/qdrant.png"; import MilvusLogo from "@/media/vectordbs/milvus.png"; +import ZillizLogo from "@/media/vectordbs/zilliz.png"; import PreLoader from "@/components/Preloader"; import ChangeWarningModal from "@/components/ChangeWarning"; import { MagnifyingGlass } from "@phosphor-icons/react"; @@ -19,6 +20,7 @@ import QDrantDBOptions from "@/components/VectorDBSelection/QDrantDBOptions"; import WeaviateDBOptions from "@/components/VectorDBSelection/WeaviateDBOptions"; import VectorDBItem from "@/components/VectorDBSelection/VectorDBItem"; import MilvusDBOptions from "@/components/VectorDBSelection/MilvusDBOptions"; +import ZillizCloudOptions from "@/components/VectorDBSelection/ZillizCloudOptions"; export default function GeneralVectorDatabase() { const [saving, setSaving] = useState(false); @@ -33,7 +35,6 @@ export default function GeneralVectorDatabase() { useEffect(() => { async function fetchKeys() { const _settings = await System.keys(); - console.log(_settings); setSettings(_settings); setSelectedVDB(_settings?.VectorDB || "lancedb"); setHasEmbeddings(_settings?.HasExistingEmbeddings || false); @@ -66,6 +67,14 @@ export default function GeneralVectorDatabase() { options: , description: "100% cloud-based vector database for enterprise use cases.", }, + { + name: "Zilliz Cloud", + value: "zilliz", + logo: ZillizLogo, + options: , + description: + "Cloud hosted vector database built for enterprise with SOC 2 compliance.", + }, { name: "QDrant", value: "qdrant", diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx index 3b004638..ae573027 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx @@ -10,6 +10,7 @@ import TogetherAILogo from "@/media/llmprovider/togetherai.png"; import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import LocalAiLogo from "@/media/llmprovider/localai.png"; import MistralLogo from "@/media/llmprovider/mistral.jpeg"; +import ZillizLogo from "@/media/vectordbs/zilliz.png"; import ChromaLogo from "@/media/vectordbs/chroma.png"; import PineconeLogo from "@/media/vectordbs/pinecone.png"; import LanceDbLogo from "@/media/vectordbs/lancedb.png"; @@ -139,6 +140,13 @@ const VECTOR_DB_PRIVACY = { ], logo: MilvusLogo, }, + zilliz: { + name: "Zilliz Cloud", + description: [ + "Your vectors and document text are stored on your Zilliz cloud cluster.", + ], + logo: ZillizLogo, + }, lancedb: { name: "LanceDB", description: [ diff --git a/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx index 37e0e5b7..af0b5662 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/VectorDatabaseConnection/index.jsx @@ -6,6 +6,7 @@ import LanceDbLogo from "@/media/vectordbs/lancedb.png"; import WeaviateLogo from "@/media/vectordbs/weaviate.png"; import QDrantLogo from "@/media/vectordbs/qdrant.png"; import MilvusLogo from "@/media/vectordbs/milvus.png"; +import ZillizLogo from "@/media/vectordbs/zilliz.png"; import System from "@/models/system"; import paths from "@/utils/paths"; import PineconeDBOptions from "@/components/VectorDBSelection/PineconeDBOptions"; @@ -14,6 +15,7 @@ import QDrantDBOptions from "@/components/VectorDBSelection/QDrantDBOptions"; import WeaviateDBOptions from "@/components/VectorDBSelection/WeaviateDBOptions"; import LanceDBOptions from "@/components/VectorDBSelection/LanceDBOptions"; import MilvusOptions from "@/components/VectorDBSelection/MilvusDBOptions"; +import ZillizCloudOptions from "@/components/VectorDBSelection/ZillizCloudOptions"; import showToast from "@/utils/toast"; import { useNavigate } from "react-router-dom"; import VectorDBItem from "@/components/VectorDBSelection/VectorDBItem"; @@ -68,6 +70,14 @@ export default function VectorDatabaseConnection({ options: , description: "100% cloud-based vector database for enterprise use cases.", }, + { + name: "Zilliz Cloud", + value: "zilliz", + logo: ZillizLogo, + options: , + description: + "Cloud hosted vector database built for enterprise with SOC 2 compliance.", + }, { name: "QDrant", value: "qdrant", diff --git a/server/.env.example b/server/.env.example index 26c51927..23e20bb1 100644 --- a/server/.env.example +++ b/server/.env.example @@ -96,6 +96,11 @@ VECTOR_DB="lancedb" # MILVUS_USERNAME= # MILVUS_PASSWORD= +# Enable all below if you are using vector database: Zilliz Cloud. +# VECTOR_DB="zilliz" +# ZILLIZ_ENDPOINT="https://sample.api.gcp-us-west1.zillizcloud.com" +# ZILLIZ_API_TOKEN=api-token-here + # CLOUD DEPLOYMENT VARIRABLES ONLY # AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting. # STORAGE_DIR= # absolute filesystem path with no trailing slash diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 1c4069ac..90de463f 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -63,6 +63,12 @@ const SystemSettings = { MilvusPassword: !!process.env.MILVUS_PASSWORD, } : {}), + ...(vectorDB === "zilliz" + ? { + ZillizEndpoint: process.env.ZILLIZ_ENDPOINT, + ZillizApiToken: process.env.ZILLIZ_API_TOKEN, + } + : {}), LLMProvider: llmProvider, ...(llmProvider === "openai" ? { diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index 2eed9057..b72bb797 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -19,6 +19,9 @@ function getVectorDbClass() { case "milvus": const { Milvus } = require("../vectorDbProviders/milvus"); return Milvus; + case "zilliz": + const { Zilliz } = require("../vectorDbProviders/zilliz"); + return Zilliz; default: throw new Error("ENV: No VECTOR_DB value found in environment!"); } diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index f44b040b..9e89047f 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -199,6 +199,16 @@ const KEY_MAPPING = { checks: [isNotEmpty], }, + // Zilliz Cloud Options + ZillizEndpoint: { + envKey: "ZILLIZ_ENDPOINT", + checks: [isValidURL], + }, + ZillizApiToken: { + envKey: "ZILLIZ_API_TOKEN", + checks: [isNotEmpty], + }, + // Together Ai Options TogetherAiApiKey: { envKey: "TOGETHER_AI_API_KEY", @@ -316,6 +326,7 @@ function supportedVectorDB(input = "") { "weaviate", "qdrant", "milvus", + "zilliz", ]; return supported.includes(input) ? null diff --git a/server/utils/vectorDbProviders/zilliz/index.js b/server/utils/vectorDbProviders/zilliz/index.js new file mode 100644 index 00000000..b8493e1c --- /dev/null +++ b/server/utils/vectorDbProviders/zilliz/index.js @@ -0,0 +1,365 @@ +const { + DataType, + MetricType, + IndexType, + MilvusClient, +} = require("@zilliz/milvus2-sdk-node"); +const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter"); +const { v4: uuidv4 } = require("uuid"); +const { storeVectorResult, cachedVectorInformation } = require("../../files"); +const { + toChunks, + getLLMProvider, + getEmbeddingEngineSelection, +} = require("../../helpers"); + +// Zilliz is basically a copy of Milvus DB class with a different constructor +// to connect to the cloud +const Zilliz = { + name: "Zilliz", + connect: async function () { + if (process.env.VECTOR_DB !== "zilliz") + throw new Error("Zilliz::Invalid ENV settings"); + + const client = new MilvusClient({ + address: process.env.ZILLIZ_ENDPOINT, + token: process.env.ZILLIZ_API_TOKEN, + }); + + const { isHealthy } = await client.checkHealth(); + if (!isHealthy) + throw new Error( + "Zilliz::Invalid Heartbeat received - is the instance online?" + ); + + return { client }; + }, + heartbeat: async function () { + await this.connect(); + return { heartbeat: Number(new Date()) }; + }, + totalVectors: async function () { + const { client } = await this.connect(); + const { collection_names } = await client.listCollections(); + const total = collection_names.reduce(async (acc, collection_name) => { + const statistics = await client.getCollectionStatistics({ + collection_name, + }); + return Number(acc) + Number(statistics?.data?.row_count ?? 0); + }, 0); + return total; + }, + namespaceCount: async function (_namespace = null) { + const { client } = await this.connect(); + const statistics = await client.getCollectionStatistics({ + collection_name: _namespace, + }); + return Number(statistics?.data?.row_count ?? 0); + }, + namespace: async function (client, namespace = null) { + if (!namespace) throw new Error("No namespace value provided."); + const collection = await client + .getCollectionStatistics({ collection_name: namespace }) + .catch(() => null); + return collection; + }, + hasNamespace: async function (namespace = null) { + if (!namespace) return false; + const { client } = await this.connect(); + return await this.namespaceExists(client, namespace); + }, + namespaceExists: async function (client, namespace = null) { + if (!namespace) throw new Error("No namespace value provided."); + const { value } = await client + .hasCollection({ collection_name: namespace }) + .catch((e) => { + console.error("Zilliz::namespaceExists", e.message); + return { value: false }; + }); + return value; + }, + deleteVectorsInNamespace: async function (client, namespace = null) { + await client.dropCollection({ collection_name: namespace }); + return true; + }, + // Zilliz requires a dimension aspect for collection creation + // we pass this in from the first chunk to infer the dimensions like other + // providers do. + getOrCreateCollection: async function (client, namespace, dimensions = null) { + const isExists = await this.namespaceExists(client, namespace); + if (!isExists) { + if (!dimensions) + throw new Error( + `Zilliz:getOrCreateCollection Unable to infer vector dimension from input. Open an issue on Github for support.` + ); + + await client.createCollection({ + collection_name: namespace, + fields: [ + { + name: "id", + description: "id", + data_type: DataType.VarChar, + max_length: 255, + is_primary_key: true, + }, + { + name: "vector", + description: "vector", + data_type: DataType.FloatVector, + dim: dimensions, + }, + { + name: "metadata", + decription: "metadata", + data_type: DataType.JSON, + }, + ], + }); + await client.createIndex({ + collection_name: namespace, + field_name: "vector", + index_type: IndexType.AUTOINDEX, + metric_type: MetricType.COSINE, + }); + await client.loadCollectionSync({ + collection_name: namespace, + }); + } + }, + addDocumentToNamespace: async function ( + namespace, + documentData = {}, + fullFilePath = null + ) { + const { DocumentVectors } = require("../../../models/vectors"); + try { + let vectorDimension = null; + const { pageContent, docId, ...metadata } = documentData; + if (!pageContent || pageContent.length == 0) return false; + + console.log("Adding new vectorized document into namespace", namespace); + const cacheResult = await cachedVectorInformation(fullFilePath); + if (cacheResult.exists) { + const { client } = await this.connect(); + const { chunks } = cacheResult; + const documentVectors = []; + vectorDimension = chunks[0][0].values.length || null; + + await this.getOrCreateCollection(client, namespace, vectorDimension); + for (const chunk of chunks) { + // Before sending to Pinecone and saving the records to our db + // we need to assign the id of each chunk that is stored in the cached file. + const newChunks = chunk.map((chunk) => { + const id = uuidv4(); + documentVectors.push({ docId, vectorId: id }); + return { id, vector: chunk.values, metadata: chunk.metadata }; + }); + const insertResult = await client.insert({ + collection_name: namespace, + data: newChunks, + }); + + if (insertResult?.status.error_code !== "Success") { + throw new Error( + `Error embedding into Zilliz! Reason:${insertResult?.status.reason}` + ); + } + } + await DocumentVectors.bulkInsert(documentVectors); + await client.flushSync({ collection_names: [namespace] }); + return true; + } + + const textSplitter = new RecursiveCharacterTextSplitter({ + chunkSize: + getEmbeddingEngineSelection()?.embeddingMaxChunkLength || 1_000, + chunkOverlap: 20, + }); + const textChunks = await textSplitter.splitText(pageContent); + + console.log("Chunks created from document:", textChunks.length); + const LLMConnector = getLLMProvider(); + const documentVectors = []; + const vectors = []; + const vectorValues = await LLMConnector.embedChunks(textChunks); + + if (!!vectorValues && vectorValues.length > 0) { + for (const [i, vector] of vectorValues.entries()) { + if (!vectorDimension) vectorDimension = vector.length; + const vectorRecord = { + id: uuidv4(), + values: vector, + // [DO NOT REMOVE] + // LangChain will be unable to find your text if you embed manually and dont include the `text` key. + metadata: { ...metadata, text: textChunks[i] }, + }; + + vectors.push(vectorRecord); + documentVectors.push({ docId, vectorId: vectorRecord.id }); + } + } else { + throw new Error( + "Could not embed document chunks! This document will not be recorded." + ); + } + + if (vectors.length > 0) { + const chunks = []; + const { client } = await this.connect(); + await this.getOrCreateCollection(client, namespace, vectorDimension); + + console.log("Inserting vectorized chunks into Zilliz."); + for (const chunk of toChunks(vectors, 100)) { + chunks.push(chunk); + const insertResult = await client.insert({ + collection_name: namespace, + data: chunk.map((item) => ({ + id: item.id, + vector: item.values, + metadata: chunk.metadata, + })), + }); + + if (insertResult?.status.error_code !== "Success") { + throw new Error( + `Error embedding into Zilliz! Reason:${insertResult?.status.reason}` + ); + } + } + await storeVectorResult(chunks, fullFilePath); + await client.flushSync({ collection_names: [namespace] }); + } + + await DocumentVectors.bulkInsert(documentVectors); + return true; + } catch (e) { + console.error(e); + console.error("addDocumentToNamespace", e.message); + return false; + } + }, + deleteDocumentFromNamespace: async function (namespace, docId) { + const { DocumentVectors } = require("../../../models/vectors"); + const { client } = await this.connect(); + if (!(await this.namespaceExists(client, namespace))) return; + const knownDocuments = await DocumentVectors.where({ docId }); + if (knownDocuments.length === 0) return; + + const vectorIds = knownDocuments.map((doc) => doc.vectorId); + const queryIn = vectorIds.map((v) => `'${v}'`).join(","); + await client.deleteEntities({ + collection_name: namespace, + expr: `id in [${queryIn}]`, + }); + + const indexes = knownDocuments.map((doc) => doc.id); + await DocumentVectors.deleteIds(indexes); + + // Even after flushing Zilliz can take some time to re-calc the count + // so all we can hope to do is flushSync so that the count can be correct + // on a later call. + await client.flushSync({ collection_names: [namespace] }); + return true; + }, + performSimilaritySearch: async function ({ + namespace = null, + input = "", + LLMConnector = null, + similarityThreshold = 0.25, + }) { + if (!namespace || !input || !LLMConnector) + throw new Error("Invalid request to performSimilaritySearch."); + + const { client } = await this.connect(); + if (!(await this.namespaceExists(client, namespace))) { + return { + contextTexts: [], + sources: [], + message: "Invalid query - no documents found for workspace!", + }; + } + + const queryVector = await LLMConnector.embedTextInput(input); + const { contextTexts, sourceDocuments } = await this.similarityResponse( + client, + namespace, + queryVector, + similarityThreshold + ); + + const sources = sourceDocuments.map((metadata, i) => { + return { ...metadata, text: contextTexts[i] }; + }); + return { + contextTexts, + sources: this.curateSources(sources), + message: false, + }; + }, + similarityResponse: async function ( + client, + namespace, + queryVector, + similarityThreshold = 0.25 + ) { + const result = { + contextTexts: [], + sourceDocuments: [], + scores: [], + }; + const response = await client.search({ + collection_name: namespace, + vectors: queryVector, + }); + response.results.forEach((match) => { + if (match.score < similarityThreshold) return; + result.contextTexts.push(match.metadata.text); + result.sourceDocuments.push(match); + result.scores.push(match.score); + }); + return result; + }, + "namespace-stats": async function (reqBody = {}) { + const { namespace = null } = reqBody; + if (!namespace) throw new Error("namespace required"); + const { client } = await this.connect(); + if (!(await this.namespaceExists(client, namespace))) + throw new Error("Namespace by that name does not exist."); + const stats = await this.namespace(client, namespace); + return stats + ? stats + : { message: "No stats were able to be fetched from DB for namespace" }; + }, + "delete-namespace": async function (reqBody = {}) { + const { namespace = null } = reqBody; + const { client } = await this.connect(); + if (!(await this.namespaceExists(client, namespace))) + throw new Error("Namespace by that name does not exist."); + + const statistics = await this.namespace(client, namespace); + await this.deleteVectorsInNamespace(client, namespace); + const vectorCount = Number(statistics?.data?.row_count ?? 0); + return { + message: `Namespace ${namespace} was deleted along with ${vectorCount} vectors.`, + }; + }, + curateSources: function (sources = []) { + const documents = []; + for (const source of sources) { + const { metadata = {} } = source; + if (Object.keys(metadata).length > 0) { + documents.push({ + ...metadata, + ...(source.hasOwnProperty("pageContent") + ? { text: source.pageContent } + : {}), + }); + } + } + + return documents; + }, +}; + +module.exports.Zilliz = Zilliz;