From cf0b24af0275bf8cb821be15462976c9149fe024 Mon Sep 17 00:00:00 2001 From: Timothy Carambat Date: Tue, 15 Aug 2023 15:26:44 -0700 Subject: [PATCH] Add Qdrant support for embedding, chat, and conversation (#192) * Add Qdrant support for embedding, chat, and conversation * Change comments --- .vscode/settings.json | 1 + docker/.env.example | 5 + .../Modals/Settings/VectorDbs/index.jsx | 45 ++ frontend/src/media/vectordbs/qdrant.png | Bin 0 -> 15073 bytes server/.env.example | 5 + server/endpoints/system.js | 6 + server/package.json | 1 + server/utils/helpers/index.js | 3 + server/utils/helpers/updateENV.js | 10 +- .../utils/vectorDbProviders/qdrant/index.js | 397 ++++++++++++++++++ server/yarn.lock | 28 +- 11 files changed, 499 insertions(+), 2 deletions(-) create mode 100644 frontend/src/media/vectordbs/qdrant.png create mode 100644 server/utils/vectorDbProviders/qdrant/index.js diff --git a/.vscode/settings.json b/.vscode/settings.json index c8c7ea99..dde2d134 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,6 +1,7 @@ { "cSpell.words": [ "openai", + "Qdrant", "Weaviate" ] } \ No newline at end of file diff --git a/docker/.env.example b/docker/.env.example index 77550b6f..70c61ef9 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -37,6 +37,11 @@ PINECONE_INDEX= # WEAVIATE_ENDPOINT="http://localhost:8080" # WEAVIATE_API_KEY= +# Enable all below if you are using vector database: Qdrant. +# VECTOR_DB="qdrant" +# QDRANT_ENDPOINT="http://localhost:6333" +# QDRANT_API_KEY= + # CLOUD DEPLOYMENT VARIRABLES ONLY # AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting. # NO_DEBUG="true" diff --git a/frontend/src/components/Modals/Settings/VectorDbs/index.jsx b/frontend/src/components/Modals/Settings/VectorDbs/index.jsx index b1a5a97b..ec25e0a7 100644 --- a/frontend/src/components/Modals/Settings/VectorDbs/index.jsx +++ b/frontend/src/components/Modals/Settings/VectorDbs/index.jsx @@ -4,6 +4,7 @@ import ChromaLogo from "../../../../media/vectordbs/chroma.png"; import PineconeLogo from "../../../../media/vectordbs/pinecone.png"; import LanceDbLogo from "../../../../media/vectordbs/lancedb.png"; import WeaviateLogo from "../../../../media/vectordbs/weaviate.png"; +import QDrantLogo from "../../../../media/vectordbs/qdrant.png"; const noop = () => false; export default function VectorDBSelection({ @@ -80,6 +81,15 @@ export default function VectorDBSelection({ image={PineconeLogo} onClick={updateVectorChoice} /> + )} + {vectorDB === "qdrant" && ( + <> +
+ + +
+
+ + +
+ + )} {vectorDB === "weaviate" && ( <>
diff --git a/frontend/src/media/vectordbs/qdrant.png b/frontend/src/media/vectordbs/qdrant.png new file mode 100644 index 0000000000000000000000000000000000000000..d63e720c5402f3abd7246ffdd0ad6f853063c2a5 GIT binary patch literal 15073 zcmd_RWmH_<(jc5CLV!TfU;!Enu1#YNG{M~(g1fsmZV4{If_rcX!5snwcL_D%ZuZ%`c6rsV+9yIqNg5lI4D-Q*2iUSQ67UBP{*t@@ zprZjJK1@VUfG-Ru8Ew}G51#kjfBs5hdHxa@6hugBA;cZ+?adt!4?JC5)HDQ9~x z9xi8xH&A|78z>(?KbRlP!^y|X58>wFhVXFlvLc{hZcctK9x!JS9^J& z6cwPRj`r-vW{xK2?4I^c0Q?6Jggl|ZPkVEOF_ovior5dXQ<&lJ8BpN&{V)du43p|V z|Abu3ETC`+ssEe`{3py{g+Mq#IXFB#JlH+B*&SUhIk*G_1UNXs9AGdTFoVt2%K>5R z$>!in3qbl;P!i^@rY_b_2x~_Ns(aAJCXQ|hVFreO%=!tC=mGle3o5q9SPhPbF5;@?K>tnVp-+8H}o3Nv`JnVDM{yV)TaU~e4$ zUT;QaX6|Zj=|IIs#UsRVkD#6PKSB6=au@ zd-(zn0QqwNi+q7GsHhvl%FzX;Zf#E`ZtP-gZ(@uPx}W^tAN(6}|L3X*IE;VM!+&7t zY3%f0G!U2!{aafcUDO>N?O@`@4(`T69REK2|Api~+|S<}2K>)IX)eb<{1=SA1Lw_y z2Rw$d5~Av!nfnWyj^x(azb+36V1D4ig2p^XU3fF^4|j_eOOJ+X?xs$*s-N8}w_&X! zJ5FEQS{|KYZCE(YyPemS9)Ov(BMYn&w*4$5q@?zu5^aj?hT;@HTwXo6sh~qEU2CLW zx$Gri#_GnU4#r)Qd3pL5I(3g1LoT6MI5l0O7&=#QFdUsaG)@JRx(63ujyjk`OpGZQ zS6qfM1lI%|qYzgNi~9eK$?sJ`Z%GVC8~Xh#MBBd?fgT%PEin?JQU|*f0@F%K(&3iA ziDEL0y`;~?(F5Q5mX1#QC;7xa6A~<0J@0@eJ|JKU+tmcmBL`|YGpI~nGGY|*ivxUj z`u>!fy@L}AbwxdzEZsc4e17gCG|N0?=fiQjiLK9-5q+G0$O_JhPLTV_pZu;!ma}&| zK_yb{Md+_9KQjARvBK{BQT+49$nP1Hej>l#z)DQPd*my~?H-4*W}f*WXhkxUP3MWwmt<}}u@t1OnjI8#WbQDg#L zE+GRmmUxRn&XscH{1R(Timi$x0z(O>bFZ78~KWw>S4VI`bgSUiIo?!jJ@*Dy5byW0Gxjhi|id+wJH=(3H}I4Px$y? z;hR=!%yIrXk(Pl>{Cnh9%Lk*#6q;N@@epH~ahwt!J>Hx*7dLI>{k=qO26stlebS0E zgoDAO(VMFh9AoTm`WgehjewjT2}7xG{AK!2k;0!*;~2ojD%;0zm;t5YcsAzy{TVR*s-C94D_57=T@{J7j-GkoHA! zto8t3?yrlRw=QkN8%iPlbLQFmGgfXs`LNIN-`pfEXu7hZnJUT^syH)x8qDUVof&e; zO*sz4DT&gB=1YI~`$$7F7DIFfVQfXY&h|W``_a}K#A*cuF?&joKDtCxMs%^33gx!- z8jtV4x_tz)DW=)?pih%<3z&5{nh0MxUQAV{r zAkf-iif)qu=AmKSO0^Q?q#M$0F(5#32)Hp(oCihEl7bl&j9m=Y8WWyn8!8gHS zSTea*IlF?zqG*+MjIPPg{6lm-R;ZAm|F#SxM?5F_ml0dr%AuQ48fA~N#7|kOplPit zwygDLI*$s=<`yAQdWv|lakG{|a#r6Et#6!bZc;l{(dqmTWM&3TXF^nzJGg%F z1mFyn!5-M4!tJ8Fjr=?(xr9%+<-l|Az;%K&RBD7vD_#ERpqz9%g(176^(n^26RO-V z4*<3Jv5hJdA^(kN6lNogRjyDNumOCxs$SD{ zvr3A6iAvLM_0+6z13Uqdgj(0~6IgnK$NmgGepZV?jxBa~RthN0c6=X_jmsB#n0BFEgFtrXYt`*s83o_r@ zYQ%i?`d8bNWDA0VgqZgP(W${M5`RR)cP|sm^eJI%sQ#w{pRkFSaQY76S6<|7! z{0q;^wK6LRaw}g3&t8;c+NRSGacS!-9Nq4FZ4^Jvc=8f|BtA|hyPC=qhNFFW32Sak?sdajD3$)ZnA{!OKQGt+P&-C#u=Xlbbn??VLtROlAUp9R8*?1bxCULebW*@pw)*@wL$_oMcXLr#dfalSrbhC zlu_QX$lxQaFP?MDAWHJu%g`d-49WeM|(sWg;408l~zsB8yFrPKpZ#toEvM0wk+mScd$iMK*x)Ck34ty5J$ z;?~M3?FKVI+Hk7$)fD*{hYNqBgug|MI~{LwlrZN{eLpC{dfy+qmn@_k#j6rD8!R^p zy~FV;*_@R##TthnSn9ppRn}L-p6n=kTx-)q6uhvoIEsy???Jy}Cy(Shh<($~Gxe;AC%U zN9>CwZtBO>o3%T|I{GS2L6TFElmUu?Kg37)m|B0Pcy{Y(`FMZa#x2F4v;8=(VeXY0 zdriYlYGIJRNjaJhZsyP{x07u5I9R75`nJZd0F^u{>!ok5l?-_gVS(;;6O-z3EX+?F?0}ujCEq+V7^$LlJ?M zZG$A2`luz)lr!k-i4eVfznvY-?A9trB0q-cM7rgOj8SqXLS;yyx~-*iafNwQ!iCio zW4u2ikG?j+c`?8zCPR}xVz#dYXICs z%TA|EeSkTknJGM8cOkR%E9Y=gCixw+%)?t%COM)*pmTUQ%Z9^rT#@0yjBZ9FMM$D}ao#NRm3*#+LW^0n)aAfGd`01pA#FtU_?W}05+6***I z=#C{OIr!-1#ZBjJTt@2)z_C@=8s>9Xa2#QZpZMpPwGAO){X4Y`Bb1=!CSu#`!5Ozt zr5NrS6(2HmKk^GiTh0aB2nCheMn)udFYIdbYH$Ni35`SO<>|XDar_|;jIKDPM(K)g zE6W;bOG|=zJQXQF-%cMAY}XAYMzQYO*S1_GYL$8Iupo6-I^%k#- z=Ri|3Xh8H}8E%yk6ri=+0{kl$qO?k*n>%65Z$CiI=gj_^4 z!?`y9EW?|!yz?7*w}=}s;m4779ZSXVQYQpL zqFT#{_htXFw_$DJk!QThL%E=E)7)KXj456$RDGAtCYtv~{?GU5Gv9g3Wv7&opiTv| zHS7kFR)soFx@iZKLyw4tIR&C)a@Nl(17p9S@v5N3Qs9*7-KLbQ@==RbrF?qJT~I-h zKOa`-Dk^qaNz$SuwDapH{X}@QeHySqi$kouD+?c11mBJVvC9Z(pUb08xkbl9XvVuE z?cqrKxB|6wjlnB5Cg-ZEk%_&B>k7&3{sQd%-JCtgLUM)p&3Z9nxvz_-+J$n?321V! z@e2uyjN_F0!ug3}>{V^+wh+R4;|%<((UeM&m}uwJYW0}MVXdF{9JsXEb8)<|gWiVE znOg^h4Z6G=guly5avMrx(y4yj?##tEj~@77#?883=ey0Dv-9)PA2ugfM_w0D{2|AW ziCxufL>Am;^egJNY|N)b`|iY*n*@4s6V#{Ie#i)^XymH+bVCm38)l=^FTe>TB=4cvFsAM8QjYP$;e>Iqio6!ND%*g@kx> zU4!XnlNg^)iF#eKq{@O0bCz&YFvZ^+#Cq0_%`a1G=D_IG*l;vzIJUG=ULT+QHE5BQ z5eqZ7w@g?l)`ePXGhxTz~G}7jD^6- zigm>(4J|?|Sgcv8a9RjtnKEwJ8#fB7Mx9VcU5$sk&yx5nKlH*>Cowq{qW@%`mqmm$ z$u8cVK^nBljg#M0{rSaOqnvMG84!@;oBO)#N8<3_eJsIFFRB{p42(6+C`eGnEbLsr zDCvt($Hlef(V9b=0by#;<*&_P!;M$%*+kFLrY&P}!s|24j-~DNoBL2BLVn%kgIrHU zU$-u7{QmuRb)GBQ;?el=8u*GwmeFu3s&?3_r>_Q!ra`iNqDaSa!p>uyHMM1lR==Fn z$M-AX@c|q?Yu``nncb0GI_U zQK?2*-5mJfxQH&%ajTbKm6?k~BUX}vY?Qa&B21&q)#jYz4KA+ZcP;*(<}i=1W=zU8 z2HkPaIq{QSap7~uKx|ZK9zbwOJ6L4jguEC^(DqLDV>aZ&5>FqcUw27^m&z2vb0@51 zsFcHlj|vz=?9+0VTHp82s@q_bm9uBXc(^!PV0Dk5hv@+uoFeH>cAx<7V<`7f_&mcfbk=pFN2=?#CZc15OXda!jnlMlLj>V9H!| zdll9h>xKnB-QQmAP5|m@g0q%@CZ=6eht8*p%n$~>J3ZZJP#}&$>g%i`hoDIjo!90i z+m)y1p~f=b4srVYj=TUvvx1P-sJP0}B^FCCg;KRYQp7xavlGo*u{jlxRQ;Zl zoRz3<4$wRow_|GVOIo`|ZX&@eJt9mQ$nL!7HR-Xmk6pRGsn02Mw0e5Bar-3l-r>z0 zm(1+!?ZNWn#;11m^ZM<^sWH}>;BvXvxUo`BL{DSIh=$#EM%n$xLEZutfe)q*@hWG2 z{`CDiTU*?t6D?moe8CTYyLc`;!#wZ*n@g?Nlyb32oLW$PCIV|=j1B#v?@}?+?!)jp zKUApThxJs)+EqyQQ~V({UJ{L`YTIUTXA{*howggHRQoQf>mecyX7q1?unBh{pS1k! zC`_ZqWZbIXOzhRkgm#>w)Y#oML0Re@0tmNZx>5wB!PimK>P-@w6MIFz=|WCPF=+)^ zV?Dx>WgH(c%9LTqvrZ*l_2Se`WDdGY;eh>SMhEf_)&mjK<=>=fa%nQfuPozJ-mhHp ztF}oMT!kaA@%pEZg>(&oocQ)QB@3I#7P{sl;L=Q9uY*0~wqBafO`j`d8stnvA#7u+ zi-yt)mMRl3gcln!*VeUN`WC0tOt=$tgE|xu{Slbr>{8PzRe?)_vbvT^iX?its)@Q2o7dJhbfWeiBE@3X+t@Z@ z&LZmPOfQpVxODL4oGrw@@+s&{kSJFy8hKn|o|<`c3YG+HtHKJmUZRzIW>(ZnIopJ8 zhtN6*wfCEqzTIywp^7t#tgJz0w8bf{7uJxOZ3hRL!9 zLBoZKBu(~c)J86RSV-t8y)WSsFiM)6e8^g7+2g2yk0~r`pS`r_AhIFb*Ltr+CniW% zoIY0dXOGFX`H|hYaA!X8iXl*5jv$*4gOt2tU{tgaNZ0@6gg-rAPJH~Xz9B9oQA2w)U?+EYhnTYr21N+47B*TiXKNfGx&WfR`M?9MKwy?RIM--wyaQocM(i1fV6gmsBH>i1h^%?kDzfRAhNxMeXKc4~|Dkp2n7J|3Ia5Si z&;^;qFV++98SW)%?h$p_t)G|GzlXzFvBVOm z6&)Org5b zmQ?g%sH*1Jtm@8uJV7gR956=W$1R-)=P)Bzw;<-eU;1n**SGrUi8Nd>DHXqasxD-& z8b4nMkI3KpMWp~uA;SXeo*B(FL&NUU`jYSIh!%GLyQ8*#%iryLuzmM1mu7{7E%f3z z)9(FhbiKu6E7YfZoD$(yp`xrro$jpgLB>M4AeZ`gGshT#Q31&>LaB{OR<2-$@Q9lm zKOoP{-UP2UpEO@A<@0Obb3pE05fkg^*Ka)Jb2M3zs;o1m>KP~3QCEycKwsj@&!&F% z_%#ubJ07*$p~`{1bg>sjDmX_;V$458N)M*kKy>y05bxEuh~j>8XcXY*|EMycRyl(b zid8v~j0757CX9uIEpQ21-Uf3MZTcjeAh|Zh#@+0zXFoU}kGnEy4v|w(!da888g=ny zP?Pt1!&JzLl@+0mH3trtup=9NUhbX}vzyg=l>)@=Z3GZh)jz$E>!krG^1oYIgyQA{ z9;H%&Ns@I3*V%4zkfdHJL#Iu$cA;?hOv=BAD3m&T6JGX%F=;4KPV&3CppigK6dHh4G*e-fLJ)Er z65$yvRxMYK>~!<7Z1^>Gcn*;2-8R3&O0auK6i>xFgPik#`8|wdTHkZww*D|B z#D{8MF393Gf*%T9`}OOsMXhWl-yffXm81KfPI02x%!?R{52JJuQg*=cCP^njqxGt< zY}FX|+W~@+5OeL=i#Z9r;GhWzLJe>>#THKP)^XnfLDP}OezS8c{hnUQQ$_jsuPmi7 z-`iA^YMC~J;su9c^9Fu5z@P0v?0=IK*r)a6J)a0yY@Yq7iuk*zlkz=!+T#7!t6X7wjMa9k;Gw5o4iy0~sSyc+NKizKI@P-T8$Zlsq9_%D7} z4zy{CIU33+l}}h2{rZ*CcbC`ZxQ`L7`Zo1s09YV&XDM0sNVE4P1;gLJudoFYVH4F1 z?^?xNt>OurDH{(fNIH=Dy8x%YT4Y?39Tyy5jtefwq#zD@PatWBxlR^vy%jfpAX1=F zxqQ_w0e_zf0Gf|l0BRL9q*34OrmY*uj_exv`PjF7qs;S|(cXE=qq-n*#Sq{$hl=QHvrST|T1m6Rw|{&NCtLiZzj3}U zy+5&Pod~yYx?NkMUK4 z=>p$o2&WvLK){K@n7aveq+KfCrdQ{mh*v5|H{N=K!g3GXN;yctkQ0fn2NHINdykp4qYGTA%Wo7)Xj<7 zkjJ5l-!88Mz4%hKsG!I&l?4g@vp;&r!v1^po{clFNFS#L*J#liL;x`n{NphHw6*69 zCp@vtj8ZuFVo@-0sYQ|_p6dH)d_`PG(}=;T@NJJh%Dr+};n!*~;Gw($Q?17uW4|Ja zMoZqsFa05Vu8>)cj$LpJt(LU_9v>thgmoRMS9Qd~zd42lGq!)~m zi;f5qpY#b!oStFS*De(1@yaV(ouD%6_V6m6S#}>cuNO%TOH8tJzW2H6RyS`wOT|9x z(6$ApaHe;gH@9Z9M0@7z0g5Q#X8|@W zBK&v-tCovi$GIC=8+*Qjv3sr>oEmKQ2j+O%@?~k)Y1AVP>!CmtAuSM2 zXmh)8TNgSJ`g!ZjYP#bkW8*c=4Pr%8>l{$UgSt$jP>v~)l_>smc>C)QtLw+}ln7DRBjM;YHH{NCopx);6z zwbW}duSLcup`siah1p6|Eq)nW8}!?TH~5ww#BD#sqLEh(AtXE)>t2P{!41)l>5#+u z&a5>0)43)5y9oyg}yB!@(lC)@pldcr{X9e1P__0XMs z0uxrC4;r|cvg%JS&O(&WCm?&Z^9Dc1S7ER75n_ho6c5w6RF;t=^;?7U=H_LeIgA+@fwlt4~gUxR@^oy+UU)^P|dh4G!3l{z%i7#9wTqv4g=K z@tD|Gu<=2O6|{qw_PX|k`EXT#dXG#dlT#OBY!_;Zu`~8-HUBVv?sZv4L+ZsL-{Lnl7dU>K%?#@qebsl!uok*-uTuC)0=ZDVj36LAiDT zRC$#mWJr;d)+t}St*A8VcM{yQ2%9j$D_$f&UG|eiuS=9tnnLoj9;7`7rQ^w^E0xg2 zt*iru#K0opCXK8RF9|hsqHs(kdvtQ_ZZwj#2>v)m{Iq-yqNx$Szt4@jz}Vimq&s@D zcd!Kt!y?TTrQAWw2<>Gk>b49OSK;zpw30t>1yz;lF@6socV1?VhnB#~SxfPIy7zLhyr`3e~ z1Zm>MZtLkra0y!1x_-->{q5}2=~*5pvc$??#Y#3DWml{Qfwhj$OXqwTN~zK(zIpFY z<@d3S@X6>e4~yWAT2t~J`Uq^y_$0NdLNat0YbWKsg$4K|?t za0&9P=Fc=)ke{P&{H#r$ix+HXAFVIq(cA57Q)E=M4NcDy##68MVDVDVx_Js_uJ2%>-Y(FNvaWxsf+<3l+weug z)IE9jn)MV(YC$PX?dBQ$W4ryk;c6cLH|$#@sf(WoPovp7NOVGLD)CANKxsT2vwvDU z(>Yw)EpLn}xjaVMah}Nc?N@Lq2is(3RxXcl?|j=+g}pue^676K#vZ4{t1v*z96zIX z)R?IAC~DO7rhQP!{!3p?p@Mh25tHLgFn&scCgG4A@8Ju?yY2`g~t^pO|{w z;eX^#Gd>SnD*oahlBO(TdbVu(UMD(viJJK2@@Lfr152OyR1ur_s!1?iqUBa^Rg391 zs~H9T2l~QK!=2UgWmCnQPgmVSORXOl$(s5nk4dCo?`QN-3axY=!q)D}`MmNFHtABz zOwGhVz9A;?Q0|>EBYsPasbs1GQhk?FP?{p0EvMSMF$EsnqQpT|c<^(LBCKPaxVf=e zynA=BO~#pBhEZCEDa8DF_HSpqFp2p!&ZSqH2x||HvyG<8z1}I|33RvNzN9BFonwF? z|1JQdDAbzq16PpVV4=#{R_yAiJoOkU#=1ZrA>b=x3h|=1fbXOhzU<0;+!b=*CWClr zZoI5EoeA1{x@h|z-0{r1&i9&YeoQf3H9;j?P41n!&4>aNkOAP!x&!6QC-PU(Tx8jM zNCDxv2EA~J-e$^(NbGgCuKKBFf8p82^^yVKL;}bM-oihUuwINPOAi#V#>lfj?bp{? zxV_j9VCo?SE=Ezwy~||*)VC0cz8yTMnmcTqD_=<*vpyv(miYv@3w2_WX+>j=U(G`g zC%S5WzOJK8c2mE5R$1=QGJETG1?@V-LjJM?BlUT77N6n%1q0Zri+f@pC zYEdqCmSXi6&fbojgvQI>@g;K?n!jFqUSS^Gu#w%(E&YQ1>>E$o(zN+y*q+i(8aigj zJ9@qMSmbJ3_xifuLtA@*#_yUKIHK1>6uHtGWO@{L6mz}&S zXm#ekqb|aL%gsYnv50q6njPj8C*21_;IirxV3DWSx8~rYDLcVE@;as&A5R0P<+G0O z5TKq_Rg<{han5qC7jgd%!+j*|EN$d7eVpDt;(^&xO@!nu1qBNcsI86uJVrf zRyF?Pk9C^G9*RAvd37CUZ_eGx^Sj&4LvFz=`W3A$!&?FL*J4beDwv4EZwgrm_-bg@ zM=jLPVYd<d9*yVu1Xw@k;iq?4tLEfus3@k@$4t+OTg|4R78yIsM zWyCa3$q$V37$fq5D54fTrZ2Z~9F7+nXOK-`X;zyKZIVPWB+mLM~se9xA&Qk7a z6OkDk$rgqqP#}y#tK^%wb1|h|sYAHW0;z*!mK%~fIg7>I@GC~ru zq8el~RjlRi?30ge(NpcYS^j3Ry}rxGW5#Q@4__^m>FS}bWyYT9k z?Te4{_U$eAwOE?!QcrY0`C?0T&w?(?e0zJ3k4FWkaFRsA(`&;tU?)6%O}bV3>TuDb zeS+96^&Md#ZCfOLI@O>!om}j7I7TLN^NZ+kL7O(|eYQ7AomTLG$5L0jU((_R`~EIV z(>d?d3w1NacC^juyF;CO<_P!t z)hVjjXZ|5ADq^ZhkUdz`TVA@|p1bbNX$vyVaKSlxeBV-;1TJ9;q;{~r5q=)inrK6* ze~uXY1k|d_accUL$AO7v3guX7b1?fzj-yW19-niD9v?YPcC++Gq~>qS(p zW?#>!MM}_gb&Wg@I10+RUfDc!x!IvfhvN?dHzo)fluA2EH@*i;3faVQGz$steLGb; z*qem7fwxHlZl_wW*K<0?M{VUL$O#Iq^FgkvHdWmSm!W5w9VXDJ+Y9~y2}-i~@SdAS*L;xL_4D<=Z2D9k5Y=`=f5tYgrE1o~#N zlx`E^@n_plE8FkZfB9`dt4l_Yx*p$Mid-`-`FI2kDmx?BZUkKHf?QW;9}<+3BwG(?uQ36t zuEujws8O&I8BSqtF`2cb4t-%J`zWy;ph&605o-$bE6i zHkVLGC@^xihe7S92jB1Py~#fT{<51XAUw$Im%X*yU(-_W8yzc;qH;RAb@?>3GKUA+ z2Grti};&txJ|%8XuWK$jly66LwcP$_r6+b~Swm@;`@yz63;{ALw}TR!%Y!-@YhD05=3D(^$do%_3N_?{mKy_$3UHt(A#z8M~dyDPhz=|I$KnwkYD(Lzh zC{)1A{;C56cRVfm?tO8CJ9FxJViqMSxpKtm{0#G$n}8S=w?uE%;4I?x{>muh z { + result.contextTexts.push(response?.payload?.text || ""); + result.sourceDocuments.push({ + ...(response?.payload || {}), + id: response.id, + }); + }); + + return result; + }, + namespace: async function (client, namespace = null) { + if (!namespace) throw new Error("No namespace value provided."); + const collection = await client.getCollection(namespace).catch(() => null); + if (!collection) return null; + + return { + name: namespace, + ...collection, + vectorCount: collection.vectors_count, + }; + }, + hasNamespace: async function (namespace = null) { + if (!namespace) return false; + const { client } = await this.connect(); + return await this.namespaceExists(client, namespace); + }, + namespaceExists: async function (client, namespace = null) { + if (!namespace) throw new Error("No namespace value provided."); + const collection = await client.getCollection(namespace).catch((e) => { + console.error("QDrant::namespaceExists", e.message); + return null; + }); + return !!collection; + }, + deleteVectorsInNamespace: async function (client, namespace = null) { + await client.deleteCollection(namespace); + return true; + }, + getOrCreateCollection: async function (client, namespace) { + if (await this.namespaceExists(client, namespace)) { + return await client.getCollection(namespace); + } + await client.createCollection(namespace, { + vectors: { + size: 1536, //TODO: Fixed to OpenAI models - when other embeddings exist make variable. + distance: "Cosine", + }, + }); + return await client.getCollection(namespace); + }, + addDocumentToNamespace: async function ( + namespace, + documentData = {}, + fullFilePath = null + ) { + const { DocumentVectors } = require("../../../models/vectors"); + try { + const { pageContent, docId, ...metadata } = documentData; + if (!pageContent || pageContent.length == 0) return false; + + console.log("Adding new vectorized document into namespace", namespace); + const cacheResult = await cachedVectorInformation(fullFilePath); + if (cacheResult.exists) { + const { client } = await this.connect(); + const collection = await this.getOrCreateCollection(client, namespace); + if (!collection) + throw new Error("Failed to create new QDrant collection!", { + namespace, + }); + + const { chunks } = cacheResult; + const documentVectors = []; + + for (const chunk of chunks) { + const submission = { + ids: [], + vectors: [], + payloads: [], + }; + + // Before sending to Qdrant and saving the records to our db + // we need to assign the id of each chunk that is stored in the cached file. + chunk.forEach((chunk) => { + const id = uuidv4(); + const { id: _id, ...payload } = chunk.payload; + documentVectors.push({ docId, vectorId: id }); + submission.ids.push(id); + submission.vectors.push(chunk.vector); + submission.payloads.push(payload); + }); + + const additionResult = await client.upsert(namespace, { + wait: true, + batch: { ...submission }, + }); + if (additionResult?.status !== "completed") + throw new Error("Error embedding into QDrant", additionResult); + } + + await DocumentVectors.bulkInsert(documentVectors); + return true; + } + + // If we are here then we are going to embed and store a novel document. + // We have to do this manually as opposed to using LangChains `Qdrant.fromDocuments` + // because we then cannot atomically control our namespace to granularly find/remove documents + // from vectordb. + const textSplitter = new RecursiveCharacterTextSplitter({ + chunkSize: 1000, + chunkOverlap: 20, + }); + const textChunks = await textSplitter.splitText(pageContent); + + console.log("Chunks created from document:", textChunks.length); + const LLMConnector = getLLMProvider(); + const documentVectors = []; + const vectors = []; + const vectorValues = await LLMConnector.embedChunks(textChunks); + const submission = { + ids: [], + vectors: [], + payloads: [], + }; + + if (!!vectorValues && vectorValues.length > 0) { + for (const [i, vector] of vectorValues.entries()) { + const vectorRecord = { + id: uuidv4(), + vector: vector, + // [DO NOT REMOVE] + // LangChain will be unable to find your text if you embed manually and dont include the `text` key. + // https://github.com/hwchase17/langchainjs/blob/2def486af734c0ca87285a48f1a04c057ab74bdf/langchain/src/vectorstores/pinecone.ts#L64 + payload: { ...metadata, text: textChunks[i] }, + }; + + submission.ids.push(vectorRecord.id); + submission.vectors.push(vectorRecord.vector); + submission.payloads.push(vectorRecord.payload); + + vectors.push(vectorRecord); + documentVectors.push({ docId, vectorId: vectorRecord.id }); + } + } else { + console.error( + "Could not use OpenAI to embed document chunks! This document will not be recorded." + ); + } + + const { client } = await this.connect(); + const collection = await this.getOrCreateCollection(client, namespace); + if (!collection) + throw new Error("Failed to create new QDrant collection!", { + namespace, + }); + + if (vectors.length > 0) { + const chunks = []; + + console.log("Inserting vectorized chunks into QDrant collection."); + for (const chunk of toChunks(vectors, 500)) chunks.push(chunk); + + const additionResult = await client.upsert(namespace, { + wait: true, + batch: { + ids: submission.ids, + vectors: submission.vectors, + payloads: submission.payloads, + }, + }); + if (additionResult?.status !== "completed") + throw new Error("Error embedding into QDrant", additionResult); + + await storeVectorResult(chunks, fullFilePath); + } + + await DocumentVectors.bulkInsert(documentVectors); + return true; + } catch (e) { + console.error("addDocumentToNamespace", e.message); + return false; + } + }, + deleteDocumentFromNamespace: async function (namespace, docId) { + const { DocumentVectors } = require("../../../models/vectors"); + const { client } = await this.connect(); + if (!(await this.namespaceExists(client, namespace))) return; + + const knownDocuments = await DocumentVectors.where(`docId = '${docId}'`); + if (knownDocuments.length === 0) return; + + const vectorIds = knownDocuments.map((doc) => doc.vectorId); + await client.delete(namespace, { + wait: true, + points: vectorIds, + }); + + const indexes = knownDocuments.map((doc) => doc.id); + await DocumentVectors.deleteIds(indexes); + return true; + }, + query: async function (reqBody = {}) { + const { namespace = null, input, workspace = {} } = reqBody; + if (!namespace || !input) throw new Error("Invalid request body"); + + const { client } = await this.connect(); + if (!(await this.namespaceExists(client, namespace))) { + return { + response: null, + sources: [], + message: "Invalid query - no documents found for workspace!", + }; + } + + const LLMConnector = getLLMProvider(); + const queryVector = await LLMConnector.embedTextInput(input); + const { contextTexts, sourceDocuments } = await this.similarityResponse( + client, + namespace, + queryVector + ); + const prompt = { + role: "system", + content: `${chatPrompt(workspace)} + Context: + ${contextTexts + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("")}`, + }; + const memory = [prompt, { role: "user", content: input }]; + const responseText = await LLMConnector.getChatCompletion(memory, { + temperature: workspace?.openAiTemp ?? 0.7, + }); + + return { + response: responseText, + sources: this.curateSources(sourceDocuments), + message: false, + }; + }, + // This implementation of chat uses the chat history and modifies the system prompt at execution + // this is improved over the regular langchain implementation so that chats do not directly modify embeddings + // because then multi-user support will have all conversations mutating the base vector collection to which then + // the only solution is replicating entire vector databases per user - which will very quickly consume space on VectorDbs + chat: async function (reqBody = {}) { + const { + namespace = null, + input, + workspace = {}, + chatHistory = [], + } = reqBody; + if (!namespace || !input) throw new Error("Invalid request body"); + + const { client } = await this.connect(); + if (!(await this.namespaceExists(client, namespace))) { + return { + response: null, + sources: [], + message: "Invalid query - no documents found for workspace!", + }; + } + + const LLMConnector = getLLMProvider(); + const queryVector = await LLMConnector.embedTextInput(input); + const { contextTexts, sourceDocuments } = await this.similarityResponse( + client, + namespace, + queryVector + ); + const prompt = { + role: "system", + content: `${chatPrompt(workspace)} + Context: + ${contextTexts + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("")}`, + }; + const memory = [prompt, ...chatHistory, { role: "user", content: input }]; + const responseText = await LLMConnector.getChatCompletion(memory, { + temperature: workspace?.openAiTemp ?? 0.7, + }); + + return { + response: responseText, + sources: this.curateSources(sourceDocuments), + message: false, + }; + }, + "namespace-stats": async function (reqBody = {}) { + const { namespace = null } = reqBody; + if (!namespace) throw new Error("namespace required"); + const { client } = await this.connect(); + if (!(await this.namespaceExists(client, namespace))) + throw new Error("Namespace by that name does not exist."); + const stats = await this.namespace(client, namespace); + return stats + ? stats + : { message: "No stats were able to be fetched from DB for namespace" }; + }, + "delete-namespace": async function (reqBody = {}) { + const { namespace = null } = reqBody; + const { client } = await this.connect(); + if (!(await this.namespaceExists(client, namespace))) + throw new Error("Namespace by that name does not exist."); + + const details = await this.namespace(client, namespace); + await this.deleteVectorsInNamespace(client, namespace); + return { + message: `Namespace ${namespace} was deleted along with ${details?.vectorCount} vectors.`, + }; + }, + reset: async function () { + const { client } = await this.connect(); + const response = await client.getCollections(); + for (const collection of response.collections) { + await client.deleteCollection(collection.name); + } + return { reset: true }; + }, + curateSources: function (sources = []) { + const documents = []; + for (const source of sources) { + if (Object.keys(source).length > 0) { + documents.push({ + ...source, + }); + } + } + + return documents; + }, +}; + +module.exports.QDrant = QDrant; diff --git a/server/yarn.lock b/server/yarn.lock index 2ff2aec4..6a9e1669 100644 --- a/server/yarn.lock +++ b/server/yarn.lock @@ -173,6 +173,25 @@ dependencies: cross-fetch "^3.1.5" +"@qdrant/js-client-rest@^1.4.0": + version "1.4.0" + resolved "https://registry.yarnpkg.com/@qdrant/js-client-rest/-/js-client-rest-1.4.0.tgz#efd341a9a30b241e7e11f773b581b3102db1adc6" + integrity sha512-I3pCKnaVdqiVpZ9+XtEjCx7IQSJnerXffD/g8mj/fZsOOJH3IFM+nF2izOfVIByufAArW+drGcAPrxHedba99w== + dependencies: + "@qdrant/openapi-typescript-fetch" "^1.2.1" + "@sevinf/maybe" "^0.5.0" + undici "^5.22.1" + +"@qdrant/openapi-typescript-fetch@^1.2.1": + version "1.2.1" + resolved "https://registry.yarnpkg.com/@qdrant/openapi-typescript-fetch/-/openapi-typescript-fetch-1.2.1.tgz#6e232899ca0a7fbc769f0c3a229b56f93da39f19" + integrity sha512-oiBJRN1ME7orFZocgE25jrM3knIF/OKJfMsZPBbtMMKfgNVYfps0MokGvSJkBmecj6bf8QoLXWIGlIoaTM4Zmw== + +"@sevinf/maybe@^0.5.0": + version "0.5.0" + resolved "https://registry.yarnpkg.com/@sevinf/maybe/-/maybe-0.5.0.tgz#e59fcea028df615fe87d708bb30e1f338e46bb44" + integrity sha512-ARhyoYDnY1LES3vYI0fiG6e9esWfTNcXcO6+MPJJXcnyMV3bim4lnFt45VXouV7y82F4x3YH8nOQ6VztuvUiWg== + "@tootallnate/once@1": version "1.1.2" resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-1.1.2.tgz#ccb91445360179a04e7fe6aff78c00ffc1eeaf82" @@ -526,7 +545,7 @@ buffer@^5.5.0: base64-js "^1.3.1" ieee754 "^1.1.13" -busboy@^1.0.0: +busboy@^1.0.0, busboy@^1.6.0: version "1.6.0" resolved "https://registry.yarnpkg.com/busboy/-/busboy-1.6.0.tgz#966ea36a9502e43cdb9146962523b92f531f6893" integrity sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA== @@ -2505,6 +2524,13 @@ undefsafe@^2.0.5: resolved "https://registry.yarnpkg.com/undefsafe/-/undefsafe-2.0.5.tgz#38733b9327bdcd226db889fb723a6efd162e6e2c" integrity sha512-WxONCrssBM8TSPRqN5EmsjVrsv4A8X12J4ArBiiayv3DyyG3ZlIg6yysuuSYdZsVz3TKcTg2fd//Ujd4CHV1iA== +undici@^5.22.1: + version "5.23.0" + resolved "https://registry.yarnpkg.com/undici/-/undici-5.23.0.tgz#e7bdb0ed42cebe7b7aca87ced53e6eaafb8f8ca0" + integrity sha512-1D7w+fvRsqlQ9GscLBwcAJinqcZGHUKjbOmXdlE/v8BvEGXjeWAax+341q44EuTcHXXnfyKNbKRq4Lg7OzhMmg== + dependencies: + busboy "^1.6.0" + unique-filename@^1.1.1: version "1.1.1" resolved "https://registry.yarnpkg.com/unique-filename/-/unique-filename-1.1.1.tgz#1d69769369ada0583103a1e6ae87681b56573230"