From 5bf4b4db58040e367c3ce5a76d4f482aa7dd1a11 Mon Sep 17 00:00:00 2001 From: Sean Hatfield Date: Sun, 19 May 2024 11:20:23 -0700 Subject: [PATCH] [FEAT] Add support for Voyage AI embedder (#1401) * add support for voyageai embedder * remove unneeded import * linting * Add ENV examples Update how chunks are processed for Voyage use correct langchain import Add data handling --------- Co-authored-by: Timothy Carambat --- docker/.env.example | 4 ++ .../VoyageAiOptions/index.jsx | 50 ++++++++++++++++++ .../src/media/embeddingprovider/voyageai.png | Bin 0 -> 20060 bytes .../EmbeddingPreference/index.jsx | 10 ++++ .../Steps/DataHandling/index.jsx | 9 ++++ server/.env.example | 4 ++ server/endpoints/api/workspace/index.js | 9 ++-- server/models/systemSettings.js | 3 ++ server/swagger/openapi.json | 22 ++++---- .../utils/EmbeddingEngines/voyageAi/index.js | 45 ++++++++++++++++ server/utils/helpers/index.js | 3 ++ server/utils/helpers/updateENV.js | 7 +++ 12 files changed, 150 insertions(+), 16 deletions(-) create mode 100644 frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx create mode 100644 frontend/src/media/embeddingprovider/voyageai.png create mode 100644 server/utils/EmbeddingEngines/voyageAi/index.js diff --git a/docker/.env.example b/docker/.env.example index 7fedf944..23789af4 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -124,6 +124,10 @@ GID='1000' # COHERE_API_KEY= # EMBEDDING_MODEL_PREF='embed-english-v3.0' +# EMBEDDING_ENGINE='voyageai' +# VOYAGEAI_API_KEY= +# EMBEDDING_MODEL_PREF='voyage-large-2-instruct' + ########################################### ######## Vector Database Selection ######## ########################################### diff --git a/frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx b/frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx new file mode 100644 index 00000000..33ce693d --- /dev/null +++ b/frontend/src/components/EmbeddingSelection/VoyageAiOptions/index.jsx @@ -0,0 +1,50 @@ +export default function VoyageAiOptions({ settings }) { + return ( +
+
+
+ + +
+
+ + +
+
+
+ ); +} diff --git a/frontend/src/media/embeddingprovider/voyageai.png b/frontend/src/media/embeddingprovider/voyageai.png new file mode 100644 index 0000000000000000000000000000000000000000..4fd57eaac410eebba278abadf2a2ed882de7959c GIT binary patch literal 20060 zcmeIacRbhs{y(h9h|I_)t7Pv@Hlae<`|XYFO$edPD4E$LWN)&|D0|Du%!tga3jH3h z&pF@oJLi0_>-yvR{jT5b`r~vv-9FCSd%WJS=i~W!+}E@4ySJ4uU8KHjP)^2TqQR$XlCRIC;2BUAy*=XZ`JxEK&dX7-tVx`@cTK62*hIM?0V$-Q9Tj zxcUC&DlJjs?hoAU(f@Ygvi9!(_K5uh$)dUGwI$L|d7A*t=hozVGn#~A z(QXf{9T~Y8`6PLe3$%amPw(*8BmerQJpUIT{nyj}>6`w)ya}@I?ye6k{&6E5-MG#F zdc*^{d)%FEaTk*G7iOX8L{nLdb-$Wbj@^6p8%|L$PZ0_oYzW2}>Ep<%` z?dIg+ibDT$1^oRD2ow0YS8tg_VearNuwq>u)dlZ||gJ0dFM4FT^h_B*rJm zCn6>!%KMMk{PoNK_L6%}9w-~+Yf1C}|C?q0*W2}9U-e&a>;I;OcK)Yb zg{<;l+Z9<~_yw%5z`v|7JSHyd;cnyPDy@w+cVtvCcei?gwzrf-p8a3H`L}QPzg~;b zG5)0x|8a@l=Fb1E1mVfzf0c`q>pdqYdue%dM^AIc>sn|>OSCK6m0gnO-=FdSbiM!8 zivI06(3JkEoO%Ay)TGOTEzB`6R0&iSWbb*WejjVQ)XGTQS~h&@CH-{%aoBgfy`ERe zKQRLHPHQnS0^ObxVx0TW-~Zf$|G(4#Oz%H`zNbC(zO4AGt7S}}UY?QoJxR1PhwBr0 zv-)7#b6-h~xMeRW8o5s>%04-;)K_f1fBA~+O|cISA%S5t5i=J*YSgjtJ^y{%>dnSc z*U8(>PqfLZE8WNb>DP#QUym5WpSib3k2bhS-Gdl$gLclT4C28L?{OGqui?TE41ri_ z@HnF%#($phf95_oOQgEpxze^e=<^N(OP07j1G8Vu)y>UiYewuc1;zV;0itI#B1)&f z4y23)Gg?tWB@-K);?aEA&y+P?yawc?BeEp@l)N_4tyffH@+nLs!dXPRet$k#yh{a+>^v>e*Wb2)PbHu zm2|4wE|gU*!CT`={FCdeiye{Wj$gzDDr0pD70t{T&tqbe(9lG-{5joz&QhQ?LnBM9 z@NglOnS%rW^!JgnqhpEcT%F63Qq_Zz&ca`^O(ro{<)1Q2NK6+oTp{ZV48*{~!C}6B z9ak-ZQ}!9PkfaY0eR5V$MO0&>WQxH3elZr+x98g+91}HqaFD!dyIUY?F#kWi^w%WZ- zi8}6*IeQ!yhI@x19ySKo$BHQ5qpgK$rxu}dm52Zy)8;4v>wa(JfVoUrOiBW5fnw6r z`s+?9!VZlVt}C9hi7a?SLqoVkM70~o!#VZca!N{1JBFxNY2oY2%{4!gx_blm+0h5e z4?{W1FJ8T}WeTx9H&<eV>9py#FFDZtMQHksiTP z=YE%sajrFY@t72T+GSXqZUWDapNLJ(c##le|ApJ7rh%B-U@?atCXV7jO(KXn5~{@w3XH3aITsO zpKWF0$%$X8^rI=xZsVMxOywB*wV8T>a4}ipfQzE5$6F2C&dc589pw7d!EJ$2Q3OzK zqjtZ1^b<6T4TLbpg>Kb7-Qi}LML&UriYnak%exmzWLUd-fqYNDezi=M@FEO6V6^7@^r^f1eL1SDkxf@wMMWjq z|4~3d0PEoNhOQ!O^+<42V!ND5t0swNrX>%{+u|9R&02kZ}IB(Tnod42MAZMW{KDdEQtgMQvDcxeq(5xu9jq;18YgpDRICIL%i1G07=taP3qX#Sd5behFBK7 z`-M6kYCKI}KP{4A$=1R5-TU!lcejgy<0TIt`v$xNz0cZJD8k9FzCv>CA`Vj_`}^*- ziKfq#a@Yle(yZ1fY{uS_TUb~Wy~=%}!VP<6!)gtq%gD6tXti}f{9;1Abud01vWPE9qFGeqN}h{l+;4DmM*GE-cbJ+%WTJPh$PNj_B}t_ z&Nn-+d_*lfVOyQVhr)(ZoZ_+z)8Z5IPdwUghbSPY1UH#a`<>&~OReU+6>UI-qzS&6Z)EsUS zwpY%UqeA)Z$b&v-E8+_}OkGZO!@|P){GCRzY|?A)Q7CkSn6We|M=hs|5kI2GcK+N^ zS0_NGigw=ZWDnbQ90mr4m~<&$7;OVf1Zw>)TC6|#f6U&we#bL_ImMykvI9q4Z^QPoJt5@-^z$|mcqZn z-QE_A6YseU6`pYP9YUtgWTxywrKgsM(K7QBjeXk|4tLek{vN!gBx9 zUtwWkH*IXL!<`Ko9K5?cW0B^Hwn<5Ki;Ii9Svnz~fQByr_3Kwg78Wca+tD5OU8C-J z4qfM8`wuG5P3|mwKz6ihEMpHp{ll*$etVy;b(_%H4pIDRaa$#X(eng8qW)A`!LU%h z{Ei9p+TvsBUu!v{KKuC{2LahR$3Isyc44|?Y33-=-`HeWMmo_QnNut%o?cHX)XvdS z7e{I1;xK_+wm&S(^SHQ>m)Gtrb}s4Q|TS^$C1CFJG1*7ZEe25OO?V@aq>}aQX0Ha zUS5uihbOD4`9g8-N!azdGILB5-%TN+_&p**2I;50?HNTF1>;yY31C z)!4gwdSS5h&Ed|esHw@rt1VdXiYR!{MX<8il9}DUP3ZsY+cjD#pUdaYoy*tE!N$Hs z9fEX6c+vfm)$1M`;~gP*WV{deEG^M!b~+lfpRCVW@L*6;3fW$Md#_-z=89qh`t|GA zX3O32Rt~pES81FY7xpzgV8X5!mZ}#?MDd-R{1~Wv_=C-$%4VU;uKwbXF!Y{sj}0CB zsVY3L?O8>C|K>?sq62khuKC4TwUo)ohzRGOduXG2*9+4%4)Ql|Vx;@+sW$EptC72+ z@LXnF0%R&GDvTBv%UJF@OjXGi>Jd^;De8YxmrN|Z+fIgi@gl#kRi*Vn z=VMG9SQd*FC2|*))q#vI=;cFscNk*A@}wsADKw96$XZ8TWnwx%Ao+tp`sY`&NS-Ha zEQ?D^d{AMd1=?NAJh-d0Po6wE-XFQsN1fefcGJixb;MdP`$Ge45K>aoG60w_A5C=N zV_-af{5b!9AqCuJQs;c5a#YHanB%(&%Xi$o| zvM5DSK3?fd;g~49^5Uw~x8Yo5Uhn?iZkgVHf@j|3d&F%}WB<+O;9eLYx0`t*?3ncD zPy_;2y{)insZ;%y`bt(-Y_MZ2Exs(I%IA$Nd?1o(k7ZSB9G|&1wYWK53nM2CU^ony z3#|+e3cWUpO|Zz9pdH*aGt0E_5uzlxe*OAQS=n>^{QS>fzCA&eXX`bF8G&Bs#IOvAt4fX}^{6`LkYR zfG!}x{qK`x06wd|Z)=98h&U+?q)UC-a%Fdr-S=#~HFyqn;dYAfCBRwAR#tEBoBF`V z*Emc+{Wg;4`#QCPb7{UU7%3eL?U$bMH}3P=j$Y3*y%WoVR1R!kX!8k^_F4&OLA!AR zWWf0BuVvxZ6JIdr7NjKj*x%oIcKW;As*jq5m6g4cWg+kDOcJHV*xUPhE&ft$8m~gG znEDM})vgmQLH8tlu6MhJO+Xpg+pGRE+2@-tCp&xF+xvw(XTP@_mRU*D`k{(4$80rf zyuPNQR(g|@L|w8+Ol>)nk=iag&Z0Nnc zy~{26`PZ(-0+MPkGygyg9K`2fo!!oJEa=))8Vm+4Ez;&cXTE+v`b2J7skm&##*)6y z&0Q!gZGI12r2ABo5(OnK2MtYQeRkO8?CcB|7xyN#HDY>t!e>gsJ3Gz*QbBIQNTtas z9~mBg9v}Y%E(~_(^*5eF&9$sVR@Fwa`Mqo{#eb`Qm6)1({hsLt6kM5E+xhR)wF8#eEaIKgkA5K_8Uf-!)-=S` zJjALDxZrzTy3}#JyToVy9_!?IDduzILxIE1=}_pS%TDb-BO@cr-M?w2in^SK>Q{iC zKu0Rt8TBc6)^}Sbp;N@`E1hf23cPZMnuK`s8*r23wVW8|#df@xHQDLmT^$|f^R16<=9-&G zm9w|j)R?EV;bVtt91PVnr0K5RNml@L4r-vh%L%2m69g^fMsVPakneVSfNTia!|pU$Yk^HNq;R=$4a8(m3d-Bz(Kcy4@{ zyXF;xVu}H&qtr2BPF2!(Ufa=tJeFpge02uW5)(s#6P+(@{`GcXJWF6l{CAAc=$o?= z!BjVx8h9-ydyx{6d2xA}nHa!#gs7Z>dIE8_SR2d|*62B8>fc?8k+ImBZ|fcwUBJ9- zCy~f)90620)3DZ&&B}j>y`WENJ`dHnKaCnkCQ3 zC{b(U#WWP0dYQeVYBIXjq6m3egWqfj;GdZV0mCB^_P%>PF!*tD6 zph>p%lmb>tm&;kwl_W!I3b9*B<024^i{`275!LV(ps`zuR`Z9~# zFoXwz-oiVPFe^v5!!C+Ix=P1eT3x_7TXQWjft+cWOLMmUdCX<(ySvx{os$>u z=}R<~af7EKY{DlrL2EV$qX;>;$Kl(;A~A6j8Q6pzLSUwN5Dn z{BTotfW^N1{ti3%TzGi6)tQeF6gUi;?WUu2weAJnR2qSRKDcM4O^2_TzOH+8BJ(_y z!9PqflA>*H&h*#=-K>*_=ml$8zb1#(f>aW}kkGch~eeM(!--dQklKRe!n zVixuMo;0oG0M{fWL|&*{DvbYX2zaL81Fa`ym%@H@c6FipQ^mT!YlantVVjm)T$rkQ zuoTyhy(rDZ%)E<8B#(aW^QEX3mdXU2`4z!NF*=a8?x$5yQktSAhPm3C#D}R{V&tNK zc@OC{HbYFy0%o-Cy|0Au0T;MTuKB@uRk?MmLf>m~iblk-{a|B4>XnLW>BQ0(uelD` z8NDa77{s)+QNS`RK0d$7R;h2DnO;srCyppj3ZQoY_h`PDdjTDl1b@U}^EwN!Nat?O7?w zqZv1=4YYDJZ8&tMckNySV#+=~;=m_yTir7)zv=C6j~5$qVH4C-lk6P)+9*Z+KHS7G z<1Yhv$7N7;Q%>&uE5QfnTRq;h_S2A)KS`7D%9al$)T^=Af6}iHgWdY_=AD5T%*s1G zJT2EPIDZB6p*k~_o>6-?sa0^IK9`wOpx%sjAsDVJn$GQz>zKfVW6k28wDbJUlpJA_ ztnY-TIcvv9-zQPAyh>5L89wU;7fpewz?R15Y1$`+fvGNj*SyJ|onvZ~5eTJ`ZdObIUgjF-5a9J@6B=$Fu8UZPWU7gkH; z?&wPq(GZV;`Pg{87|ES-DYE8ed_4N#D7WYBkj?X?B;2&ytFiq0^6gjrPIkLMJmLM_ zS)F?-r|r2q_unVe;F*xrLw;;7Hkuku&_kV!nr* zFqiYN?uLhcsdGW0i#;EbEN4h~abqOm;jhv{*Qt8+9FvZgd zp}0Rlq0y4c^@}OR&_Bz|@HhBrMD20!2XNEWLq8Gw&w+q8lJp+F)hUjEp>grWix=D} zX_&15HRCzM78{;kqA{qJ19Awu3JQ?QZJw9K6%mXqBx|p06wck9;pXGxb2Qbt79{)U z=RknY^}$}MJ2Es)<8yCU%^ooi1Iayg*;puyTWTI)s^vz%xO7_=joNIxlX%*tSB2zD;v)vH&@eRh1B=$)E>vH{CC z6F!gbtz*-%H(M&fhB_5-UmyL%dFib<(`n~38ogS_G!)02{s?QWE|yy6rg0L&3V&OuB^ z=fFoH=V#LBUH&2X&E9*;4=|T@fzle%*#IuUdG!0xm~|7j8N`65Q~vJP&D4O^{xo_Q`G*oFTx&sydd$T1elg5obK(>( z(U{F0fbC4ZTQ|R$N1z@MS@pfkiHX2dEMN<=k+h%hqy#!RRAMYq8(#0s+ITQ-NP)wV zy+wsWC>0`FhUUx`LW2N7fdMdl^{=3ao9zYKc}yG}g+|$Iy&i5Wy_)@36ATZwzR)an z#prHzOgcNN4+}sSU9Ox5)kpc>z01Fjww;7+v=|DZkbRFFYwO}+%kA_E3?$_$-Z%B_ zj$_xs)ZxHyYiko!E5%@EXFrB65&yW>rcmRRplpNZCT)pfXVlMgPYI|jhHLXl<`_hs zZ%%r(-M`mavw2XguVCT@i)O2HjpSTCD0j=10wm{uL)%Tp;t1Ct2eii#H>1=@VCw%f zcaiMgTyRo-y)TE3EZ|vQ*RLum&0B!}IQ7a}7ujfQ%%ZYngRzq&yhZqnYlctNh2j(G8+|M>spm$|RJOVOjE_atUbl zR8bP-rcE~2I!v!PpNd~x?X2pqb^OB5UU;0QXE6#L`PSq@0Z9V)t7Wlh&K_uaR7f-W zC8rw8>$ai}T_ACzF^Z3XT2KW~uWChY;L|5fPKR~|@1{&lCD|091&WnLtzb*Q8Y3FX zNGXqc56ovaRb33!?KP7opZoddE{?s;2d77KhCgg|F3v73F3zv6TK}j+ z^bx>)gkJkmeEaKT_)yED7)h55Ml)KD7x6(Q6qi(!@jnq2c9^ns)zDAivm573>=#s= zTv}R+AISa=eL)GB+f`N4x4Na#pI%<$Kyy4ZDeXe!G+2$q>T{aou=8HoFESHb*^>0? zO%B;sTe|Y#Ky3on{M_JG%0Ef`G94kM5Ba(=Q3P;L5mFjIkPv+?YWMp3vvMO@G)U2y3de*23ju$ppU{BZ2gU&6*jsC6Y5?FG;Z0f=zp zf4+g=NeR3*T-;;*dWIUa)wgJF6d6dH<^T(TBUFy1_NUSuC8bMvIVNw^jRhQm$g}_r zM0oe$LA3$W%-J?JjnA#ph7!_gxw7(F&6tV_3SNd`pyj&c)=zTfO6dAnq2Xpb^Z*c> zih3iA+CW)y{;{GFso5UIC+Ijs30HCup8P_57vEe4XwTYEPF=vCX4TY4U}V5{(wKrk zdI6QLrjJ}0bT?7wc?>Ce^7yxGZ*HdP#(1Aq^A9iu(7v~=6k|@>Vy1;@V3iueSx9^ z=bmpmn$siyxVDVva*lIsEAMm#_!yBll!&l;x#_wtkQx` z4zQ13{~IPoF!m)m=&`B7sC2|4JUJ;MBnJBi8DO-c&aKu{mWl;}L4ddA2cl8kapOm1)z?Us#R)WId`=I7of4Q zHySDLM(3&GX`X=ebepS?MfG?glp*eM zS*~>#rToTtahj*GQ|rj~J+z!&>UgQC(~86qsI9C301o%H8 zCHP4iuLPn?sHD%n&1i}7w3Tocm}Gv3lh(KEVW*3bKR$o9hJnb)Nl8f;y>RR6>qQ^# zz3%JO%e_Tjy5Eu6$w>u@ zG*a;1iE26%xtBc;300c;@t}vhHXsXC7 zJW4Ju<=PVEBYAhG)hi8~d_0!F{J7A#koMNzsL6*sk;fFqo8{FV8xF^p<$QWu2cT<7Ez+-*7E3SzX@LhfxCBNAS> zCxd5(x6gexdcVLuikVz7T=pg}4_W;gj}J*MsLl7P20pIN9y;7(ZOdLXYRdqR7=J-b& z#rl>W;&h%{GpcIDnxC-fc%|E6R2igIN8r4A@!~N!N%0MjKUG8lY}MeCzZk;14_9Ec z%JmF)zH;0Jx*;XyTAbbe%l*%iQ|+7TQfbIuzRt_j73Eb*K=-CgOHg<8maj696Frrt$r50E4NbM+N zOoQK+%DrN7{%E4*tXlobECAwoNuvJO6_5TVK;X=9nnbM;)+o z;;Y6wfB($)(mluGL1#{k^jp6Y+^BMXYkj8oGc@z59KKD{r0!tu0`t3fj0R$ZiA~U2 zIUsE(g!7nkAJH~|NOyrZy91O;_oqp_MB~XG((mef`id6Q;ZuSUB|kbJe44c{4YkH> z8$g~ZfeO_;NCTE|-A%`VC!206qKt@;u*99t=pl(m(Zq)4-yDGDYdkze1qB7rh~9(J z>jInDezuVccx5|aQC^Srd+i+^Ss>FuU%v>7ILE1a8Xq)G;mp^s+b!98DL;(dk#P`W zj}`ES=s=u$R>Ew6e!`2iv}XVo>R&(7VVh9TthD18Bq@m! zT*=JL%+~ce1DcS+4Gs0|qA@g(1O&~(t&429%irnz=j;)pi~>35I(nqzc_TM7GSLC} z_ve;?mJX7Nu)1dNT_*6ma*IrP@cW;AQW=Nc<7)CZ!5GxcF9nr%ebrqK-D|dPCjD`E zrZ@KDbHChIqd(s(2aqrNb#*nR={WZS$nK#3n}aujNS21ZR;g~l8kfL9V=Lr^i3+8w zW2MC+^gcio)k*MIN6GsrrVLJOjpLUHDgmpwhTaNJ!!J3;8sHz*muOOvoQCGgM)|Z} zkFHD*$poIo%~Qw$NgN3Y8iq&vFD50f&BVeY)ru!okw-F4rI!W5fQ;-JXfID_H+n>X zB0%;MfT>hz?^4dn{afs7fxGB+(3Wk-i^4(pj_iV{0SL7AU*3@;qMq^Ua_SPW$zz!B z8oisTZb+p77x0*p0);X#Fpxjy2b*lI$e(HstSacI-XI1*`IugxDEn%P9W~(pYyX?g zR8_%9k9j0d%Zb)xpGF)jKR>DbQv#Q|+mtB86VoZudkp$FJ3wzOKvZ0t zC^gM3Ioi`IxFE&JNdVS$hs1Uxs{Cst2^kBTsZ^&?c#^pWjcngLqR&NXTc&r*6@peu}wo zbw}BJ03rVF;T7Wi-&oMDVf=MGdIN@PKxIV*cpF;7B2#*qg-+8jOKY4bxgw`^Hd-$Q`wZj1k;$n#9pmL?|jfQ(GvP$jvc z(sy8~9B$RP12@UMElutQy)!^FOhDWj&@O#KWSwxqx(-EG|WY4>!D(Uz8?T*B|CH7B0f z{8&`3;(PIs7gS{^0eN8E;I<4wpMX(z4hsHYo=!9rxuLzu0#=*1Vp*^*T(}C^GSEZ7 z5>W(8UrtUgb&rge*8Y%;k`fmLh!oWh2zWsSgEfB1>;4N}%?H+NtqepvL&^*tpa9v9 zVf9dc01CGYh%x6@{0-p_Hfd>kKyqwU-Ee21I=0+jSVYCwcN!6>xQnOtZK{2FBEC35 z+FxPznF;6<$ZL0l<~bLlMq8qh;sT+q|8?aHG!;ZIO7F&10``rFcn?m%V^Glb2yho? z>T+g~Zfw;{8Cu#VrtIv@i_zmqAPe^L@bIu?%|?buc6!0DSK+z}KR4XPD~0(i+HuvB z`4#H}bVp^sU3Y`b)!n;+aPido`uY^pZ#jED4h$&!`AH&PKwf-q#odAn#>X1J4mLtz zzTErSAFICyI*=$3h_MjfRGN_|Ta%StmYa}_Ba+&GE>F^Z2>lv1mSy?Ox+^TmJZu-6 z&t+N45|>DlO?|lDQfo@(Jy`SU=?21OpR_3$He4X8gZpdvz^k*M(SHt-l75;}nILu&z6Q1yGJ|8A>tn z$QW5zhz8IUt@V2Z1p6JQXFy|15cCDcdUO`g)u zrEqT*4Sq0`eE~X7FrYv^BO-P`V@NCpAij0vs(BRqWYcKS)gFFu3Dzpo>OrfuaMNKC zF93t=y8e`8c$-<-WO;FA_6AyaT-9@pFVwJd+f6~hj>*0G_88vMgt0?_aHk6 z(wlhNZT$_-?10nL)1kNbIS|jzRv+$LC|ulPL0-D+8a*W{EIeHE%*nwF4+F-(JbW`+ z@fHD55HOoTm0%qR)LUdxjg4iNfieQ>ua!_hx-36hsFHW1nG8WYAmY$-Ohdc}$^F3e zFf^_(f`|feaC2}}$(v2kds(_ZvN%!M(!PiGv=W{KK)H1!e#6kV05Xn%uuu3rSf(u8 zcVqU!_KyH3zu|Q-$V0$yAyZRnGfvtC+!3;51PszM-_Xuj{bCS8(qQ_-MM6UI_{ROW zb@rRJB28vdd;rGu!!dO6k3b@b_l_F=Z16tE(@vD%89WLYYB*3}tAiMEd`ta0upM=d z$cc~Gw8#?)Puy4mnnN1kGbA4S99~eNY)0NZGwlAl@pWaz0{R2nICpvB6e>mvyH!9y zx04c@Gq24cqaTPT6u+;M40_MEoKhA=b)mHT?jF$MiQEFl z%{ICN76osR-K`v{TkFKH!aRkXo0SaofR8;r^8WsCpywh)L-H*d$n0FkFx9FjM-22u zSL>V?&POxIZ0$S>dHNKzX$s*9G6>5jvA zV)^koKN%6NjQ@<~&u7PH(3OOfje+BXCv)TBw6QA0OdUMAA&UbS-*S4GqD+&hl+*U5 z!K)`#=y2=HbQa5v=xT|8*OpPC zfP-3?nWL4E@nV_Q1#Vz6R)u9 zB5ICCweLVn{+J{C0y;fdBX<|Giu7ASQ35VtQ(^cfCyu-_N4lS82k&J=X274be1qy6 z5OU(|w*pWEKa{yH-)cOZvg=kehEfm4A(G1(x=#0svj>K4aD_(!$G6!g3F$v4ZiUIu zMMad}nKpWpAaSFf9u(fLoN2vlN48nSyEqm+c&TT7QsTk+dU1^c&K{LJce;Wu;Hwvj zzhBpC@O0>q%pbFj50P9nwVUj)ZxajEDeM5r6G8xJ9ir_1Qqz`bXy7OiqC90Fv=j&o zYYE6uWztQB88--}A#S!+-z%|uhY^e0+lX6&Y|zGQol(gL-#=|ImR+9Mu~v>{p%rt* zLSmC!mjFV7RDtNpN6vW9077}qKEwgFjAF@0;kO3R9!N~^nzaVHE_dduTM!;U2OdXE zOdJU4Kwmu`@8*Nd-Q8V?Q80n+*h={Uf*;QzOuR^wz=8*1j@Pb^y2D_Hj27w=LD5&P zlR=CTWK>E%ix9OQi%Xg~Q2#Yaq}p*VU}$sSslH)BZ+|*Fwvh`$IfMZ+N4{M)hqnTc z{TLERLq+-mDQB~LCfW&TgolAf&iA{C@-8HF?B|;40K=6}PjIP{Lc|e44l_-@6K@{K zz~p?%rt!ASY1pK_^*Tg^poQx6EN^db-_7GHVhUl0Ov_W9LLt6dw|+jy8BvU~295ex z5L|Od~rAauGYdKG%WTk{q?rq;ZZJkZuDf2;++D{=0it<8$>S)lfDi*WIE5|ff<7cta~t|`<*%IkZagK7`vDlKHDI)46G`hNf_sHm&EH&9M>*K~;~-@Qu=d%}404#CBy0)a!pRGMI?fU}?0WPH}W zNrMGoKcnAS&OEvP0WhEgp)}lU5Tu?bC+nAN#Kn8b8+92@y|{z-4mS$i_Lwn3Q-zys zY-~Yp(I7~_VqqtKk!s<&RpJ^$Ep!plL_`a<@jD@=1RwzbQla|c5w`}&*1OPX zCd*%pruqfSl*_^T{oG(OifjlWOxgzK2oP6}L1=i2zzj@x1lx~|k&TBvdsZ#>9Nn8_ z^8gSTg~+foE81ah9U{4d1jd{J%ux<+^7B7hx-vH>qNzZ;lXG{k8qpTl#p!u2 zzuu5tovHhv|6s13MRg4#xJW=Qb|ZmX9`X%C;AC{n6)Q>RKzq9YgSMif829BFgz?m^ zY;sF4VL;XcAE}9uhH|3*hsgzgbaX1@SFZRz`}uhUCg9o0Lr_>gan_9hLkB-K*7(8O zFX3S+Nx}}V#;o&&8z5WyWx-#&bYi;d7H?;xt=XD(5aG!S8!O1YP@E36G6eAgjf2hJ z&M9gIMmwYrU4BPG_AY>Rt#6pqI00>GWn}>q!7+v(VjG#U97b^(?wPSH=HR2RjTI&u zcB5lVd*TfrZB5tSDPPC98=DeEE%a7hH&B4s@B<+wZ|h>Kk|Z<$od`a zNBcv{IzRd%2|dA|IEEhfWjG85S-WwwU;UnboZ<-$J$*EoYVQ<#DN)_cpK9*nmsAq_ zPauZsvc1!6qu5lf({_y~vnnau?NmK0vniMw#n`d>a&?3d`uM{L+MlfiRv^Tv!IAo$ zbxvNf4828OBIvu3;Z|a~$Q(s;la+-fsqw?2u^UU>@RzVQ2WWFy6B|A5mEzQfC|*kY zvrDKbtS@vBZXt%5T4iV%qK5<<0qFctSpo&)6189}Fo*anD#nBdA0Hy_@Gz_HRjc;^ zEETo1Vw0UTP9%^KnAXBUoMUBZ3Do5D9q@J*7Q~!llZ}g_5-E4S?_;LM+j$pcR>^}= zZ5@riR5#$9i>7~m=_YeW3her5d|a z+j4pdhK(Z~#-xD!K!3mIx)9hWkokk;NuQ3w{@M`cU#7>w_s4us0bc3%bzhsg+7-jl zH6Zin`&|i=H{;Mm4WDseS?PoyP>AK>RgzsKykbMc$<1A&;^KE6^tRa8AOPS=dZO)a zw0ss=Al5dydU6x1af5b9)apuedHg9LrK=$HY^gvDgRKO^zcDr~HAK&5E1t8m)9oMg~JCMD8B`Skb?9ncRf}!cX*&ObZ0@Bro#HDlLF_ihyggPUYK; zO>}lXKJP|)^ro;1ONMi|FJn7yZRuX>qgKo1`%q9kyou^q44Iy z_Mgp;TyK{WQ%BA;2Nu;Jc+ITO z1{Y#a>R78_?7-fc2mT}n8`8qS_41HSM~NF8Oqq)(yc3qBlFbjEG9;FIP?gVsV|Lh@ z;q{9lC>7$LdGRg~QK<9#SuuIr4bmQNhMj_Ha2HxmmU$2%s;_z})eXtIEKkjc@}c-s zK+HmNN)&UfL!Jp-h6s;cc>e^y+rBA#;3gIu9dz^crgP5Te*(d-OKH(fG;CzZ^=j8H z)uuRL@{fp&bb1!F-h8sBhUC@*AB1)$xpWlBT6VP~?N>l2`~U>i!TEwQy~l9-#r6q-HUTm;XnG?W z13oh3?)l9~&YOUwS5(9ahcXzYr0C#_)IoY25W@4+)F=pGE~+aw89DvWcdSFR_a@IQO04=@-|W8_MI>ufT#4CJ!yZfiUFBr!N$+ zJ&;&EdDd!0lf@Sr{k^)+p0I#FFwoY=_+bV@U5! z*Az1;43oD;yFP=$wtiS#`$8Ai% zs}J=9VF+a-BcpoH52)U{-Cm$J&XD9%hj{pf3l|1$t8euN*~mcX5+(^5ocsy`=!LLi zr{T#OAV_)0F=4pbg}{aeE$OSvV^{FrRogf( z?7WR3kxK(nXf@rp--3pC?n{$zZLcZ`s#Bv7kmTyVuj}QEEAE|?)FO^iFnY`Z(6R9( zd)pwvUBg7$cpRWkfEarrM*89DC3#S~paM%d=d==y;Z8v62sxAjhO{Q|I>Z)-Lj~Qq zLNYQ85W`WqeOp*8)F)XBkUDZ~1Pn8B0JD44_?AJGUY=^+IjiM631|!b7t{+AT1k$pMrm4YM?G{qQd1G)RkrFpf>EscR zF3B@OyuuMkKAMfOk`fcozc@~cXcbciS?N0*9)1d~&KyWpSafuakG-w!+dg5Yevh-0 zpL-P31t;uSbQqxDgd(Kl^|uilLGSs;ID>#)OR0pWgZNO?oq!;-0mQ@2M0k7{5H9wW zfnlb^jEor2JJ2j9)I*G8XSZRUU@{^xRmAjQ+M69DiI0n$2Z(V?UqS2TaE=Y+JYaQQ zQ}EIHO6KNQR|YbiTRIH#cXQ$k{4*)a3otG~iSS7@OhNchkjG zpr(+9B~ABPXFu8bK%`gad=28?IGH#*={P;K;ESm`}#HIeIjCvrTLq& zEQojv3#GHC9%oH61)Us_xIxvuI8W>Nqn`;|+DO_DHn{g_2-hM;`yQ7an#Ax#hfU$n zb!@-&R)qtB%X8LnOtyN$}^1mV!+zD{;&ZMP9$Rdf5&tBX%6 z*Q*eTe*(=1VYKEIa0L+QTU3-1)|KnXgA#=l zZBTIVP?kJS?r~q;zNtXBDB*z>xkg<5&V|o?@ zzWO3^6v$-}3kon4dMi5zX-E<-oRS=V@fJtTRyP7do7#DV0O4bhN&ujU%+v7j2Fc}d z$gqH|0d6%Y2=KqVMn**h$2}4DH6g#8M*ZDf20)#;nXI;L^o&;o2Mdc)SXk#%g6=O9 zNH~GiXxvO*QOOP`X+VGDdN(#D$xb#mDex7;ffe(k7 zMDW+nWeTuU(yae_zvw0zc{EyN?Z-o@8}$ zxM@QN`mgFI-y2S;q3xy*wbHF^H>ByrGDJovGHwm3d0mwIj2Glg)O>>lYvIyK!QcP= zhWPG{C^!|%h%5K_KmVe`|JVPr#jyqu9fFD{J12%m9SYHgDGrPHl!OQ55 zK}jr*L4Zq#fgM7KfhmU*crFXGHSj;b{?BdsKTr;jF)mKiKimqjNQTnHP*uFGP$Fj* G^gjUK3_@f8 literal 0 HcmV?d00001 diff --git a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx index 8f234b5a..5a0f51c1 100644 --- a/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx +++ b/frontend/src/pages/GeneralSettings/EmbeddingPreference/index.jsx @@ -10,6 +10,8 @@ import LocalAiLogo from "@/media/llmprovider/localai.png"; import OllamaLogo from "@/media/llmprovider/ollama.png"; import LMStudioLogo from "@/media/llmprovider/lmstudio.png"; import CohereLogo from "@/media/llmprovider/cohere.png"; +import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png"; + import PreLoader from "@/components/Preloader"; import ChangeWarningModal from "@/components/ChangeWarning"; import OpenAiOptions from "@/components/EmbeddingSelection/OpenAiOptions"; @@ -19,6 +21,7 @@ import NativeEmbeddingOptions from "@/components/EmbeddingSelection/NativeEmbedd import OllamaEmbeddingOptions from "@/components/EmbeddingSelection/OllamaOptions"; import LMStudioEmbeddingOptions from "@/components/EmbeddingSelection/LMStudioOptions"; import CohereEmbeddingOptions from "@/components/EmbeddingSelection/CohereOptions"; +import VoyageAiOptions from "@/components/EmbeddingSelection/VoyageAiOptions"; import EmbedderItem from "@/components/EmbeddingSelection/EmbedderItem"; import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react"; @@ -78,6 +81,13 @@ const EMBEDDERS = [ options: (settings) => , description: "Run powerful embedding models from Cohere.", }, + { + name: "Voyage AI", + value: "voyageai", + logo: VoyageAiLogo, + options: (settings) => , + description: "Run powerful embedding models from Voyage AI.", + }, ]; export default function GeneralEmbeddingPreference() { diff --git a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx index b6ae8cb2..35358636 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/DataHandling/index.jsx @@ -28,6 +28,8 @@ import LanceDbLogo from "@/media/vectordbs/lancedb.png"; import WeaviateLogo from "@/media/vectordbs/weaviate.png"; import QDrantLogo from "@/media/vectordbs/qdrant.png"; import MilvusLogo from "@/media/vectordbs/milvus.png"; +import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png"; + import React, { useState, useEffect } from "react"; import paths from "@/utils/paths"; import { useNavigate } from "react-router-dom"; @@ -292,6 +294,13 @@ export const EMBEDDING_ENGINE_PRIVACY = { ], logo: CohereLogo, }, + voyageai: { + name: "Voyage AI", + description: [ + "Data sent to Voyage AI's servers is shared according to the terms of service of voyageai.com.", + ], + logo: VoyageAiLogo, + }, }; export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) { diff --git a/server/.env.example b/server/.env.example index 4be9ab75..e38250be 100644 --- a/server/.env.example +++ b/server/.env.example @@ -121,6 +121,10 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea # COHERE_API_KEY= # EMBEDDING_MODEL_PREF='embed-english-v3.0' +# EMBEDDING_ENGINE='voyageai' +# VOYAGEAI_API_KEY= +# EMBEDDING_MODEL_PREF='voyage-large-2-instruct' + ########################################### ######## Vector Database Selection ######## ########################################### diff --git a/server/endpoints/api/workspace/index.js b/server/endpoints/api/workspace/index.js index 7cd2dd47..cbbf1f23 100644 --- a/server/endpoints/api/workspace/index.js +++ b/server/endpoints/api/workspace/index.js @@ -498,15 +498,18 @@ function apiWorkspaceEndpoints(app) { const { slug = null } = request.params; const { docPath, pinStatus = false } = reqBody(request); const workspace = await Workspace.get({ slug }); - + const document = await Document.get({ workspaceId: workspace.id, docpath: docPath, }); if (!document) return response.sendStatus(404).end(); - + await Document.update(document.id, { pinned: pinStatus }); - return response.status(200).json({ message: 'Pin status updated successfully' }).end(); + return response + .status(200) + .json({ message: "Pin status updated successfully" }) + .end(); } catch (error) { console.error("Error processing the pin status update:", error); return response.status(500).end(); diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index c8e239f1..a5bb6a23 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -426,6 +426,9 @@ const SystemSettings = { // Cohere API Keys CohereApiKey: !!process.env.COHERE_API_KEY, CohereModelPref: process.env.COHERE_MODEL_PREF, + + // VoyageAi API Keys + VoyageAiApiKey: !!process.env.VOYAGEAI_API_KEY, }; }, diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json index b98891c9..8616943c 100644 --- a/server/swagger/openapi.json +++ b/server/swagger/openapi.json @@ -1999,7 +1999,8 @@ } } } - },"/v1/workspace/{slug}/update-pin": { + }, + "/workspace/{slug}/update-pin": { "post": { "tags": [ "Workspaces" @@ -2037,6 +2038,9 @@ } } }, + "403": { + "description": "Forbidden" + }, "404": { "description": "Document not found" }, @@ -2047,20 +2051,12 @@ "requestBody": { "description": "JSON object with the document path and pin status to update.", "required": true, + "type": "object", "content": { "application/json": { - "schema": { - "type": "object", - "properties": { - "docPath": { - "type": "string", - "example": "custom-documents/my-pdf.pdf-hash.json" - }, - "pinStatus": { - "type": "boolean", - "example": true - } - } + "example": { + "docPath": "custom-documents/my-pdf.pdf-hash.json", + "pinStatus": true } } } diff --git a/server/utils/EmbeddingEngines/voyageAi/index.js b/server/utils/EmbeddingEngines/voyageAi/index.js new file mode 100644 index 00000000..b25d3208 --- /dev/null +++ b/server/utils/EmbeddingEngines/voyageAi/index.js @@ -0,0 +1,45 @@ +class VoyageAiEmbedder { + constructor() { + if (!process.env.VOYAGEAI_API_KEY) + throw new Error("No Voyage AI API key was set."); + + const { + VoyageEmbeddings, + } = require("@langchain/community/embeddings/voyage"); + const voyage = new VoyageEmbeddings({ + apiKey: process.env.VOYAGEAI_API_KEY, + }); + + this.voyage = voyage; + this.model = process.env.EMBEDDING_MODEL_PREF || "voyage-large-2-instruct"; + + // Limit of how many strings we can process in a single pass to stay with resource or network limits + this.batchSize = 128; // Voyage AI's limit per request is 128 https://docs.voyageai.com/docs/rate-limits#use-larger-batches + this.embeddingMaxChunkLength = 4000; // https://docs.voyageai.com/docs/embeddings - assume a token is roughly 4 letters with some padding + } + + async embedTextInput(textInput) { + const result = await this.voyage.embedDocuments( + Array.isArray(textInput) ? textInput : [textInput], + { modelName: this.model } + ); + return result || []; + } + + async embedChunks(textChunks = []) { + try { + const embeddings = await this.voyage.embedDocuments(textChunks, { + modelName: this.model, + batchSize: this.batchSize, + }); + return embeddings; + } catch (error) { + console.error("Voyage AI Failed to embed:", error); + throw error; + } + } +} + +module.exports = { + VoyageAiEmbedder, +}; diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index d9a1ba09..e60202a6 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -125,6 +125,9 @@ function getEmbeddingEngineSelection() { case "cohere": const { CohereEmbedder } = require("../EmbeddingEngines/cohere"); return new CohereEmbedder(); + case "voyageai": + const { VoyageAiEmbedder } = require("../EmbeddingEngines/voyageAi"); + return new VoyageAiEmbedder(); default: return new NativeEmbedder(); } diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 48c98e95..40154163 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -350,6 +350,12 @@ const KEY_MAPPING = { checks: [isNotEmpty], }, + // VoyageAi Options + VoyageAiApiKey: { + envKey: "VOYAGEAI_API_KEY", + checks: [isNotEmpty], + }, + // Whisper (transcription) providers WhisperProvider: { envKey: "WHISPER_PROVIDER", @@ -545,6 +551,7 @@ function supportedEmbeddingModel(input = "") { "ollama", "lmstudio", "cohere", + "voyageai", ]; return supported.includes(input) ? null