mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2024-11-04 22:10:12 +01:00
Docker support (#34)
* Updates for Linux for frontend/server * frontend/server docker * updated Dockerfile for deps related to node vectordb * updates for collector in docker * docker deps for ODT processing * ignore another collector dir * storage mount improvements; run as UID * fix pypandoc version typo * permissions fixes
This commit is contained in:
parent
ebd3a62866
commit
9f33b3dfcb
13
.dockerignore
Normal file
13
.dockerignore
Normal file
@ -0,0 +1,13 @@
|
||||
server/storage/documents/**
|
||||
server/storage/vector-cache/**
|
||||
server/storage/*.db
|
||||
server/storage/lancedb
|
||||
collector/hotdir/**
|
||||
collector/v-env/**
|
||||
collector/outputs/**
|
||||
**/node_modules/
|
||||
**/dist/
|
||||
**/v-env/
|
||||
**/__pycache__/
|
||||
**/.env
|
||||
**/.env.*
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -5,6 +5,5 @@ v-env
|
||||
node_modules
|
||||
__pycache__
|
||||
v-env
|
||||
*.lock
|
||||
.DS_Store
|
||||
|
||||
|
@ -52,9 +52,9 @@ Next, you will need some content to embed. This could be a Youtube Channel, Medi
|
||||
|
||||
[Go set up and run collector scripts](./collector/README.md)
|
||||
|
||||
[Learn about documents](./server/documents/DOCUMENTS.md)
|
||||
[Learn about documents](./server/storage/documents/DOCUMENTS.md)
|
||||
|
||||
[Learn about vector caching](./server/vector-cache/VECTOR_CACHE.md)
|
||||
[Learn about vector caching](./server/storage/vector-cache/VECTOR_CACHE.md)
|
||||
|
||||
### Contributing
|
||||
- create issue
|
||||
|
@ -1,5 +1,5 @@
|
||||
import os
|
||||
from whaaaaat import prompt, Separator
|
||||
from InquirerPy import inquirer
|
||||
from scripts.youtube import youtube
|
||||
from scripts.link import link, links
|
||||
from scripts.substack import substack
|
||||
@ -20,57 +20,46 @@ def main():
|
||||
selection = input("Your selection: ")
|
||||
method = methods.get(str(selection))
|
||||
else:
|
||||
questions = [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "collector",
|
||||
"message": "What kind of data would you like to add to convert into long-term memory?",
|
||||
"choices": [
|
||||
"YouTube Channel",
|
||||
"Substack",
|
||||
"Medium",
|
||||
"Article or Blog Link(s)",
|
||||
"Gitbook",
|
||||
Separator(),
|
||||
{"name": "Twitter", "disabled": "Needs PR"},
|
||||
"Abort",
|
||||
],
|
||||
},
|
||||
]
|
||||
method = prompt(questions).get('collector')
|
||||
|
||||
if('Article or Blog Link' in method):
|
||||
questions = [
|
||||
{
|
||||
"type": "list",
|
||||
"name": "collector",
|
||||
"message": "Do you want to scrape a single article/blog/url or many at once?",
|
||||
"choices": [
|
||||
'Single URL',
|
||||
'Multiple URLs',
|
||||
'Abort',
|
||||
],
|
||||
},
|
||||
]
|
||||
method = prompt(questions).get('collector')
|
||||
if(method == 'Single URL'):
|
||||
method = inquirer.select(
|
||||
message="What kind of data would you like to add to convert into long-term memory?",
|
||||
choices=[
|
||||
{"name": "YouTube Channel", "value": "YouTube Channel"},
|
||||
{"name": "Substack", "value": "Substack"},
|
||||
{"name": "Medium", "value": "Medium"},
|
||||
{"name": "Article or Blog Link(s)", "value": "Article or Blog Link(s)"},
|
||||
{"name": "Gitbook", "value": "Gitbook"},
|
||||
{"name": "Twitter", "value": "Twitter", "disabled": "Needs PR"},
|
||||
{"name": "Abort", "value": "Abort"},
|
||||
],
|
||||
).execute()
|
||||
|
||||
if 'Article or Blog Link' in method:
|
||||
method = inquirer.select(
|
||||
message="Do you want to scrape a single article/blog/url or many at once?",
|
||||
choices=[
|
||||
{"name": "Single URL", "value": "Single URL"},
|
||||
{"name": "Multiple URLs", "value": "Multiple URLs"},
|
||||
{"name": "Abort", "value": "Abort"},
|
||||
],
|
||||
).execute()
|
||||
if method == 'Single URL':
|
||||
link()
|
||||
exit(0)
|
||||
if(method == 'Multiple URLs'):
|
||||
if method == 'Multiple URLs':
|
||||
links()
|
||||
exit(0)
|
||||
|
||||
if(method == 'Abort'): exit(0)
|
||||
if(method == 'YouTube Channel'):
|
||||
if method == 'Abort': exit(0)
|
||||
if method == 'YouTube Channel':
|
||||
youtube()
|
||||
exit(0)
|
||||
if(method == 'Substack'):
|
||||
if method == 'Substack':
|
||||
substack()
|
||||
exit(0)
|
||||
if(method == 'Medium'):
|
||||
if method == 'Medium':
|
||||
medium()
|
||||
exit(0)
|
||||
if(method == 'Gitbook'):
|
||||
if method == 'Gitbook':
|
||||
gitbook()
|
||||
exit(0)
|
||||
|
||||
|
@ -20,6 +20,7 @@ cryptography==41.0.1
|
||||
cssselect==1.2.0
|
||||
dataclasses-json==0.5.7
|
||||
Deprecated==1.2.14
|
||||
docx2txt==0.8
|
||||
et-xmlfile==1.1.0
|
||||
exceptiongroup==1.1.1
|
||||
fake-useragent==1.1.3
|
||||
@ -30,6 +31,7 @@ h11==0.14.0
|
||||
httpcore==0.16.3
|
||||
httpx==0.23.3
|
||||
idna==3.4
|
||||
InquirerPy==0.3.4
|
||||
importlib-metadata==6.6.0
|
||||
importlib-resources==5.12.0
|
||||
install==1.3.5
|
||||
@ -54,132 +56,13 @@ pandas==1.5.3
|
||||
parse==1.19.0
|
||||
pdfminer.six==20221105
|
||||
Pillow==9.5.0
|
||||
prompt-toolkit==1.0.14
|
||||
prompt-toolkit==3.0.38
|
||||
pycparser==2.21
|
||||
pydantic==1.10.8
|
||||
pyee==8.2.2
|
||||
Pygments==2.15.1
|
||||
pyobjc==9.1.1
|
||||
pyobjc-core==9.1.1
|
||||
pyobjc-framework-Accounts==9.1.1
|
||||
pyobjc-framework-AddressBook==9.1.1
|
||||
pyobjc-framework-AdSupport==9.1.1
|
||||
pyobjc-framework-AppleScriptKit==9.1.1
|
||||
pyobjc-framework-AppleScriptObjC==9.1.1
|
||||
pyobjc-framework-ApplicationServices==9.1.1
|
||||
pyobjc-framework-AudioVideoBridging==9.1.1
|
||||
pyobjc-framework-AuthenticationServices==9.1.1
|
||||
pyobjc-framework-AutomaticAssessmentConfiguration==9.1.1
|
||||
pyobjc-framework-Automator==9.1.1
|
||||
pyobjc-framework-AVFoundation==9.1.1
|
||||
pyobjc-framework-AVKit==9.1.1
|
||||
pyobjc-framework-BusinessChat==9.1.1
|
||||
pyobjc-framework-CalendarStore==9.1.1
|
||||
pyobjc-framework-CFNetwork==9.1.1
|
||||
pyobjc-framework-CloudKit==9.1.1
|
||||
pyobjc-framework-Cocoa==9.1.1
|
||||
pyobjc-framework-Collaboration==9.1.1
|
||||
pyobjc-framework-ColorSync==9.1.1
|
||||
pyobjc-framework-Contacts==9.1.1
|
||||
pyobjc-framework-ContactsUI==9.1.1
|
||||
pyobjc-framework-CoreAudio==9.1.1
|
||||
pyobjc-framework-CoreAudioKit==9.1.1
|
||||
pyobjc-framework-CoreBluetooth==9.1.1
|
||||
pyobjc-framework-CoreData==9.1.1
|
||||
pyobjc-framework-CoreHaptics==9.1.1
|
||||
pyobjc-framework-CoreLocation==9.1.1
|
||||
pyobjc-framework-CoreMedia==9.1.1
|
||||
pyobjc-framework-CoreMediaIO==9.1.1
|
||||
pyobjc-framework-CoreMIDI==9.1.1
|
||||
pyobjc-framework-CoreML==9.1.1
|
||||
pyobjc-framework-CoreMotion==9.1.1
|
||||
pyobjc-framework-CoreServices==9.1.1
|
||||
pyobjc-framework-CoreSpotlight==9.1.1
|
||||
pyobjc-framework-CoreText==9.1.1
|
||||
pyobjc-framework-CoreWLAN==9.1.1
|
||||
pyobjc-framework-CryptoTokenKit==9.1.1
|
||||
pyobjc-framework-DeviceCheck==9.1.1
|
||||
pyobjc-framework-DictionaryServices==9.1.1
|
||||
pyobjc-framework-DiscRecording==9.1.1
|
||||
pyobjc-framework-DiscRecordingUI==9.1.1
|
||||
pyobjc-framework-DiskArbitration==9.1.1
|
||||
pyobjc-framework-DVDPlayback==9.1.1
|
||||
pyobjc-framework-EventKit==9.1.1
|
||||
pyobjc-framework-ExceptionHandling==9.1.1
|
||||
pyobjc-framework-ExecutionPolicy==9.1.1
|
||||
pyobjc-framework-ExternalAccessory==9.1.1
|
||||
pyobjc-framework-FileProvider==9.1.1
|
||||
pyobjc-framework-FileProviderUI==9.1.1
|
||||
pyobjc-framework-FinderSync==9.1.1
|
||||
pyobjc-framework-FSEvents==9.1.1
|
||||
pyobjc-framework-GameCenter==9.1.1
|
||||
pyobjc-framework-GameController==9.1.1
|
||||
pyobjc-framework-GameKit==9.1.1
|
||||
pyobjc-framework-GameplayKit==9.1.1
|
||||
pyobjc-framework-ImageCaptureCore==9.1.1
|
||||
pyobjc-framework-IMServicePlugIn==9.1.1
|
||||
pyobjc-framework-InputMethodKit==9.1.1
|
||||
pyobjc-framework-InstallerPlugins==9.1.1
|
||||
pyobjc-framework-InstantMessage==9.1.1
|
||||
pyobjc-framework-Intents==9.1.1
|
||||
pyobjc-framework-IOBluetooth==9.1.1
|
||||
pyobjc-framework-IOBluetoothUI==9.1.1
|
||||
pyobjc-framework-IOSurface==9.1.1
|
||||
pyobjc-framework-iTunesLibrary==9.1.1
|
||||
pyobjc-framework-LatentSemanticMapping==9.1.1
|
||||
pyobjc-framework-LaunchServices==9.1.1
|
||||
pyobjc-framework-libdispatch==9.1.1
|
||||
pyobjc-framework-libxpc==9.1.1
|
||||
pyobjc-framework-LinkPresentation==9.1.1
|
||||
pyobjc-framework-LocalAuthentication==9.1.1
|
||||
pyobjc-framework-MapKit==9.1.1
|
||||
pyobjc-framework-MediaAccessibility==9.1.1
|
||||
pyobjc-framework-MediaLibrary==9.1.1
|
||||
pyobjc-framework-MediaPlayer==9.1.1
|
||||
pyobjc-framework-MediaToolbox==9.1.1
|
||||
pyobjc-framework-Metal==9.1.1
|
||||
pyobjc-framework-MetalKit==9.1.1
|
||||
pyobjc-framework-MetalPerformanceShaders==9.1.1
|
||||
pyobjc-framework-ModelIO==9.1.1
|
||||
pyobjc-framework-MultipeerConnectivity==9.1.1
|
||||
pyobjc-framework-NaturalLanguage==9.1.1
|
||||
pyobjc-framework-NetFS==9.1.1
|
||||
pyobjc-framework-Network==9.1.1
|
||||
pyobjc-framework-NetworkExtension==9.1.1
|
||||
pyobjc-framework-NotificationCenter==9.1.1
|
||||
pyobjc-framework-OpenDirectory==9.1.1
|
||||
pyobjc-framework-OSAKit==9.1.1
|
||||
pyobjc-framework-OSLog==9.1.1
|
||||
pyobjc-framework-PencilKit==9.1.1
|
||||
pyobjc-framework-Photos==9.1.1
|
||||
pyobjc-framework-PhotosUI==9.1.1
|
||||
pyobjc-framework-PreferencePanes==9.1.1
|
||||
pyobjc-framework-PushKit==9.1.1
|
||||
pyobjc-framework-Quartz==9.1.1
|
||||
pyobjc-framework-QuickLookThumbnailing==9.1.1
|
||||
pyobjc-framework-SafariServices==9.1.1
|
||||
pyobjc-framework-SceneKit==9.1.1
|
||||
pyobjc-framework-ScreenSaver==9.1.1
|
||||
pyobjc-framework-ScriptingBridge==9.1.1
|
||||
pyobjc-framework-SearchKit==9.1.1
|
||||
pyobjc-framework-Security==9.1.1
|
||||
pyobjc-framework-SecurityFoundation==9.1.1
|
||||
pyobjc-framework-SecurityInterface==9.1.1
|
||||
pyobjc-framework-ServiceManagement==9.1.1
|
||||
pyobjc-framework-Social==9.1.1
|
||||
pyobjc-framework-SoundAnalysis==9.1.1
|
||||
pyobjc-framework-Speech==9.1.1
|
||||
pyobjc-framework-SpriteKit==9.1.1
|
||||
pyobjc-framework-StoreKit==9.1.1
|
||||
pyobjc-framework-SyncServices==9.1.1
|
||||
pyobjc-framework-SystemConfiguration==9.1.1
|
||||
pyobjc-framework-SystemExtensions==9.1.1
|
||||
pyobjc-framework-UserNotifications==9.1.1
|
||||
pyobjc-framework-VideoSubscriberAccount==9.1.1
|
||||
pyobjc-framework-VideoToolbox==9.1.1
|
||||
pyobjc-framework-Vision==9.1.1
|
||||
pyobjc-framework-WebKit==9.1.1
|
||||
pypandoc==1.4
|
||||
pypdf==3.9.0
|
||||
pyppeteer==1.0.2
|
||||
pyquery==2.0.0
|
||||
python-dateutil==2.8.2
|
||||
@ -199,6 +82,7 @@ six==1.16.0
|
||||
sniffio==1.3.0
|
||||
soupsieve==2.4.1
|
||||
SQLAlchemy==2.0.15
|
||||
tabulate==0.9.0
|
||||
tenacity==8.2.2
|
||||
text-unidecode==1.3
|
||||
tiktoken==0.4.0
|
||||
@ -212,10 +96,9 @@ uuid==1.30
|
||||
w3lib==2.1.1
|
||||
wcwidth==0.2.6
|
||||
websockets==10.4
|
||||
whaaaaat==0.5.2
|
||||
wrapt==1.14.1
|
||||
xlrd==2.0.1
|
||||
XlsxWriter==3.1.2
|
||||
yarl==1.9.2
|
||||
youtube-transcript-api==0.6.0
|
||||
zipp==3.15.0
|
||||
zipp==3.15.0
|
@ -14,7 +14,7 @@ def gitbook():
|
||||
|
||||
primary_source = urlparse(url)
|
||||
output_path = f"./outputs/gitbook-logs/{primary_source.netloc}"
|
||||
transaction_output_dir = f"../server/documents/gitbook-{primary_source.netloc}"
|
||||
transaction_output_dir = f"../server/storage/documents/gitbook-{primary_source.netloc}"
|
||||
|
||||
if os.path.exists(output_path) == False:os.makedirs(output_path)
|
||||
if os.path.exists(transaction_output_dir) == False: os.makedirs(transaction_output_dir)
|
||||
|
@ -36,7 +36,7 @@ def link():
|
||||
output_path = f"./outputs/website-logs"
|
||||
|
||||
transaction_output_filename = f"article-{source.path.replace('/','_')}.json"
|
||||
transaction_output_dir = f"../server/documents/website-{source.netloc}"
|
||||
transaction_output_dir = f"../server/storage/documents/website-{source.netloc}"
|
||||
|
||||
if os.path.isdir(output_path) == False:
|
||||
os.makedirs(output_path)
|
||||
@ -109,7 +109,7 @@ def links():
|
||||
output_path = f"./outputs/website-logs"
|
||||
|
||||
transaction_output_filename = f"article-{source.path.replace('/','_')}.json"
|
||||
transaction_output_dir = f"../server/documents/website-{source.netloc}"
|
||||
transaction_output_dir = f"../server/storage/documents/website-{source.netloc}"
|
||||
|
||||
if os.path.isdir(output_path) == False:
|
||||
os.makedirs(output_path)
|
||||
|
@ -23,7 +23,7 @@ def medium():
|
||||
exit(1)
|
||||
|
||||
totalTokenCount = 0
|
||||
transaction_output_dir = f"../server/documents/medium-{handle}"
|
||||
transaction_output_dir = f"../server/storage/documents/medium-{handle}"
|
||||
if os.path.isdir(transaction_output_dir) == False:
|
||||
os.makedirs(transaction_output_dir)
|
||||
|
||||
|
@ -27,7 +27,7 @@ def substack():
|
||||
print(f"{len(valid_publications)} of {len(publications)} publications are readable publically text posts - collecting those.")
|
||||
|
||||
totalTokenCount = 0
|
||||
transaction_output_dir = f"../server/documents/substack-{subdomain}"
|
||||
transaction_output_dir = f"../server/storage/documents/substack-{subdomain}"
|
||||
if os.path.isdir(transaction_output_dir) == False:
|
||||
os.makedirs(transaction_output_dir)
|
||||
|
||||
|
@ -24,7 +24,7 @@ def move_source(working_dir='hotdir', new_destination_filename= ''):
|
||||
return
|
||||
|
||||
def write_to_server_documents(data, filename):
|
||||
destination = f"../server/documents/custom-documents"
|
||||
destination = f"../server/storage/documents/custom-documents"
|
||||
if os.path.exists(destination) == False: os.makedirs(destination)
|
||||
with open(f"{destination}/{filename}.json", 'w', encoding='utf-8') as file:
|
||||
json.dump(data, file, ensure_ascii=True, indent=4)
|
||||
|
@ -17,7 +17,7 @@ def youtube():
|
||||
exit(1)
|
||||
|
||||
channel_data = fetch_channel_video_information(channel_id)
|
||||
transaction_output_dir = f"../server/documents/youtube-{channel_data.get('channelTitle')}"
|
||||
transaction_output_dir = f"../server/storage/documents/youtube-{channel_data.get('channelTitle')}"
|
||||
|
||||
if os.path.isdir(transaction_output_dir) == False:
|
||||
os.makedirs(transaction_output_dir)
|
||||
|
25
docker/.env.example
Normal file
25
docker/.env.example
Normal file
@ -0,0 +1,25 @@
|
||||
SERVER_PORT=3001
|
||||
OPEN_AI_KEY=
|
||||
OPEN_MODEL_PREF='gpt-3.5-turbo'
|
||||
CACHE_VECTORS="true"
|
||||
|
||||
# Enable all below if you are using vector database: Chroma.
|
||||
# VECTOR_DB="chroma"
|
||||
# CHROMA_ENDPOINT='http://localhost:8000'
|
||||
|
||||
# Enable all below if you are using vector database: Pinecone.
|
||||
VECTOR_DB="pinecone"
|
||||
PINECONE_ENVIRONMENT=
|
||||
PINECONE_API_KEY=
|
||||
PINECONE_INDEX=
|
||||
|
||||
# Enable all below if you are using vector database: LanceDB.
|
||||
# VECTOR_DB="lancedb"
|
||||
|
||||
# CLOUD DEPLOYMENT VARIRABLES ONLY
|
||||
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
|
||||
# JWT_SECRET="my-random-string-for-seeding" # Only needed if AUTH_TOKEN is set. Please generate random string at least 12 chars long.
|
||||
STORAGE_DIR="./server/storage"
|
||||
GOOGLE_APIS_KEY=
|
||||
UID='1000'
|
||||
GID='1000'
|
94
docker/Dockerfile
Normal file
94
docker/Dockerfile
Normal file
@ -0,0 +1,94 @@
|
||||
# Setup base image
|
||||
FROM ubuntu:jammy-20230522 AS base
|
||||
|
||||
# Build arguments
|
||||
ARG ARG_UID
|
||||
ARG ARG_GID
|
||||
|
||||
# Install system dependencies
|
||||
RUN DEBIAN_FRONTEND=noninteractive apt-get update && \
|
||||
DEBIAN_FRONTEND=noninteractive apt-get install -yq --no-install-recommends \
|
||||
curl libgfortran5 python3 python3-pip tzdata netcat \
|
||||
libasound2 libatk1.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 \
|
||||
libgcc1 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libx11-6 libx11-xcb1 libxcb1 \
|
||||
libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 \
|
||||
libxss1 libxtst6 ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release \
|
||||
xdg-utils && \
|
||||
curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \
|
||||
apt-get install -yq --no-install-recommends nodejs && \
|
||||
curl -LO https://github.com/yarnpkg/yarn/releases/download/v1.22.19/yarn_1.22.19_all.deb \
|
||||
&& dpkg -i yarn_1.22.19_all.deb \
|
||||
&& rm yarn_1.22.19_all.deb && \
|
||||
curl -LO https://github.com/jgm/pandoc/releases/download/3.1.3/pandoc-3.1.3-1-amd64.deb \
|
||||
&& dpkg -i pandoc-3.1.3-1-amd64.deb \
|
||||
&& rm pandoc-3.1.3-1-amd64.deb && \
|
||||
rm -rf /var/lib/apt/lists/* /usr/share/icons && \
|
||||
dpkg-reconfigure -f noninteractive tzdata && \
|
||||
python3 -m pip install --no-cache-dir virtualenv
|
||||
|
||||
# Create a group and user with specific UID and GID
|
||||
RUN groupadd -g $ARG_GID anythingllm && \
|
||||
useradd -u $ARG_UID -m -d /app -s /bin/bash -g anythingllm anythingllm && \
|
||||
mkdir -p /app/frontend/ /app/server/ /app/collector/ && chown -R anythingllm:anythingllm /app
|
||||
|
||||
# Copy the docker entrypoint and healthcheck scripts
|
||||
COPY ./docker/docker-entrypoint.sh /usr/local/bin/
|
||||
COPY ./docker/docker-healthcheck.sh /usr/local/bin/
|
||||
|
||||
# Ensure the scripts are executable
|
||||
RUN chmod +x /usr/local/bin/docker-entrypoint.sh && \
|
||||
chmod +x /usr/local/bin/docker-healthcheck.sh
|
||||
|
||||
USER anythingllm
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install frontend dependencies
|
||||
FROM base as frontend-deps
|
||||
|
||||
COPY ./frontend/package.json ./frontend/yarn.lock ./frontend/
|
||||
RUN cd ./frontend/ && yarn install && yarn cache clean
|
||||
|
||||
# Install server dependencies
|
||||
FROM base as server-deps
|
||||
COPY ./server/package.json ./server/yarn.lock ./server/
|
||||
RUN cd ./server/ && yarn install --production && yarn cache clean && \
|
||||
rm /app/server/node_modules/vectordb/x86_64-apple-darwin.node && \
|
||||
rm /app/server/node_modules/vectordb/aarch64-apple-darwin.node
|
||||
|
||||
# Build the frontend
|
||||
FROM frontend-deps as build-stage
|
||||
COPY ./frontend/ ./frontend/
|
||||
RUN cd ./frontend/ && yarn build && yarn cache clean
|
||||
|
||||
# Setup the server
|
||||
FROM server-deps as production-stage
|
||||
COPY ./server/ ./server/
|
||||
|
||||
# Copy built static frontend files to the server public directory
|
||||
COPY --from=build-stage /app/frontend/dist ./server/public
|
||||
|
||||
# Copy the collector
|
||||
COPY ./collector/ ./collector/
|
||||
|
||||
# Install collector dependencies
|
||||
RUN cd /app/collector && \
|
||||
python3 -m virtualenv v-env && \
|
||||
. v-env/bin/activate && \
|
||||
pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Setup the environment
|
||||
ENV NODE_ENV=production
|
||||
ENV PATH=/app/collector/v-env/bin:$PATH
|
||||
|
||||
# Expose the server port
|
||||
EXPOSE 3001
|
||||
|
||||
# Setup the healthcheck
|
||||
HEALTHCHECK --interval=1m --timeout=10s --start-period=1m \
|
||||
CMD /bin/bash /usr/local/bin/docker-healthcheck.sh || exit 1
|
||||
|
||||
# Run the server
|
||||
ENTRYPOINT ["docker-entrypoint.sh"]
|
||||
|
||||
CMD ["node", "/app/server/index.js"]
|
30
docker/docker-compose.yml
Normal file
30
docker/docker-compose.yml
Normal file
@ -0,0 +1,30 @@
|
||||
version: '3.9'
|
||||
|
||||
networks:
|
||||
anything-llm:
|
||||
driver: bridge
|
||||
# chroma_net:
|
||||
# external: true
|
||||
|
||||
services:
|
||||
anything-llm:
|
||||
container_name: anything-llm
|
||||
image: anything-llm:latest
|
||||
build:
|
||||
context: ../.
|
||||
dockerfile: ./docker/Dockerfile
|
||||
args:
|
||||
ARG_UID: ${UID}
|
||||
ARG_GID: ${GID}
|
||||
volumes:
|
||||
- "../server/storage:/app/server/storage"
|
||||
- "../collector/hotdir/:/app/collector/hotdir"
|
||||
- "../collector/outputs/:/app/collector/outputs"
|
||||
user: "${UID}:${GID}"
|
||||
ports:
|
||||
- "3001:3001"
|
||||
env_file:
|
||||
- .env
|
||||
networks:
|
||||
- anything-llm
|
||||
# - chroma_net
|
3
docker/docker-entrypoint.sh
Executable file
3
docker/docker-entrypoint.sh
Executable file
@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
exec "$@"
|
13
docker/docker-healthcheck.sh
Normal file
13
docker/docker-healthcheck.sh
Normal file
@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Send a request to the specified URL
|
||||
response=$(curl --write-out '%{http_code}' --silent --output /dev/null http://localhost:3001/api/ping)
|
||||
|
||||
# If the HTTP response code is 200 (OK), the server is up
|
||||
if [ $response -eq 200 ]; then
|
||||
echo "Server is up"
|
||||
exit 0
|
||||
else
|
||||
echo "Server is down"
|
||||
exit 1
|
||||
fi
|
@ -3,6 +3,7 @@
|
||||
"private": false,
|
||||
"version": "0.0.1-beta",
|
||||
"type": "module",
|
||||
"license": "MIT",
|
||||
"scripts": {
|
||||
"start": "vite --open",
|
||||
"build": "vite build",
|
||||
|
@ -1,2 +1,2 @@
|
||||
export const API_BASE =
|
||||
import.meta.env.VITE_API_BASE || "http://localhost:3001";
|
||||
import.meta.env.VITE_API_BASE || "http://localhost:3001/api";
|
||||
|
2604
frontend/yarn.lock
Normal file
2604
frontend/yarn.lock
Normal file
File diff suppressed because it is too large
Load Diff
@ -10,7 +10,7 @@
|
||||
},
|
||||
"scripts": {
|
||||
"lint": "cd server && yarn lint && cd .. && cd frontend && yarn lint",
|
||||
"setup": "cd server && yarn && cd .. && yarn setup:envs && echo \"Please run yarn dev:server and yarn dev:frontend in separate terminal tabs.\"",
|
||||
"setup": "cd server && yarn && cd ../frontend && yarn && cd .. && yarn setup:envs && echo \"Please run yarn dev:server and yarn dev:frontend in separate terminal tabs.\"",
|
||||
"setup:envs": "cd server && cp -n .env.example .env.development && cd ../collector && cp -n .env.example .env && cd ..",
|
||||
"dev:server": "cd server && yarn dev",
|
||||
"dev:frontend": "cd frontend && yarn start",
|
||||
|
9
server/.gitignore
vendored
9
server/.gitignore
vendored
@ -1,8 +1,9 @@
|
||||
.env.production
|
||||
.env.development
|
||||
documents/*
|
||||
vector-cache/*.json
|
||||
!documents/DOCUMENTS.md
|
||||
storage/documents/*
|
||||
storage/vector-cache/*.json
|
||||
!storage/documents/DOCUMENTS.md
|
||||
logs/server.log
|
||||
*.db
|
||||
lancedb
|
||||
storage/lancedb
|
||||
public/
|
@ -5,6 +5,7 @@ process.env.NODE_ENV === "development"
|
||||
const express = require("express");
|
||||
const bodyParser = require("body-parser");
|
||||
const cors = require("cors");
|
||||
const path = require("path");
|
||||
const { validatedRequest } = require("./utils/middleware/validatedRequest");
|
||||
const { reqBody } = require("./utils/http");
|
||||
const { systemEndpoints } = require("./endpoints/system");
|
||||
@ -12,6 +13,7 @@ const { workspaceEndpoints } = require("./endpoints/workspaces");
|
||||
const { chatEndpoints } = require("./endpoints/chat");
|
||||
const { getVectorDbClass } = require("./utils/helpers");
|
||||
const app = express();
|
||||
const apiRouter = express.Router();
|
||||
|
||||
app.use(cors({ origin: true }));
|
||||
app.use(bodyParser.text());
|
||||
@ -22,13 +24,13 @@ app.use(
|
||||
})
|
||||
);
|
||||
|
||||
app.use("/system/*", validatedRequest);
|
||||
app.use("/workspace/*", validatedRequest);
|
||||
systemEndpoints(app);
|
||||
workspaceEndpoints(app);
|
||||
chatEndpoints(app);
|
||||
apiRouter.use("/system/*", validatedRequest);
|
||||
apiRouter.use("/workspace/*", validatedRequest);
|
||||
systemEndpoints(apiRouter);
|
||||
workspaceEndpoints(apiRouter);
|
||||
chatEndpoints(apiRouter);
|
||||
|
||||
app.post("/v/:command", async (request, response) => {
|
||||
apiRouter.post("/v/:command", async (request, response) => {
|
||||
try {
|
||||
const VectorDb = getVectorDbClass();
|
||||
const { command } = request.params;
|
||||
@ -56,14 +58,24 @@ app.post("/v/:command", async (request, response) => {
|
||||
}
|
||||
});
|
||||
|
||||
app.use("/api", apiRouter);
|
||||
|
||||
if (process.env.NODE_ENV !== "development") {
|
||||
app.use(express.static(path.resolve(__dirname, 'public'), {extensions: ["js"]}));
|
||||
|
||||
app.use("/", function (_, response) {
|
||||
response.sendFile(path.join(__dirname, "public", "index.html"));
|
||||
})
|
||||
}
|
||||
|
||||
app.all("*", function (_, response) {
|
||||
response.sendStatus(404);
|
||||
});
|
||||
|
||||
app
|
||||
.listen(process.env.SERVER_PORT || 5000, () => {
|
||||
.listen(process.env.SERVER_PORT || 3001, () => {
|
||||
console.log(
|
||||
`Example app listening on port ${process.env.SERVER_PORT || 5000}`
|
||||
`Example app listening on port ${process.env.SERVER_PORT || 3001}`
|
||||
);
|
||||
})
|
||||
.on("error", function (err) {
|
||||
|
@ -20,7 +20,7 @@ const Document = {
|
||||
|
||||
const db = await open({
|
||||
filename: `${
|
||||
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : ""
|
||||
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "storage/"
|
||||
}anythingllm.db`,
|
||||
driver: sqlite3.Database,
|
||||
});
|
||||
|
@ -18,7 +18,7 @@ const DocumentVectors = {
|
||||
|
||||
const db = await open({
|
||||
filename: `${
|
||||
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : ""
|
||||
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "storage/"
|
||||
}anythingllm.db`,
|
||||
driver: sqlite3.Database,
|
||||
});
|
||||
|
@ -17,7 +17,7 @@ const Workspace = {
|
||||
|
||||
const db = await open({
|
||||
filename: `${
|
||||
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : ""
|
||||
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "storage/"
|
||||
}anythingllm.db`,
|
||||
driver: sqlite3.Database,
|
||||
});
|
||||
|
@ -15,7 +15,7 @@ const WorkspaceChats = {
|
||||
|
||||
const db = await open({
|
||||
filename: `${
|
||||
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : ""
|
||||
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "storage/"
|
||||
}anythingllm.db`,
|
||||
driver: sqlite3.Database,
|
||||
});
|
||||
|
@ -31,7 +31,7 @@
|
||||
"sqlite3": "^5.1.6",
|
||||
"uuid": "^9.0.0",
|
||||
"jsonwebtoken": "^8.5.1",
|
||||
"vectordb": "0.1.5-beta"
|
||||
"vectordb": "0.1.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
"nodemon": "^2.0.22",
|
||||
|
@ -6,7 +6,7 @@ async function collectDocumentData(folderName = null) {
|
||||
if (!folderName) throw new Error("No docPath provided in request");
|
||||
const folder =
|
||||
process.env.NODE_ENV === "development"
|
||||
? path.resolve(__dirname, `../../documents/${folderName}`)
|
||||
? path.resolve(__dirname, `../../storage/documents/${folderName}`)
|
||||
: path.resolve(process.env.STORAGE_DIR, `documents/${folderName}`);
|
||||
|
||||
const dirExists = fs.existsSync(folder);
|
||||
@ -35,7 +35,7 @@ async function fileData(filePath = null) {
|
||||
|
||||
const fullPath =
|
||||
process.env.NODE_ENV === "development"
|
||||
? path.resolve(__dirname, `../../documents/${filePath}`)
|
||||
? path.resolve(__dirname, `../../storage/documents/${filePath}`)
|
||||
: path.resolve(process.env.STORAGE_DIR, `documents/${filePath}`);
|
||||
const fileExists = fs.existsSync(fullPath);
|
||||
if (!fileExists) return null;
|
||||
@ -47,7 +47,7 @@ async function fileData(filePath = null) {
|
||||
async function viewLocalFiles() {
|
||||
const folder =
|
||||
process.env.NODE_ENV === "development"
|
||||
? path.resolve(__dirname, `../../documents`)
|
||||
? path.resolve(__dirname, `../../storage/documents`)
|
||||
: path.resolve(process.env.STORAGE_DIR, `documents`);
|
||||
const dirExists = fs.existsSync(folder);
|
||||
if (!dirExists) fs.mkdirSync(folder);
|
||||
@ -63,7 +63,7 @@ async function viewLocalFiles() {
|
||||
|
||||
const folderPath =
|
||||
process.env.NODE_ENV === "development"
|
||||
? path.resolve(__dirname, `../../documents/${file}`)
|
||||
? path.resolve(__dirname, `../../storage/documents/${file}`)
|
||||
: path.resolve(process.env.STORAGE_DIR, `documents/${file}`);
|
||||
|
||||
const isFolder = fs.lstatSync(folderPath).isDirectory();
|
||||
@ -106,7 +106,7 @@ async function cachedVectorInformation(filename = null, checkOnly = false) {
|
||||
const digest = uuidv5(filename, uuidv5.URL);
|
||||
const file =
|
||||
process.env.NODE_ENV === "development"
|
||||
? path.resolve(__dirname, `../../vector-cache/${digest}.json`)
|
||||
? path.resolve(__dirname, `../../storage/vector-cache/${digest}.json`)
|
||||
: path.resolve(process.env.STORAGE_DIR, `vector-cache/${digest}.json`);
|
||||
const exists = fs.existsSync(file);
|
||||
|
||||
@ -130,7 +130,7 @@ async function storeVectorResult(vectorData = [], filename = null) {
|
||||
);
|
||||
const folder =
|
||||
process.env.NODE_ENV === "development"
|
||||
? path.resolve(__dirname, `../../vector-cache`)
|
||||
? path.resolve(__dirname, `../../storage/vector-cache`)
|
||||
: path.resolve(process.env.STORAGE_DIR, `vector-cache`);
|
||||
|
||||
if (!fs.existsSync(folder)) fs.mkdirSync(folder);
|
||||
|
@ -27,7 +27,7 @@ function curateLanceSources(sources = []) {
|
||||
|
||||
const LanceDb = {
|
||||
uri: `${
|
||||
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "./"
|
||||
!!process.env.STORAGE_DIR ? `${process.env.STORAGE_DIR}/` : "./storage/"
|
||||
}lancedb`,
|
||||
name: "LanceDb",
|
||||
connect: async function () {
|
||||
|
2054
server/yarn.lock
Normal file
2054
server/yarn.lock
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user