diff --git a/.hadolint.yaml b/.hadolint.yaml new file mode 100644 index 00000000..b76a5107 --- /dev/null +++ b/.hadolint.yaml @@ -0,0 +1,8 @@ +failure-threshold: warning +ignored: + - DL3008 + - DL3013 +format: tty +trustedRegistries: + - docker.io + - gcr.io diff --git a/.vscode/settings.json b/.vscode/settings.json index 096f1c9f..72b612b8 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,16 +4,20 @@ "Astra", "Dockerized", "Embeddable", + "GROQ", "hljs", + "inferencing", "Langchain", "Milvus", "Mintplex", "Ollama", "openai", + "openrouter", "Qdrant", "vectordbs", "Weaviate", "Zilliz" ], - "eslint.experimental.useFlatConfig": true -} \ No newline at end of file + "eslint.experimental.useFlatConfig": true, + "docker.languageserver.formatter.ignoreMultilineInstructions": true +} diff --git a/collector/utils/files/index.js b/collector/utils/files/index.js index 3e6ce344..4bca62f9 100644 --- a/collector/utils/files/index.js +++ b/collector/utils/files/index.js @@ -1,28 +1,16 @@ const fs = require("fs"); const path = require("path"); -const { getType } = require("mime"); +const { MimeDetector } = require("./mime"); function isTextType(filepath) { - if (!fs.existsSync(filepath)) return false; - // These are types of mime primary classes that for sure - // cannot also for forced into a text type. - const nonTextTypes = ["multipart", "image", "model", "audio", "video"]; - // These are full-mimes we for sure cannot parse or interpret as text - // documents - const BAD_MIMES = [ - "application/octet-stream", - "application/zip", - "application/pkcs8", - "application/vnd.microsoft.portable-executable", - "application/x-msdownload", - ]; - try { - const mime = getType(filepath); - if (BAD_MIMES.includes(mime)) return false; + if (!fs.existsSync(filepath)) return false; + const mimeLib = new MimeDetector(); + const mime = mimeLib.getType(filepath); + if (mimeLib.badMimes.includes(mime)) return false; const type = mime.split("/")[0]; - if (nonTextTypes.includes(type)) return false; + if (mimeLib.nonTextTypes.includes(type)) return false; return true; } catch { return false; diff --git a/collector/utils/files/mime.js b/collector/utils/files/mime.js new file mode 100644 index 00000000..feabd620 --- /dev/null +++ b/collector/utils/files/mime.js @@ -0,0 +1,37 @@ +const MimeLib = require("mime"); + +class MimeDetector { + nonTextTypes = ["multipart", "image", "model", "audio", "video"]; + badMimes = [ + "application/octet-stream", + "application/zip", + "application/pkcs8", + "application/vnd.microsoft.portable-executable", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", // XLSX are binaries and need to be handled explicitly. + "application/x-msdownload", + ]; + + constructor() { + this.lib = MimeLib; + this.setOverrides(); + } + + setOverrides() { + // the .ts extension maps to video/mp2t because of https://en.wikipedia.org/wiki/MPEG_transport_stream + // which has had this extension far before TS was invented. So need to force re-map this MIME map. + this.lib.define( + { + "text/plain": ["ts", "py", "opts", "lock", "jsonl"], + }, + true + ); + } + + getType(filepath) { + return this.lib.getType(filepath); + } +} + +module.exports = { + MimeDetector, +}; diff --git a/docker/.env.example b/docker/.env.example index ba33bd5c..ae4913dc 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -61,6 +61,10 @@ GID='1000' # HUGGING_FACE_LLM_API_KEY=hf_xxxxxx # HUGGING_FACE_LLM_TOKEN_LIMIT=8000 +# LLM_PROVIDER='groq' +# GROQ_API_KEY=gsk_abcxyz +# GROQ_MODEL_PREF=llama2-70b-4096 + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/docker/Dockerfile b/docker/Dockerfile index b1ea62a6..2edbadb2 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,12 +1,17 @@ # Setup base image -FROM ubuntu:jammy-20230522 AS base +FROM ubuntu:jammy-20230916 AS base +# Build arguments ARG ARG_UID=1000 ARG ARG_GID=1000 FROM base AS build-arm64 RUN echo "Preparing build of AnythingLLM image for arm64 architecture" +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +# Install system dependencies +# hadolint ignore=DL3008,DL3013 RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -yq --no-install-recommends \ unzip curl gnupg libgfortran5 libgbm1 tzdata netcat \ @@ -25,8 +30,8 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ && rm yarn_1.22.19_all.deb # Create a group and user with specific UID and GID -RUN groupadd -g $ARG_GID anythingllm && \ - useradd -u $ARG_UID -m -d /app -s /bin/bash -g anythingllm anythingllm && \ +RUN groupadd -g "$ARG_GID" anythingllm && \ + useradd -l -u "$ARG_UID" -m -d /app -s /bin/bash -g anythingllm anythingllm && \ mkdir -p /app/frontend/ /app/server/ /app/collector/ && chown -R anythingllm:anythingllm /app # Copy docker helper scripts @@ -61,6 +66,10 @@ RUN echo "Done running arm64 specific installtion steps" FROM base AS build-amd64 RUN echo "Preparing build of AnythingLLM image for non-ARM architecture" +SHELL ["/bin/bash", "-o", "pipefail", "-c"] + +# Install system dependencies +# hadolint ignore=DL3008,DL3013 RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install -yq --no-install-recommends \ curl gnupg libgfortran5 libgbm1 tzdata netcat \ @@ -79,8 +88,8 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update && \ && rm yarn_1.22.19_all.deb # Create a group and user with specific UID and GID -RUN groupadd -g $ARG_GID anythingllm && \ - useradd -u $ARG_UID -m -d /app -s /bin/bash -g anythingllm anythingllm && \ +RUN groupadd -g "$ARG_GID" anythingllm && \ + useradd -l -u "$ARG_UID" -m -d /app -s /bin/bash -g anythingllm anythingllm && \ mkdir -p /app/frontend/ /app/server/ /app/collector/ && chown -R anythingllm:anythingllm /app # Copy docker helper scripts @@ -95,6 +104,8 @@ RUN chmod +x /usr/local/bin/docker-entrypoint.sh && \ ############################################# # COMMON BUILD FLOW FOR ALL ARCHS ############################################# + +# hadolint ignore=DL3006 FROM build-${TARGETARCH} AS build RUN echo "Running common build flow of AnythingLLM image for all architectures" @@ -102,43 +113,54 @@ USER anythingllm WORKDIR /app # Install frontend dependencies -FROM build as frontend-deps +FROM build AS frontend-deps COPY ./frontend/package.json ./frontend/yarn.lock ./frontend/ -RUN cd ./frontend/ && yarn install --network-timeout 100000 && yarn cache clean +WORKDIR /app/frontend +RUN yarn install --network-timeout 100000 && yarn cache clean +WORKDIR /app # Install server dependencies -FROM build as server-deps +FROM build AS server-deps COPY ./server/package.json ./server/yarn.lock ./server/ -RUN cd ./server/ && yarn install --production --network-timeout 100000 && yarn cache clean +WORKDIR /app/server +RUN yarn install --production --network-timeout 100000 && yarn cache clean +WORKDIR /app # Compile Llama.cpp bindings for node-llama-cpp for this operating system. USER root -RUN cd ./server && npx --no node-llama-cpp download +WORKDIR /app/server +RUN npx --no node-llama-cpp download +WORKDIR /app USER anythingllm # Build the frontend -FROM frontend-deps as build-stage +FROM frontend-deps AS build-stage COPY ./frontend/ ./frontend/ -RUN cd ./frontend/ && yarn build && yarn cache clean +WORKDIR /app/frontend +RUN yarn build && yarn cache clean +WORKDIR /app # Setup the server -FROM server-deps as production-stage +FROM server-deps AS production-stage COPY --chown=anythingllm:anythingllm ./server/ ./server/ # Copy built static frontend files to the server public directory -COPY --from=build-stage /app/frontend/dist ./server/public +COPY --chown=anythingllm:anythingllm --from=build-stage /app/frontend/dist ./server/public # Copy the collector COPY --chown=anythingllm:anythingllm ./collector/ ./collector/ # Install collector dependencies +WORKDIR /app/collector ENV PUPPETEER_DOWNLOAD_BASE_URL=https://storage.googleapis.com/chrome-for-testing-public -RUN cd /app/collector && yarn install --production --network-timeout 100000 && yarn cache clean +RUN yarn install --production --network-timeout 100000 && yarn cache clean # Migrate and Run Prisma against known schema -RUN cd ./server && npx prisma generate --schema=./prisma/schema.prisma -RUN cd ./server && npx prisma migrate deploy --schema=./prisma/schema.prisma +WORKDIR /app/server +RUN npx prisma generate --schema=./prisma/schema.prisma && \ + npx prisma migrate deploy --schema=./prisma/schema.prisma +WORKDIR /app # Setup the environment ENV NODE_ENV=production @@ -152,4 +174,4 @@ HEALTHCHECK --interval=1m --timeout=10s --start-period=1m \ CMD /bin/bash /usr/local/bin/docker-healthcheck.sh || exit 1 # Run the server -ENTRYPOINT ["/bin/bash", "/usr/local/bin/docker-entrypoint.sh"] \ No newline at end of file +ENTRYPOINT ["/bin/bash", "/usr/local/bin/docker-entrypoint.sh"] diff --git a/docker/docker-entrypoint.sh b/docker/docker-entrypoint.sh index 3d890db1..1ac69e5b 100755 --- a/docker/docker-entrypoint.sh +++ b/docker/docker-entrypoint.sh @@ -1,9 +1,10 @@ #!/bin/bash -{ cd /app/server/ &&\ - npx prisma generate --schema=./prisma/schema.prisma &&\ - npx prisma migrate deploy --schema=./prisma/schema.prisma &&\ - node /app/server/index.js +{ + cd /app/server/ && + npx prisma generate --schema=./prisma/schema.prisma && + npx prisma migrate deploy --schema=./prisma/schema.prisma && + node /app/server/index.js } & { node /app/collector/index.js; } & wait -n -exit $? \ No newline at end of file +exit $? diff --git a/docker/docker-healthcheck.sh b/docker/docker-healthcheck.sh index 45a88477..49bee3e1 100644 --- a/docker/docker-healthcheck.sh +++ b/docker/docker-healthcheck.sh @@ -4,10 +4,10 @@ response=$(curl --write-out '%{http_code}' --silent --output /dev/null http://localhost:3001/api/ping) # If the HTTP response code is 200 (OK), the server is up -if [ $response -eq 200 ]; then - echo "Server is up" - exit 0 +if [ "$response" -eq 200 ]; then + echo "Server is up" + exit 0 else - echo "Server is down" - exit 1 + echo "Server is down" + exit 1 fi diff --git a/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx b/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx index 3d493f1c..6bc18a5a 100644 --- a/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx +++ b/frontend/src/components/LLMSelection/AnthropicAiOptions/index.jsx @@ -48,7 +48,13 @@ export default function AnthropicAiOptions({ settings, showAlert = false }) { required={true} className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" > - {["claude-2", "claude-instant-1"].map((model) => { + {[ + "claude-instant-1.2", + "claude-2.0", + "claude-2.1", + "claude-3-opus-20240229", + "claude-3-sonnet-20240229", + ].map((model) => { return (