Stirling-PDF/scripts/init.sh

58 lines
1.9 KiB
Bash

#!/bin/bash
# Copy the original tesseract-ocr files to the volume directory without overwriting existing files
echo "Copying original files without overwriting existing files"
mkdir -p /usr/share/tessdata
cp -rn /usr/share/tessdata-original/* /usr/share/tessdata
if [ -d /usr/share/tesseract-ocr/4.00/tessdata ]; then
cp -r /usr/share/tesseract-ocr/4.00/tessdata/* /usr/share/tessdata || true;
fi
if [ -d /usr/share/tesseract-ocr/5/tessdata ]; then
cp -r /usr/share/tesseract-ocr/5/tessdata/* /usr/share/tessdata || true;
fi
# Update the user and group IDs as per environment variables
if [ ! -z "$PUID" ] && [ "$PUID" != "$(id -u stirlingpdfuser)" ]; then
usermod -o -u "$PUID" stirlingpdfuser
fi
if [ ! -z "$PGID" ] && [ "$PGID" != "$(getent group stirlingpdfgroup | cut -d: -f3)" ]; then
groupmod -o -g "$PGID" stirlingpdfgroup
fi
umask "$UMASK"
echo "Setting permissions and ownership for necessary directories..."
chown -R stirlingpdfuser:stirlingpdfgroup $HOME /logs /scripts /usr/share/fonts/opentype/noto /usr/share/tessdata /configs /customFiles /pipeline /app.jar
chmod -R 755 /logs /scripts /usr/share/fonts/opentype/noto /usr/share/tessdata /configs /customFiles /pipeline /app.jar
# Check if TESSERACT_LANGS environment variable is set and is not empty
if [[ -n "$TESSERACT_LANGS" ]]; then
# Convert comma-separated values to a space-separated list
LANGS=$(echo $TESSERACT_LANGS | tr ',' ' ')
pattern='^[a-zA-Z]{2,4}(_[a-zA-Z]{2,4})?$'
# Install each language pack
for LANG in $LANGS; do
if [[ $LANG =~ $pattern ]]; then
apk add --no-cache "tesseract-ocr-data-$LANG"
else
echo "Skipping invalid language code"
fi
done
fi
if [[ "$INSTALL_BOOK_AND_ADVANCED_HTML_OPS" == "true" ]]; then
apk add --no-cache calibre@testing
fi
/scripts/download-security-jar.sh
# Run the main command and switch to stirling user for rest of run
exec su-exec stirlingpdfuser "$@"