mirror of
https://github.com/Stirling-Tools/Stirling-PDF.git
synced 2024-11-23 15:21:25 +01:00
71e93e3cb5
* Changes! * lang * fake scan init, print init and pdf to text for exe * Hardening suggestions for Stirling-PDF / changes (#1099) * Switch order of literals to prevent NullPointerException * Introduced protections against predictable RNG abuse --------- Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com> * Update README.md * install custom fonts * Formats etc * version bump * disable WIP work * remove chinese font --------- Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com> Co-authored-by: systo <systo@host.docker.internal>
31 lines
1.0 KiB
Bash
31 lines
1.0 KiB
Bash
#!/bin/bash
|
|
|
|
# Copy the original tesseract-ocr files to the volume directory without overwriting existing files
|
|
echo "Copying original files without overwriting existing files"
|
|
mkdir -p /usr/share/tessdata
|
|
cp -rn /usr/share/tessdata-original/* /usr/share/tessdata
|
|
|
|
if [ -d /usr/share/tesseract-ocr/4.00/tessdata ]; then
|
|
cp -r /usr/share/tesseract-ocr/4.00/tessdata/* /usr/share/tessdata || true;
|
|
fi
|
|
|
|
if [ -d /usr/share/tesseract-ocr/5/tessdata ]; then
|
|
cp -r /usr/share/tesseract-ocr/5/tessdata/* /usr/share/tessdata || true;
|
|
fi
|
|
|
|
# Check if TESSERACT_LANGS environment variable is set and is not empty
|
|
if [[ -n "$TESSERACT_LANGS" ]]; then
|
|
# Convert comma-separated values to a space-separated list
|
|
LANGS=$(echo $TESSERACT_LANGS | tr ',' ' ')
|
|
pattern='^[a-zA-Z]{2,4}(_[a-zA-Z]{2,4})?$'
|
|
# Install each language pack
|
|
for LANG in $LANGS; do
|
|
if [[ $LANG =~ $pattern ]]; then
|
|
apk add --no-cache "tesseract-ocr-data-$LANG"
|
|
else
|
|
echo "Skipping invalid language code"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
/scripts/init-without-ocr.sh "$@" |