From 1f29033f179a049d8f3b98e5954564247bfb9eb1 Mon Sep 17 00:00:00 2001 From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com> Date: Sun, 10 Dec 2023 23:06:35 +0000 Subject: [PATCH] docker changes --- Dockerfile | 2 +- DockerfileBase | 52 ++++++++++---------- scripts/init.sh | 4 ++ src/main/resources/messages_ar_AR.properties | 3 -- src/main/resources/messages_bg_BG.properties | 3 -- src/main/resources/messages_ca_CA.properties | 3 -- src/main/resources/messages_de_DE.properties | 3 -- src/main/resources/messages_el_GR.properties | 3 -- src/main/resources/messages_en_US.properties | 3 -- src/main/resources/messages_es_ES.properties | 3 -- src/main/resources/messages_eu_ES.properties | 3 -- src/main/resources/messages_fr_FR.properties | 3 -- src/main/resources/messages_it_IT.properties | 3 -- src/main/resources/messages_ja_JP.properties | 3 -- src/main/resources/messages_ko_KR.properties | 3 -- src/main/resources/messages_nl_NL.properties | 3 -- src/main/resources/messages_pl_PL.properties | 3 -- src/main/resources/messages_ru_RU.properties | 3 -- src/main/resources/messages_sv_SE.properties | 3 -- src/main/resources/messages_tr_TR.properties | 3 -- src/main/resources/messages_zh_CN.properties | 3 -- 21 files changed, 32 insertions(+), 80 deletions(-) diff --git a/Dockerfile b/Dockerfile index f66390f4..ef2d6d90 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Use the base image -FROM frooodle/stirling-pdf-base:testDontUseMe +FROM frooodle/stirling-pdf-base:version6 ARG VERSION_TAG diff --git a/DockerfileBase b/DockerfileBase index 8027ac8b..7662e229 100644 --- a/DockerfileBase +++ b/DockerfileBase @@ -1,48 +1,50 @@ # Main stage FROM ubuntu:latest AS base -RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common gnupg2 -RUN add-apt-repository ppa:alex-p/tesseract-ocr5 && apt install -y --no-install-recommends tesseract-ocr +# JDK for app RUN apt-get update && \ apt-get install -y --no-install-recommends \ - openjdk-17-jre \ - libreoffice-core-nogui \ + openjdk-17-jre + + +# Doc conversion +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libreoffice-core-nogui \ libreoffice-common \ libreoffice-writer-nogui \ libreoffice-calc-nogui \ libreoffice-impress-nogui \ - python3-uno \ + python3-uno \ + unoconv + + +# OCR MY PDF (unpaper for descew and other advanced featues) +RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common gnupg2 && \ +add-apt-repository ppa:alex-p/tesseract-ocr5 && apt install -y --no-install-recommends tesseract-ocr && \ +apt-get update && \ + apt-get install -y --no-install-recommends \ ghostscript \ python3-pip \ ocrmypdf \ - unoconv && \ + unpaper && \ pip install --upgrade pip && \ - pip install --no-cache-dir --user --upgrade ocrmypdf && \ - pip install --no-cache-dir --upgrade pillow==10.0.1 reportlab==3.6.13 wheel==0.38.1 setuptools==65.5.1 pyjwt==2.4.0 cryptography==39.0.1 \ pip install --no-cache-dir --upgrade ocrmypdf && \ - pip install --no-cache-dir \ - opencv-python-headless && \ - rm -rf /var/lib/apt/lists/* && \ + pip install --no-cache-dir --upgrade pillow==10.0.1 reportlab==3.6.13 wheel==0.38.1 setuptools==65.5.1 pyjwt==2.4.0 cryptography==39.0.1 + + +#CV +RUN pip install --no-cache-dir opencv-python-headless + + +# cleanup and etc +RUN rm -rf /var/lib/apt/lists/* && \ mkdir /usr/share/tesseract-ocr-original && \ cp -r /usr/share/tesseract-ocr/* /usr/share/tesseract-ocr-original && \ rm -rf /usr/share/tesseract-ocr -# Python packages stage -FROM base AS python-packages -# Install build tools and Python libraries -RUN apt-get update && \ - apt-get install -y --no-install-recommends \ - build-essential \ - libffi-dev \ - libssl-dev \ - zlib1g-dev \ - libjpeg-dev - -# Final stage: Copy necessary files from the previous stage -FROM base -COPY --from=python-packages /usr/local /usr/local \ No newline at end of file diff --git a/scripts/init.sh b/scripts/init.sh index e65914c4..80a13785 100644 --- a/scripts/init.sh +++ b/scripts/init.sh @@ -5,6 +5,10 @@ echo "Copying original files without overwriting existing files" mkdir -p /usr/share/tesseract-ocr cp -rn /usr/share/tesseract-ocr-original/* /usr/share/tesseract-ocr +if [ -d /usr/share/tesseract-ocr/4.00/tessdata ]; then + cp -r /usr/share/tesseract-ocr/4.00/tessdata/* /usr/share/tesseract-ocr/5/tessdata/ || true; +fi + # Check if TESSERACT_LANGS environment variable is set and is not empty if [[ -n "$TESSERACT_LANGS" ]]; then # Convert comma-separated values to a space-separated list diff --git a/src/main/resources/messages_ar_AR.properties b/src/main/resources/messages_ar_AR.properties index 9e22527c..a3d2a451 100644 --- a/src/main/resources/messages_ar_AR.properties +++ b/src/main/resources/messages_ar_AR.properties @@ -831,8 +831,5 @@ PDFToXML.submit=تحويل #PDFToCSV PDFToCSV.title=PDF ??? CSV PDFToCSV.header=PDF ??? CSV -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=?????? diff --git a/src/main/resources/messages_bg_BG.properties b/src/main/resources/messages_bg_BG.properties index 7114bb8f..c0c327ed 100644 --- a/src/main/resources/messages_bg_BG.properties +++ b/src/main/resources/messages_bg_BG.properties @@ -831,8 +831,5 @@ PDFToXML.submit=Преобразуване #PDFToCSV PDFToCSV.title=PDF ??? CSV PDFToCSV.header=PDF ??? CSV -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=???????? diff --git a/src/main/resources/messages_ca_CA.properties b/src/main/resources/messages_ca_CA.properties index fd2f5adf..4a650f98 100644 --- a/src/main/resources/messages_ca_CA.properties +++ b/src/main/resources/messages_ca_CA.properties @@ -831,8 +831,5 @@ PDFToXML.submit=Converteix #PDFToCSV PDFToCSV.title=PDF a CSV PDFToCSV.header=PDF a CSV -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=Extracte diff --git a/src/main/resources/messages_de_DE.properties b/src/main/resources/messages_de_DE.properties index c5722ce2..1777bfdf 100644 --- a/src/main/resources/messages_de_DE.properties +++ b/src/main/resources/messages_de_DE.properties @@ -831,8 +831,5 @@ PDFToXML.submit=Konvertieren #PDFToCSV PDFToCSV.title=PDF zu CSV PDFToCSV.header=PDF zu CSV -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=Extrakt diff --git a/src/main/resources/messages_el_GR.properties b/src/main/resources/messages_el_GR.properties index f4970b65..119ec1fc 100644 --- a/src/main/resources/messages_el_GR.properties +++ b/src/main/resources/messages_el_GR.properties @@ -831,8 +831,5 @@ PDFToXML.submit=\u039C\u03B5\u03C4\u03B1\u03C4\u03C1\u03BF\u03C0\u03AE #PDFToCSV PDFToCSV.title=PDF ?? CSV PDFToCSV.header=PDF ?? CSV -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=????????? diff --git a/src/main/resources/messages_en_US.properties b/src/main/resources/messages_en_US.properties index 3963d55c..db116c98 100644 --- a/src/main/resources/messages_en_US.properties +++ b/src/main/resources/messages_en_US.properties @@ -831,8 +831,5 @@ PDFToXML.submit=Convert #PDFToCSV PDFToCSV.title=PDF to CSV PDFToCSV.header=PDF to CSV -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=Extract diff --git a/src/main/resources/messages_es_ES.properties b/src/main/resources/messages_es_ES.properties index b60e2907..7828b717 100644 --- a/src/main/resources/messages_es_ES.properties +++ b/src/main/resources/messages_es_ES.properties @@ -831,8 +831,5 @@ PDFToXML.submit=Convertir #PDFToCSV PDFToCSV.title=PDF a CSV PDFToCSV.header=PDF a CSV -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=Extracto diff --git a/src/main/resources/messages_eu_ES.properties b/src/main/resources/messages_eu_ES.properties index 4a26b24a..528f22f3 100644 --- a/src/main/resources/messages_eu_ES.properties +++ b/src/main/resources/messages_eu_ES.properties @@ -831,8 +831,5 @@ PDFToXML.submit=Bihurtu #PDFToCSV PDFToCSV.title=PDF a CSV PDFToCSV.header=PDF a CSV -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=Extracto diff --git a/src/main/resources/messages_fr_FR.properties b/src/main/resources/messages_fr_FR.properties index d4e1fb7b..c6b59d0c 100644 --- a/src/main/resources/messages_fr_FR.properties +++ b/src/main/resources/messages_fr_FR.properties @@ -831,8 +831,5 @@ PDFToXML.submit=Convertir #PDFToCSV PDFToCSV.title=PDF en CSV PDFToCSV.header=PDF en CSV -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=Extrait diff --git a/src/main/resources/messages_it_IT.properties b/src/main/resources/messages_it_IT.properties index ee13c595..101664bb 100644 --- a/src/main/resources/messages_it_IT.properties +++ b/src/main/resources/messages_it_IT.properties @@ -831,8 +831,5 @@ PDFToXML.submit=Converti #PDFToCSV PDFToCSV.title=Da PDF a CSV PDFToCSV.header=Da PDF a CSV -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=Estratto diff --git a/src/main/resources/messages_ja_JP.properties b/src/main/resources/messages_ja_JP.properties index a37d25a1..68084f14 100644 --- a/src/main/resources/messages_ja_JP.properties +++ b/src/main/resources/messages_ja_JP.properties @@ -831,8 +831,5 @@ PDFToXML.submit=変換 #PDFToCSV PDFToCSV.title=PDF??CSV? PDFToCSV.header=PDF??CSV? -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=???? diff --git a/src/main/resources/messages_ko_KR.properties b/src/main/resources/messages_ko_KR.properties index a1d38f09..340b0839 100644 --- a/src/main/resources/messages_ko_KR.properties +++ b/src/main/resources/messages_ko_KR.properties @@ -831,8 +831,5 @@ PDFToXML.submit=변환 #PDFToCSV PDFToCSV.title=PDF? CSV? PDFToCSV.header=PDF? CSV? -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=?? diff --git a/src/main/resources/messages_nl_NL.properties b/src/main/resources/messages_nl_NL.properties index c08e3844..3ae32e8b 100644 --- a/src/main/resources/messages_nl_NL.properties +++ b/src/main/resources/messages_nl_NL.properties @@ -831,8 +831,5 @@ PDFToXML.submit=Converteren #PDFToCSV PDFToCSV.title=PDF naar CSV PDFToCSV.header=PDF naar CSV -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=Extract diff --git a/src/main/resources/messages_pl_PL.properties b/src/main/resources/messages_pl_PL.properties index 89758125..2e76194a 100644 --- a/src/main/resources/messages_pl_PL.properties +++ b/src/main/resources/messages_pl_PL.properties @@ -831,8 +831,5 @@ PDFToXML.submit=Konwertuj #PDFToCSV PDFToCSV.title=PDF na CSV PDFToCSV.header=PDF na CSV -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=Wyci?g diff --git a/src/main/resources/messages_ru_RU.properties b/src/main/resources/messages_ru_RU.properties index b3ae01db..a4d566ad 100644 --- a/src/main/resources/messages_ru_RU.properties +++ b/src/main/resources/messages_ru_RU.properties @@ -831,8 +831,5 @@ PDFToXML.submit=Конвертировать #PDFToCSV PDFToCSV.title=PDF ? CSV PDFToCSV.header=PDF ? CSV -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=??????? diff --git a/src/main/resources/messages_sv_SE.properties b/src/main/resources/messages_sv_SE.properties index 48ba7943..569e8d8a 100644 --- a/src/main/resources/messages_sv_SE.properties +++ b/src/main/resources/messages_sv_SE.properties @@ -831,8 +831,5 @@ PDFToXML.submit=Konvertera #PDFToCSV PDFToCSV.title=PDF till CSV PDFToCSV.header=PDF till CSV -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=Navvit diff --git a/src/main/resources/messages_tr_TR.properties b/src/main/resources/messages_tr_TR.properties index 395f6abb..d02ae139 100644 --- a/src/main/resources/messages_tr_TR.properties +++ b/src/main/resources/messages_tr_TR.properties @@ -831,8 +831,5 @@ PDFToXML.submit=Dönüştür #PDFToCSV PDFToCSV.title=PDF to CSV PDFToCSV.header=PDF to CSV -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=Extract diff --git a/src/main/resources/messages_zh_CN.properties b/src/main/resources/messages_zh_CN.properties index 6b18d1d9..398530fd 100644 --- a/src/main/resources/messages_zh_CN.properties +++ b/src/main/resources/messages_zh_CN.properties @@ -831,8 +831,5 @@ PDFToXML.submit=转换 #PDFToCSV PDFToCSV.title=PDF ? CSV PDFToCSV.header=PDF ? CSV -########################## -### TODO: Translate ### -########################## PDFToCSV.prompt=Choose page to extract table PDFToCSV.submit=??