docker and ocr updates

2024-11-23 15:21:25 +01:00 · 2023-12-10 22:02:30 +00:00 · 2023-12-10 22:02:30 +00:00 · 59c7978330
commit 59c7978330
parent 8b55ffff96
28 changed files with 100 additions and 110 deletions
--- a/2
+++ b/2
@ -1,5 +1,5 @@
 # Use the base image
-FROM frooodle/stirling-pdf-base:beta4
+FROM frooodle/stirling-pdf-base:testDontUseMe

 ARG VERSION_TAG

--- a/54
+++ b/54
@ -1,33 +1,35 @@
 # Main stage
-FROM bellsoft/liberica-openjdk-debian:17 AS base
+FROM ubuntu:latest AS base
+
+RUN apt-get update && apt-get install -y --no-install-recommends software-properties-common gnupg2
+
+RUN add-apt-repository ppa:alex-p/tesseract-ocr5 && apt install -y --no-install-recommends tesseract-ocr
+
 RUN apt-get update && \
    apt-get install -y --no-install-recommends \
+    	openjdk-17-jre \
        libreoffice-core-nogui \
        libreoffice-common \
        libreoffice-writer-nogui \
        libreoffice-calc-nogui \
        libreoffice-impress-nogui \
        python3-uno \
+        ghostscript \
        python3-pip \
-        unoconv \
-        pngquant \
-        unpaper \
-        ocrmypdf && \
+        ocrmypdf \
+        unoconv && \
    pip install --upgrade pip && \
+    pip install --no-cache-dir --user --upgrade ocrmypdf && \
+    pip install --no-cache-dir --upgrade pillow==10.0.1 reportlab==3.6.13 wheel==0.38.1 setuptools==65.5.1 pyjwt==2.4.0 cryptography==39.0.1 \
+    pip install --no-cache-dir --upgrade ocrmypdf && \
    pip install --no-cache-dir \
-        pillow==10.0.1 \
-        lxml==4.6.5 \
-        reportlab==3.6.13 \
-        setuptools==65.5.1 \
-        pikepdf==4.4.1 \
-        wheel==0.38.1 \
-        cryptography==39.0.1 \
        opencv-python-headless && \
    rm -rf /var/lib/apt/lists/* && \
    mkdir /usr/share/tesseract-ocr-original && \
    cp -r /usr/share/tesseract-ocr/* /usr/share/tesseract-ocr-original && \
    rm -rf /usr/share/tesseract-ocr

+
 # Python packages stage
 FROM base AS python-packages
 # Install build tools and Python libraries
@ -43,32 +45,4 @@ RUN apt-get update && \
 FROM base
 COPY --from=python-packages /usr/local /usr/local

-
-# Install wkhtmltopdf
-RUN apt-get update && \
-    apt-get install -y \
-        fontconfig \
-        libfontconfig1 \
-        libfreetype6 \
-        libx11-6 \
-        libxext6 \
-        libxrender1 \
-        xfonts-75dpi \
-        wget \
-        xfonts-base 
-
-# Set a default value for TARGETARCH if it's not provided
-ARG TARGETARCH=arm64
-
-# Conditional statement to choose the correct wkhtmltopdf package based on architecture
-RUN if [ "$TARGETARCH" = "amd64" ]; then \
-        wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6.1-3/wkhtmltox_0.12.6.1-3.bullseye_amd64.deb; \
-    elif [ "$TARGETARCH" = "arm64" ]; then \
-        wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6.1-3/wkhtmltox_0.12.6.1-3.bullseye_arm64.deb; \
-    else \
-        wget https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6.1-3/wkhtmltox_0.12.6.1-3.bullseye_amd64.deb; \
-    fi && \
-    dpkg -i wkhtmltox_0.12.6.1-3.bullseye_$TARGETARCH.deb && \
-    rm wkhtmltox_0.12.6.1-3.bullseye_$TARGETARCH.deb && \
-    rm -rf /var/lib/apt/lists/*
    
--- a/HowToUseOCR.md
+++ b/HowToUseOCR.md
@ -2,6 +2,9 @@

 This document provides instructions on how to add additional language packs for the OCR tab in Stirling-PDF, both inside and outside of Docker.

+## My OCR used to work and now doesnt!
+Please update your tesseract docker volume path version from 4.00 to 5
+
 ## How does the OCR Work
 Stirling-PDF uses [OCRmyPDF](https://github.com/ocrmypdf/OCRmyPDF) which in turn uses tesseract for its text recognition.
 All credit goes to them for this awesome work! 
@ -18,7 +21,7 @@ Depending on your requirements, you can choose the appropriate language pack for
 ### Installing Language Packs

 1. Download the desired language pack(s) by selecting the `.traineddata` file(s) for the language(s) you need.
-2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tesseract-ocr/4.00/tessdata` (Debian) or `/usr/share/tesseract/tessdata` (Fedora)
+2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tesseract-ocr/5/tessdata` (Debian) or `/usr/share/tesseract/tessdata` (Fedora)

 # DO NOT REMOVE EXISTING ENG.TRAINEDDATA, IT'S REQUIRED.

@ -34,14 +37,14 @@ services:
  your_service_name:
    image: your_docker_image_name
    volumes:
-      - /location/of/trainingData:/usr/share/tesseract-ocr/4.00/tessdata
+      - /location/of/trainingData:/usr/share/tesseract-ocr/5/tessdata
 ```


 #### Docker run
 Add the following to your existing docker run command
 ```bash
-v /location/of/trainingData:/usr/share/tesseract-ocr/4.00/tessdata
+-v /location/of/trainingData:/usr/share/tesseract-ocr/5/tessdata
 ```

 #### Non-Docker
--- a/LocalRunGuide.md
+++ b/LocalRunGuide.md
@ -139,7 +139,7 @@ Easiest is to use the langpacks provided by your repositories. Skip the other st
 Manual:

 1. Download the desired language pack(s) by selecting the `.traineddata` file(s) for the language(s) you need.
-2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tesseract-ocr/4.00/tessdata`
+2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tesseract-ocr/5/tessdata`
 3. 
 Please view  [OCRmyPDF install guide](https://ocrmypdf.readthedocs.io/en/latest/installation.html) for more info.
 **IMPORTANT:** DO NOT REMOVE EXISTING `eng.traineddata`, IT'S REQUIRED.
--- a/README.md
+++ b/README.md
@ -113,7 +113,7 @@ Docker Run
 ```
 docker run -d \
  -p 8080:8080 \
-  -v /location/of/trainingData:/usr/share/tesseract-ocr/4.00/tessdata \
+  -v /location/of/trainingData:/usr/share/tesseract-ocr/5/tessdata \
  -v /location/of/extraConfigs:/configs \
  -e DOCKER_ENABLE_SECURITY=false \
  --name stirling-pdf \
@ -133,7 +133,7 @@ services:
    ports:
      - '8080:8080'
    volumes:
-      - /location/of/trainingData:/usr/share/tesseract-ocr/4.00/tessdata #Required for extra OCR languages
+      - /location/of/trainingData:/usr/share/tesseract-ocr/5/tessdata #Required for extra OCR languages
      - /location/of/extraConfigs:/configs
 #      - /location/of/customFiles:/customFiles/
    environment:
--- a/build.gradle
+++ b/build.gradle
@ -8,7 +8,7 @@ plugins {
 }

 group = 'stirling.software'
-version = '0.15.2'
+version = '0.16.0'
 sourceCompatibility = '17'

 repositories {
--- a/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java
+++ b/src/main/java/stirling/software/SPDF/controller/api/misc/OCRController.java
@ -39,7 +39,7 @@ public class OCRController {
    private static final Logger logger = LoggerFactory.getLogger(OCRController.class);

    public List<String> getAvailableTesseractLanguages() {
-        String tessdataDir = "/usr/share/tesseract-ocr/4.00/tessdata";
+        String tessdataDir = "/usr/share/tesseract-ocr/5/tessdata";
        File[] files = new File(tessdataDir).listFiles();
        if (files == null) {
            return Collections.emptyList();
--- a/src/main/java/stirling/software/SPDF/controller/web/OtherWebController.java
+++ b/src/main/java/stirling/software/SPDF/controller/web/OtherWebController.java
@ -78,7 +78,7 @@ public class OtherWebController {
    }
    
    public List<String> getAvailableTesseractLanguages() {
-        String tessdataDir = "/usr/share/tesseract-ocr/4.00/tessdata";
+        String tessdataDir = "/usr/share/tesseract-ocr/5/tessdata";
        File[] files = new File(tessdataDir).listFiles();
        if (files == null) {
            return Collections.emptyList();
--- a/src/main/resources/messages_ar_AR.properties
+++ b/src/main/resources/messages_ar_AR.properties
@ -336,9 +336,6 @@ home.autoRedact.title=Auto Redact
 home.autoRedact.desc=Auto Redacts(Blacks out) text in a PDF based on input text
 showJS.tags=JS

-##########################
-###  TODO: Translate   ###
-##########################
 home.tableExtraxt.title=PDF to CSV
 home.tableExtraxt.desc=Extracts Tables from a PDF converting it to CSV
 tableExtraxt.tags=CSV,Table Extraction,extract,convert
@ -834,4 +831,8 @@ PDFToXML.submit=تحويل
 #PDFToCSV
 PDFToCSV.title=PDF ??? CSV
 PDFToCSV.header=PDF ??? CSV
+##########################
+###  TODO: Translate   ###
+##########################
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=??????
--- a/src/main/resources/messages_bg_BG.properties
+++ b/src/main/resources/messages_bg_BG.properties
@ -336,9 +336,6 @@ home.autoRedact.title=Автоматично редактиране
 home.autoRedact.desc=Автоматично редактира (зачернява) текст в PDF въз основа на въведен текст
 showJS.tags=Редактиране,Скриване,затъмняване,черен,маркер,скрит

-##########################
-###  TODO: Translate   ###
-##########################
 home.tableExtraxt.title=PDF to CSV
 home.tableExtraxt.desc=Extracts Tables from a PDF converting it to CSV
 tableExtraxt.tags=CSV,Table Extraction,extract,convert
@ -834,4 +831,8 @@ PDFToXML.submit=Преобразуване
 #PDFToCSV
 PDFToCSV.title=PDF ??? CSV
 PDFToCSV.header=PDF ??? CSV
+##########################
+###  TODO: Translate   ###
+##########################
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=????????
--- a/src/main/resources/messages_ca_CA.properties
+++ b/src/main/resources/messages_ca_CA.properties
@ -336,9 +336,6 @@ home.autoRedact.title=Auto Redact
 home.autoRedact.desc=Auto Redacts(Blacks out) text in a PDF based on input text
 showJS.tags=JS

-##########################
-###  TODO: Translate   ###
-##########################
 home.tableExtraxt.title=PDF to CSV
 home.tableExtraxt.desc=Extracts Tables from a PDF converting it to CSV
 tableExtraxt.tags=CSV,Table Extraction,extract,convert
@ -834,4 +831,8 @@ PDFToXML.submit=Converteix
 #PDFToCSV
 PDFToCSV.title=PDF a CSV
 PDFToCSV.header=PDF a CSV
+##########################
+###  TODO: Translate   ###
+##########################
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=Extracte
--- a/src/main/resources/messages_de_DE.properties
+++ b/src/main/resources/messages_de_DE.properties
@ -831,4 +831,8 @@ PDFToXML.submit=Konvertieren
 #PDFToCSV
 PDFToCSV.title=PDF zu CSV
 PDFToCSV.header=PDF zu CSV
+##########################
+###  TODO: Translate   ###
+##########################
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=Extrakt
--- a/src/main/resources/messages_el_GR.properties
+++ b/src/main/resources/messages_el_GR.properties
@ -336,9 +336,6 @@ home.autoRedact.title=\u0391\u03C5\u03C4\u03CC\u03BC\u03B1\u03C4\u03BF \u039C\u0
 home.autoRedact.desc=\u0391\u03C5\u03C4\u03CC\u03BC\u03B1\u03C4\u03B7 \u03B5\u03C0\u03B5\u03BE\u03B5\u03C1\u03B3\u03B1\u03C3\u03AF\u03B1 (\u039C\u03B1\u03CD\u03C1\u03B9\u03C3\u03BC\u03B1) \u03BA\u03B5\u03AF\u03BC\u03B5\u03BD\u03BF\u03C5 \u03C3\u03B5 PDF \u03BC\u03B5 \u03B2\u03AC\u03C3\u03B7 \u03C4\u03BF \u03BA\u03B5\u03AF\u03BC\u03B5\u03BD\u03BF \u03B5\u03B9\u03C3\u03B1\u03B3\u03C9\u03B3\u03AE\u03C2
 showJS.tags=Redact,Hide,black out,black,marker,hidden

-##########################
-###  TODO: Translate   ###
-##########################
 home.tableExtraxt.title=PDF to CSV
 home.tableExtraxt.desc=Extracts Tables from a PDF converting it to CSV
 tableExtraxt.tags=CSV,Table Extraction,extract,convert
@ -834,4 +831,8 @@ PDFToXML.submit=\u039C\u03B5\u03C4\u03B1\u03C4\u03C1\u03BF\u03C0\u03AE
 #PDFToCSV
 PDFToCSV.title=PDF ?? CSV
 PDFToCSV.header=PDF ?? CSV
+##########################
+###  TODO: Translate   ###
+##########################
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=?????????
--- a/src/main/resources/messages_en_GB.properties
+++ b/src/main/resources/messages_en_GB.properties
@ -831,4 +831,5 @@ PDFToXML.submit=Convert
 #PDFToCSV
 PDFToCSV.title=PDF to CSV
 PDFToCSV.header=PDF to CSV
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=Extract
--- a/src/main/resources/messages_en_US.properties
+++ b/src/main/resources/messages_en_US.properties
@ -831,4 +831,8 @@ PDFToXML.submit=Convert
 #PDFToCSV
 PDFToCSV.title=PDF to CSV
 PDFToCSV.header=PDF to CSV
+##########################
+###  TODO: Translate   ###
+##########################
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=Extract
--- a/src/main/resources/messages_es_ES.properties
+++ b/src/main/resources/messages_es_ES.properties
@ -336,9 +336,6 @@ home.autoRedact.title=Auto Redactar
 home.autoRedact.desc=Redactar automáticamente (ocultar) texto en un PDF según el texto introducido
 showJS.tags=JS

-##########################
-###  TODO: Translate   ###
-##########################
 home.tableExtraxt.title=PDF to CSV
 home.tableExtraxt.desc=Extracts Tables from a PDF converting it to CSV
 tableExtraxt.tags=CSV,Table Extraction,extract,convert
@ -834,4 +831,8 @@ PDFToXML.submit=Convertir
 #PDFToCSV
 PDFToCSV.title=PDF a CSV
 PDFToCSV.header=PDF a CSV
+##########################
+###  TODO: Translate   ###
+##########################
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=Extracto
--- a/src/main/resources/messages_eu_ES.properties
+++ b/src/main/resources/messages_eu_ES.properties
@ -336,9 +336,6 @@ home.autoRedact.title=Auto Idatzi
 home.autoRedact.desc=Auto Idatzi testua pdf fitxategian sarrerako testuan oinarritua
 showJS.tags=JS

-##########################
-###  TODO: Translate   ###
-##########################
 home.tableExtraxt.title=PDF to CSV
 home.tableExtraxt.desc=Extracts Tables from a PDF converting it to CSV
 tableExtraxt.tags=CSV,Table Extraction,extract,convert
@ -834,4 +831,8 @@ PDFToXML.submit=Bihurtu
 #PDFToCSV
 PDFToCSV.title=PDF a CSV
 PDFToCSV.header=PDF a CSV
+##########################
+###  TODO: Translate   ###
+##########################
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=Extracto
--- a/src/main/resources/messages_fr_FR.properties
+++ b/src/main/resources/messages_fr_FR.properties
@ -336,9 +336,6 @@ home.autoRedact.title=Caviarder automatiquement
 home.autoRedact.desc=Caviardez automatiquement les informations sensibles d\u2019un PDF.
 showJS.tags=caviarder,redact,auto

-##########################
-###  TODO: Translate   ###
-##########################
 home.tableExtraxt.title=PDF to CSV
 home.tableExtraxt.desc=Extracts Tables from a PDF converting it to CSV
 tableExtraxt.tags=CSV,Table Extraction,extract,convert
@ -834,4 +831,8 @@ PDFToXML.submit=Convertir
 #PDFToCSV
 PDFToCSV.title=PDF en CSV
 PDFToCSV.header=PDF en CSV
+##########################
+###  TODO: Translate   ###
+##########################
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=Extrait
--- a/src/main/resources/messages_it_IT.properties
+++ b/src/main/resources/messages_it_IT.properties
@ -336,9 +336,6 @@ home.autoRedact.title=Redazione automatica
 home.autoRedact.desc=Redige automaticamente (oscura) il testo in un PDF in base al testo immesso
 showJS.tags=JS

-##########################
-###  TODO: Translate   ###
-##########################
 home.tableExtraxt.title=PDF to CSV
 home.tableExtraxt.desc=Extracts Tables from a PDF converting it to CSV
 tableExtraxt.tags=CSV,Table Extraction,extract,convert
@ -834,4 +831,8 @@ PDFToXML.submit=Converti
 #PDFToCSV
 PDFToCSV.title=Da PDF a CSV
 PDFToCSV.header=Da PDF a CSV
+##########################
+###  TODO: Translate   ###
+##########################
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=Estratto
--- a/src/main/resources/messages_ja_JP.properties
+++ b/src/main/resources/messages_ja_JP.properties
@ -336,9 +336,6 @@ home.autoRedact.title=自動塗りつぶし
 home.autoRedact.desc=入力したテキストに基づいてPDF内のテキストを自動で塗りつぶし(黒塗り)します。
 showJS.tags=JS

-##########################
-###  TODO: Translate   ###
-##########################
 home.tableExtraxt.title=PDF to CSV
 home.tableExtraxt.desc=Extracts Tables from a PDF converting it to CSV
 tableExtraxt.tags=CSV,Table Extraction,extract,convert
@ -834,4 +831,8 @@ PDFToXML.submit=変換
 #PDFToCSV
 PDFToCSV.title=PDF??CSV?
 PDFToCSV.header=PDF??CSV?
+##########################
+###  TODO: Translate   ###
+##########################
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=????
--- a/src/main/resources/messages_ko_KR.properties
+++ b/src/main/resources/messages_ko_KR.properties
@ -336,9 +336,6 @@ home.autoRedact.title=자동 검열
 home.autoRedact.desc=PDF 문서에서 입력된 텍스트들을 자동으로 검열(모자이크)합니다.
 showJS.tags=JS

-##########################
-###  TODO: Translate   ###
-##########################
 home.tableExtraxt.title=PDF to CSV
 home.tableExtraxt.desc=Extracts Tables from a PDF converting it to CSV
 tableExtraxt.tags=CSV,Table Extraction,extract,convert
@ -834,4 +831,8 @@ PDFToXML.submit=변환
 #PDFToCSV
 PDFToCSV.title=PDF? CSV?
 PDFToCSV.header=PDF? CSV?
+##########################
+###  TODO: Translate   ###
+##########################
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=??
--- a/src/main/resources/messages_nl_NL.properties
+++ b/src/main/resources/messages_nl_NL.properties
@ -336,9 +336,6 @@ home.autoRedact.title=Auto Redact
 home.autoRedact.desc=Auto Redacts(Blacks out) text in a PDF based on input text
 showJS.tags=JS

-##########################
-###  TODO: Translate   ###
-##########################
 home.tableExtraxt.title=PDF to CSV
 home.tableExtraxt.desc=Extracts Tables from a PDF converting it to CSV
 tableExtraxt.tags=CSV,Table Extraction,extract,convert
@ -834,4 +831,8 @@ PDFToXML.submit=Converteren
 #PDFToCSV
 PDFToCSV.title=PDF naar CSV
 PDFToCSV.header=PDF naar CSV
+##########################
+###  TODO: Translate   ###
+##########################
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=Extract
--- a/src/main/resources/messages_pl_PL.properties
+++ b/src/main/resources/messages_pl_PL.properties
@ -336,9 +336,6 @@ home.autoRedact.title=Auto Redact
 home.autoRedact.desc=Auto Redacts(Blacks out) text in a PDF based on input text
 showJS.tags=JS

-##########################
-###  TODO: Translate   ###
-##########################
 home.tableExtraxt.title=PDF to CSV
 home.tableExtraxt.desc=Extracts Tables from a PDF converting it to CSV
 tableExtraxt.tags=CSV,Table Extraction,extract,convert
@ -834,4 +831,8 @@ PDFToXML.submit=Konwertuj
 #PDFToCSV
 PDFToCSV.title=PDF na CSV
 PDFToCSV.header=PDF na CSV
+##########################
+###  TODO: Translate   ###
+##########################
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=Wyci?g
--- a/src/main/resources/messages_ru_RU.properties
+++ b/src/main/resources/messages_ru_RU.properties
@ -336,9 +336,6 @@ home.autoRedact.title=Auto Redact
 home.autoRedact.desc=Auto Redacts(Blacks out) text in a PDF based on input text
 showJS.tags=JS

-##########################
-###  TODO: Translate   ###
-##########################
 home.tableExtraxt.title=PDF to CSV
 home.tableExtraxt.desc=Extracts Tables from a PDF converting it to CSV
 tableExtraxt.tags=CSV,Table Extraction,extract,convert
@ -834,4 +831,8 @@ PDFToXML.submit=Конвертировать
 #PDFToCSV
 PDFToCSV.title=PDF ? CSV
 PDFToCSV.header=PDF ? CSV
+##########################
+###  TODO: Translate   ###
+##########################
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=???????
--- a/src/main/resources/messages_sv_SE.properties
+++ b/src/main/resources/messages_sv_SE.properties
@ -336,9 +336,6 @@ home.autoRedact.title=Auto Redact
 home.autoRedact.desc=Auto Redacts(Blacks out) text in a PDF based on input text
 showJS.tags=JS

-##########################
-###  TODO: Translate   ###
-##########################
 home.tableExtraxt.title=PDF to CSV
 home.tableExtraxt.desc=Extracts Tables from a PDF converting it to CSV
 tableExtraxt.tags=CSV,Table Extraction,extract,convert
@ -834,4 +831,8 @@ PDFToXML.submit=Konvertera
 #PDFToCSV
 PDFToCSV.title=PDF till CSV
 PDFToCSV.header=PDF till CSV
+##########################
+###  TODO: Translate   ###
+##########################
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=Navvit
--- a/src/main/resources/messages_tr_TR.properties
+++ b/src/main/resources/messages_tr_TR.properties
@ -126,15 +126,9 @@ adminUserSettings.submit=Kullanıcıyı Kaydet
 # HOME-PAGE #
 #############
 home.desc=Yerel olarak barındırılan tüm PDF ihtiyaçlarınız için tek durak noktanız.
-##########################
-###  TODO: Translate   ###
-##########################
 home.searchBar=Search for features...


-##########################
-###  TODO: Translate   ###
-##########################
 home.viewPdf.title=View PDF
 home.viewPdf.desc=View, annotate, add text or images
 viewPdf.tags=view,read,annotate,text,image
@ -342,9 +336,6 @@ home.autoRedact.title=Otomatik Karartma
 home.autoRedact.desc=Giriş metnine dayanarak bir PDF'teki metni Otomatik Karartır (Redakte)
 showJS.tags=Karart,Gizle,karartma,siyah,markör,gizli

-##########################
-###  TODO: Translate   ###
-##########################
 home.tableExtraxt.title=PDF to CSV
 home.tableExtraxt.desc=Extracts Tables from a PDF converting it to CSV
 tableExtraxt.tags=CSV,Table Extraction,extract,convert
@ -651,9 +642,6 @@ multiTool.title=PDF Çoklu Araç
 multiTool.header=PDF Çoklu Araç

 #view pdf
-##########################
-###  TODO: Translate   ###
-##########################
 viewPdf.title=View PDF
 viewPdf.header=View PDF

@ -841,9 +829,10 @@ PDFToXML.credit=Bu hizmet dosya dönüşümü için LibreOffice kullanır.
 PDFToXML.submit=Dönüştür

 #PDFToCSV
+PDFToCSV.title=PDF to CSV
+PDFToCSV.header=PDF to CSV
 ##########################
 ###  TODO: Translate   ###
 ##########################
-PDFToCSV.title=PDF to CSV
-PDFToCSV.header=PDF to CSV
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=Extract
--- a/src/main/resources/messages_zh_CN.properties
+++ b/src/main/resources/messages_zh_CN.properties
@ -336,9 +336,6 @@ home.autoRedact.title=自动删除
 home.autoRedact.desc=根据输入文本自动删除（覆盖）PDF中的文本
 showJS.tags=JavaScript

-##########################
-###  TODO: Translate   ###
-##########################
 home.tableExtraxt.title=PDF to CSV
 home.tableExtraxt.desc=Extracts Tables from a PDF converting it to CSV
 tableExtraxt.tags=CSV,Table Extraction,extract,convert
@ -834,4 +831,8 @@ PDFToXML.submit=转换
 #PDFToCSV
 PDFToCSV.title=PDF ? CSV
 PDFToCSV.header=PDF ? CSV
+##########################
+###  TODO: Translate   ###
+##########################
+PDFToCSV.prompt=Choose page to extract table
 PDFToCSV.submit=??
--- a/src/main/resources/templates/convert/pdf-to-csv.html
+++ b/src/main/resources/templates/convert/pdf-to-csv.html
@ -18,7 +18,7 @@
                        <div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false, accept='application/pdf')}"></div>
                        <button type="submit" class="btn btn-primary" th:text="#{PDFToCSV.submit}"></button>
                    </form>
-                    <p id="instruction-text" style="margin: 0; display: none">Choose page to extract table</p>
+                    <p id="instruction-text" style="margin: 0; display: none" th:text="#{PDFToCSV.prompt}"></p>

                    <div style="position: relative; display: inline-block;">
                        <div>