diff --git a/Dockerfile b/Dockerfile index 63bbc6b0..0479b5f9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,32 @@ -# Use the base image -FROM frooodle/stirling-pdf-base:version8 +# Main stage +FROM alpine:3.19.0 + +# JDK for app +RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \ + echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \ + echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \ + apk add --no-cache \ + ca-certificates \ + tzdata \ + tini \ + bash \ + curl \ + openjdk17-jre \ +# Doc conversion + libreoffice@testing \ +# OCR MY PDF (unpaper for descew and other advanced featues) + ocrmypdf \ + tesseract-ocr-data-eng \ +# CV + py3-opencv \ +# python3/pip + python3 && \ + wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \ +# uno unoconv and HTML + pip install --break-system-packages --no-cache-dir --upgrade unoconv WeasyPrint && \ + mv /usr/share/tessdata /usr/share/tessdata-original + + ARG VERSION_TAG @@ -24,7 +51,7 @@ COPY build/libs/*.jar app.jar ## useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \ ## mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME && \ # Set up necessary directories and permissions -RUN mkdir /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \ +RUN mkdir -p /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \ ##&& \ ## chown -R stirlingpdfuser:stirlingpdfgroup /scripts /usr/share/fonts/opentype/noto /usr/share/tesseract-ocr /configs /customFiles && \ ## chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/tesseract-ocr-original && \ diff --git a/Dockerfile-lite b/Dockerfile-lite index 979d97de..5de060e6 100644 --- a/Dockerfile-lite +++ b/Dockerfile-lite @@ -20,17 +20,19 @@ COPY src/main/resources/static/fonts/*.ttf /usr/share/fonts/opentype/noto COPY src/main/resources/static/fonts/*.otf /usr/share/fonts/opentype/noto COPY build/libs/*.jar app.jar -RUN apk add --no-cache \ +RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \ + echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \ + echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \ + apk add --no-cache \ ca-certificates \ tzdata \ tini \ bash \ curl \ - openjdk17-jre && \ + openjdk17-jre \ # Doc conversion - apk add --no-cache libreoffice --repository http://dl-cdn.alpinelinux.org/alpine/edge/community && \ + libreoffice@testing \ # python and pip - apk add --no-cache \ python3 && \ wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \ # uno unoconv and HTML @@ -40,14 +42,11 @@ RUN apk add --no-cache \ # useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \ # mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME # Set up necessary directories and permissions - mkdir /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \ + mkdir -p /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \ # chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/fonts/opentype/noto /configs /customFiles # Set font cache and permissions fc-cache -f -v && \ - chmod +x /scripts/*.sh && \ - echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \ - echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \ - echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories + chmod +x /scripts/*.sh # chown stirlingpdfuser:stirlingpdfgroup /app.jar # Set environment variables diff --git a/DockerfileBase b/DockerfileBase deleted file mode 100644 index 44e1a582..00000000 --- a/DockerfileBase +++ /dev/null @@ -1,28 +0,0 @@ -# Main stage -FROM alpine:3.19.0 - -# JDK for app -RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \ - echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \ - echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \ - apk add --no-cache \ - ca-certificates \ - tzdata \ - tini \ - bash \ - curl \ - openjdk17-jre \ -# Doc conversion - libreoffice@testing \ -# OCR MY PDF (unpaper for descew and other advanced featues) - ocrmypdf \ - tesseract-ocr-data-eng \ -# CV - py3-opencv \ -# python3/pip - python3 && \ - wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \ -# uno unoconv and HTML - pip install --break-system-packages --no-cache-dir --upgrade unoconv WeasyPrint && \ - mv /usr/share/tessdata /usr/share/tessdata-original - diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java index 94e33982..798c5f44 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java @@ -4,6 +4,7 @@ import java.io.IOException; import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.text.PDFTextStripper; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.ModelAttribute; @@ -11,7 +12,6 @@ import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile; -import org.apache.pdfbox.text.PDFTextStripper; import io.github.pixee.security.Filenames; import io.swagger.v3.oas.annotations.Operation; @@ -65,16 +65,20 @@ public class ConvertPDFToOffice { throws IOException, InterruptedException { MultipartFile inputFile = request.getFileInput(); String outputFormat = request.getOutputFormat(); - if ("txt".equals(request.getOutputFormat())) { - try (PDDocument document = Loader.loadPDF(inputFile.getBytes())) { + if ("txt".equals(request.getOutputFormat())) { + try (PDDocument document = Loader.loadPDF(inputFile.getBytes())) { PDFTextStripper stripper = new PDFTextStripper(); - String text = stripper.getText(document); - return WebResponseUtils.bytesToWebResponse(text.getBytes(), Filenames.toSimpleFileName(inputFile.getOriginalFilename()).replaceFirst("[.][^.]+$", "") - + ".txt" , MediaType.TEXT_PLAIN); + String text = stripper.getText(document); + return WebResponseUtils.bytesToWebResponse( + text.getBytes(), + Filenames.toSimpleFileName(inputFile.getOriginalFilename()) + .replaceFirst("[.][^.]+$", "") + + ".txt", + MediaType.TEXT_PLAIN); } } else { - PDFToFile pdfToFile = new PDFToFile(); - return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import"); + PDFToFile pdfToFile = new PDFToFile(); + return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import"); } } diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/ShowJavascript.java b/src/main/java/stirling/software/SPDF/controller/api/misc/ShowJavascript.java index e1355c30..a1f4f59c 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/ShowJavascript.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/ShowJavascript.java @@ -68,7 +68,9 @@ public class ShowJavascript { if (script.isEmpty()) { script = - "PDF '" + Filenames.toSimpleFileName(inputFile.getOriginalFilename()) + "' does not contain Javascript"; + "PDF '" + + Filenames.toSimpleFileName(inputFile.getOriginalFilename()) + + "' does not contain Javascript"; } return WebResponseUtils.bytesToWebResponse( diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/StampController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/StampController.java index cfde30c1..9e53292e 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/StampController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/StampController.java @@ -97,46 +97,44 @@ public class StampController { PDRectangle pageSize = page.getMediaBox(); float margin = marginFactor * (pageSize.getWidth() + pageSize.getHeight()) / 2; - PDPageContentStream contentStream = new PDPageContentStream( document, page, PDPageContentStream.AppendMode.APPEND, true, true); + PDExtendedGraphicsState graphicsState = new PDExtendedGraphicsState(); + graphicsState.setNonStrokingAlphaConstant(opacity); + contentStream.setGraphicsStateParameters(graphicsState); - PDExtendedGraphicsState graphicsState = new PDExtendedGraphicsState(); - graphicsState.setNonStrokingAlphaConstant(opacity); - contentStream.setGraphicsStateParameters(graphicsState); - - if ("text".equalsIgnoreCase(watermarkType)) { - addTextStamp( - contentStream, - watermarkText, - document, - page, - rotation, - position, - fontSize, - alphabet, - overrideX, - overrideY, - margin, - customColor); - } else if ("image".equalsIgnoreCase(watermarkType)) { - addImageStamp( - contentStream, - watermarkImage, - document, - page, - rotation, - position, - fontSize, - overrideX, - overrideY, - margin); - } - - contentStream.close(); - } + if ("text".equalsIgnoreCase(watermarkType)) { + addTextStamp( + contentStream, + watermarkText, + document, + page, + rotation, + position, + fontSize, + alphabet, + overrideX, + overrideY, + margin, + customColor); + } else if ("image".equalsIgnoreCase(watermarkType)) { + addImageStamp( + contentStream, + watermarkImage, + document, + page, + rotation, + position, + fontSize, + overrideX, + overrideY, + margin); + } + + contentStream.close(); + } } return WebResponseUtils.pdfDocToWebResponse( document,