diff --git a/Dockerfile b/Dockerfile index 63bbc6b0..0479b5f9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,32 @@ -# Use the base image -FROM frooodle/stirling-pdf-base:version8 +# Main stage +FROM alpine:3.19.0 + +# JDK for app +RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \ + echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \ + echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \ + apk add --no-cache \ + ca-certificates \ + tzdata \ + tini \ + bash \ + curl \ + openjdk17-jre \ +# Doc conversion + libreoffice@testing \ +# OCR MY PDF (unpaper for descew and other advanced featues) + ocrmypdf \ + tesseract-ocr-data-eng \ +# CV + py3-opencv \ +# python3/pip + python3 && \ + wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \ +# uno unoconv and HTML + pip install --break-system-packages --no-cache-dir --upgrade unoconv WeasyPrint && \ + mv /usr/share/tessdata /usr/share/tessdata-original + + ARG VERSION_TAG @@ -24,7 +51,7 @@ COPY build/libs/*.jar app.jar ## useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \ ## mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME && \ # Set up necessary directories and permissions -RUN mkdir /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \ +RUN mkdir -p /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \ ##&& \ ## chown -R stirlingpdfuser:stirlingpdfgroup /scripts /usr/share/fonts/opentype/noto /usr/share/tesseract-ocr /configs /customFiles && \ ## chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/tesseract-ocr-original && \ diff --git a/Dockerfile-lite b/Dockerfile-lite index 979d97de..5de060e6 100644 --- a/Dockerfile-lite +++ b/Dockerfile-lite @@ -20,17 +20,19 @@ COPY src/main/resources/static/fonts/*.ttf /usr/share/fonts/opentype/noto COPY src/main/resources/static/fonts/*.otf /usr/share/fonts/opentype/noto COPY build/libs/*.jar app.jar -RUN apk add --no-cache \ +RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \ + echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \ + echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \ + apk add --no-cache \ ca-certificates \ tzdata \ tini \ bash \ curl \ - openjdk17-jre && \ + openjdk17-jre \ # Doc conversion - apk add --no-cache libreoffice --repository http://dl-cdn.alpinelinux.org/alpine/edge/community && \ + libreoffice@testing \ # python and pip - apk add --no-cache \ python3 && \ wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \ # uno unoconv and HTML @@ -40,14 +42,11 @@ RUN apk add --no-cache \ # useradd -u $PUID -g stirlingpdfgroup -s /bin/sh stirlingpdfuser && \ # mkdir -p $HOME && chown stirlingpdfuser:stirlingpdfgroup $HOME # Set up necessary directories and permissions - mkdir /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \ + mkdir -p /configs /logs /customFiles /pipeline/watchedFolders /pipeline/finishedFolders && \ # chown -R stirlingpdfuser:stirlingpdfgroup /usr/share/fonts/opentype/noto /configs /customFiles # Set font cache and permissions fc-cache -f -v && \ - chmod +x /scripts/*.sh && \ - echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \ - echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \ - echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories + chmod +x /scripts/*.sh # chown stirlingpdfuser:stirlingpdfgroup /app.jar # Set environment variables diff --git a/DockerfileBase b/DockerfileBase deleted file mode 100644 index 44e1a582..00000000 --- a/DockerfileBase +++ /dev/null @@ -1,28 +0,0 @@ -# Main stage -FROM alpine:3.19.0 - -# JDK for app -RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \ - echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \ - echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \ - apk add --no-cache \ - ca-certificates \ - tzdata \ - tini \ - bash \ - curl \ - openjdk17-jre \ -# Doc conversion - libreoffice@testing \ -# OCR MY PDF (unpaper for descew and other advanced featues) - ocrmypdf \ - tesseract-ocr-data-eng \ -# CV - py3-opencv \ -# python3/pip - python3 && \ - wget https://bootstrap.pypa.io/get-pip.py -qO - | python3 - --break-system-packages --no-cache-dir --upgrade && \ -# uno unoconv and HTML - pip install --break-system-packages --no-cache-dir --upgrade unoconv WeasyPrint && \ - mv /usr/share/tessdata /usr/share/tessdata-original - diff --git a/src/main/java/stirling/software/SPDF/config/AppConfig.java b/src/main/java/stirling/software/SPDF/config/AppConfig.java index c1a646ad..65e98b7f 100644 --- a/src/main/java/stirling/software/SPDF/config/AppConfig.java +++ b/src/main/java/stirling/software/SPDF/config/AppConfig.java @@ -77,16 +77,11 @@ public class AppConfig { return Files.exists(Paths.get("/.dockerenv")); } - @Bean(name = "bookFormatsInstalled") - public boolean bookFormatsInstalled() { - return applicationProperties.getSystem().getCustomApplications().isInstallBookFormats(); - } - - @Bean(name = "htmlFormatsInstalled") - public boolean htmlFormatsInstalled() { + @Bean(name = "bookAndHtmlFormatsInstalled") + public boolean bookAndHtmlFormatsInstalled() { return applicationProperties .getSystem() .getCustomApplications() - .isInstallAdvancedHtmlToPDF(); + .isInstallBookAndHtmlFormats(); } } diff --git a/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java b/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java index f1e328f9..f82c189f 100644 --- a/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java +++ b/src/main/java/stirling/software/SPDF/config/EndpointConfiguration.java @@ -16,7 +16,7 @@ import org.springframework.stereotype.Service; import stirling.software.SPDF.model.ApplicationProperties; @Service -@DependsOn({"bookFormatsInstalled"}) +@DependsOn({"bookAndHtmlFormatsInstalled"}) public class EndpointConfiguration { private static final Logger logger = LoggerFactory.getLogger(EndpointConfiguration.class); private Map endpointStatuses = new ConcurrentHashMap<>(); @@ -24,14 +24,14 @@ public class EndpointConfiguration { private final ApplicationProperties applicationProperties; - private boolean bookFormatsInstalled; + private boolean bookAndHtmlFormatsInstalled; @Autowired public EndpointConfiguration( ApplicationProperties applicationProperties, - @Qualifier("bookFormatsInstalled") boolean bookFormatsInstalled) { + @Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled) { this.applicationProperties = applicationProperties; - this.bookFormatsInstalled = bookFormatsInstalled; + this.bookAndHtmlFormatsInstalled = bookAndHtmlFormatsInstalled; init(); processEnvironmentConfigs(); } @@ -229,7 +229,7 @@ public class EndpointConfiguration { private void processEnvironmentConfigs() { List endpointsToRemove = applicationProperties.getEndpoints().getToRemove(); List groupsToRemove = applicationProperties.getEndpoints().getGroupsToRemove(); - if (!bookFormatsInstalled) { + if (!bookAndHtmlFormatsInstalled) { groupsToRemove.add("Calibre"); } if (endpointsToRemove != null) { diff --git a/src/main/java/stirling/software/SPDF/config/PostStartupProcesses.java b/src/main/java/stirling/software/SPDF/config/PostStartupProcesses.java index 862e5f9e..2041cf95 100644 --- a/src/main/java/stirling/software/SPDF/config/PostStartupProcesses.java +++ b/src/main/java/stirling/software/SPDF/config/PostStartupProcesses.java @@ -26,12 +26,8 @@ public class PostStartupProcesses { private boolean runningInDocker; @Autowired - @Qualifier("bookFormatsInstalled") - private boolean bookFormatsInstalled; - - @Autowired - @Qualifier("htmlFormatsInstalled") - private boolean htmlFormatsInstalled; + @Qualifier("bookAndHtmlFormatsInstalled") + private boolean bookAndHtmlFormatsInstalled; private static final Logger logger = LoggerFactory.getLogger(PostStartupProcesses.class); @@ -39,34 +35,11 @@ public class PostStartupProcesses { public void runInstallCommandBasedOnEnvironment() throws IOException, InterruptedException { List> commands = new ArrayList<>(); // Checking for DOCKER_INSTALL_BOOK_FORMATS environment variable - if (bookFormatsInstalled) { + if (bookAndHtmlFormatsInstalled) { List tmpList = new ArrayList<>(); - // Set up the timezone configuration commands - tmpList.addAll( - Arrays.asList( - "sh", - "-c", - "echo 'tzdata tzdata/Areas select Europe' | debconf-set-selections; " - + "echo 'tzdata tzdata/Zones/Europe select Berlin' | debconf-set-selections")); - commands.add(tmpList); - // Install calibre with DEBIAN_FRONTEND set to noninteractive tmpList = new ArrayList<>(); - tmpList.addAll( - Arrays.asList( - "sh", - "-c", - "DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends calibre")); - commands.add(tmpList); - } - - // Checking for DOCKER_INSTALL_HTML_FORMATS environment variable - if (htmlFormatsInstalled) { - List tmpList = new ArrayList<>(); - // Add -y flag for automatic yes to prompts and --no-install-recommends to reduce size - tmpList.addAll( - Arrays.asList( - "apt-get", "install", "wkhtmltopdf", "-y", "--no-install-recommends")); + tmpList.addAll(Arrays.asList("apk add --no-cache calibre")); commands.add(tmpList); } @@ -74,8 +47,6 @@ public class PostStartupProcesses { // Run the command if (runningInDocker) { List tmpList = new ArrayList<>(); - tmpList.addAll(Arrays.asList("apt-get", "update")); - commands.add(0, tmpList); for (List list : commands) { ProcessExecutorResult returnCode = diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertBookToPDFController.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertBookToPDFController.java index 3cbc783c..05784a15 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertBookToPDFController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertBookToPDFController.java @@ -23,21 +23,21 @@ import stirling.software.SPDF.utils.WebResponseUtils; public class ConvertBookToPDFController { @Autowired - @Qualifier("bookFormatsInstalled") - private boolean bookFormatsInstalled; + @Qualifier("bookAndHtmlFormatsInstalled") + private boolean bookAndHtmlFormatsInstalled; @PostMapping(consumes = "multipart/form-data", value = "/book/pdf") @Operation( summary = "Convert a BOOK/comic (*.epub | *.mobi | *.azw3 | *.fb2 | *.txt | *.docx) to PDF", description = - "(Requires bookFormatsInstalled flag and Calibre installed) This endpoint takes an BOOK/comic (*.epub | *.mobi | *.azw3 | *.fb2 | *.txt | *.docx) input and converts it to PDF format.") + "(Requires bookAndHtmlFormatsInstalled flag and Calibre installed) This endpoint takes an BOOK/comic (*.epub | *.mobi | *.azw3 | *.fb2 | *.txt | *.docx) input and converts it to PDF format.") public ResponseEntity HtmlToPdf(@ModelAttribute GeneralFile request) throws Exception { MultipartFile fileInput = request.getFileInput(); - if (!bookFormatsInstalled) { + if (!bookAndHtmlFormatsInstalled) { throw new IllegalArgumentException( - "bookFormatsInstalled flag is False, this functionality is not avaiable"); + "bookAndHtmlFormatsInstalled flag is False, this functionality is not avaiable"); } if (fileInput == null) { diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java index 189eebdf..c7cfc196 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertHtmlToPDF.java @@ -23,8 +23,8 @@ import stirling.software.SPDF.utils.WebResponseUtils; public class ConvertHtmlToPDF { @Autowired - @Qualifier("htmlFormatsInstalled") - private boolean htmlFormatsInstalled; + @Qualifier("bookAndHtmlFormatsInstalled") + private boolean bookAndHtmlFormatsInstalled; @PostMapping(consumes = "multipart/form-data", value = "/html/pdf") @Operation( @@ -47,7 +47,10 @@ public class ConvertHtmlToPDF { } byte[] pdfBytes = FileToPdf.convertHtmlToPdf( - request, fileInput.getBytes(), originalFilename, htmlFormatsInstalled); + request, + fileInput.getBytes(), + originalFilename, + bookAndHtmlFormatsInstalled); String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java index 050dd0fe..9cefe1ff 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertMarkdownToPdf.java @@ -33,8 +33,8 @@ import stirling.software.SPDF.utils.WebResponseUtils; public class ConvertMarkdownToPdf { @Autowired - @Qualifier("htmlFormatsInstalled") - private boolean htmlFormatsInstalled; + @Qualifier("bookAndHtmlFormatsInstalled") + private boolean bookAndHtmlFormatsInstalled; @PostMapping(consumes = "multipart/form-data", value = "/markdown/pdf") @Operation( @@ -69,7 +69,10 @@ public class ConvertMarkdownToPdf { byte[] pdfBytes = FileToPdf.convertHtmlToPdf( - null, htmlContent.getBytes(), "converted.html", htmlFormatsInstalled); + null, + htmlContent.getBytes(), + "converted.html", + bookAndHtmlFormatsInstalled); String outputFilename = originalFilename.replaceFirst("[.][^.]+$", "") diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToBookController.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToBookController.java index c9171305..28793d48 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToBookController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToBookController.java @@ -30,22 +30,22 @@ import stirling.software.SPDF.utils.WebResponseUtils; public class ConvertPDFToBookController { @Autowired - @Qualifier("bookFormatsInstalled") - private boolean bookFormatsInstalled; + @Qualifier("bookAndHtmlFormatsInstalled") + private boolean bookAndHtmlFormatsInstalled; @PostMapping(consumes = "multipart/form-data", value = "/pdf/book") @Operation( summary = "Convert a PDF to a Book/comic (*.epub | *.mobi | *.azw3 | *.fb2 | *.txt | *.docx .. (others to include by chatgpt) to PDF", description = - "(Requires bookFormatsInstalled flag and Calibre installed) This endpoint Convert a PDF to a Book/comic (*.epub | *.mobi | *.azw3 | *.fb2 | *.txt | *.docx .. (others to include by chatgpt) to PDF") + "(Requires bookAndHtmlFormatsInstalled flag and Calibre installed) This endpoint Convert a PDF to a Book/comic (*.epub | *.mobi | *.azw3 | *.fb2 | *.txt | *.docx .. (others to include by chatgpt) to PDF") public ResponseEntity HtmlToPdf(@ModelAttribute PdfToBookRequest request) throws Exception { MultipartFile fileInput = request.getFileInput(); - if (!bookFormatsInstalled) { + if (!bookAndHtmlFormatsInstalled) { throw new IllegalArgumentException( - "bookFormatsInstalled flag is False, this functionality is not avaiable"); + "bookAndHtmlFormatsInstalled flag is False, this functionality is not avaiable"); } if (fileInput == null) { diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java index 74b292b5..798c5f44 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java @@ -2,6 +2,10 @@ package stirling.software.SPDF.controller.api.converters; import java.io.IOException; +import org.apache.pdfbox.Loader; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.text.PDFTextStripper; +import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.ModelAttribute; import org.springframework.web.bind.annotation.PostMapping; @@ -9,6 +13,7 @@ import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile; +import io.github.pixee.security.Filenames; import io.swagger.v3.oas.annotations.Operation; import io.swagger.v3.oas.annotations.tags.Tag; @@ -17,6 +22,7 @@ import stirling.software.SPDF.model.api.converters.PdfToPresentationRequest; import stirling.software.SPDF.model.api.converters.PdfToTextOrRTFRequest; import stirling.software.SPDF.model.api.converters.PdfToWordRequest; import stirling.software.SPDF.utils.PDFToFile; +import stirling.software.SPDF.utils.WebResponseUtils; @RestController @RequestMapping("/api/v1/convert") @@ -59,9 +65,21 @@ public class ConvertPDFToOffice { throws IOException, InterruptedException { MultipartFile inputFile = request.getFileInput(); String outputFormat = request.getOutputFormat(); - - PDFToFile pdfToFile = new PDFToFile(); - return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import"); + if ("txt".equals(request.getOutputFormat())) { + try (PDDocument document = Loader.loadPDF(inputFile.getBytes())) { + PDFTextStripper stripper = new PDFTextStripper(); + String text = stripper.getText(document); + return WebResponseUtils.bytesToWebResponse( + text.getBytes(), + Filenames.toSimpleFileName(inputFile.getOriginalFilename()) + .replaceFirst("[.][^.]+$", "") + + ".txt", + MediaType.TEXT_PLAIN); + } + } else { + PDFToFile pdfToFile = new PDFToFile(); + return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import"); + } } @PostMapping(consumes = "multipart/form-data", value = "/pdf/word") diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java index a6cd439b..c2d25973 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertWebsiteToPDF.java @@ -29,8 +29,8 @@ import stirling.software.SPDF.utils.WebResponseUtils; public class ConvertWebsiteToPDF { @Autowired - @Qualifier("htmlFormatsInstalled") - private boolean htmlFormatsInstalled; + @Qualifier("bookAndHtmlFormatsInstalled") + private boolean bookAndHtmlFormatsInstalled; @PostMapping(consumes = "multipart/form-data", value = "/url/pdf") @Operation( @@ -53,7 +53,7 @@ public class ConvertWebsiteToPDF { // Prepare the OCRmyPDF command List command = new ArrayList<>(); - if (!htmlFormatsInstalled) { + if (!bookAndHtmlFormatsInstalled) { command.add("weasyprint"); } else { command.add("wkhtmltopdf"); diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/BlankPageController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/BlankPageController.java index b414a35b..a813ba79 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/BlankPageController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/BlankPageController.java @@ -2,23 +2,20 @@ package stirling.software.SPDF.controller.api.misc; import java.awt.image.BufferedImage; import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.stream.Collectors; import java.util.stream.IntStream; -import javax.imageio.ImageIO; - import org.apache.pdfbox.Loader; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageTree; import org.apache.pdfbox.rendering.PDFRenderer; import org.apache.pdfbox.text.PDFTextStripper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.ModelAttribute; @@ -33,7 +30,6 @@ import io.swagger.v3.oas.annotations.tags.Tag; import stirling.software.SPDF.model.api.misc.RemoveBlankPagesRequest; import stirling.software.SPDF.utils.PdfUtils; -import stirling.software.SPDF.utils.ProcessExecutor; import stirling.software.SPDF.utils.WebResponseUtils; @RestController @@ -41,6 +37,8 @@ import stirling.software.SPDF.utils.WebResponseUtils; @Tag(name = "Misc", description = "Miscellaneous APIs") public class BlankPageController { + private static final Logger logger = LoggerFactory.getLogger(BlankPageController.class); + @PostMapping(consumes = "multipart/form-data", value = "/remove-blanks") @Operation( summary = "Remove blank pages from a PDF file", @@ -63,63 +61,35 @@ public class BlankPageController { PDFRenderer pdfRenderer = new PDFRenderer(document); for (PDPage page : pages) { - System.out.println("checking page " + pageIndex); + logger.info("checking page " + pageIndex); textStripper.setStartPage(pageIndex + 1); textStripper.setEndPage(pageIndex + 1); String pageText = textStripper.getText(document); boolean hasText = !pageText.trim().isEmpty(); + + Boolean blank = false; if (hasText) { - pagesToKeepIndex.add(pageIndex); - System.out.println("page " + pageIndex + " has text"); + logger.info("page " + pageIndex + " has text, not blank"); + blank = false; } else { boolean hasImages = PdfUtils.hasImagesOnPage(page); if (hasImages) { - System.out.println("page " + pageIndex + " has image"); - - Path tempFile = Files.createTempFile("image_", ".png"); - + logger.info("page " + pageIndex + " has image, running blank detection"); // Render image and save as temp file - BufferedImage image = pdfRenderer.renderImageWithDPI(pageIndex, 300); - ImageIO.write(image, "png", tempFile.toFile()); - - List command = - new ArrayList<>( - Arrays.asList( - "python", - System.getProperty("user.dir") - + "/scripts/detect-blank-pages.py", - tempFile.toString(), - "--threshold", - String.valueOf(threshold), - "--white_percent", - String.valueOf(whitePercent))); - - Boolean blank = false; - // Run CLI command - try { - ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV) - .runCommandWithOutputHandling(command); - } catch (IOException e) { - // From detect-blank-pages.py - // Return code 1: The image is considered blank. - // Return code 0: The image is not considered blank. - // Since the process returned with a failure code, it should be blank. - blank = true; - } - - if (blank) { - System.out.println("Skipping, Image was blank for page #" + pageIndex); - } else { - System.out.println( - "page " + pageIndex + " has image which is not blank"); - pagesToKeepIndex.add(pageIndex); - } + BufferedImage image = pdfRenderer.renderImageWithDPI(pageIndex, 30); + blank = isBlankImage(image, threshold, whitePercent, threshold); } } + + if (blank) { + logger.info("Skipping, Image was blank for page #" + pageIndex); + } else { + logger.info("page " + pageIndex + " has image which is not blank"); + pagesToKeepIndex.add(pageIndex); + } + pageIndex++; } - System.out.print("pagesToKeep=" + pagesToKeepIndex.size()); - // Remove pages not present in pagesToKeepIndex List pageIndices = IntStream.range(0, pages.getCount()).boxed().collect(Collectors.toList()); @@ -142,4 +112,30 @@ public class BlankPageController { if (document != null) document.close(); } } + + public static boolean isBlankImage( + BufferedImage image, int threshold, double whitePercent, int blurSize) { + if (image == null) { + logger.info("Error: Image is null"); + return false; + } + + // Convert to binary image based on the threshold + int whitePixels = 0; + int totalPixels = image.getWidth() * image.getHeight(); + + for (int i = 0; i < image.getHeight(); i++) { + for (int j = 0; j < image.getWidth(); j++) { + int color = image.getRGB(j, i) & 0xFF; + if (color >= 255 - threshold) { + whitePixels++; + } + } + } + + double whitePixelPercentage = (whitePixels / (double) totalPixels) * 100; + logger.info(String.format("Page has white pixel percent of %.2f%%", whitePixelPercentage)); + + return whitePixelPercentage >= whitePercent; + } } diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/ShowJavascript.java b/src/main/java/stirling/software/SPDF/controller/api/misc/ShowJavascript.java index e1355c30..0a93bf1d 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/ShowJavascript.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/ShowJavascript.java @@ -68,12 +68,14 @@ public class ShowJavascript { if (script.isEmpty()) { script = - "PDF '" + Filenames.toSimpleFileName(inputFile.getOriginalFilename()) + "' does not contain Javascript"; + "PDF '" + + Filenames.toSimpleFileName(inputFile.getOriginalFilename()) + + "' does not contain Javascript"; } return WebResponseUtils.bytesToWebResponse( script.getBytes(StandardCharsets.UTF_8), - inputFile.getOriginalFilename() + ".js"); + Filenames.toSimpleFileName(inputFile.getOriginalFilename()) + ".js"); } } } diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/StampController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/StampController.java index 99a5e2df..9e53292e 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/StampController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/StampController.java @@ -7,6 +7,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; +import java.util.List; import javax.imageio.ImageIO; @@ -87,49 +88,54 @@ public class StampController { // Load the input PDF PDDocument document = Loader.loadPDF(pdfFile.getBytes()); - for (PDPage page : document.getPages()) { - PDRectangle pageSize = page.getMediaBox(); - float margin = marginFactor * (pageSize.getWidth() + pageSize.getHeight()) / 2; + List pageNumbers = request.getPageNumbersList(); - PDPageContentStream contentStream = - new PDPageContentStream( - document, page, PDPageContentStream.AppendMode.APPEND, true, true); + for (int pageIndex : pageNumbers) { + int zeroBasedIndex = pageIndex - 1; + if (zeroBasedIndex >= 0 && zeroBasedIndex < document.getNumberOfPages()) { + PDPage page = document.getPage(zeroBasedIndex); + PDRectangle pageSize = page.getMediaBox(); + float margin = marginFactor * (pageSize.getWidth() + pageSize.getHeight()) / 2; - PDExtendedGraphicsState graphicsState = new PDExtendedGraphicsState(); - graphicsState.setNonStrokingAlphaConstant(opacity); - contentStream.setGraphicsStateParameters(graphicsState); + PDPageContentStream contentStream = + new PDPageContentStream( + document, page, PDPageContentStream.AppendMode.APPEND, true, true); - if ("text".equalsIgnoreCase(watermarkType)) { - addTextStamp( - contentStream, - watermarkText, - document, - page, - rotation, - position, - fontSize, - alphabet, - overrideX, - overrideY, - margin, - customColor); - } else if ("image".equalsIgnoreCase(watermarkType)) { - addImageStamp( - contentStream, - watermarkImage, - document, - page, - rotation, - position, - fontSize, - overrideX, - overrideY, - margin); + PDExtendedGraphicsState graphicsState = new PDExtendedGraphicsState(); + graphicsState.setNonStrokingAlphaConstant(opacity); + contentStream.setGraphicsStateParameters(graphicsState); + + if ("text".equalsIgnoreCase(watermarkType)) { + addTextStamp( + contentStream, + watermarkText, + document, + page, + rotation, + position, + fontSize, + alphabet, + overrideX, + overrideY, + margin, + customColor); + } else if ("image".equalsIgnoreCase(watermarkType)) { + addImageStamp( + contentStream, + watermarkImage, + document, + page, + rotation, + position, + fontSize, + overrideX, + overrideY, + margin); + } + + contentStream.close(); } - - contentStream.close(); } - return WebResponseUtils.pdfDocToWebResponse( document, Filenames.toSimpleFileName(pdfFile.getOriginalFilename()) diff --git a/src/main/java/stirling/software/SPDF/controller/web/ConverterWebController.java b/src/main/java/stirling/software/SPDF/controller/web/ConverterWebController.java index 55ebcb91..23270df5 100644 --- a/src/main/java/stirling/software/SPDF/controller/web/ConverterWebController.java +++ b/src/main/java/stirling/software/SPDF/controller/web/ConverterWebController.java @@ -13,7 +13,7 @@ import io.swagger.v3.oas.annotations.tags.Tag; @Tag(name = "Convert", description = "Convert APIs") public class ConverterWebController { - @ConditionalOnExpression("#{bookFormatsInstalled}") + @ConditionalOnExpression("#{bookAndHtmlFormatsInstalled}") @GetMapping("/book-to-pdf") @Hidden public String convertBookToPdfForm(Model model) { @@ -21,7 +21,7 @@ public class ConverterWebController { return "convert/book-to-pdf"; } - @ConditionalOnExpression("#{bookFormatsInstalled}") + @ConditionalOnExpression("#{bookAndHtmlFormatsInstalled}") @GetMapping("/pdf-to-book") @Hidden public String convertPdfToBookForm(Model model) { diff --git a/src/main/java/stirling/software/SPDF/model/ApplicationProperties.java b/src/main/java/stirling/software/SPDF/model/ApplicationProperties.java index 3258d8b1..34c60bbe 100644 --- a/src/main/java/stirling/software/SPDF/model/ApplicationProperties.java +++ b/src/main/java/stirling/software/SPDF/model/ApplicationProperties.java @@ -290,31 +290,20 @@ public class ApplicationProperties { } public static class CustomApplications { - private boolean installBookFormats; - private boolean installAdvancedHtmlToPDF; + private boolean installBookAndHtmlFormats; - public boolean isInstallBookFormats() { - return installBookFormats; + public boolean isInstallBookAndHtmlFormats() { + return installBookAndHtmlFormats; } - public void setInstallBookFormats(boolean installBookFormats) { - this.installBookFormats = installBookFormats; - } - - public boolean isInstallAdvancedHtmlToPDF() { - return installAdvancedHtmlToPDF; - } - - public void setInstallAdvancedHtmlToPDF(boolean installAdvancedHtmlToPDF) { - this.installAdvancedHtmlToPDF = installAdvancedHtmlToPDF; + public void setInstallBookAndHtmlFormats(boolean installBookAndHtmlFormats) { + this.installBookAndHtmlFormats = installBookAndHtmlFormats; } @Override public String toString() { - return "CustomApplications [installBookFormats=" - + installBookFormats - + ", installAdvancedHtmlToPDF=" - + installAdvancedHtmlToPDF + return "CustomApplications [installBookAndHtmlFormats=" + + installBookAndHtmlFormats + "]"; } } diff --git a/src/main/java/stirling/software/SPDF/model/api/converters/HTMLToPdfRequest.java b/src/main/java/stirling/software/SPDF/model/api/converters/HTMLToPdfRequest.java index c778c36f..5f5ae051 100644 --- a/src/main/java/stirling/software/SPDF/model/api/converters/HTMLToPdfRequest.java +++ b/src/main/java/stirling/software/SPDF/model/api/converters/HTMLToPdfRequest.java @@ -14,39 +14,4 @@ public class HTMLToPdfRequest extends PDFFile { description = "Zoom level for displaying the website. Default is '1'.", defaultValue = "1") private float zoom; - - @Schema(description = "Width of the page in centimeters.") - private Float pageWidth; - - @Schema(description = "Height of the page in centimeters.") - private Float pageHeight; - - @Schema(description = "Top margin of the page in millimeters.") - private Float marginTop; - - @Schema(description = "Bottom margin of the page in millimeters.") - private Float marginBottom; - - @Schema(description = "Left margin of the page in millimeters.") - private Float marginLeft; - - @Schema(description = "Right margin of the page in millimeters.") - private Float marginRight; - - @Schema( - description = "Enable or disable rendering of website background.", - allowableValues = {"Yes", "No"}) - private String printBackground; - - @Schema( - description = - "Enable or disable the default header. The default header includes the name of the page on the left and the page number on the right.", - allowableValues = {"Yes", "No"}) - private String defaultHeader; - - @Schema( - description = "Change the CSS media type of the page. Defaults to 'print'.", - allowableValues = {"none", "print", "screen"}, - defaultValue = "print") - private String cssMediaType; } diff --git a/src/main/java/stirling/software/SPDF/model/api/misc/AddStampRequest.java b/src/main/java/stirling/software/SPDF/model/api/misc/AddStampRequest.java index 82296acc..f4c449b0 100644 --- a/src/main/java/stirling/software/SPDF/model/api/misc/AddStampRequest.java +++ b/src/main/java/stirling/software/SPDF/model/api/misc/AddStampRequest.java @@ -6,11 +6,11 @@ import io.swagger.v3.oas.annotations.media.Schema; import lombok.Data; import lombok.EqualsAndHashCode; -import stirling.software.SPDF.model.api.PDFFile; +import stirling.software.SPDF.model.api.PDFWithPageNums; @Data @EqualsAndHashCode(callSuper = true) -public class AddStampRequest extends PDFFile { +public class AddStampRequest extends PDFWithPageNums { @Schema( description = "The stamp type (text or image)", diff --git a/src/main/java/stirling/software/SPDF/utils/FileToPdf.java b/src/main/java/stirling/software/SPDF/utils/FileToPdf.java index 90f7dbc3..da408a23 100644 --- a/src/main/java/stirling/software/SPDF/utils/FileToPdf.java +++ b/src/main/java/stirling/software/SPDF/utils/FileToPdf.java @@ -1,8 +1,9 @@ package stirling.software.SPDF.utils; import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileWriter; import java.io.IOException; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; @@ -34,95 +35,40 @@ public class FileToPdf { tempInputFile = Files.createTempFile("input_", ".html"); Files.write(tempInputFile, fileBytes); } else { - tempInputFile = unzipAndGetMainHtml(fileBytes); + tempInputFile = Files.createTempFile("input_", ".zip"); + Files.write(tempInputFile, fileBytes); } List command = new ArrayList<>(); if (!htmlFormatsInstalled) { command.add("weasyprint"); - } else { - command.add("wkhtmltopdf"); - command.add("--enable-local-file-access"); - command.add("--load-error-handling"); - command.add("ignore"); - command.add("--load-media-error-handling"); - command.add("ignore"); - command.add("--zoom"); - command.add(String.valueOf(request.getZoom())); + command.add(tempInputFile.toString()); + command.add(tempOutputFile.toString()); + + } else { + command.add("ebook-convert"); + command.add(tempInputFile.toString()); + command.add(tempOutputFile.toString()); + command.add("--paper-size"); + command.add("a4"); - // if custom zoom add zoom style direct to html - // https://github.com/wkhtmltopdf/wkhtmltopdf/issues/4900 if (request.getZoom() != 1.0) { - String htmlContent = new String(Files.readAllBytes(tempInputFile)); - - String zoomStyle = ""; - // Check for tag, add style tag to associated tag - if (htmlContent.contains("")) { - htmlContent = htmlContent.replace("", "" + zoomStyle); - } else if (htmlContent.contains("")) { - // If no tag, but tag exists - htmlContent = htmlContent.replace("", "" + zoomStyle); - } else { - // If neither nor tags exist - htmlContent = zoomStyle + htmlContent; + // Create a temporary CSS file + File tempCssFile = Files.createTempFile("customStyle", ".css").toFile(); + try (FileWriter writer = new FileWriter(tempCssFile)) { + // Write the CSS rule to the file + writer.write("body { zoom: " + request.getZoom() + "; }"); } - // rewrite new html to file - Files.write(tempInputFile, htmlContent.getBytes(StandardCharsets.UTF_8)); - } - - if (request.getPageWidth() != null) { - command.add("--page-width"); - command.add(request.getPageWidth() + "cm"); - } - - if (request.getPageHeight() != null) { - command.add("--page-height"); - command.add(request.getPageHeight() + "cm"); - } - - if (request.getMarginTop() != null) { - command.add("--margin-top"); - command.add(request.getMarginTop() + "mm"); - } - - // Repeat similar pattern for marginBottom, marginLeft, marginRight - - if ("Yes".equalsIgnoreCase(request.getPrintBackground())) { - command.add("--background"); - } else { - command.add("--no-background"); - } - - if ("Yes".equalsIgnoreCase(request.getDefaultHeader())) { - command.add("--default-header"); - } - - if ("print".equalsIgnoreCase(request.getCssMediaType())) { - command.add("--print-media-type"); - } else if ("screen".equalsIgnoreCase(request.getCssMediaType())) { - command.add("--no-print-media-type"); + command.add("--extra-css"); + command.add(tempCssFile.getAbsolutePath()); } } - command.add(tempInputFile.toString()); - command.add(tempOutputFile.toString()); ProcessExecutorResult returnCode; - if (fileName.endsWith(".zip")) { - if (htmlFormatsInstalled) { - // command.add(1, "--allow"); - // command.add(2, tempInputFile.getParent().toString()); - } - returnCode = - ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) - .runCommandWithOutputHandling( - command, tempInputFile.getParent().toFile()); - } else { - - returnCode = - ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) - .runCommandWithOutputHandling(command); - } + returnCode = + ProcessExecutor.getInstance(ProcessExecutor.Processes.WEASYPRINT) + .runCommandWithOutputHandling(command); pdfBytes = Files.readAllBytes(tempOutputFile); } catch (IOException e) { @@ -135,10 +81,6 @@ public class FileToPdf { // Clean up temporary files Files.delete(tempOutputFile); Files.delete(tempInputFile); - - if (fileName.endsWith(".zip")) { - GeneralUtils.deleteDirectory(tempInputFile.getParent()); - } } return pdfBytes; diff --git a/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java b/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java index 7b1082b3..4ef5964c 100644 --- a/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java +++ b/src/main/java/stirling/software/SPDF/utils/GeneralUtils.java @@ -12,6 +12,7 @@ import java.nio.file.Paths; import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import org.springframework.web.multipart.MultipartFile; @@ -115,6 +116,13 @@ public class GeneralUtils { } public static List parsePageString(String pageOrder, int totalPages) { + if (pageOrder == null || pageOrder.isEmpty()) { + return Collections.singletonList(1); + } + if (pageOrder.matches("\\d+")) { + // Convert the single number string to an integer and return it in a list + return Collections.singletonList(Integer.parseInt(pageOrder)); + } return parsePageList(pageOrder.split(","), totalPages); } diff --git a/src/main/resources/settings.yml.template b/src/main/resources/settings.yml.template index 00c5998e..52a84bf4 100644 --- a/src/main/resources/settings.yml.template +++ b/src/main/resources/settings.yml.template @@ -14,8 +14,7 @@ system: googlevisibility: false # 'true' to allow Google visibility (via robots.txt), 'false' to disallow enableAlphaFunctionality: false # Set to enable functionality which might need more testing before it fully goes live (This feature might make no changes) customApplications: - installBookFormats: false # Installs Calibre for book format conversion (For non docker it must be manually downloaded but will need to be true to show in UI) - installAdvancedHtmlToPDF: false # DO NOT USE EXTERNALLY, NOT SAFE! Install wkHtmlToPDF (For non docker it must be manually downloaded but will need to be true to show in UI) + bookAndHtmlFormatsInstalled: false # Installs Calibre for book format conversion (For non docker it must be manually downloaded but will need to be true to show in UI) #ui: # appName: exampleAppName # Application's visible name diff --git a/src/main/resources/static/js/game.js b/src/main/resources/static/js/game.js index ffad304b..9067cf97 100644 --- a/src/main/resources/static/js/game.js +++ b/src/main/resources/static/js/game.js @@ -14,13 +14,16 @@ function initializeGame() { const highScoreElement = document.getElementById('high-score'); let pdfSize = gameContainer.clientWidth * 0.0625; // 5% of container width - let projectileWidth = gameContainer.clientWidth * 0.00625; // 0.5% of container width + let projectileWidth = gameContainer.clientWidth * 0.00625;// 0.00625; // 0.5% of container width let projectileHeight = gameContainer.clientHeight * 0.01667; // 1% of container height let paused = false; + const fireRate = 200; // Time between shots in milliseconds let lastProjectileTime = 0; let lives = 3; + + let highScore = localStorage.getItem('highScore') ? parseInt(localStorage.getItem('highScore')) : 0; updateHighScore(); @@ -31,7 +34,7 @@ function initializeGame() { const projectiles = []; let score = 0; let level = 1; - let pdfSpeed = 1; + let pdfSpeed = 0.5; let gameOver = false; function handleKeys() { @@ -119,7 +122,7 @@ function initializeGame() { for (let pdfIndex = 0; pdfIndex < pdfs.length; pdfIndex++) { const pdf = pdfs[pdfIndex]; - const pdfY = parseInt(pdf.style.top) + pdfSpeed; + const pdfY = parseFloat(pdf.style.top) + pdfSpeed; if (pdfY + 50 > gameContainer.clientHeight) { gameContainer.removeChild(pdf); pdfs.splice(pdfIndex, 1); @@ -218,7 +221,7 @@ function initializeGame() { if (newLevel > level) { level = newLevel; levelElement.textContent = 'Level: ' + level; - pdfSpeed += 1; + pdfSpeed += 0.2; } } @@ -249,6 +252,10 @@ function initializeGame() { let spawnPdfTimeout; + const BASE_SPAWN_INTERVAL_MS = 1250; // milliseconds before a new enemy spawns + const LEVEL_INCREASE_FACTOR_MS = 0; // milliseconds to decrease the spawn interval per level + const MAX_SPAWN_RATE_REDUCTION_MS = 800; // Max milliseconds from the base spawn interval + function spawnPdfInterval() { console.log("spawnPdfInterval"); if (gameOver || paused) { @@ -258,7 +265,9 @@ function initializeGame() { } console.log("spawnPdfInterval 3"); spawnPdf(); - spawnPdfTimeout = setTimeout(spawnPdfInterval, 1000 - level * 50); + let spawnRateReduction = Math.min(level * LEVEL_INCREASE_FACTOR_MS, MAX_SPAWN_RATE_REDUCTION_MS); + let spawnRate = BASE_SPAWN_INTERVAL_MS - spawnRateReduction; + spawnPdfTimeout = setTimeout(spawnPdfInterval, spawnRate); } updatePlayerPosition(); diff --git a/src/main/resources/templates/convert/html-to-pdf.html b/src/main/resources/templates/convert/html-to-pdf.html index cbb22b99..534bed5f 100644 --- a/src/main/resources/templates/convert/html-to-pdf.html +++ b/src/main/resources/templates/convert/html-to-pdf.html @@ -19,64 +19,7 @@ - -
- - -
- -
- - -
- -
- - -
- -
- - -
- -
- - -
- -
- - -
- -
- - -
- -
- - -
- - -
- - -
- - +
diff --git a/src/main/resources/templates/convert/pdf-to-text.html b/src/main/resources/templates/convert/pdf-to-text.html index 75b71d11..de649025 100644 --- a/src/main/resources/templates/convert/pdf-to-text.html +++ b/src/main/resources/templates/convert/pdf-to-text.html @@ -19,6 +19,7 @@