From 862086eae525e9e7e33c7d11b131c950bc875870 Mon Sep 17 00:00:00 2001 From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com> Date: Sat, 2 Sep 2023 19:12:08 +0100 Subject: [PATCH] itext removal fixes --- .../api/ToSinglePageController.java | 66 ++++++++++++------- .../api/other/PageNumbersController.java | 14 +++- .../controller/api/other/ShowJavascript.java | 30 +++++---- .../controller/api/security/GetInfoOnPDF.java | 42 ++++++------ 4 files changed, 91 insertions(+), 61 deletions(-) diff --git a/src/main/java/stirling/software/SPDF/controller/api/ToSinglePageController.java b/src/main/java/stirling/software/SPDF/controller/api/ToSinglePageController.java index 4e4ea33f..2b3c88d4 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/ToSinglePageController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/ToSinglePageController.java @@ -1,12 +1,15 @@ package stirling.software.SPDF.controller.api; import java.awt.geom.AffineTransform; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.InputStream; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.slf4j.Logger; @@ -49,37 +52,50 @@ public class ToSinglePageController { @Parameter(description = "The input multi-page PDF file to be converted into a single page", required = true) MultipartFile file) throws IOException { + // Load the source document PDDocument sourceDocument = PDDocument.load(file.getInputStream()); - float totalHeight = 0; - float width = 0; - for (PDPage page : sourceDocument.getPages()) { - PDRectangle pageSize = page.getMediaBox(); - totalHeight += pageSize.getHeight(); - if(width < pageSize.getWidth()) - width = pageSize.getWidth(); - } + // Calculate total height and max width + float totalHeight = 0; + float maxWidth = 0; + for (PDPage page : sourceDocument.getPages()) { + PDRectangle pageSize = page.getMediaBox(); + totalHeight += pageSize.getHeight(); + maxWidth = Math.max(maxWidth, pageSize.getWidth()); + } - PDDocument newDocument = new PDDocument(); - PDPage newPage = new PDPage(new PDRectangle(width, totalHeight)); - newDocument.addPage(newPage); + // Create new document and page with calculated dimensions + PDDocument newDocument = new PDDocument(); + PDPage newPage = new PDPage(new PDRectangle(maxWidth, totalHeight)); + newDocument.addPage(newPage); - LayerUtility layerUtility = new LayerUtility(newDocument); - float yOffset = totalHeight; + // Initialize the content stream of the new page + PDPageContentStream contentStream = new PDPageContentStream(newDocument, newPage); + contentStream.close(); + + LayerUtility layerUtility = new LayerUtility(newDocument); + float yOffset = totalHeight; - for (PDPage page : sourceDocument.getPages()) { - PDFormXObject form = layerUtility.importPageAsForm(sourceDocument, sourceDocument.getPages().indexOf(page)); - AffineTransform af = AffineTransform.getTranslateInstance(0, yOffset - page.getMediaBox().getHeight()); - layerUtility.appendFormAsLayer(newDocument.getPage(0), form, af, page.getResources().getCOSObject().toString()); - yOffset -= page.getMediaBox().getHeight(); - } + // For each page, copy its content to the new page at the correct offset + for (PDPage page : sourceDocument.getPages()) { + PDFormXObject form = layerUtility.importPageAsForm(sourceDocument, sourceDocument.getPages().indexOf(page)); + AffineTransform af = AffineTransform.getTranslateInstance(0, yOffset - page.getMediaBox().getHeight()); + layerUtility.wrapInSaveRestore(newPage); + String defaultLayerName = "Layer" + sourceDocument.getPages().indexOf(page); + layerUtility.appendFormAsLayer(newPage, form, af, defaultLayerName); + yOffset -= page.getMediaBox().getHeight(); + } - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - newDocument.save(baos); - newDocument.close(); - sourceDocument.close(); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + newDocument.save(baos); + newDocument.close(); + sourceDocument.close(); - byte[] result = baos.toByteArray(); - return WebResponseUtils.bytesToWebResponse(result, file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_singlePage.pdf"); + byte[] result = baos.toByteArray(); + return WebResponseUtils.bytesToWebResponse(result, file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_singlePage.pdf"); + + + + } } \ No newline at end of file diff --git a/src/main/java/stirling/software/SPDF/controller/api/other/PageNumbersController.java b/src/main/java/stirling/software/SPDF/controller/api/other/PageNumbersController.java index c48a77e9..d1a469d0 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/other/PageNumbersController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/other/PageNumbersController.java @@ -60,6 +60,11 @@ public class PageNumbersController { case "large": marginFactor = 0.05f; break; + case "x-large": + marginFactor = 0.075f; + break; + + default: marginFactor = 0.035f; break; @@ -67,7 +72,12 @@ public class PageNumbersController { float fontSize = 12.0f; PDType1Font font = PDType1Font.HELVETICA; - + if(pagesToNumber == null || pagesToNumber.length() == 0) { + pagesToNumber = "all"; + } + if(customText == null || customText.length() == 0) { + customText = "{n}"; + } List pagesToNumberList = GeneralUtils.parsePageList(pagesToNumber.split(","), document.getNumberOfPages()); for (int i : pagesToNumberList) { @@ -120,7 +130,7 @@ public class PageNumbersController { document.save(baos); document.close(); - return WebResponseUtils.bytesToWebResponse(baos.toByteArray(), URLEncoder.encode(file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_numbersAdded.pdf", "UTF-8"), MediaType.APPLICATION_PDF); + return WebResponseUtils.bytesToWebResponse(baos.toByteArray(), file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_numbersAdded.pdf", MediaType.APPLICATION_PDF); } diff --git a/src/main/java/stirling/software/SPDF/controller/api/other/ShowJavascript.java b/src/main/java/stirling/software/SPDF/controller/api/other/ShowJavascript.java index e8f33144..d812f5c8 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/other/ShowJavascript.java +++ b/src/main/java/stirling/software/SPDF/controller/api/other/ShowJavascript.java @@ -28,20 +28,22 @@ public class ShowJavascript { String script = ""; try (PDDocument document = PDDocument.load(inputFile.getInputStream())) { - - PDNameTreeNode jsTree = document.getDocumentCatalog().getNames().getJavaScript(); - - if (jsTree != null) { - Map jsEntries = jsTree.getNames(); - - for (Map.Entry entry : jsEntries.entrySet()) { - String name = entry.getKey(); - PDActionJavaScript jsAction = entry.getValue(); - String jsCodeStr = jsAction.getAction(); - - script += "// File: " + inputFile.getOriginalFilename() + ", Script: " + name + "\n" + jsCodeStr + "\n"; - } - } + + if(document.getDocumentCatalog() != null && document.getDocumentCatalog().getNames() != null) { + PDNameTreeNode jsTree = document.getDocumentCatalog().getNames().getJavaScript(); + + if (jsTree != null) { + Map jsEntries = jsTree.getNames(); + + for (Map.Entry entry : jsEntries.entrySet()) { + String name = entry.getKey(); + PDActionJavaScript jsAction = entry.getValue(); + String jsCodeStr = jsAction.getAction(); + + script += "// File: " + inputFile.getOriginalFilename() + ", Script: " + name + "\n" + jsCodeStr + "\n"; + } + } + } if (script.isEmpty()) { script = "PDF '" + inputFile.getOriginalFilename() + "' does not contain Javascript"; diff --git a/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java b/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java index ef4187f7..e75fe502 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java +++ b/src/main/java/stirling/software/SPDF/controller/api/security/GetInfoOnPDF.java @@ -185,24 +185,26 @@ public class GetInfoOnPDF { //embeed files TODO size - PDEmbeddedFilesNameTreeNode efTree = catalog.getNames().getEmbeddedFiles(); - - ArrayNode embeddedFilesArray = objectMapper.createArrayNode(); - if (efTree != null) { - Map efMap = efTree.getNames(); - if (efMap != null) { - for (Map.Entry entry : efMap.entrySet()) { - ObjectNode embeddedFileNode = objectMapper.createObjectNode(); - embeddedFileNode.put("Name", entry.getKey()); - PDEmbeddedFile embeddedFile = entry.getValue().getEmbeddedFile(); - if (embeddedFile != null) { - embeddedFileNode.put("FileSize", embeddedFile.getLength()); // size in bytes - } - embeddedFilesArray.add(embeddedFileNode); - } - } + if(catalog.getNames() != null) { + PDEmbeddedFilesNameTreeNode efTree = catalog.getNames().getEmbeddedFiles(); + + ArrayNode embeddedFilesArray = objectMapper.createArrayNode(); + if (efTree != null) { + Map efMap = efTree.getNames(); + if (efMap != null) { + for (Map.Entry entry : efMap.entrySet()) { + ObjectNode embeddedFileNode = objectMapper.createObjectNode(); + embeddedFileNode.put("Name", entry.getKey()); + PDEmbeddedFile embeddedFile = entry.getValue().getEmbeddedFile(); + if (embeddedFile != null) { + embeddedFileNode.put("FileSize", embeddedFile.getLength()); // size in bytes + } + embeddedFilesArray.add(embeddedFileNode); + } + } + } + other.set("EmbeddedFiles", embeddedFilesArray); } - other.set("EmbeddedFiles", embeddedFilesArray); @@ -374,7 +376,7 @@ public class GetInfoOnPDF { ObjectNode pageInfoParent = objectMapper.createObjectNode(); - for (int pageNum = 1; pageNum <= pdfBoxDoc.getNumberOfPages(); pageNum++) { + for (int pageNum = 0; pageNum < pdfBoxDoc.getNumberOfPages(); pageNum++) { ObjectNode pageInfo = objectMapper.createObjectNode(); // Retrieve the page @@ -411,8 +413,8 @@ public class GetInfoOnPDF { // Content Extraction PDFTextStripper textStripper = new PDFTextStripper(); - textStripper.setStartPage(pageNum -1); - textStripper.setEndPage(pageNum - 1); + textStripper.setStartPage(pageNum + 1); + textStripper.setEndPage(pageNum +1); String pageText = textStripper.getText(pdfBoxDoc); pageInfo.put("Text Characters Count", pageText.length()); //