From 53afb865c53b0a000afb50a9932f28e2563b5142 Mon Sep 17 00:00:00 2001 From: sbplat <71648843+sbplat@users.noreply.github.com> Date: Mon, 29 Jan 2024 11:23:58 -0500 Subject: [PATCH] refactor: replace ImageFinder with getAllImages using strategy behind ExtractImagesController --- .../api/misc/AutoSplitPdfController.java | 4 +- .../api/pipeline/ApiDocService.java | 5 +- .../software/SPDF/pdf/ImageFinder.java | 131 ------------------ .../software/SPDF/utils/PdfUtils.java | 29 +++- 4 files changed, 28 insertions(+), 141 deletions(-) delete mode 100644 src/main/java/stirling/software/SPDF/pdf/ImageFinder.java diff --git a/src/main/java/stirling/software/SPDF/controller/api/misc/AutoSplitPdfController.java b/src/main/java/stirling/software/SPDF/controller/api/misc/AutoSplitPdfController.java index 4ca45762..8b095358 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/misc/AutoSplitPdfController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/misc/AutoSplitPdfController.java @@ -68,7 +68,9 @@ public class AutoSplitPdfController { splitDocuments.add(new PDDocument()); } - if (!splitDocuments.isEmpty() && !QR_CONTENT.equals(result) && !QR_CONTENT_OLD.equals(result)) { + if (!splitDocuments.isEmpty() + && !QR_CONTENT.equals(result) + && !QR_CONTENT_OLD.equals(result)) { splitDocuments.get(splitDocuments.size() - 1).addPage(document.getPage(page)); } else if (page == 0) { PDDocument firstDocument = new PDDocument(); diff --git a/src/main/java/stirling/software/SPDF/controller/api/pipeline/ApiDocService.java b/src/main/java/stirling/software/SPDF/controller/api/pipeline/ApiDocService.java index 27240b6f..02c37e5c 100644 --- a/src/main/java/stirling/software/SPDF/controller/api/pipeline/ApiDocService.java +++ b/src/main/java/stirling/software/SPDF/controller/api/pipeline/ApiDocService.java @@ -63,10 +63,7 @@ public class ApiDocService { outputToFileTypes.put("PPT", Arrays.asList("ppt", "pptx", "odp")); outputToFileTypes.put("XML", Arrays.asList("xml", "xsd", "xsl")); outputToFileTypes.put( - "BOOK", - Arrays.asList( - "epub", "mobi", "azw3", "fb2", "txt", - "docx")); + "BOOK", Arrays.asList("epub", "mobi", "azw3", "fb2", "txt", "docx")); // type. } diff --git a/src/main/java/stirling/software/SPDF/pdf/ImageFinder.java b/src/main/java/stirling/software/SPDF/pdf/ImageFinder.java deleted file mode 100644 index a710dbd5..00000000 --- a/src/main/java/stirling/software/SPDF/pdf/ImageFinder.java +++ /dev/null @@ -1,131 +0,0 @@ -package stirling.software.SPDF.pdf; - -import java.awt.geom.Point2D; -import java.io.IOException; -import java.util.List; - -import org.apache.pdfbox.contentstream.operator.Operator; -import org.apache.pdfbox.contentstream.operator.OperatorName; -import org.apache.pdfbox.cos.COSBase; -import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.graphics.PDXObject; -import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; -import org.apache.pdfbox.pdmodel.graphics.image.PDImage; -import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; - -public class ImageFinder extends org.apache.pdfbox.contentstream.PDFGraphicsStreamEngine { - private boolean hasImages = false; - - public ImageFinder(PDPage page) { - super(page); - } - - public boolean hasImages() { - return hasImages; - } - - @Override - protected void processOperator(Operator operator, List operands) throws IOException { - String operation = operator.getName(); - if (operation.equals(OperatorName.DRAW_OBJECT)) { - COSBase base = operands.get(0); - if (base instanceof COSName) { - COSName objectName = (COSName) base; - PDXObject xobject = getResources().getXObject(objectName); - if (xobject instanceof PDImageXObject) { - hasImages = true; - } else if (xobject instanceof PDFormXObject) { - PDFormXObject form = (PDFormXObject) xobject; - ImageFinder innerFinder = new ImageFinder(getPage()); - innerFinder.processPage(getPage()); - if (innerFinder.hasImages()) { - hasImages = true; - } - } - } - } - super.processOperator(operator, operands); - } - - @Override - public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException { - // TODO Auto-generated method stub - - } - - @Override - public void drawImage(PDImage pdImage) throws IOException { - // TODO Auto-generated method stub - - } - - @Override - public void clip(int windingRule) throws IOException { - // TODO Auto-generated method stub - - } - - @Override - public void moveTo(float x, float y) throws IOException { - // TODO Auto-generated method stub - - } - - @Override - public void lineTo(float x, float y) throws IOException { - // TODO Auto-generated method stub - - } - - @Override - public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3) - throws IOException { - // TODO Auto-generated method stub - - } - - @Override - public Point2D getCurrentPoint() throws IOException { - // TODO Auto-generated method stub - return null; - } - - @Override - public void closePath() throws IOException { - // TODO Auto-generated method stub - - } - - @Override - public void endPath() throws IOException { - // TODO Auto-generated method stub - - } - - @Override - public void strokePath() throws IOException { - // TODO Auto-generated method stub - - } - - @Override - public void fillPath(int windingRule) throws IOException { - // TODO Auto-generated method stub - - } - - @Override - public void fillAndStrokePath(int windingRule) throws IOException { - // TODO Auto-generated method stub - - } - - @Override - public void shadingFill(COSName shadingName) throws IOException { - // TODO Auto-generated method stub - - } - - // ... rest of the overridden methods -} diff --git a/src/main/java/stirling/software/SPDF/utils/PdfUtils.java b/src/main/java/stirling/software/SPDF/utils/PdfUtils.java index 426fc78d..1676ce85 100644 --- a/src/main/java/stirling/software/SPDF/utils/PdfUtils.java +++ b/src/main/java/stirling/software/SPDF/utils/PdfUtils.java @@ -2,8 +2,10 @@ package stirling.software.SPDF.utils; import java.awt.Graphics; import java.awt.image.BufferedImage; +import java.awt.image.RenderedImage; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; @@ -16,11 +18,15 @@ import javax.imageio.ImageWriter; import javax.imageio.stream.ImageOutputStream; import org.apache.pdfbox.Loader; +import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode; +import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.graphics.PDXObject; +import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; import org.apache.pdfbox.pdmodel.graphics.image.JPEGFactory; import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; @@ -31,8 +37,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.web.multipart.MultipartFile; -import stirling.software.SPDF.pdf.ImageFinder; - public class PdfUtils { private static final Logger logger = LoggerFactory.getLogger(PdfUtils.class); @@ -62,6 +66,23 @@ public class PdfUtils { } } + public static List getAllImages(PDResources resources) throws IOException { + List images = new ArrayList<>(); + + for (COSName name : resources.getXObjectNames()) { + PDXObject object = resources.getXObject(name); + + if (object instanceof PDImageXObject) { + images.add(((PDImageXObject) object).getImage()); + + } else if (object instanceof PDFormXObject) { + images.addAll(getAllImages(((PDFormXObject) object).getResources())); + } + } + + return images; + } + public static boolean hasImages(PDDocument document, String pagesToCheck) throws IOException { String[] pageOrderArr = pagesToCheck.split(","); List pageList = @@ -94,9 +115,7 @@ public class PdfUtils { } public static boolean hasImagesOnPage(PDPage page) throws IOException { - ImageFinder imageFinder = new ImageFinder(page); - imageFinder.processPage(page); - return imageFinder.hasImages(); + return getAllImages(page.getResources()).size() > 0; } public static boolean hasTextOnPage(PDPage page, String phrase) throws IOException {