1
0
mirror of https://github.com/Stirling-Tools/Stirling-PDF.git synced 2024-06-03 06:10:11 +02:00
Stirling-PDF/src/main/java/stirling/software/SPDF/controller/api/misc/ExtractImagesController.java

140 lines
5.9 KiB
Java

package stirling.software.SPDF.controller.api.misc;
import io.github.pixee.security.Filenames;
import java.awt.Graphics2D;
import java.awt.Image;
import java.awt.image.BufferedImage;
import java.awt.image.RenderedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.zip.Deflater;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import javax.imageio.ImageIO;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ModelAttribute;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import stirling.software.SPDF.model.api.PDFWithImageFormatRequest;
import stirling.software.SPDF.utils.WebResponseUtils;
@RestController
@RequestMapping("/api/v1/misc")
@Tag(name = "Misc", description = "Miscellaneous APIs")
public class ExtractImagesController {
private static final Logger logger = LoggerFactory.getLogger(ExtractImagesController.class);
@PostMapping(consumes = "multipart/form-data", value = "/extract-images")
@Operation(
summary = "Extract images from a PDF file",
description =
"This endpoint extracts images from a given PDF file and returns them in a zip file. Users can specify the output image format. Input:PDF Output:IMAGE/ZIP Type:SIMO")
public ResponseEntity<byte[]> extractImages(@ModelAttribute PDFWithImageFormatRequest request)
throws IOException {
MultipartFile file = request.getFileInput();
String format = request.getFormat();
System.out.println(
System.currentTimeMillis() + "file=" + file.getName() + ", format=" + format);
PDDocument document = Loader.loadPDF(file.getBytes());
// Create ByteArrayOutputStream to write zip file to byte array
ByteArrayOutputStream baos = new ByteArrayOutputStream();
// Create ZipOutputStream to create zip file
ZipOutputStream zos = new ZipOutputStream(baos);
// Set compression level
zos.setLevel(Deflater.BEST_COMPRESSION);
int imageIndex = 1;
String filename = Filenames.toSimpleFileName(file.getOriginalFilename()).replaceFirst("[.][^.]+$", "");
int pageNum = 0;
Set<Integer> processedImages = new HashSet<>();
// Iterate over each page
for (PDPage page : document.getPages()) {
++pageNum;
// Extract images from page
for (COSName name : page.getResources().getXObjectNames()) {
if (page.getResources().isImageXObject(name)) {
PDImageXObject image = (PDImageXObject) page.getResources().getXObject(name);
int imageHash = image.hashCode();
if (processedImages.contains(imageHash)) {
continue; // Skip already processed images
}
processedImages.add(imageHash);
// Convert image to desired format
RenderedImage renderedImage = image.getImage();
BufferedImage bufferedImage = null;
if ("png".equalsIgnoreCase(format)) {
bufferedImage =
new BufferedImage(
renderedImage.getWidth(),
renderedImage.getHeight(),
BufferedImage.TYPE_INT_ARGB);
} else if ("jpeg".equalsIgnoreCase(format) || "jpg".equalsIgnoreCase(format)) {
bufferedImage =
new BufferedImage(
renderedImage.getWidth(),
renderedImage.getHeight(),
BufferedImage.TYPE_INT_RGB);
} else if ("gif".equalsIgnoreCase(format)) {
bufferedImage =
new BufferedImage(
renderedImage.getWidth(),
renderedImage.getHeight(),
BufferedImage.TYPE_BYTE_INDEXED);
}
// Write image to zip file
String imageName =
filename + "_" + imageIndex + " (Page " + pageNum + ")." + format;
ZipEntry zipEntry = new ZipEntry(imageName);
zos.putNextEntry(zipEntry);
Graphics2D g = bufferedImage.createGraphics();
g.drawImage((Image) renderedImage, 0, 0, null);
g.dispose();
// Write image bytes to zip file
ByteArrayOutputStream imageBaos = new ByteArrayOutputStream();
ImageIO.write(bufferedImage, format, imageBaos);
zos.write(imageBaos.toByteArray());
zos.closeEntry();
imageIndex++;
}
}
}
// Close ZipOutputStream and PDDocument
zos.close();
document.close();
// Create ByteArrayResource from byte array
byte[] zipContents = baos.toByteArray();
return WebResponseUtils.boasToWebResponse(
baos, filename + "_extracted-images.zip", MediaType.APPLICATION_OCTET_STREAM);
}
}