diff --git a/Dockerfile b/Dockerfile index f4ecd3bd..f2c478eb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,9 @@ # Build jbig2enc in a separate stage -FROM frooodle/stirling-pdf-base:latest +FROM frooodle/stirling-pdf-base:beta + +# Create pythonScripts folder and copy local scripts +RUN mkdir /pythonScripts +COPY ./pythonScripts/* /pythonScripts/ # Copy the application JAR file COPY build/libs/*.jar app.jar diff --git a/DockerfileBase b/DockerfileBase index 1aff24e0..668b81f6 100644 --- a/DockerfileBase +++ b/DockerfileBase @@ -11,8 +11,8 @@ RUN apt-get update && \ pkg-config \ ca-certificates \ zlib1g-dev \ - make \ - g++ + make \ + g++ RUN git clone https://github.com/agl/jbig2enc && \ cd jbig2enc && \ @@ -33,12 +33,57 @@ RUN apt-get update && \ libreoffice-calc \ libreoffice-impress \ python3-uno \ - python3-pip \ + python3-pip \ unoconv \ - pngquant \ - unpaper \ + pngquant \ + unpaper \ ocrmypdf && \ - pip install --user --upgrade ocrmypdf + pip install --user --upgrade ocrmypdf && \ + pip3 install opencv-python-headless + +# Copy the jbig2enc binary from the builder stage +COPY --from=jbig2enc_builder /usr/local/bin/jbig2 /usr/local/bin/jbig2# Build jbig2enc in a separate stage +FROM debian:bullseye-slim as jbig2enc_builder + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + git \ + automake \ + autoconf \ + libtool \ + libleptonica-dev \ + pkg-config \ + ca-certificates \ + zlib1g-dev \ + make \ + g++ + +RUN git clone https://github.com/agl/jbig2enc && \ + cd jbig2enc && \ + ./autogen.sh && \ + ./configure && \ + make && \ + make install + +# Main stage +FROM openjdk:17-jdk-slim + +# Install necessary dependencies +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libreoffice-core \ + libreoffice-common \ + libreoffice-writer \ + libreoffice-calc \ + libreoffice-impress \ + python3-uno \ + python3-pip \ + unoconv \ + pngquant \ + unpaper \ + ocrmypdf && \ + pip install --user --upgrade ocrmypdf && \ + pip3 install opencv-python-headless # Copy the jbig2enc binary from the builder stage COPY --from=jbig2enc_builder /usr/local/bin/jbig2 /usr/local/bin/jbig2 \ No newline at end of file diff --git a/pythonScripts/split_photos.py b/pythonScripts/split_photos.py new file mode 100644 index 00000000..ac854e24 --- /dev/null +++ b/pythonScripts/split_photos.py @@ -0,0 +1,133 @@ +import sys +import cv2 +import numpy as np +import os + +def find_photo_boundaries(image, background_color, tolerance=30, min_area=10000, min_contour_area=500): + mask = cv2.inRange(image, background_color - tolerance, background_color + tolerance) + mask = cv2.bitwise_not(mask) + kernel = np.ones((5,5),np.uint8) + mask = cv2.dilate(mask, kernel, iterations=2) + contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + photo_boundaries = [] + for contour in contours: + x, y, w, h = cv2.boundingRect(contour) + area = w * h + contour_area = cv2.contourArea(contour) + if area >= min_area and contour_area >= min_contour_area: + photo_boundaries.append((x, y, w, h)) + + return photo_boundaries + +def estimate_background_color(image, sample_points=5): + h, w, _ = image.shape + points = [ + (0, 0), + (w - 1, 0), + (w - 1, h - 1), + (0, h - 1), + (w // 2, h // 2), + ] + + colors = [] + for x, y in points: + colors.append(image[y, x]) + + return np.median(colors, axis=0) + +def auto_rotate(image, angle_threshold=10): + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + ret, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) + contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + if len(contours) == 0: + return image + + largest_contour = max(contours, key=cv2.contourArea) + mu = cv2.moments(largest_contour) + + if mu["m00"] == 0: + return image + + x_centroid = int(mu["m10"] / mu["m00"]) + y_centroid = int(mu["m01"] / mu["m00"]) + + coords = np.column_stack(np.where(binary > 0)) + u, _, vt = np.linalg.svd(coords - np.array([[y_centroid, x_centroid]]), full_matrices=False) + + angle = np.arctan2(u[1, 0], u[0, 0]) * 180 / np.pi + + if angle < -45: + angle = -(90 + angle) + else: + angle = -angle + + if abs(angle) < angle_threshold: + return image + + (h, w) = image.shape[:2] + center = (w // 2, h // 2) + M = cv2.getRotationMatrix2D(center, angle, 1.0) + return cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) + + + +def crop_borders(image, border_color, tolerance=30): + mask = cv2.inRange(image, border_color - tolerance, border_color + tolerance) + + contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + if len(contours) == 0: + return image + + largest_contour = max(contours, key=cv2.contourArea) + x, y, w, h = cv2.boundingRect(largest_contour) + + return image[y:y+h, x:x+w] + +def split_photos(input_file, output_directory, tolerance=30, min_area=10000, min_contour_area=500, angle_threshold=10, border_size=10): + image = cv2.imread(input_file) + background_color = estimate_background_color(image) + + # Add a constant border around the image + image = cv2.copyMakeBorder(image, border_size, border_size, border_size, border_size, cv2.BORDER_CONSTANT, value=background_color) + + photo_boundaries = find_photo_boundaries(image, background_color, tolerance) + + if not os.path.exists(output_directory): + os.makedirs(output_directory) + + # Get the input file's base name without the extension + input_file_basename = os.path.splitext(os.path.basename(input_file))[0] + + for idx, (x, y, w, h) in enumerate(photo_boundaries): + cropped_image = image[y:y+h, x:x+w] + cropped_image = auto_rotate(cropped_image, angle_threshold) + + # Remove the added border + cropped_image = cropped_image[border_size:-border_size, border_size:-border_size] + + output_path = os.path.join(output_directory, f"{input_file_basename}_{idx+1}.png") + cv2.imwrite(output_path, cropped_image) + print(f"Saved {output_path}") + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python3 split_photos.py [tolerance] [min_area] [min_contour_area] [angle_threshold] [border_size]") + print("\nParameters:") + print(" - The input scanned image containing multiple photos.") + print(" - The directory where the result images should be placed.") + print(" [tolerance] - Optional. Determines the range of color variation around the estimated background color (default: 30).") + print(" [min_area] - Optional. Sets the minimum area threshold for a photo (default: 10000).") + print(" [min_contour_area] - Optional. Sets the minimum contour area threshold for a photo (default: 500).") + print(" [angle_threshold] - Optional. Sets the minimum absolute angle required for the image to be rotated (default: 10).") + print(" [border_size] - Optional. Sets the size of the border added and removed to prevent white borders in the output (default: 10).") + sys.exit(1) + + input_file = sys.argv[1] + output_directory = sys.argv[2] + tolerance = int(sys.argv[3]) if len(sys.argv) > 3 else 20 + min_area = int(sys.argv[4]) if len(sys.argv) > 4 else 8000 + min_contour_area = int(sys.argv[5]) if len(sys.argv) > 5 else 500 + angle_threshold = int(sys.argv[6]) if len(sys.argv) > 6 else 60 + split_photos(input_file, output_directory, tolerance=tolerance, min_area=min_area, min_contour_area=min_contour_area, angle_threshold=angle_threshold) diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertPDFToPDFA.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertPDFToPDFA.java index 2a9af96d..89776989 100644 --- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertPDFToPDFA.java +++ b/src/main/java/stirling/software/SPDF/controller/converters/ConvertPDFToPDFA.java @@ -6,8 +6,6 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.List; -import org.springframework.http.HttpHeaders; -import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.stereotype.Controller; import org.springframework.ui.Model; @@ -16,6 +14,7 @@ import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.multipart.MultipartFile; +import stirling.software.SPDF.utils.PdfUtils; import stirling.software.SPDF.utils.ProcessExecutor; @Controller @@ -52,10 +51,7 @@ public class ConvertPDFToPDFA { // Return the optimized PDF as a response String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_PDFA.pdf"; - HttpHeaders headers = new HttpHeaders(); - headers.setContentType(MediaType.APPLICATION_PDF); - headers.setContentDispositionFormData("attachment", outputFilename); - return ResponseEntity.ok().headers(headers).body(pdfBytes); + return PdfUtils.bytesToWebResponse(pdfBytes, outputFilename); } @GetMapping("/pdf-to-pdfa") diff --git a/src/main/java/stirling/software/SPDF/controller/other/CompressController.java b/src/main/java/stirling/software/SPDF/controller/other/CompressController.java index 9f35ad57..1b557d85 100644 --- a/src/main/java/stirling/software/SPDF/controller/other/CompressController.java +++ b/src/main/java/stirling/software/SPDF/controller/other/CompressController.java @@ -8,8 +8,6 @@ import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.http.HttpHeaders; -import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.stereotype.Controller; import org.springframework.ui.Model; @@ -18,6 +16,7 @@ import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.multipart.MultipartFile; +import stirling.software.SPDF.utils.PdfUtils; import stirling.software.SPDF.utils.ProcessExecutor; @Controller @@ -78,10 +77,7 @@ public class CompressController { // Return the optimized PDF as a response String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_Optimized.pdf"; - HttpHeaders headers = new HttpHeaders(); - headers.setContentType(MediaType.APPLICATION_PDF); - headers.setContentDispositionFormData("attachment", outputFilename); - return ResponseEntity.ok().headers(headers).body(pdfBytes); + return PdfUtils.bytesToWebResponse(pdfBytes, outputFilename); } } diff --git a/src/main/java/stirling/software/SPDF/controller/other/ExtractImageScansController.java b/src/main/java/stirling/software/SPDF/controller/other/ExtractImageScansController.java new file mode 100644 index 00000000..e3562697 --- /dev/null +++ b/src/main/java/stirling/software/SPDF/controller/other/ExtractImageScansController.java @@ -0,0 +1,145 @@ +package stirling.software.SPDF.controller.other; + +import java.awt.image.BufferedImage; +import java.io.ByteArrayInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +import javax.imageio.ImageIO; + +import org.apache.commons.io.FileUtils; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.stereotype.Controller; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.multipart.MultipartFile; +import org.springframework.web.servlet.ModelAndView; + +import stirling.software.SPDF.utils.PdfUtils; +import stirling.software.SPDF.utils.ProcessExecutor; + +@Controller +public class ExtractImageScansController { + + private static final Logger logger = LoggerFactory.getLogger(ExtractImageScansController.class); + + + @GetMapping("/extract-image-scans") + public ModelAndView extractImageScansForm() { + ModelAndView modelAndView = new ModelAndView("other/extract-image-scans"); + modelAndView.addObject("currentPage", "extract-image-scans"); + return modelAndView; + } + + @PostMapping("/extract-image-scans") + public ResponseEntity extractImageScans(@RequestParam("fileInput") MultipartFile inputFile, + @RequestParam(name = "angle_threshold", defaultValue = "5") int angleThreshold, + @RequestParam(name = "tolerance", defaultValue = "20") int tolerance, + @RequestParam(name = "min_area", defaultValue = "8000") int minArea, + @RequestParam(name = "min_contour_area", defaultValue = "500") int minContourArea) throws IOException, InterruptedException { + + String fileName = inputFile.getOriginalFilename(); + String extension = fileName.substring(fileName.lastIndexOf(".") + 1); + + List images = new ArrayList<>(); + + // Check if input file is a PDF + if (extension.equalsIgnoreCase("pdf")) { + // Load PDF document + try (PDDocument document = PDDocument.load(new ByteArrayInputStream(inputFile.getBytes()))) { + PDFRenderer pdfRenderer = new PDFRenderer(document); + int pageCount = document.getNumberOfPages(); + images = new ArrayList<>(); + + // Create images of all pages + for (int i = 0; i < pageCount; i++) { + // Create temp file to save the image + Path tempFile = Files.createTempFile("image_", ".png"); + + // Render image and save as temp file + BufferedImage image = pdfRenderer.renderImageWithDPI(i, 300); + ImageIO.write(image, "png", tempFile.toFile()); + + // Add temp file path to images list + images.add(tempFile.toString()); + } + } + } else { + Path tempInputFile = Files.createTempFile("input_", "." + extension); + Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING); + // Add input file path to images list + images.add(tempInputFile.toString()); + } + + + List processedImageBytes = new ArrayList<>(); + + // Process each image + for (int i = 0; i < images.size(); i++) { + + Path tempDir = Files.createTempDirectory("openCV_output"); + List command = new ArrayList<>(Arrays.asList("python3", "/pythonScripts/split_photos.py", images.get(i), tempDir.toString(), String.valueOf(angleThreshold), String.valueOf(tolerance),String.valueOf(minArea),String.valueOf(minContourArea))); + + // Run CLI command + int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command); + + // Read the output photos in temp directory + List tempOutputFiles = Files.list(tempDir).sorted().collect(Collectors.toList()); + for (Path tempOutputFile : tempOutputFiles) { + byte[] imageBytes = Files.readAllBytes(tempOutputFile); + processedImageBytes.add(imageBytes); + } + // Clean up the temporary directory + FileUtils.deleteDirectory(tempDir.toFile()); + } + + // Create zip file if multiple images + if (processedImageBytes.size() > 1) { + String outputZipFilename = fileName.replaceFirst("[.][^.]+$", "") + "_processed.zip"; + Path tempZipFile = Files.createTempFile("output_", ".zip"); + + try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) { + // Add processed images to the zip + for (int i = 0; i < processedImageBytes.size(); i++) { + ZipEntry entry = new ZipEntry(fileName.replaceFirst("[.][^.]+$", "") + "_" + (i+1) + ".png"); + zipOut.putNextEntry(entry); + zipOut.write(processedImageBytes.get(i)); + zipOut.closeEntry(); + } + } + + byte[] zipBytes = Files.readAllBytes(tempZipFile); + + // Clean up the temporary zip file + Files.delete(tempZipFile); + + return PdfUtils.bytesToWebResponse(zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM); + } else { + // Return the processed image as a response + byte[] imageBytes = processedImageBytes.get(0); + return PdfUtils.bytesToWebResponse(imageBytes, fileName.replaceFirst("[.][^.]+$", "") + ".png", MediaType.IMAGE_PNG); + } + + + } + + + + +} diff --git a/src/main/java/stirling/software/SPDF/controller/other/ExtractImagesController.java b/src/main/java/stirling/software/SPDF/controller/other/ExtractImagesController.java index 10c2f1f5..9afe67c6 100644 --- a/src/main/java/stirling/software/SPDF/controller/other/ExtractImagesController.java +++ b/src/main/java/stirling/software/SPDF/controller/other/ExtractImagesController.java @@ -18,10 +18,6 @@ import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.core.io.ByteArrayResource; -import org.springframework.core.io.Resource; -import org.springframework.http.HttpHeaders; -import org.springframework.http.HttpStatus; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.stereotype.Controller; @@ -31,13 +27,15 @@ import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.multipart.MultipartFile; +import stirling.software.SPDF.utils.PdfUtils; + @Controller public class ExtractImagesController { private static final Logger logger = LoggerFactory.getLogger(ExtractImagesController.class); @PostMapping("/extract-images") - public ResponseEntity extractImages(@RequestParam("fileInput") MultipartFile file, @RequestParam("format") String format) throws IOException { + public ResponseEntity extractImages(@RequestParam("fileInput") MultipartFile file, @RequestParam("format") String format) throws IOException { System.out.println(System.currentTimeMillis() + "file=" + file.getName() + ", format=" + format); PDDocument document = PDDocument.load(file.getBytes()); @@ -98,18 +96,8 @@ public class ExtractImagesController { // Create ByteArrayResource from byte array byte[] zipContents = baos.toByteArray(); - ByteArrayResource resource = new ByteArrayResource(zipContents); - - // Set content disposition header to indicate that the response should be - // downloaded as a file - HttpHeaders headers = new HttpHeaders(); - headers.setContentLength(zipContents.length); - headers.add(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_extracted-images.zip"); - - // Return ResponseEntity with ByteArrayResource and headers - return ResponseEntity.status(HttpStatus.OK).headers(headers) - - .header("Cache-Control", "no-cache").contentType(MediaType.APPLICATION_OCTET_STREAM).body(resource); + + return PdfUtils.boasToWebResponse(baos, file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_extracted-images.zip", MediaType.APPLICATION_OCTET_STREAM); } @GetMapping("/extract-images") diff --git a/src/main/java/stirling/software/SPDF/controller/other/OCRController.java b/src/main/java/stirling/software/SPDF/controller/other/OCRController.java index a18adc76..189b79d6 100644 --- a/src/main/java/stirling/software/SPDF/controller/other/OCRController.java +++ b/src/main/java/stirling/software/SPDF/controller/other/OCRController.java @@ -17,7 +17,6 @@ import java.util.zip.ZipOutputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.http.HttpHeaders; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.stereotype.Controller; @@ -27,6 +26,7 @@ import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.multipart.MultipartFile; import org.springframework.web.servlet.ModelAndView; +import stirling.software.SPDF.utils.PdfUtils; import stirling.software.SPDF.utils.ProcessExecutor; @Controller @@ -123,8 +123,6 @@ public class OCRController { // Return the OCR processed PDF as a response String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf"; - HttpHeaders headers = new HttpHeaders(); - if (sidecar != null && sidecar) { // Create a zip file containing both the PDF and the text file String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip"; @@ -150,17 +148,13 @@ public class OCRController { Files.delete(tempZipFile); Files.delete(tempOutputFile); Files.delete(sidecarTextPath); - + // Return the zip file containing both the PDF and the text file - headers.setContentType(MediaType.APPLICATION_OCTET_STREAM); - headers.setContentDispositionFormData("attachment", outputZipFilename); - return ResponseEntity.ok().headers(headers).body(zipBytes); + return PdfUtils.bytesToWebResponse(pdfBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM); } else { // Return the OCR processed PDF as a response Files.delete(tempOutputFile); - headers.setContentType(MediaType.APPLICATION_PDF); - headers.setContentDispositionFormData("attachment", outputFilename); - return ResponseEntity.ok().headers(headers).body(pdfBytes); + return PdfUtils.bytesToWebResponse(pdfBytes, outputFilename); } } diff --git a/src/main/java/stirling/software/SPDF/utils/PDFToFile.java b/src/main/java/stirling/software/SPDF/utils/PDFToFile.java index 450f8192..4f52c97f 100644 --- a/src/main/java/stirling/software/SPDF/utils/PDFToFile.java +++ b/src/main/java/stirling/software/SPDF/utils/PDFToFile.java @@ -15,7 +15,6 @@ import java.util.zip.ZipOutputStream; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; -import org.springframework.http.HttpHeaders; import org.springframework.http.HttpStatus; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; @@ -41,8 +40,7 @@ public class PDFToFile { Path tempInputFile = null; Path tempOutputDir = null; byte[] fileBytes; - // Prepare response - HttpHeaders headers = new HttpHeaders(); + String fileName = "temp.file"; try { // Save the uploaded file to a temporary location @@ -60,19 +58,18 @@ public class PDFToFile { // Get output files List outputFiles = Arrays.asList(tempOutputDir.toFile().listFiles()); + if (outputFiles.size() == 1) { // Return single output file File outputFile = outputFiles.get(0); - headers.setContentType(MediaType.APPLICATION_OCTET_STREAM); if (outputFormat.equals("txt:Text")) { outputFormat = "txt"; } - headers.setContentDispositionFormData("attachment", pdfBaseName + "." + outputFormat); + fileName = pdfBaseName + "." + outputFormat; fileBytes = FileUtils.readFileToByteArray(outputFile); } else { // Return output files in a ZIP archive - headers.setContentType(MediaType.APPLICATION_OCTET_STREAM); - headers.setContentDispositionFormData("attachment", pdfBaseName + "To" + outputFormat + ".zip"); + fileName = pdfBaseName + "To" + outputFormat + ".zip"; ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); ZipOutputStream zipOutputStream = new ZipOutputStream(byteArrayOutputStream); @@ -96,6 +93,6 @@ public class PDFToFile { if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile()); } - return new ResponseEntity<>(fileBytes, headers, HttpStatus.OK); + return PdfUtils.bytesToWebResponse(fileBytes, fileName, MediaType.APPLICATION_OCTET_STREAM); } } diff --git a/src/main/java/stirling/software/SPDF/utils/PdfUtils.java b/src/main/java/stirling/software/SPDF/utils/PdfUtils.java index d083121d..446e73dd 100644 --- a/src/main/java/stirling/software/SPDF/utils/PdfUtils.java +++ b/src/main/java/stirling/software/SPDF/utils/PdfUtils.java @@ -8,6 +8,8 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.security.KeyPair; import java.security.KeyStore; @@ -43,18 +45,26 @@ public class PdfUtils { public static ResponseEntity boasToWebResponse(ByteArrayOutputStream baos, String docName) throws IOException { return PdfUtils.bytesToWebResponse(baos.toByteArray(), docName); - } - public static ResponseEntity bytesToWebResponse(byte[] bytes, String docName) throws IOException { + public static ResponseEntity boasToWebResponse(ByteArrayOutputStream baos, String docName, MediaType mediaType) throws IOException { + return PdfUtils.bytesToWebResponse(baos.toByteArray(), docName, mediaType ); + } + + public static ResponseEntity bytesToWebResponse(byte[] bytes, String docName, MediaType mediaType ) throws IOException { // Return the PDF as a response HttpHeaders headers = new HttpHeaders(); - headers.setContentType(MediaType.APPLICATION_PDF); + headers.setContentType(mediaType); headers.setContentLength(bytes.length); - headers.setContentDispositionFormData("attachment", docName); + String encodedDocName = URLEncoder.encode(docName, StandardCharsets.UTF_8.toString()).replaceAll("\\+", "%20"); + headers.setContentDispositionFormData("attachment", encodedDocName); return new ResponseEntity<>(bytes, headers, HttpStatus.OK); } + + public static ResponseEntity bytesToWebResponse(byte[] bytes, String docName) throws IOException { + return bytesToWebResponse(bytes, docName, MediaType.APPLICATION_PDF); + } public static byte[] convertFromPdf(byte[] inputStream, String imageType, ImageType colorType, boolean singleImage, int DPI) throws IOException, Exception { try (PDDocument document = PDDocument.load(new ByteArrayInputStream(inputStream))) { diff --git a/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java b/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java index c8744c52..34cfdb6b 100644 --- a/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java +++ b/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java @@ -13,7 +13,7 @@ import java.util.concurrent.Semaphore; public class ProcessExecutor { public enum Processes { - LIBRE_OFFICE, OCR_MY_PDF + LIBRE_OFFICE, OCR_MY_PDF, PYTHON_OPENCV } private static final Map instances = new ConcurrentHashMap<>(); @@ -23,6 +23,7 @@ public class ProcessExecutor { int semaphoreLimit = switch (key) { case LIBRE_OFFICE -> 1; case OCR_MY_PDF -> 2; + case PYTHON_OPENCV -> 8; }; return new ProcessExecutor(semaphoreLimit); }); diff --git a/src/main/resources/templates/fragments/common.html b/src/main/resources/templates/fragments/common.html index 33154eff..85d5c5a7 100644 --- a/src/main/resources/templates/fragments/common.html +++ b/src/main/resources/templates/fragments/common.html @@ -136,7 +136,7 @@ document.addEventListener("DOMContentLoaded", function () { const contentDispositionHeader = response.headers.get('Content-Disposition'); console.log(contentDispositionHeader) if (contentDispositionHeader && contentDispositionHeader.indexOf('attachment') !== -1) { - filename = contentDispositionHeader.split('filename=')[1].replace(/"/g, ''); + filename = decodeURIComponent(contentDispositionHeader.split('filename=')[1].replace(/"/g, '')); } else { // If the Content-Disposition header is not present or does not contain the filename, use a default filename filename = 'download'; diff --git a/src/main/resources/templates/other/extract-image-scans.html b/src/main/resources/templates/other/extract-image-scans.html new file mode 100644 index 00000000..fbcfe839 --- /dev/null +++ b/src/main/resources/templates/other/extract-image-scans.html @@ -0,0 +1,28 @@ + + + + + + + +
+
+
+

+
+
+
+

+ +
+
+ +
+
+
+
+
+
+
+ + \ No newline at end of file