From 5bee71443786dfb7579d1cf8a2e1880a9f0a20f9 Mon Sep 17 00:00:00 2001 From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com> Date: Mon, 1 May 2023 21:57:48 +0100 Subject: [PATCH 1/4] utf8 bug fix and scan pages (#113) --- Dockerfile | 9 +- DockerfileBase | 34 +- HowToUseOCR.md | 2 + README.md | 4 + build.gradle | 8 +- scripts/init.sh | 9 + scripts/split_photos.py | 134 +++++ .../stirling/software/SPDF/config/Beans.java | 1 + .../SPDF/config/CleanUrlInterceptor.java | 45 ++ .../software/SPDF/config/OpenApiConfig.java | 20 + .../SPDF/controller/MultiToolController.java | 20 - .../SPDF/controller/PdfController.java | 25 - .../controller/{ => api}/MergeController.java | 134 ++--- .../RearrangePagesPDFController.java | 230 ++++----- .../{ => api}/RotationController.java | 89 ++-- .../{ => api}/SplitPDFController.java | 264 +++++----- .../converters/ConvertImgPDFController.java | 183 ++++--- .../converters/ConvertOfficeController.java | 21 +- .../api/converters/ConvertPDFToOffice.java | 52 ++ .../converters/ConvertPDFToPDFA.java | 28 +- .../{ => api}/other/CompressController.java | 163 +++--- .../other/ExtractImageScansController.java | 130 +++++ .../other/ExtractImagesController.java | 37 +- .../{ => api}/other/MetadataController.java | 18 +- .../{ => api}/other/OCRController.java | 75 +-- .../other/OverlayImageController.java | 79 ++- .../security/PasswordController.java | 147 +++--- .../security/WatermarkController.java | 290 ++++++----- .../converters/ConvertPDFToOffice.java | 86 ---- .../web/ConverterWebController.java | 86 ++++ .../controller/web/GeneralWebController.java | 69 +++ .../controller/web/OtherWebController.java | 84 ++++ .../controller/web/SecurityWebController.java | 46 ++ .../software/SPDF/utils/PDFToFile.java | 12 +- .../software/SPDF/utils/PdfUtils.java | 18 +- .../software/SPDF/utils/ProcessExecutor.java | 4 +- src/main/resources/messages_ar_AR.properties | 98 ++-- src/main/resources/messages_de_DE.properties | 16 + src/main/resources/messages_en_GB.properties | 17 + src/main/resources/messages_es_ES.properties | 15 + src/main/resources/messages_fr_FR.properties | 16 + .../resources/static/css/rainbow-mode.css | 35 ++ src/main/resources/static/images/scanner.svg | 13 + src/main/resources/static/js/game.js | 286 +++++++++++ src/main/resources/static/rainbow.svg | 3 + src/main/resources/templates/about.html | 22 + .../resources/templates/fragments/common.html | 464 +++++++++++------- .../resources/templates/fragments/navbar.html | 69 ++- src/main/resources/templates/home.html | 19 +- .../templates/other/adjust-contrast.html | 32 ++ .../templates/other/extract-image-scans.html | 54 ++ .../resources/templates/other/ocr-pdf.html | 19 +- 52 files changed, 2493 insertions(+), 1341 deletions(-) create mode 100644 scripts/init.sh create mode 100644 scripts/split_photos.py create mode 100644 src/main/java/stirling/software/SPDF/config/CleanUrlInterceptor.java create mode 100644 src/main/java/stirling/software/SPDF/config/OpenApiConfig.java delete mode 100644 src/main/java/stirling/software/SPDF/controller/MultiToolController.java delete mode 100644 src/main/java/stirling/software/SPDF/controller/PdfController.java rename src/main/java/stirling/software/SPDF/controller/{ => api}/MergeController.java (68%) rename src/main/java/stirling/software/SPDF/controller/{ => api}/RearrangePagesPDFController.java (78%) rename src/main/java/stirling/software/SPDF/controller/{ => api}/RotationController.java (63%) rename src/main/java/stirling/software/SPDF/controller/{ => api}/SplitPDFController.java (87%) rename src/main/java/stirling/software/SPDF/controller/{ => api}/converters/ConvertImgPDFController.java (73%) rename src/main/java/stirling/software/SPDF/controller/{ => api}/converters/ConvertOfficeController.java (79%) create mode 100644 src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java rename src/main/java/stirling/software/SPDF/controller/{ => api}/converters/ConvertPDFToPDFA.java (61%) rename src/main/java/stirling/software/SPDF/controller/{ => api}/other/CompressController.java (71%) create mode 100644 src/main/java/stirling/software/SPDF/controller/api/other/ExtractImageScansController.java rename src/main/java/stirling/software/SPDF/controller/{ => api}/other/ExtractImagesController.java (71%) rename src/main/java/stirling/software/SPDF/controller/{ => api}/other/MetadataController.java (91%) rename src/main/java/stirling/software/SPDF/controller/{ => api}/other/OCRController.java (66%) rename src/main/java/stirling/software/SPDF/controller/{ => api}/other/OverlayImageController.java (59%) rename src/main/java/stirling/software/SPDF/controller/{ => api}/security/PasswordController.java (63%) rename src/main/java/stirling/software/SPDF/controller/{ => api}/security/WatermarkController.java (83%) delete mode 100644 src/main/java/stirling/software/SPDF/controller/converters/ConvertPDFToOffice.java create mode 100644 src/main/java/stirling/software/SPDF/controller/web/ConverterWebController.java create mode 100644 src/main/java/stirling/software/SPDF/controller/web/GeneralWebController.java create mode 100644 src/main/java/stirling/software/SPDF/controller/web/OtherWebController.java create mode 100644 src/main/java/stirling/software/SPDF/controller/web/SecurityWebController.java create mode 100644 src/main/resources/static/css/rainbow-mode.css create mode 100644 src/main/resources/static/images/scanner.svg create mode 100644 src/main/resources/static/js/game.js create mode 100644 src/main/resources/static/rainbow.svg create mode 100644 src/main/resources/templates/about.html create mode 100644 src/main/resources/templates/other/adjust-contrast.html create mode 100644 src/main/resources/templates/other/extract-image-scans.html diff --git a/Dockerfile b/Dockerfile index f4ecd3bd..50bbc809 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,9 @@ # Build jbig2enc in a separate stage -FROM frooodle/stirling-pdf-base:latest +FROM frooodle/stirling-pdf-base:beta2 + +# Create scripts folder and copy local scripts +RUN mkdir /scripts +COPY ./scripts/* /scripts/ # Copy the application JAR file COPY build/libs/*.jar app.jar @@ -13,7 +17,8 @@ ENV APP_HOME_NAME="Stirling PDF" #ENV APP_NAVBAR_NAME="Stirling PDF" # Run the application -ENTRYPOINT java -jar /app.jar +ENTRYPOINT ["/scripts/init.sh"] +CMD ["java", "-jar", "/app.jar"] diff --git a/DockerfileBase b/DockerfileBase index 1aff24e0..fc5399af 100644 --- a/DockerfileBase +++ b/DockerfileBase @@ -21,10 +21,9 @@ RUN git clone https://github.com/agl/jbig2enc && \ make && \ make install -# Main stage -FROM openjdk:17-jdk-slim -# Install necessary dependencies +# Main stage +FROM openjdk:17-jdk-slim AS base RUN apt-get update && \ apt-get install -y --no-install-recommends \ libreoffice-core \ @@ -33,12 +32,31 @@ RUN apt-get update && \ libreoffice-calc \ libreoffice-impress \ python3-uno \ - python3-pip \ + python3-pip \ unoconv \ - pngquant \ - unpaper \ + pngquant \ + unpaper \ ocrmypdf && \ - pip install --user --upgrade ocrmypdf + rm -rf /var/lib/apt/lists/* && \ + mkdir /usr/share/tesseract-ocr-original && \ + cp -r /usr/share/tesseract-ocr/* /usr/share/tesseract-ocr-original && \ + rm -rf /usr/share/tesseract-ocr -# Copy the jbig2enc binary from the builder stage +# Python packages stage +FROM base AS python-packages +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential \ + libffi-dev \ + libssl-dev \ + zlib1g-dev \ + libjpeg-dev && \ + pip install --upgrade pip && \ + pip install --no-cache-dir \ + opencv-python-headless && \ + rm -rf /var/lib/apt/lists/* + +# Final stage: Copy necessary files from the previous stage +FROM base +COPY --from=python-packages /usr/local /usr/local COPY --from=jbig2enc_builder /usr/local/bin/jbig2 /usr/local/bin/jbig2 \ No newline at end of file diff --git a/HowToUseOCR.md b/HowToUseOCR.md index 1b7d75f9..9a867cbb 100644 --- a/HowToUseOCR.md +++ b/HowToUseOCR.md @@ -20,6 +20,8 @@ Depending on your requirements, you can choose the appropriate language pack for 1. Download the desired language pack(s) by selecting the `.traineddata` file(s) for the language(s) you need. 2. Place the `.traineddata` files in the Tesseract tessdata directory: `/usr/share/tesseract-ocr/4.00/tessdata` +# DO NOT REMOVE EXISTING ENG.TRAINEDDATA, ITS REQUIRED. + #### Docker If you are using Docker, you need to expose the Tesseract tessdata directory as a volume in order to use the additional language packs. diff --git a/README.md b/README.md index eba3525f..b5264c07 100644 --- a/README.md +++ b/README.md @@ -99,3 +99,7 @@ Simply use environment variables APP_HOME_NAME, APP_HOME_DESCRIPTION and APP_NAV If running Java directly, you can also pass these as properties using -D arguments. Using the same method you can also change the default language by providing APP_LOCALE with values like de-DE fr-FR or ar-AR to select your default language (Will always default to English on invalid locale) + +## API +For those wanting to use Stirling-PDFs backend API to link with their own custom scripting to edit PDFs you can view all existing API documentation +[here](https://app.swaggerhub.com/apis-docs/Frooodle/Stirling-PDF/1.0.0) or navigate to /swagger-ui/index.html of your stirling-pdf instance for your versions documentation \ No newline at end of file diff --git a/build.gradle b/build.gradle index 1351ece7..76296dbc 100644 --- a/build.gradle +++ b/build.gradle @@ -5,7 +5,7 @@ plugins { } group = 'stirling.software' -version = '0.6.0' +version = '0.7.0' sourceCompatibility = '17' repositories { @@ -19,11 +19,11 @@ dependencies { // https://mvnrepository.com/artifact/org.apache.pdfbox/jbig2-imageio implementation group: 'org.apache.pdfbox', name: 'jbig2-imageio', version: '3.0.4' implementation 'commons-io:commons-io:2.11.0' - + + implementation 'org.springdoc:springdoc-openapi-starter-webmvc-ui:2.1.0' + //general PDF implementation 'org.apache.pdfbox:pdfbox:2.0.28' - - implementation 'com.itextpdf:itextpdf:5.5.13.3' developmentOnly("org.springframework.boot:spring-boot-devtools") } diff --git a/scripts/init.sh b/scripts/init.sh new file mode 100644 index 00000000..b45bf45f --- /dev/null +++ b/scripts/init.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# Copy the original tesseract-ocr files to the volume directory without overwriting existing files +echo "Copying original files without overwriting existing files" +mkdir -p /usr/share/tesseract-ocr +cp -rn /usr/share/tesseract-ocr-original/* /usr/share/tesseract-ocr + +# Run the main command +exec "$@" \ No newline at end of file diff --git a/scripts/split_photos.py b/scripts/split_photos.py new file mode 100644 index 00000000..13f44043 --- /dev/null +++ b/scripts/split_photos.py @@ -0,0 +1,134 @@ +import sys +import cv2 +import numpy as np +import os + +def find_photo_boundaries(image, background_color, tolerance=30, min_area=10000, min_contour_area=500): + mask = cv2.inRange(image, background_color - tolerance, background_color + tolerance) + mask = cv2.bitwise_not(mask) + kernel = np.ones((5,5),np.uint8) + mask = cv2.dilate(mask, kernel, iterations=2) + contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + photo_boundaries = [] + for contour in contours: + x, y, w, h = cv2.boundingRect(contour) + area = w * h + contour_area = cv2.contourArea(contour) + if area >= min_area and contour_area >= min_contour_area: + photo_boundaries.append((x, y, w, h)) + + return photo_boundaries + +def estimate_background_color(image, sample_points=5): + h, w, _ = image.shape + points = [ + (0, 0), + (w - 1, 0), + (w - 1, h - 1), + (0, h - 1), + (w // 2, h // 2), + ] + + colors = [] + for x, y in points: + colors.append(image[y, x]) + + return np.median(colors, axis=0) + +def auto_rotate(image, angle_threshold=10): + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + ret, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) + contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + if len(contours) == 0: + return image + + largest_contour = max(contours, key=cv2.contourArea) + mu = cv2.moments(largest_contour) + + if mu["m00"] == 0: + return image + + x_centroid = int(mu["m10"] / mu["m00"]) + y_centroid = int(mu["m01"] / mu["m00"]) + + coords = np.column_stack(np.where(binary > 0)) + u, _, vt = np.linalg.svd(coords - np.array([[y_centroid, x_centroid]]), full_matrices=False) + + angle = np.arctan2(u[1, 0], u[0, 0]) * 180 / np.pi + + if angle < -45: + angle = -(90 + angle) + else: + angle = -angle + + if abs(angle) < angle_threshold: + return image + + (h, w) = image.shape[:2] + center = (w // 2, h // 2) + M = cv2.getRotationMatrix2D(center, angle, 1.0) + return cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) + + + +def crop_borders(image, border_color, tolerance=30): + mask = cv2.inRange(image, border_color - tolerance, border_color + tolerance) + + contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + if len(contours) == 0: + return image + + largest_contour = max(contours, key=cv2.contourArea) + x, y, w, h = cv2.boundingRect(largest_contour) + + return image[y:y+h, x:x+w] + +def split_photos(input_file, output_directory, tolerance=30, min_area=10000, min_contour_area=500, angle_threshold=10, border_size=0): + image = cv2.imread(input_file) + background_color = estimate_background_color(image) + + # Add a constant border around the image + image = cv2.copyMakeBorder(image, border_size, border_size, border_size, border_size, cv2.BORDER_CONSTANT, value=background_color) + + photo_boundaries = find_photo_boundaries(image, background_color, tolerance) + + if not os.path.exists(output_directory): + os.makedirs(output_directory) + + # Get the input file's base name without the extension + input_file_basename = os.path.splitext(os.path.basename(input_file))[0] + + for idx, (x, y, w, h) in enumerate(photo_boundaries): + cropped_image = image[y:y+h, x:x+w] + cropped_image = auto_rotate(cropped_image, angle_threshold) + + # Remove the added border + cropped_image = cropped_image[border_size:-border_size, border_size:-border_size] + + output_path = os.path.join(output_directory, f"{input_file_basename}_{idx+1}.png") + cv2.imwrite(output_path, cropped_image) + print(f"Saved {output_path}") + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python3 split_photos.py [tolerance] [min_area] [min_contour_area] [angle_threshold] [border_size]") + print("\nParameters:") + print(" - The input scanned image containing multiple photos.") + print(" - The directory where the result images should be placed.") + print(" [tolerance] - Optional. Determines the range of color variation around the estimated background color (default: 30).") + print(" [min_area] - Optional. Sets the minimum area threshold for a photo (default: 10000).") + print(" [min_contour_area] - Optional. Sets the minimum contour area threshold for a photo (default: 500).") + print(" [angle_threshold] - Optional. Sets the minimum absolute angle required for the image to be rotated (default: 10).") + print(" [border_size] - Optional. Sets the size of the border added and removed to prevent white borders in the output (default: 0).") + sys.exit(1) + + input_file = sys.argv[1] + output_directory = sys.argv[2] + tolerance = int(sys.argv[3]) if len(sys.argv) > 3 else 20 + min_area = int(sys.argv[4]) if len(sys.argv) > 4 else 8000 + min_contour_area = int(sys.argv[5]) if len(sys.argv) > 5 else 500 + angle_threshold = int(sys.argv[6]) if len(sys.argv) > 6 else 60 + border_size = int(sys.argv[7]) if len(sys.argv) > 7 else 0 + split_photos(input_file, output_directory, tolerance=tolerance, min_area=min_area, min_contour_area=min_contour_area, angle_threshold=angle_threshold, border_size=border_size) diff --git a/src/main/java/stirling/software/SPDF/config/Beans.java b/src/main/java/stirling/software/SPDF/config/Beans.java index a35796ea..a65879d3 100644 --- a/src/main/java/stirling/software/SPDF/config/Beans.java +++ b/src/main/java/stirling/software/SPDF/config/Beans.java @@ -16,6 +16,7 @@ public class Beans implements WebMvcConfigurer { @Override public void addInterceptors(InterceptorRegistry registry) { registry.addInterceptor(localeChangeInterceptor()); + registry.addInterceptor(new CleanUrlInterceptor()); } @Bean diff --git a/src/main/java/stirling/software/SPDF/config/CleanUrlInterceptor.java b/src/main/java/stirling/software/SPDF/config/CleanUrlInterceptor.java new file mode 100644 index 00000000..81cbc43f --- /dev/null +++ b/src/main/java/stirling/software/SPDF/config/CleanUrlInterceptor.java @@ -0,0 +1,45 @@ +package stirling.software.SPDF.config; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.springframework.web.servlet.HandlerInterceptor; +import org.springframework.web.servlet.ModelAndView; + +import jakarta.servlet.http.HttpServletRequest; +import jakarta.servlet.http.HttpServletResponse; + +public class CleanUrlInterceptor implements HandlerInterceptor { + + private static final Pattern LANG_PATTERN = Pattern.compile("&?lang=([^&]+)"); + + @Override + public boolean preHandle(HttpServletRequest request, HttpServletResponse response, Object handler) throws Exception { + String queryString = request.getQueryString(); + if (queryString != null && !queryString.isEmpty()) { + String requestURI = request.getRequestURI(); + + // Keep the lang parameter if it exists + Matcher langMatcher = LANG_PATTERN.matcher(queryString); + String langQueryString = langMatcher.find() ? "lang=" + langMatcher.group(1) : ""; + + // Check if there are any other query parameters besides the lang parameter + String remainingQueryString = queryString.replaceAll(LANG_PATTERN.pattern(), "").replaceAll("&+", "&").replaceAll("^&|&$", ""); + + if (!remainingQueryString.isEmpty()) { + // Redirect to the URL without other query parameters + String redirectUrl = requestURI + (langQueryString.isEmpty() ? "" : "?" + langQueryString); + response.sendRedirect(redirectUrl); + return false; + } + } + return true; + } + + @Override + public void postHandle(HttpServletRequest request, HttpServletResponse response, Object handler, ModelAndView modelAndView) { + } + + @Override + public void afterCompletion(HttpServletRequest request, HttpServletResponse response, Object handler, Exception ex) { + } +} diff --git a/src/main/java/stirling/software/SPDF/config/OpenApiConfig.java b/src/main/java/stirling/software/SPDF/config/OpenApiConfig.java new file mode 100644 index 00000000..6935f60f --- /dev/null +++ b/src/main/java/stirling/software/SPDF/config/OpenApiConfig.java @@ -0,0 +1,20 @@ +package stirling.software.SPDF.config; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import io.swagger.v3.oas.models.Components; +import io.swagger.v3.oas.models.OpenAPI; +import io.swagger.v3.oas.models.info.Info; +import io.swagger.v3.oas.models.info.License; + +@Configuration +public class OpenApiConfig { + + @Bean + public OpenAPI customOpenAPI() { + return new OpenAPI().components(new Components()).info( + new Info().title("Your API Title").version("1.0.0").description("Your API Description").license(new License().name("Your License Name").url("Your License URL"))); + } + +} diff --git a/src/main/java/stirling/software/SPDF/controller/MultiToolController.java b/src/main/java/stirling/software/SPDF/controller/MultiToolController.java deleted file mode 100644 index cfe0ba72..00000000 --- a/src/main/java/stirling/software/SPDF/controller/MultiToolController.java +++ /dev/null @@ -1,20 +0,0 @@ -package stirling.software.SPDF.controller; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; - -@Controller -public class MultiToolController { - - private static final Logger logger = LoggerFactory.getLogger(MultiToolController.class); - - @GetMapping("/multi-tool") - public String multiToolForm(Model model) { - model.addAttribute("currentPage", "multi-tool"); - return "multi-tool"; - } - -} \ No newline at end of file diff --git a/src/main/java/stirling/software/SPDF/controller/PdfController.java b/src/main/java/stirling/software/SPDF/controller/PdfController.java deleted file mode 100644 index 90dc8229..00000000 --- a/src/main/java/stirling/software/SPDF/controller/PdfController.java +++ /dev/null @@ -1,25 +0,0 @@ -package stirling.software.SPDF.controller; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; - -@Controller -public class PdfController { - - private static final Logger logger = LoggerFactory.getLogger(PdfController.class); - - @GetMapping("/") - public String home(Model model) { - model.addAttribute("currentPage", "home"); - return "home"; - } - - @GetMapping("/home") - public String root(Model model) { - return "redirect:/"; - } - -} \ No newline at end of file diff --git a/src/main/java/stirling/software/SPDF/controller/MergeController.java b/src/main/java/stirling/software/SPDF/controller/api/MergeController.java similarity index 68% rename from src/main/java/stirling/software/SPDF/controller/MergeController.java rename to src/main/java/stirling/software/SPDF/controller/api/MergeController.java index c261c8e5..4a9cb2fb 100644 --- a/src/main/java/stirling/software/SPDF/controller/MergeController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/MergeController.java @@ -1,67 +1,69 @@ -package stirling.software.SPDF.controller; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.PDPageTree; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.multipart.MultipartFile; - -import stirling.software.SPDF.utils.PdfUtils; - -@Controller -public class MergeController { - - private static final Logger logger = LoggerFactory.getLogger(MergeController.class); - - @GetMapping("/merge-pdfs") - public String hello(Model model) { - model.addAttribute("currentPage", "merge-pdfs"); - return "merge-pdfs"; - } - - private PDDocument mergeDocuments(List documents) throws IOException { - // Create a new empty document - PDDocument mergedDoc = new PDDocument(); - - // Iterate over the list of documents and add their pages to the merged document - for (PDDocument doc : documents) { - // Get all pages from the current document - PDPageTree pages = doc.getPages(); - // Iterate over the pages and add them to the merged document - for (PDPage page : pages) { - mergedDoc.addPage(page); - } - } - - // Return the merged document - return mergedDoc; - } - - @PostMapping("/merge-pdfs") - public ResponseEntity mergePdfs(@RequestParam("fileInput") MultipartFile[] files) throws IOException { - // Read the input PDF files into PDDocument objects - List documents = new ArrayList<>(); - - // Loop through the files array and read each file into a PDDocument - for (MultipartFile file : files) { - documents.add(PDDocument.load(file.getInputStream())); - } - - PDDocument mergedDoc = mergeDocuments(documents); - - // Return the merged PDF as a response - return PdfUtils.pdfDocToWebResponse(mergedDoc, files[0].getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_merged.pdf"); - } - +package stirling.software.SPDF.controller.api; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageTree; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +import stirling.software.SPDF.utils.PdfUtils; + +@RestController +public class MergeController { + + private static final Logger logger = LoggerFactory.getLogger(MergeController.class); + + private PDDocument mergeDocuments(List documents) throws IOException { + // Create a new empty document + PDDocument mergedDoc = new PDDocument(); + + // Iterate over the list of documents and add their pages to the merged document + for (PDDocument doc : documents) { + // Get all pages from the current document + PDPageTree pages = doc.getPages(); + // Iterate over the pages and add them to the merged document + for (PDPage page : pages) { + mergedDoc.addPage(page); + } + + + } + + // Return the merged document + return mergedDoc; + } + + @PostMapping(consumes = "multipart/form-data", value = "/merge-pdfs") + public ResponseEntity mergePdfs(@RequestPart(required = true, value = "fileInput") MultipartFile[] files) throws IOException { + // Read the input PDF files into PDDocument objects + List documents = new ArrayList<>(); + + // Loop through the files array and read each file into a PDDocument + for (MultipartFile file : files) { + documents.add(PDDocument.load(file.getInputStream())); + } + + PDDocument mergedDoc = mergeDocuments(documents); + + + // Return the merged PDF as a response + ResponseEntity response = PdfUtils.pdfDocToWebResponse(mergedDoc, files[0].getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_merged.pdf"); + + for (PDDocument doc : documents) { + // Close the document after processing + doc.close(); + } + + return response; + } + } \ No newline at end of file diff --git a/src/main/java/stirling/software/SPDF/controller/RearrangePagesPDFController.java b/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java similarity index 78% rename from src/main/java/stirling/software/SPDF/controller/RearrangePagesPDFController.java rename to src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java index 053f3e26..4ed72e92 100644 --- a/src/main/java/stirling/software/SPDF/controller/RearrangePagesPDFController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/RearrangePagesPDFController.java @@ -1,121 +1,109 @@ -package stirling.software.SPDF.controller; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.multipart.MultipartFile; - -import stirling.software.SPDF.utils.PdfUtils; - -@Controller -public class RearrangePagesPDFController { - - private static final Logger logger = LoggerFactory.getLogger(RearrangePagesPDFController.class); - - @PostMapping("/remove-pages") - public ResponseEntity deletePages(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("pagesToDelete") String pagesToDelete) throws IOException { - - PDDocument document = PDDocument.load(pdfFile.getBytes()); - - // Split the page order string into an array of page numbers or range of numbers - String[] pageOrderArr = pagesToDelete.split(","); - - List pagesToRemove = pageOrderToString(pageOrderArr, document.getNumberOfPages()); - - for (int i = pagesToRemove.size() - 1; i >= 0; i--) { - int pageIndex = pagesToRemove.get(i); - document.removePage(pageIndex); - } - return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_removed_pages.pdf"); - - } - - @GetMapping("/remove-pages") - public String pageDeleter(Model model) { - model.addAttribute("currentPage", "remove-pages"); - return "remove-pages"; - } - - private List pageOrderToString(String[] pageOrderArr, int totalPages) { - List newPageOrder = new ArrayList<>(); - // loop through the page order array - for (String element : pageOrderArr) { - // check if the element contains a range of pages - if (element.contains("-")) { - // split the range into start and end page - String[] range = element.split("-"); - int start = Integer.parseInt(range[0]); - int end = Integer.parseInt(range[1]); - // check if the end page is greater than total pages - if (end > totalPages) { - end = totalPages; - } - // loop through the range of pages - for (int j = start; j <= end; j++) { - // print the current index - newPageOrder.add(j - 1); - } - } else { - // if the element is a single page - newPageOrder.add(Integer.parseInt(element) - 1); - } - } - - return newPageOrder; - } - - @GetMapping("/pdf-organizer") - public String pageOrganizer(Model model) { - model.addAttribute("currentPage", "pdf-organizer"); - return "pdf-organizer"; - } - - @PostMapping("/rearrange-pages") - public ResponseEntity rearrangePages(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("pageOrder") String pageOrder) { - try { - // Load the input PDF - PDDocument document = PDDocument.load(pdfFile.getInputStream()); - - // Split the page order string into an array of page numbers or range of numbers - String[] pageOrderArr = pageOrder.split(","); - // int[] newPageOrder = new int[pageOrderArr.length]; - int totalPages = document.getNumberOfPages(); - - List newPageOrder = pageOrderToString(pageOrderArr, totalPages); - - // Create a new list to hold the pages in the new order - List newPages = new ArrayList<>(); - for (int i = 0; i < newPageOrder.size(); i++) { - newPages.add(document.getPage(newPageOrder.get(i))); - } - - // Remove all the pages from the original document - for (int i = document.getNumberOfPages() - 1; i >= 0; i--) { - document.removePage(i); - } - - // Add the pages in the new order - for (PDPage page : newPages) { - document.addPage(page); - } - - return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_rearranged.pdf"); - } catch (IOException e) { - - logger.error("Failed rearranging documents", e); - return null; - } - } - -} +package stirling.software.SPDF.controller.api; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +import stirling.software.SPDF.utils.PdfUtils; + +@RestController +public class RearrangePagesPDFController { + + private static final Logger logger = LoggerFactory.getLogger(RearrangePagesPDFController.class); + + @PostMapping(consumes = "multipart/form-data", value = "/remove-pages") + public ResponseEntity deletePages(@RequestPart(required = true, value = "fileInput") MultipartFile pdfFile, @RequestParam("pagesToDelete") String pagesToDelete) + throws IOException { + + PDDocument document = PDDocument.load(pdfFile.getBytes()); + + // Split the page order string into an array of page numbers or range of numbers + String[] pageOrderArr = pagesToDelete.split(","); + + List pagesToRemove = pageOrderToString(pageOrderArr, document.getNumberOfPages()); + + for (int i = pagesToRemove.size() - 1; i >= 0; i--) { + int pageIndex = pagesToRemove.get(i); + document.removePage(pageIndex); + } + return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_removed_pages.pdf"); + + } + + private List pageOrderToString(String[] pageOrderArr, int totalPages) { + List newPageOrder = new ArrayList<>(); + // loop through the page order array + for (String element : pageOrderArr) { + // check if the element contains a range of pages + if (element.contains("-")) { + // split the range into start and end page + String[] range = element.split("-"); + int start = Integer.parseInt(range[0]); + int end = Integer.parseInt(range[1]); + // check if the end page is greater than total pages + if (end > totalPages) { + end = totalPages; + } + // loop through the range of pages + for (int j = start; j <= end; j++) { + // print the current index + newPageOrder.add(j - 1); + } + } else { + // if the element is a single page + newPageOrder.add(Integer.parseInt(element) - 1); + } + } + + return newPageOrder; + } + + @PostMapping(consumes = "multipart/form-data", value = "/rearrange-pages") + public ResponseEntity rearrangePages(@RequestPart(required = true, value = "fileInput") MultipartFile pdfFile, @RequestParam("pageOrder") String pageOrder) { + try { + // Load the input PDF + PDDocument document = PDDocument.load(pdfFile.getInputStream()); + + // Split the page order string into an array of page numbers or range of numbers + String[] pageOrderArr = pageOrder.split(","); + // int[] newPageOrder = new int[pageOrderArr.length]; + int totalPages = document.getNumberOfPages(); + + List newPageOrder = pageOrderToString(pageOrderArr, totalPages); + + // Create a new list to hold the pages in the new order + List newPages = new ArrayList<>(); + for (int i = 0; i < newPageOrder.size(); i++) { + newPages.add(document.getPage(newPageOrder.get(i))); + } + + // Remove all the pages from the original document + for (int i = document.getNumberOfPages() - 1; i >= 0; i--) { + document.removePage(i); + } + + // Add the pages in the new order + for (PDPage page : newPages) { + document.addPage(page); + } + + return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_rearranged.pdf"); + } catch (IOException e) { + + logger.error("Failed rearranging documents", e); + return null; + } + } + +} diff --git a/src/main/java/stirling/software/SPDF/controller/RotationController.java b/src/main/java/stirling/software/SPDF/controller/api/RotationController.java similarity index 63% rename from src/main/java/stirling/software/SPDF/controller/RotationController.java rename to src/main/java/stirling/software/SPDF/controller/api/RotationController.java index f61fe40f..e9d73a43 100644 --- a/src/main/java/stirling/software/SPDF/controller/RotationController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/RotationController.java @@ -1,48 +1,41 @@ -package stirling.software.SPDF.controller; - -import java.io.IOException; - -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.PDPageTree; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.multipart.MultipartFile; - -import stirling.software.SPDF.utils.PdfUtils; - -@Controller -public class RotationController { - - private static final Logger logger = LoggerFactory.getLogger(RotationController.class); - - @PostMapping("/rotate-pdf") - public ResponseEntity rotatePDF(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("angle") Integer angle) throws IOException { - - // Load the PDF document - PDDocument document = PDDocument.load(pdfFile.getBytes()); - - // Get the list of pages in the document - PDPageTree pages = document.getPages(); - - for (PDPage page : pages) { - page.setRotation(page.getRotation() + angle); - } - - return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_rotated.pdf"); - - } - - @GetMapping("/rotate-pdf") - public String rotatePdfForm(Model model) { - model.addAttribute("currentPage", "rotate-pdf"); - return "rotate-pdf"; - } - -} +package stirling.software.SPDF.controller.api; + +import java.io.IOException; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageTree; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +import stirling.software.SPDF.utils.PdfUtils; + +@RestController +public class RotationController { + + private static final Logger logger = LoggerFactory.getLogger(RotationController.class); + + @PostMapping(consumes = "multipart/form-data", value = "/rotate-pdf") + public ResponseEntity rotatePDF(@RequestPart(required = true, value = "fileInput") MultipartFile pdfFile, @RequestParam("angle") Integer angle) throws IOException { + + // Load the PDF document + PDDocument document = PDDocument.load(pdfFile.getBytes()); + + // Get the list of pages in the document + PDPageTree pages = document.getPages(); + + for (PDPage page : pages) { + page.setRotation(page.getRotation() + angle); + } + + return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_rotated.pdf"); + + } + +} diff --git a/src/main/java/stirling/software/SPDF/controller/SplitPDFController.java b/src/main/java/stirling/software/SPDF/controller/api/SplitPDFController.java similarity index 87% rename from src/main/java/stirling/software/SPDF/controller/SplitPDFController.java rename to src/main/java/stirling/software/SPDF/controller/api/SplitPDFController.java index 37d49d63..f6deb80c 100644 --- a/src/main/java/stirling/software/SPDF/controller/SplitPDFController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/SplitPDFController.java @@ -1,135 +1,129 @@ -package stirling.software.SPDF.controller; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; -import java.util.zip.ZipEntry; -import java.util.zip.ZipOutputStream; - -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDPage; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.core.io.ByteArrayResource; -import org.springframework.core.io.Resource; -import org.springframework.http.HttpHeaders; -import org.springframework.http.MediaType; -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.multipart.MultipartFile; - -@Controller -public class SplitPDFController { - - private static final Logger logger = LoggerFactory.getLogger(SplitPDFController.class); - - @PostMapping("/split-pages") - public ResponseEntity splitPdf(@RequestParam("fileInput") MultipartFile file, @RequestParam("pages") String pages) throws IOException { - // parse user input - - // open the pdf document - InputStream inputStream = file.getInputStream(); - PDDocument document = PDDocument.load(inputStream); - - List pageNumbers = new ArrayList<>(); - pages = pages.replaceAll("\\s+", ""); // remove whitespaces - if (pages.toLowerCase().equals("all")) { - for (int i = 0; i < document.getNumberOfPages(); i++) { - pageNumbers.add(i); - } - } else { - List pageNumbersStr = new ArrayList<>(Arrays.asList(pages.split(","))); - if (!pageNumbersStr.contains(String.valueOf(document.getNumberOfPages()))) { - String lastpage = String.valueOf(document.getNumberOfPages()); - pageNumbersStr.add(lastpage); - } - for (String page : pageNumbersStr) { - if (page.contains("-")) { - String[] range = page.split("-"); - int start = Integer.parseInt(range[0]); - int end = Integer.parseInt(range[1]); - for (int i = start; i <= end; i++) { - pageNumbers.add(i); - } - } else { - pageNumbers.add(Integer.parseInt(page)); - } - } - } - - logger.info("Splitting PDF into pages: {}", pageNumbers.stream().map(String::valueOf).collect(Collectors.joining(","))); - - // split the document - List splitDocumentsBoas = new ArrayList<>(); - int currentPage = 0; - for (int pageNumber : pageNumbers) { - try (PDDocument splitDocument = new PDDocument()) { - for (int i = currentPage; i < pageNumber; i++) { - PDPage page = document.getPage(i); - splitDocument.addPage(page); - logger.debug("Adding page {} to split document", i); - } - currentPage = pageNumber; - logger.debug("Setting current page to {}", currentPage); - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - splitDocument.save(baos); - - splitDocumentsBoas.add(baos); - } catch (Exception e) { - logger.error("Failed splitting documents and saving them", e); - throw e; - } - } - - // closing the original document - document.close(); - - Path zipFile = Files.createTempFile("split_documents", ".zip"); - - try (ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(zipFile))) { - // loop through the split documents and write them to the zip file - for (int i = 0; i < splitDocumentsBoas.size(); i++) { - String fileName = "split_document_" + (i + 1) + ".pdf"; - ByteArrayOutputStream baos = splitDocumentsBoas.get(i); - byte[] pdf = baos.toByteArray(); - - // Add PDF file to the zip - ZipEntry pdfEntry = new ZipEntry(fileName); - zipOut.putNextEntry(pdfEntry); - zipOut.write(pdf); - zipOut.closeEntry(); - - logger.info("Wrote split document {} to zip file", fileName); - } - } catch (Exception e) { - logger.error("Failed writing to zip", e); - throw e; - } - - logger.info("Successfully created zip file with split documents: {}", zipFile.toString()); - byte[] data = Files.readAllBytes(zipFile); - ByteArrayResource resource = new ByteArrayResource(data); - Files.delete(zipFile); - - // return the Resource in the response - return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_split.zip") - .contentType(MediaType.APPLICATION_OCTET_STREAM).contentLength(resource.contentLength()).body(resource); - } - - @GetMapping("/split-pdfs") - public String splitPdfForm(Model model) { - model.addAttribute("currentPage", "split-pdfs"); - return "split-pdfs"; - } -} +package stirling.software.SPDF.controller.api; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.core.io.ByteArrayResource; +import org.springframework.core.io.Resource; +import org.springframework.http.HttpHeaders; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +@RestController +public class SplitPDFController { + + private static final Logger logger = LoggerFactory.getLogger(SplitPDFController.class); + + @PostMapping(consumes = "multipart/form-data", value = "/split-pages") + public ResponseEntity splitPdf(@RequestPart(required = true, value = "fileInput") MultipartFile file, @RequestParam("pages") String pages) throws IOException { + // parse user input + + // open the pdf document + InputStream inputStream = file.getInputStream(); + PDDocument document = PDDocument.load(inputStream); + + List pageNumbers = new ArrayList<>(); + pages = pages.replaceAll("\\s+", ""); // remove whitespaces + if (pages.toLowerCase().equals("all")) { + for (int i = 0; i < document.getNumberOfPages(); i++) { + pageNumbers.add(i); + } + } else { + List pageNumbersStr = new ArrayList<>(Arrays.asList(pages.split(","))); + if (!pageNumbersStr.contains(String.valueOf(document.getNumberOfPages()))) { + String lastpage = String.valueOf(document.getNumberOfPages()); + pageNumbersStr.add(lastpage); + } + for (String page : pageNumbersStr) { + if (page.contains("-")) { + String[] range = page.split("-"); + int start = Integer.parseInt(range[0]); + int end = Integer.parseInt(range[1]); + for (int i = start; i <= end; i++) { + pageNumbers.add(i); + } + } else { + pageNumbers.add(Integer.parseInt(page)); + } + } + } + + logger.info("Splitting PDF into pages: {}", pageNumbers.stream().map(String::valueOf).collect(Collectors.joining(","))); + + // split the document + List splitDocumentsBoas = new ArrayList<>(); + int currentPage = 0; + for (int pageNumber : pageNumbers) { + try (PDDocument splitDocument = new PDDocument()) { + for (int i = currentPage; i < pageNumber; i++) { + PDPage page = document.getPage(i); + splitDocument.addPage(page); + logger.debug("Adding page {} to split document", i); + } + currentPage = pageNumber; + logger.debug("Setting current page to {}", currentPage); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + splitDocument.save(baos); + + splitDocumentsBoas.add(baos); + } catch (Exception e) { + logger.error("Failed splitting documents and saving them", e); + throw e; + } + } + + // closing the original document + document.close(); + + Path zipFile = Files.createTempFile("split_documents", ".zip"); + + try (ZipOutputStream zipOut = new ZipOutputStream(Files.newOutputStream(zipFile))) { + // loop through the split documents and write them to the zip file + for (int i = 0; i < splitDocumentsBoas.size(); i++) { + String fileName = "split_document_" + (i + 1) + ".pdf"; + ByteArrayOutputStream baos = splitDocumentsBoas.get(i); + byte[] pdf = baos.toByteArray(); + + // Add PDF file to the zip + ZipEntry pdfEntry = new ZipEntry(fileName); + zipOut.putNextEntry(pdfEntry); + zipOut.write(pdf); + zipOut.closeEntry(); + + logger.info("Wrote split document {} to zip file", fileName); + } + } catch (Exception e) { + logger.error("Failed writing to zip", e); + throw e; + } + + logger.info("Successfully created zip file with split documents: {}", zipFile.toString()); + byte[] data = Files.readAllBytes(zipFile); + ByteArrayResource resource = new ByteArrayResource(data); + Files.delete(zipFile); + + // return the Resource in the response + return ResponseEntity.ok().header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_split.zip") + .contentType(MediaType.APPLICATION_OCTET_STREAM).contentLength(resource.contentLength()).body(resource); + } + +} diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertImgPDFController.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertImgPDFController.java similarity index 73% rename from src/main/java/stirling/software/SPDF/controller/converters/ConvertImgPDFController.java rename to src/main/java/stirling/software/SPDF/controller/api/converters/ConvertImgPDFController.java index f473b60d..12172127 100644 --- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertImgPDFController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertImgPDFController.java @@ -1,97 +1,86 @@ -package stirling.software.SPDF.controller.converters; - -import java.io.IOException; - -import org.apache.pdfbox.rendering.ImageType; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.core.io.ByteArrayResource; -import org.springframework.core.io.Resource; -import org.springframework.http.HttpHeaders; -import org.springframework.http.HttpStatus; -import org.springframework.http.MediaType; -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.multipart.MultipartFile; - -import stirling.software.SPDF.utils.PdfUtils; - -@Controller -public class ConvertImgPDFController { - - private static final Logger logger = LoggerFactory.getLogger(ConvertImgPDFController.class); - - @PostMapping("/pdf-to-img") - public ResponseEntity convertToImage(@RequestParam("fileInput") MultipartFile file, @RequestParam("imageFormat") String imageFormat, - @RequestParam("singleOrMultiple") String singleOrMultiple, @RequestParam("colorType") String colorType, @RequestParam("dpi") String dpi) throws IOException { - - byte[] pdfBytes = file.getBytes(); - ImageType colorTypeResult = ImageType.RGB; - if ("greyscale".equals(colorType)) { - colorTypeResult = ImageType.GRAY; - } else if ("blackwhite".equals(colorType)) { - colorTypeResult = ImageType.BINARY; - } - // returns bytes for image - boolean singleImage = singleOrMultiple.equals("single"); - byte[] result = null; - try { - result = PdfUtils.convertFromPdf(pdfBytes, imageFormat.toUpperCase(), colorTypeResult, singleImage, Integer.valueOf(dpi)); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (Exception e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - if (singleImage) { - HttpHeaders headers = new HttpHeaders(); - headers.setContentType(MediaType.parseMediaType(getMediaType(imageFormat))); - ResponseEntity response = new ResponseEntity<>(new ByteArrayResource(result), headers, HttpStatus.OK); - return response; - } else { - ByteArrayResource resource = new ByteArrayResource(result); - // return the Resource in the response - return ResponseEntity.ok() - .header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToImages.zip") - .contentType(MediaType.APPLICATION_OCTET_STREAM).contentLength(resource.contentLength()).body(resource); - } - } - - @PostMapping("/img-to-pdf") - public ResponseEntity convertToPdf(@RequestParam("fileInput") MultipartFile[] file, @RequestParam(defaultValue = "false", name = "stretchToFit") boolean stretchToFit, - @RequestParam(defaultValue = "true", name = "autoRotate") boolean autoRotate) throws IOException { - // Convert the file to PDF and get the resulting bytes - System.out.println(stretchToFit); - byte[] bytes = PdfUtils.imageToPdf(file, stretchToFit, autoRotate); - return PdfUtils.bytesToWebResponse(bytes, file[0].getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_coverted.pdf"); - } - - @GetMapping("/img-to-pdf") - public String convertToPdfForm(Model model) { - model.addAttribute("currentPage", "img-to-pdf"); - return "convert/img-to-pdf"; - } - - private String getMediaType(String imageFormat) { - if (imageFormat.equalsIgnoreCase("PNG")) - return "image/png"; - else if (imageFormat.equalsIgnoreCase("JPEG") || imageFormat.equalsIgnoreCase("JPG")) - return "image/jpeg"; - else if (imageFormat.equalsIgnoreCase("GIF")) - return "image/gif"; - else - return "application/octet-stream"; - } - - @GetMapping("/pdf-to-img") - public String pdfToimgForm(Model model) { - model.addAttribute("currentPage", "pdf-to-img"); - return "convert/pdf-to-img"; - } - -} +package stirling.software.SPDF.controller.api.converters; + +import java.io.IOException; + +import org.apache.pdfbox.rendering.ImageType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.core.io.ByteArrayResource; +import org.springframework.core.io.Resource; +import org.springframework.http.HttpHeaders; +import org.springframework.http.HttpStatus; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +import stirling.software.SPDF.utils.PdfUtils; + +@RestController +public class ConvertImgPDFController { + + private static final Logger logger = LoggerFactory.getLogger(ConvertImgPDFController.class); + + @PostMapping(consumes = "multipart/form-data", value = "/pdf-to-img") + public ResponseEntity convertToImage(@RequestPart(required = true, value = "fileInput") MultipartFile file, @RequestParam("imageFormat") String imageFormat, + @RequestParam("singleOrMultiple") String singleOrMultiple, @RequestParam("colorType") String colorType, @RequestParam("dpi") String dpi) throws IOException { + + byte[] pdfBytes = file.getBytes(); + ImageType colorTypeResult = ImageType.RGB; + if ("greyscale".equals(colorType)) { + colorTypeResult = ImageType.GRAY; + } else if ("blackwhite".equals(colorType)) { + colorTypeResult = ImageType.BINARY; + } + // returns bytes for image + boolean singleImage = singleOrMultiple.equals("single"); + byte[] result = null; + try { + result = PdfUtils.convertFromPdf(pdfBytes, imageFormat.toUpperCase(), colorTypeResult, singleImage, Integer.valueOf(dpi)); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + if (singleImage) { + HttpHeaders headers = new HttpHeaders(); + headers.setContentType(MediaType.parseMediaType(getMediaType(imageFormat))); + ResponseEntity response = new ResponseEntity<>(new ByteArrayResource(result), headers, HttpStatus.OK); + return response; + } else { + ByteArrayResource resource = new ByteArrayResource(result); + // return the Resource in the response + return ResponseEntity.ok() + .header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_convertedToImages.zip") + .contentType(MediaType.APPLICATION_OCTET_STREAM).contentLength(resource.contentLength()).body(resource); + } + } + + @PostMapping(consumes = "multipart/form-data", value = "/img-to-pdf") + public ResponseEntity convertToPdf(@RequestPart(required = true, value = "fileInput") MultipartFile[] file, + @RequestParam(defaultValue = "false", name = "stretchToFit") boolean stretchToFit, @RequestParam(defaultValue = "true", name = "autoRotate") boolean autoRotate) + throws IOException { + // Convert the file to PDF and get the resulting bytes + System.out.println(stretchToFit); + byte[] bytes = PdfUtils.imageToPdf(file, stretchToFit, autoRotate); + return PdfUtils.bytesToWebResponse(bytes, file[0].getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_coverted.pdf"); + } + + private String getMediaType(String imageFormat) { + if (imageFormat.equalsIgnoreCase("PNG")) + return "image/png"; + else if (imageFormat.equalsIgnoreCase("JPEG") || imageFormat.equalsIgnoreCase("JPG")) + return "image/jpeg"; + else if (imageFormat.equalsIgnoreCase("GIF")) + return "image/gif"; + else + return "application/octet-stream"; + } + + +} diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertOfficeController.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java similarity index 79% rename from src/main/java/stirling/software/SPDF/controller/converters/ConvertOfficeController.java rename to src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java index 03f04acd..9cec9a46 100644 --- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertOfficeController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertOfficeController.java @@ -1,4 +1,4 @@ -package stirling.software.SPDF.controller.converters; +package stirling.software.SPDF.controller.api.converters; import java.io.IOException; import java.nio.file.Files; @@ -10,17 +10,15 @@ import java.util.List; import org.apache.commons.io.FilenameUtils; import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile; import stirling.software.SPDF.utils.PdfUtils; import stirling.software.SPDF.utils.ProcessExecutor; -@Controller +@RestController public class ConvertOfficeController { public byte[] convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException { @@ -50,20 +48,13 @@ public class ConvertOfficeController { return pdfBytes; } - - @GetMapping("/file-to-pdf") - public String convertToPdfForm(Model model) { - model.addAttribute("currentPage", "file-to-pdf"); - return "convert/file-to-pdf"; - } - private boolean isValidFileExtension(String fileExtension) { String extensionPattern = "^(?i)[a-z0-9]{2,4}$"; return fileExtension.matches(extensionPattern); } - @PostMapping("/file-to-pdf") - public ResponseEntity processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException { + @PostMapping(consumes = "multipart/form-data", value = "/file-to-pdf") + public ResponseEntity processPdfWithOCR(@RequestPart(required = true, value = "fileInput") MultipartFile inputFile) throws IOException, InterruptedException { // unused but can start server instance if startup time is to long // LibreOfficeListener.getInstance().start(); diff --git a/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java new file mode 100644 index 00000000..29d16495 --- /dev/null +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToOffice.java @@ -0,0 +1,52 @@ +package stirling.software.SPDF.controller.api.converters; + +import java.io.IOException; + +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +import stirling.software.SPDF.utils.PDFToFile; + +@RestController +public class ConvertPDFToOffice { + + + + @PostMapping(consumes = "multipart/form-data", value = "/pdf-to-html") + public ResponseEntity processPdfToHTML(@RequestPart(required = true, value = "fileInput") MultipartFile inputFile) throws IOException, InterruptedException { + PDFToFile pdfToFile = new PDFToFile(); + return pdfToFile.processPdfToOfficeFormat(inputFile, "html", "writer_pdf_import"); + } + + @PostMapping(consumes = "multipart/form-data", value = "/pdf-to-presentation") + public ResponseEntity processPdfToPresentation(@RequestPart(required = true, value = "fileInput") MultipartFile inputFile, + @RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException { + PDFToFile pdfToFile = new PDFToFile(); + return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "impress_pdf_import"); + } + + @PostMapping(consumes = "multipart/form-data", value = "/pdf-to-text") + public ResponseEntity processPdfToRTForTXT(@RequestPart(required = true, value = "fileInput") MultipartFile inputFile, + @RequestParam("outputFormat") String outputFormat) throws IOException, InterruptedException { + PDFToFile pdfToFile = new PDFToFile(); + return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import"); + } + + @PostMapping(consumes = "multipart/form-data", value = "/pdf-to-word") + public ResponseEntity processPdfToWord(@RequestPart(required = true, value = "fileInput") MultipartFile inputFile, @RequestParam("outputFormat") String outputFormat) + throws IOException, InterruptedException { + PDFToFile pdfToFile = new PDFToFile(); + return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import"); + } + + @PostMapping(consumes = "multipart/form-data", value = "/pdf-to-xml") + public ResponseEntity processPdfToXML(@RequestPart(required = true, value = "fileInput") MultipartFile inputFile) throws IOException, InterruptedException { + PDFToFile pdfToFile = new PDFToFile(); + return pdfToFile.processPdfToOfficeFormat(inputFile, "xml", "writer_pdf_import"); + } + +} diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertPDFToPDFA.java b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java similarity index 61% rename from src/main/java/stirling/software/SPDF/controller/converters/ConvertPDFToPDFA.java rename to src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java index 2a9af96d..e36b15c6 100644 --- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertPDFToPDFA.java +++ b/src/main/java/stirling/software/SPDF/controller/api/converters/ConvertPDFToPDFA.java @@ -1,4 +1,4 @@ -package stirling.software.SPDF.controller.converters; +package stirling.software.SPDF.controller.api.converters; import java.io.IOException; import java.nio.file.Files; @@ -6,23 +6,20 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.List; -import org.springframework.http.HttpHeaders; -import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile; +import stirling.software.SPDF.utils.PdfUtils; import stirling.software.SPDF.utils.ProcessExecutor; -@Controller +@RestController public class ConvertPDFToPDFA { - @PostMapping("/pdf-to-pdfa") - public ResponseEntity pdfToPdfA(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException { + @PostMapping(consumes = "multipart/form-data", value = "/pdf-to-pdfa") + public ResponseEntity pdfToPdfA(@RequestPart(required = true, value = "fileInput") MultipartFile inputFile) throws IOException, InterruptedException { // Save the uploaded file to a temporary location Path tempInputFile = Files.createTempFile("input_", ".pdf"); @@ -52,16 +49,7 @@ public class ConvertPDFToPDFA { // Return the optimized PDF as a response String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_PDFA.pdf"; - HttpHeaders headers = new HttpHeaders(); - headers.setContentType(MediaType.APPLICATION_PDF); - headers.setContentDispositionFormData("attachment", outputFilename); - return ResponseEntity.ok().headers(headers).body(pdfBytes); - } - - @GetMapping("/pdf-to-pdfa") - public String pdfToPdfAForm(Model model) { - model.addAttribute("currentPage", "pdf-to-pdfa"); - return "convert/pdf-to-pdfa"; + return PdfUtils.bytesToWebResponse(pdfBytes, outputFilename); } } diff --git a/src/main/java/stirling/software/SPDF/controller/other/CompressController.java b/src/main/java/stirling/software/SPDF/controller/api/other/CompressController.java similarity index 71% rename from src/main/java/stirling/software/SPDF/controller/other/CompressController.java rename to src/main/java/stirling/software/SPDF/controller/api/other/CompressController.java index 9f35ad57..6a9a0d8d 100644 --- a/src/main/java/stirling/software/SPDF/controller/other/CompressController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/other/CompressController.java @@ -1,87 +1,76 @@ -package stirling.software.SPDF.controller.other; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.List; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.http.HttpHeaders; -import org.springframework.http.MediaType; -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.multipart.MultipartFile; - -import stirling.software.SPDF.utils.ProcessExecutor; - -@Controller -public class CompressController { - - private static final Logger logger = LoggerFactory.getLogger(CompressController.class); - - @GetMapping("/compress-pdf") - public String compressPdfForm(Model model) { - model.addAttribute("currentPage", "compress-pdf"); - return "other/compress-pdf"; - } - - @PostMapping("/compress-pdf") - public ResponseEntity optimizePdf(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("optimizeLevel") int optimizeLevel, - @RequestParam(name = "fastWebView", required = false) Boolean fastWebView, @RequestParam(name = "jbig2Lossy", required = false) Boolean jbig2Lossy) - throws IOException, InterruptedException { - - // Save the uploaded file to a temporary location - Path tempInputFile = Files.createTempFile("input_", ".pdf"); - inputFile.transferTo(tempInputFile.toFile()); - - // Prepare the output file path - Path tempOutputFile = Files.createTempFile("output_", ".pdf"); - - // Prepare the OCRmyPDF command - List command = new ArrayList<>(); - command.add("ocrmypdf"); - command.add("--skip-text"); - command.add("--tesseract-timeout=0"); - command.add("--optimize"); - command.add(String.valueOf(optimizeLevel)); - command.add("--output-type"); - command.add("pdf"); - - if (fastWebView != null && fastWebView) { - long fileSize = inputFile.getSize(); - long fastWebViewSize = (long) (fileSize * 1.25); // 25% higher than file size - command.add("--fast-web-view"); - command.add(String.valueOf(fastWebViewSize)); - } - - if (jbig2Lossy != null && jbig2Lossy) { - command.add("--jbig2-lossy"); - } - - command.add(tempInputFile.toString()); - command.add(tempOutputFile.toString()); - - int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command); - - // Read the optimized PDF file - byte[] pdfBytes = Files.readAllBytes(tempOutputFile); - - // Clean up the temporary files - Files.delete(tempInputFile); - Files.delete(tempOutputFile); - - // Return the optimized PDF as a response - String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_Optimized.pdf"; - HttpHeaders headers = new HttpHeaders(); - headers.setContentType(MediaType.APPLICATION_PDF); - headers.setContentDispositionFormData("attachment", outputFilename); - return ResponseEntity.ok().headers(headers).body(pdfBytes); - } - -} +package stirling.software.SPDF.controller.api.other; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +import stirling.software.SPDF.utils.PdfUtils; +import stirling.software.SPDF.utils.ProcessExecutor; + +@RestController +public class CompressController { + + private static final Logger logger = LoggerFactory.getLogger(CompressController.class); + + @PostMapping(consumes = "multipart/form-data", value = "/compress-pdf") + public ResponseEntity optimizePdf(@RequestPart(required = true, value = "fileInput") MultipartFile inputFile, @RequestParam("optimizeLevel") int optimizeLevel, + @RequestParam(name = "fastWebView", required = false) Boolean fastWebView, @RequestParam(name = "jbig2Lossy", required = false) Boolean jbig2Lossy) + throws IOException, InterruptedException { + + // Save the uploaded file to a temporary location + Path tempInputFile = Files.createTempFile("input_", ".pdf"); + inputFile.transferTo(tempInputFile.toFile()); + + // Prepare the output file path + Path tempOutputFile = Files.createTempFile("output_", ".pdf"); + + // Prepare the OCRmyPDF command + List command = new ArrayList<>(); + command.add("ocrmypdf"); + command.add("--skip-text"); + command.add("--tesseract-timeout=0"); + command.add("--optimize"); + command.add(String.valueOf(optimizeLevel)); + command.add("--output-type"); + command.add("pdf"); + + if (fastWebView != null && fastWebView) { + long fileSize = inputFile.getSize(); + long fastWebViewSize = (long) (fileSize * 1.25); // 25% higher than file size + command.add("--fast-web-view"); + command.add(String.valueOf(fastWebViewSize)); + } + + if (jbig2Lossy != null && jbig2Lossy) { + command.add("--jbig2-lossy"); + } + + command.add(tempInputFile.toString()); + command.add(tempOutputFile.toString()); + + int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command); + + // Read the optimized PDF file + byte[] pdfBytes = Files.readAllBytes(tempOutputFile); + + // Clean up the temporary files + Files.delete(tempInputFile); + Files.delete(tempOutputFile); + + // Return the optimized PDF as a response + String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_Optimized.pdf"; + return PdfUtils.bytesToWebResponse(pdfBytes, outputFilename); + } + +} diff --git a/src/main/java/stirling/software/SPDF/controller/api/other/ExtractImageScansController.java b/src/main/java/stirling/software/SPDF/controller/api/other/ExtractImageScansController.java new file mode 100644 index 00000000..987970ba --- /dev/null +++ b/src/main/java/stirling/software/SPDF/controller/api/other/ExtractImageScansController.java @@ -0,0 +1,130 @@ +package stirling.software.SPDF.controller.api.other; + +import java.awt.image.BufferedImage; +import java.io.ByteArrayInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +import javax.imageio.ImageIO; + +import org.apache.commons.io.FileUtils; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.MediaType; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +import stirling.software.SPDF.utils.PdfUtils; +import stirling.software.SPDF.utils.ProcessExecutor; + +@RestController +public class ExtractImageScansController { + + private static final Logger logger = LoggerFactory.getLogger(ExtractImageScansController.class); + + @PostMapping(consumes = "multipart/form-data", value = "/extract-image-scans") + public ResponseEntity extractImageScans(@RequestPart(required = true, value = "fileInput") MultipartFile inputFile, + @RequestParam(name = "angle_threshold", defaultValue = "5") int angleThreshold, @RequestParam(name = "tolerance", defaultValue = "20") int tolerance, + @RequestParam(name = "min_area", defaultValue = "8000") int minArea, @RequestParam(name = "min_contour_area", defaultValue = "500") int minContourArea, + @RequestParam(name = "border_size", defaultValue = "1") int borderSize) throws IOException, InterruptedException { + + String fileName = inputFile.getOriginalFilename(); + String extension = fileName.substring(fileName.lastIndexOf(".") + 1); + + List images = new ArrayList<>(); + + // Check if input file is a PDF + if (extension.equalsIgnoreCase("pdf")) { + // Load PDF document + try (PDDocument document = PDDocument.load(new ByteArrayInputStream(inputFile.getBytes()))) { + PDFRenderer pdfRenderer = new PDFRenderer(document); + int pageCount = document.getNumberOfPages(); + images = new ArrayList<>(); + + // Create images of all pages + for (int i = 0; i < pageCount; i++) { + // Create temp file to save the image + Path tempFile = Files.createTempFile("image_", ".png"); + + // Render image and save as temp file + BufferedImage image = pdfRenderer.renderImageWithDPI(i, 300); + ImageIO.write(image, "png", tempFile.toFile()); + + // Add temp file path to images list + images.add(tempFile.toString()); + } + } + } else { + Path tempInputFile = Files.createTempFile("input_", "." + extension); + Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING); + // Add input file path to images list + images.add(tempInputFile.toString()); + } + + List processedImageBytes = new ArrayList<>(); + + // Process each image + for (int i = 0; i < images.size(); i++) { + + Path tempDir = Files.createTempDirectory("openCV_output"); + List command = new ArrayList<>(Arrays.asList("python3", "/scripts/split_photos.py", images.get(i), tempDir.toString(), String.valueOf(angleThreshold), + String.valueOf(tolerance), String.valueOf(minArea), String.valueOf(minContourArea), String.valueOf(borderSize))); + + // Run CLI command + int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.PYTHON_OPENCV).runCommandWithOutputHandling(command); + + // Read the output photos in temp directory + List tempOutputFiles = Files.list(tempDir).sorted().collect(Collectors.toList()); + for (Path tempOutputFile : tempOutputFiles) { + byte[] imageBytes = Files.readAllBytes(tempOutputFile); + processedImageBytes.add(imageBytes); + } + // Clean up the temporary directory + FileUtils.deleteDirectory(tempDir.toFile()); + } + + // Create zip file if multiple images + if (processedImageBytes.size() > 1) { + String outputZipFilename = fileName.replaceFirst("[.][^.]+$", "") + "_processed.zip"; + Path tempZipFile = Files.createTempFile("output_", ".zip"); + + try (ZipOutputStream zipOut = new ZipOutputStream(new FileOutputStream(tempZipFile.toFile()))) { + // Add processed images to the zip + for (int i = 0; i < processedImageBytes.size(); i++) { + ZipEntry entry = new ZipEntry(fileName.replaceFirst("[.][^.]+$", "") + "_" + (i + 1) + ".png"); + zipOut.putNextEntry(entry); + zipOut.write(processedImageBytes.get(i)); + zipOut.closeEntry(); + } + } + + byte[] zipBytes = Files.readAllBytes(tempZipFile); + + // Clean up the temporary zip file + Files.delete(tempZipFile); + + return PdfUtils.bytesToWebResponse(zipBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM); + } else { + // Return the processed image as a response + byte[] imageBytes = processedImageBytes.get(0); + return PdfUtils.bytesToWebResponse(imageBytes, fileName.replaceFirst("[.][^.]+$", "") + ".png", MediaType.IMAGE_PNG); + } + + } + +} diff --git a/src/main/java/stirling/software/SPDF/controller/other/ExtractImagesController.java b/src/main/java/stirling/software/SPDF/controller/api/other/ExtractImagesController.java similarity index 71% rename from src/main/java/stirling/software/SPDF/controller/other/ExtractImagesController.java rename to src/main/java/stirling/software/SPDF/controller/api/other/ExtractImagesController.java index 10c2f1f5..d935bd39 100644 --- a/src/main/java/stirling/software/SPDF/controller/other/ExtractImagesController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/other/ExtractImagesController.java @@ -1,4 +1,4 @@ -package stirling.software.SPDF.controller.other; +package stirling.software.SPDF.controller.api.other; import java.awt.Graphics2D; import java.awt.Image; @@ -18,26 +18,23 @@ import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.core.io.ByteArrayResource; -import org.springframework.core.io.Resource; -import org.springframework.http.HttpHeaders; -import org.springframework.http.HttpStatus; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile; -@Controller +import stirling.software.SPDF.utils.PdfUtils; + +@RestController public class ExtractImagesController { private static final Logger logger = LoggerFactory.getLogger(ExtractImagesController.class); - @PostMapping("/extract-images") - public ResponseEntity extractImages(@RequestParam("fileInput") MultipartFile file, @RequestParam("format") String format) throws IOException { + @PostMapping(consumes = "multipart/form-data", value = "/extract-images") + public ResponseEntity extractImages(@RequestPart(required = true, value = "fileInput") MultipartFile file, @RequestParam("format") String format) throws IOException { System.out.println(System.currentTimeMillis() + "file=" + file.getName() + ", format=" + format); PDDocument document = PDDocument.load(file.getBytes()); @@ -98,24 +95,8 @@ public class ExtractImagesController { // Create ByteArrayResource from byte array byte[] zipContents = baos.toByteArray(); - ByteArrayResource resource = new ByteArrayResource(zipContents); - // Set content disposition header to indicate that the response should be - // downloaded as a file - HttpHeaders headers = new HttpHeaders(); - headers.setContentLength(zipContents.length); - headers.add(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_extracted-images.zip"); - - // Return ResponseEntity with ByteArrayResource and headers - return ResponseEntity.status(HttpStatus.OK).headers(headers) - - .header("Cache-Control", "no-cache").contentType(MediaType.APPLICATION_OCTET_STREAM).body(resource); - } - - @GetMapping("/extract-images") - public String extractImagesForm(Model model) { - model.addAttribute("currentPage", "extract-images"); - return "other/extract-images"; + return PdfUtils.boasToWebResponse(baos, file.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_extracted-images.zip", MediaType.APPLICATION_OCTET_STREAM); } } diff --git a/src/main/java/stirling/software/SPDF/controller/other/MetadataController.java b/src/main/java/stirling/software/SPDF/controller/api/other/MetadataController.java similarity index 91% rename from src/main/java/stirling/software/SPDF/controller/other/MetadataController.java rename to src/main/java/stirling/software/SPDF/controller/api/other/MetadataController.java index c139b885..42309c39 100644 --- a/src/main/java/stirling/software/SPDF/controller/other/MetadataController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/other/MetadataController.java @@ -1,4 +1,4 @@ -package stirling.software.SPDF.controller.other; +package stirling.software.SPDF.controller.api.other; import java.io.IOException; import java.text.ParseException; @@ -11,23 +11,17 @@ import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentInformation; import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile; import stirling.software.SPDF.utils.PdfUtils; -@Controller +@RestController public class MetadataController { - @GetMapping("/change-metadata") - public String addWatermarkForm(Model model) { - model.addAttribute("currentPage", "change-metadata"); - return "other/change-metadata"; - } private String checkUndefined(String entry) { // Check if the string is "undefined" @@ -40,8 +34,8 @@ public class MetadataController { } - @PostMapping("/update-metadata") - public ResponseEntity metadata(@RequestParam("fileInput") MultipartFile pdfFile, + @PostMapping(consumes = "multipart/form-data", value = "/update-metadata") + public ResponseEntity metadata(@RequestPart(required = true, value = "fileInput") MultipartFile pdfFile, @RequestParam(value = "deleteAll", required = false, defaultValue = "false") Boolean deleteAll, @RequestParam(value = "author", required = false) String author, @RequestParam(value = "creationDate", required = false) String creationDate, @RequestParam(value = "creator", required = false) String creator, @RequestParam(value = "keywords", required = false) String keywords, @RequestParam(value = "modificationDate", required = false) String modificationDate, diff --git a/src/main/java/stirling/software/SPDF/controller/other/OCRController.java b/src/main/java/stirling/software/SPDF/controller/api/other/OCRController.java similarity index 66% rename from src/main/java/stirling/software/SPDF/controller/other/OCRController.java rename to src/main/java/stirling/software/SPDF/controller/api/other/OCRController.java index a18adc76..5c7f2553 100644 --- a/src/main/java/stirling/software/SPDF/controller/other/OCRController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/other/OCRController.java @@ -1,4 +1,4 @@ -package stirling.software.SPDF.controller.other; +package stirling.software.SPDF.controller.api.other; import java.io.File; import java.io.FileOutputStream; @@ -10,26 +10,24 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; -import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.http.HttpHeaders; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile; -import org.springframework.web.servlet.ModelAndView; +import stirling.software.SPDF.utils.PdfUtils; import stirling.software.SPDF.utils.ProcessExecutor; -@Controller +@RestController public class OCRController { private static final Logger logger = LoggerFactory.getLogger(OCRController.class); @@ -44,28 +42,29 @@ public class OCRController { .filter(lang -> !lang.equalsIgnoreCase("osd")).collect(Collectors.toList()); } - @GetMapping("/ocr-pdf") - public ModelAndView ocrPdfPage() { - ModelAndView modelAndView = new ModelAndView("other/ocr-pdf"); - modelAndView.addObject("languages", getAvailableTesseractLanguages()); - modelAndView.addObject("currentPage", "ocr-pdf"); - return modelAndView; - } - - @PostMapping("/ocr-pdf") - public ResponseEntity processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("languages") List selectedLanguages, - @RequestParam(name = "sidecar", required = false) Boolean sidecar, @RequestParam(name = "deskew", required = false) Boolean deskew, - @RequestParam(name = "clean", required = false) Boolean clean, @RequestParam(name = "clean-final", required = false) Boolean cleanFinal, - @RequestParam(name = "ocrType", required = false) String ocrType) throws IOException, InterruptedException { + @PostMapping(consumes = "multipart/form-data", value = "/ocr-pdf") + public ResponseEntity processPdfWithOCR(@RequestPart(required = true, value = "fileInput") MultipartFile inputFile, + @RequestParam("languages") List selectedLanguages, @RequestParam(name = "sidecar", required = false) Boolean sidecar, + @RequestParam(name = "deskew", required = false) Boolean deskew, @RequestParam(name = "clean", required = false) Boolean clean, + @RequestParam(name = "clean-final", required = false) Boolean cleanFinal, @RequestParam(name = "ocrType", required = false) String ocrType, + @RequestParam(name = "ocrRenderType", required = false, defaultValue = "hocr") String ocrRenderType, + @RequestParam(name = "removeImagesAfter", required = false) Boolean removeImagesAfter) + throws IOException, InterruptedException { // --output-type pdfa - if (selectedLanguages == null || selectedLanguages.size() < 1) { + if (selectedLanguages == null || selectedLanguages.isEmpty()) { throw new IOException("Please select at least one language."); } + + if(!ocrRenderType.equals("hocr") && !ocrRenderType.equals("sandwich")) { + throw new IOException("ocrRenderType wrong"); + } + + // Get available Tesseract languages + List availableLanguages = getAvailableTesseractLanguages(); - // Validate and sanitize selected languages using regex - String languagePattern = "^[a-zA-Z]{3}$"; // Regex pattern for three-letter language codes - selectedLanguages = selectedLanguages.stream().filter(lang -> Pattern.matches(languagePattern, lang)).collect(Collectors.toList()); + // Validate selected languages + selectedLanguages = selectedLanguages.stream().filter(availableLanguages::contains).toList(); if (selectedLanguages.isEmpty()) { throw new IOException("None of the selected languages are valid."); @@ -83,7 +82,8 @@ public class OCRController { // Run OCR Command String languageOption = String.join("+", selectedLanguages); - List command = new ArrayList<>(Arrays.asList("ocrmypdf", "--verbose", "2", "--output-type", "pdf")); + + List command = new ArrayList<>(Arrays.asList("ocrmypdf", "--verbose", "2", "--output-type", "pdf", "--pdf-renderer" , ocrRenderType)); if (sidecar != null && sidecar) { sidecarTextPath = Files.createTempFile("sidecar", ".txt"); @@ -115,16 +115,27 @@ public class OCRController { // Run CLI command int returnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.OCR_MY_PDF).runCommandWithOutputHandling(command); + + + + + // Remove images from the OCR processed PDF if the flag is set to true + if (removeImagesAfter != null && removeImagesAfter) { + Path tempPdfWithoutImages = Files.createTempFile("output_", "_no_images.pdf"); + + List gsCommand = Arrays.asList("gs", "-sDEVICE=pdfwrite", "-dFILTERIMAGE", "-o", tempPdfWithoutImages.toString(), tempOutputFile.toString()); + + int gsReturnCode = ProcessExecutor.getInstance(ProcessExecutor.Processes.GHOSTSCRIPT).runCommandWithOutputHandling(gsCommand); + tempOutputFile = tempPdfWithoutImages; + } // Read the OCR processed PDF file byte[] pdfBytes = Files.readAllBytes(tempOutputFile); - // Clean up the temporary files Files.delete(tempInputFile); + // Return the OCR processed PDF as a response String outputFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.pdf"; - HttpHeaders headers = new HttpHeaders(); - if (sidecar != null && sidecar) { // Create a zip file containing both the PDF and the text file String outputZipFilename = inputFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_OCR.zip"; @@ -152,15 +163,11 @@ public class OCRController { Files.delete(sidecarTextPath); // Return the zip file containing both the PDF and the text file - headers.setContentType(MediaType.APPLICATION_OCTET_STREAM); - headers.setContentDispositionFormData("attachment", outputZipFilename); - return ResponseEntity.ok().headers(headers).body(zipBytes); + return PdfUtils.bytesToWebResponse(pdfBytes, outputZipFilename, MediaType.APPLICATION_OCTET_STREAM); } else { // Return the OCR processed PDF as a response Files.delete(tempOutputFile); - headers.setContentType(MediaType.APPLICATION_PDF); - headers.setContentDispositionFormData("attachment", outputFilename); - return ResponseEntity.ok().headers(headers).body(pdfBytes); + return PdfUtils.bytesToWebResponse(pdfBytes, outputFilename); } } diff --git a/src/main/java/stirling/software/SPDF/controller/other/OverlayImageController.java b/src/main/java/stirling/software/SPDF/controller/api/other/OverlayImageController.java similarity index 59% rename from src/main/java/stirling/software/SPDF/controller/other/OverlayImageController.java rename to src/main/java/stirling/software/SPDF/controller/api/other/OverlayImageController.java index bf0f2283..4b0d4017 100644 --- a/src/main/java/stirling/software/SPDF/controller/other/OverlayImageController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/other/OverlayImageController.java @@ -1,43 +1,36 @@ -package stirling.software.SPDF.controller.other; - -import java.io.IOException; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.http.HttpStatus; -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.multipart.MultipartFile; - -import stirling.software.SPDF.utils.PdfUtils; - -@Controller -public class OverlayImageController { - - private static final Logger logger = LoggerFactory.getLogger(OverlayImageController.class); - - @GetMapping("/add-image") - public String overlayImage(Model model) { - model.addAttribute("currentPage", "add-image"); - return "other/add-image"; - } - - @PostMapping("/add-image") - public ResponseEntity overlayImage(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("fileInput2") MultipartFile imageFile, @RequestParam("x") float x, - @RequestParam("y") float y, @RequestParam("everyPage") boolean everyPage) { - try { - byte[] pdfBytes = pdfFile.getBytes(); - byte[] imageBytes = imageFile.getBytes(); - byte[] result = PdfUtils.overlayImage(pdfBytes, imageBytes, x, y, everyPage); - - return PdfUtils.bytesToWebResponse(result, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_overlayed.pdf"); - } catch (IOException e) { - logger.error("Failed to add image to PDF", e); - return new ResponseEntity<>(HttpStatus.BAD_REQUEST); - } - } -} +package stirling.software.SPDF.controller.api.other; + +import java.io.IOException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.HttpStatus; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +import stirling.software.SPDF.utils.PdfUtils; + +@RestController +public class OverlayImageController { + + private static final Logger logger = LoggerFactory.getLogger(OverlayImageController.class); + + @PostMapping(consumes = "multipart/form-data", value = "/add-image") + public ResponseEntity overlayImage(@RequestPart(required = true, value = "fileInput") MultipartFile pdfFile, @RequestParam("fileInput2") MultipartFile imageFile, + @RequestParam("x") float x, @RequestParam("y") float y, @RequestParam("everyPage") boolean everyPage) { + try { + byte[] pdfBytes = pdfFile.getBytes(); + byte[] imageBytes = imageFile.getBytes(); + byte[] result = PdfUtils.overlayImage(pdfBytes, imageBytes, x, y, everyPage); + + return PdfUtils.bytesToWebResponse(result, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_overlayed.pdf"); + } catch (IOException e) { + logger.error("Failed to add image to PDF", e); + return new ResponseEntity<>(HttpStatus.BAD_REQUEST); + } + } +} diff --git a/src/main/java/stirling/software/SPDF/controller/security/PasswordController.java b/src/main/java/stirling/software/SPDF/controller/api/security/PasswordController.java similarity index 63% rename from src/main/java/stirling/software/SPDF/controller/security/PasswordController.java rename to src/main/java/stirling/software/SPDF/controller/api/security/PasswordController.java index f86bcc06..3cbe9965 100644 --- a/src/main/java/stirling/software/SPDF/controller/security/PasswordController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/security/PasswordController.java @@ -1,81 +1,66 @@ -package stirling.software.SPDF.controller.security; - -import java.io.IOException; - -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.encryption.AccessPermission; -import org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.multipart.MultipartFile; - -import stirling.software.SPDF.utils.PdfUtils; - -@Controller -public class PasswordController { - - private static final Logger logger = LoggerFactory.getLogger(PasswordController.class); - - @GetMapping("/add-password") - public String addPasswordForm(Model model) { - model.addAttribute("currentPage", "add-password"); - return "security/add-password"; - } - - @PostMapping("/remove-password") - public ResponseEntity compressPDF(@RequestParam("fileInput") MultipartFile fileInput, @RequestParam(name = "password") String password) throws IOException { - PDDocument document = PDDocument.load(fileInput.getBytes(), password); - document.setAllSecurityToBeRemoved(true); - return PdfUtils.pdfDocToWebResponse(document, fileInput.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_password_removed.pdf"); - } - - @PostMapping("/add-password") - public ResponseEntity compressPDF(@RequestParam("fileInput") MultipartFile fileInput, @RequestParam(defaultValue = "", name = "password") String password, - @RequestParam(defaultValue = "128", name = "keyLength") int keyLength, @RequestParam(defaultValue = "false", name = "canAssembleDocument") boolean canAssembleDocument, - @RequestParam(defaultValue = "false", name = "canExtractContent") boolean canExtractContent, - @RequestParam(defaultValue = "false", name = "canExtractForAccessibility") boolean canExtractForAccessibility, - @RequestParam(defaultValue = "false", name = "canFillInForm") boolean canFillInForm, @RequestParam(defaultValue = "false", name = "canModify") boolean canModify, - @RequestParam(defaultValue = "false", name = "canModifyAnnotations") boolean canModifyAnnotations, - @RequestParam(defaultValue = "false", name = "canPrint") boolean canPrint, @RequestParam(defaultValue = "false", name = "canPrintFaithful") boolean canPrintFaithful) - throws IOException { - - PDDocument document = PDDocument.load(fileInput.getBytes()); - AccessPermission ap = new AccessPermission(); - - ap.setCanAssembleDocument(!canAssembleDocument); - ap.setCanExtractContent(!canExtractContent); - ap.setCanExtractForAccessibility(!canExtractForAccessibility); - ap.setCanFillInForm(!canFillInForm); - ap.setCanModify(!canModify); - ap.setCanModifyAnnotations(!canModifyAnnotations); - ap.setCanPrint(!canPrint); - ap.setCanPrintFaithful(!canPrintFaithful); - StandardProtectionPolicy spp = new StandardProtectionPolicy(password, password, ap); - spp.setEncryptionKeyLength(keyLength); - - spp.setPermissions(ap); - - document.protect(spp); - - return PdfUtils.pdfDocToWebResponse(document, fileInput.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_passworded.pdf"); - } - - @GetMapping("/change-permissions") - public String permissionsForm(Model model) { - model.addAttribute("currentPage", "change-permissions"); - return "security/change-permissions"; - } - - @GetMapping("/remove-password") - public String removePasswordForm(Model model) { - model.addAttribute("currentPage", "remove-password"); - return "security/remove-password"; - } - -} +package stirling.software.SPDF.controller.api.security; + +import java.io.IOException; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.encryption.AccessPermission; +import org.apache.pdfbox.pdmodel.encryption.StandardProtectionPolicy; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +import stirling.software.SPDF.utils.PdfUtils; + +@RestController +public class PasswordController { + + private static final Logger logger = LoggerFactory.getLogger(PasswordController.class); + + + @PostMapping(consumes = "multipart/form-data", value = "/remove-password") + public ResponseEntity compressPDF(@RequestPart(required = true, value = "fileInput") MultipartFile fileInput, @RequestParam(name = "password") String password) + throws IOException { + PDDocument document = PDDocument.load(fileInput.getBytes(), password); + document.setAllSecurityToBeRemoved(true); + return PdfUtils.pdfDocToWebResponse(document, fileInput.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_password_removed.pdf"); + } + + @PostMapping(consumes = "multipart/form-data", value = "/add-password") + public ResponseEntity compressPDF(@RequestPart(required = true, value = "fileInput") MultipartFile fileInput, + @RequestParam(defaultValue = "", name = "password") String password, @RequestParam(defaultValue = "128", name = "keyLength") int keyLength, + @RequestParam(defaultValue = "false", name = "canAssembleDocument") boolean canAssembleDocument, + @RequestParam(defaultValue = "false", name = "canExtractContent") boolean canExtractContent, + @RequestParam(defaultValue = "false", name = "canExtractForAccessibility") boolean canExtractForAccessibility, + @RequestParam(defaultValue = "false", name = "canFillInForm") boolean canFillInForm, @RequestParam(defaultValue = "false", name = "canModify") boolean canModify, + @RequestParam(defaultValue = "false", name = "canModifyAnnotations") boolean canModifyAnnotations, + @RequestParam(defaultValue = "false", name = "canPrint") boolean canPrint, @RequestParam(defaultValue = "false", name = "canPrintFaithful") boolean canPrintFaithful) + throws IOException { + + PDDocument document = PDDocument.load(fileInput.getBytes()); + AccessPermission ap = new AccessPermission(); + + ap.setCanAssembleDocument(!canAssembleDocument); + ap.setCanExtractContent(!canExtractContent); + ap.setCanExtractForAccessibility(!canExtractForAccessibility); + ap.setCanFillInForm(!canFillInForm); + ap.setCanModify(!canModify); + ap.setCanModifyAnnotations(!canModifyAnnotations); + ap.setCanPrint(!canPrint); + ap.setCanPrintFaithful(!canPrintFaithful); + StandardProtectionPolicy spp = new StandardProtectionPolicy(password, password, ap); + spp.setEncryptionKeyLength(keyLength); + + spp.setPermissions(ap); + + document.protect(spp); + + return PdfUtils.pdfDocToWebResponse(document, fileInput.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_passworded.pdf"); + } + + +} diff --git a/src/main/java/stirling/software/SPDF/controller/security/WatermarkController.java b/src/main/java/stirling/software/SPDF/controller/api/security/WatermarkController.java similarity index 83% rename from src/main/java/stirling/software/SPDF/controller/security/WatermarkController.java rename to src/main/java/stirling/software/SPDF/controller/api/security/WatermarkController.java index 74c952a9..8f9203b1 100644 --- a/src/main/java/stirling/software/SPDF/controller/security/WatermarkController.java +++ b/src/main/java/stirling/software/SPDF/controller/api/security/WatermarkController.java @@ -1,150 +1,140 @@ -package stirling.software.SPDF.controller.security; - -import java.awt.Color; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import org.apache.pdfbox.pdmodel.PDDocument; -import org.apache.pdfbox.pdmodel.PDDocumentCatalog; -import org.apache.pdfbox.pdmodel.PDPage; -import org.apache.pdfbox.pdmodel.PDPageContentStream; -import org.apache.pdfbox.pdmodel.font.PDFont; -import org.apache.pdfbox.pdmodel.font.PDType1Font; -import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState; -import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; -import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup; -import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; -import org.apache.pdfbox.pdmodel.interactive.form.PDField; -import org.apache.pdfbox.util.Matrix; -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.ui.Model; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.multipart.MultipartFile; - -import stirling.software.SPDF.utils.PdfUtils; -import stirling.software.SPDF.utils.WatermarkRemover; - -@Controller -public class WatermarkController { - - @PostMapping("/add-watermark") - public ResponseEntity addWatermark(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("watermarkText") String watermarkText, - @RequestParam(defaultValue = "30", name = "fontSize") float fontSize, @RequestParam(defaultValue = "0", name = "rotation") float rotation, - @RequestParam(defaultValue = "0.5", name = "opacity") float opacity, @RequestParam(defaultValue = "50", name = "widthSpacer") int widthSpacer, - @RequestParam(defaultValue = "50", name = "heightSpacer") int heightSpacer) throws IOException { - - // Load the input PDF - PDDocument document = PDDocument.load(pdfFile.getInputStream()); - - // Create a page in the document - for (PDPage page : document.getPages()) { - - // Get the page's content stream - PDPageContentStream contentStream = new PDPageContentStream(document, page, PDPageContentStream.AppendMode.APPEND, true); - - // Set transparency - PDExtendedGraphicsState graphicsState = new PDExtendedGraphicsState(); - graphicsState.setNonStrokingAlphaConstant(opacity); - contentStream.setGraphicsStateParameters(graphicsState); - - // Set font of watermark - PDFont font = PDType1Font.HELVETICA_BOLD; - contentStream.beginText(); - contentStream.setFont(font, fontSize); - contentStream.setNonStrokingColor(Color.LIGHT_GRAY); - - // Set size and location of watermark - float pageWidth = page.getMediaBox().getWidth(); - float pageHeight = page.getMediaBox().getHeight(); - float watermarkWidth = widthSpacer + font.getStringWidth(watermarkText) * fontSize / 1000; - float watermarkHeight = heightSpacer + fontSize; - int watermarkRows = (int) (pageHeight / watermarkHeight + 1); - int watermarkCols = (int) (pageWidth / watermarkWidth + 1); - - // Add the watermark text - for (int i = 0; i < watermarkRows; i++) { - for (int j = 0; j < watermarkCols; j++) { - contentStream.setTextMatrix(Matrix.getRotateInstance((float) Math.toRadians(rotation), j * watermarkWidth, i * watermarkHeight)); - contentStream.showTextWithPositioning(new Object[] { watermarkText }); - } - } - - contentStream.endText(); - - // Close the content stream - contentStream.close(); - } - return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_watermarked.pdf"); - } - - @GetMapping("/add-watermark") - public String addWatermarkForm(Model model) { - model.addAttribute("currentPage", "add-watermark"); - return "security/add-watermark"; - } - - @PostMapping("/remove-watermark") - public ResponseEntity removeWatermark(@RequestParam("fileInput") MultipartFile pdfFile, @RequestParam("watermarkText") String watermarkText) throws Exception { - - // Load the input PDF - PDDocument document = PDDocument.load(pdfFile.getInputStream()); - - // Create a new PDF document for the output - PDDocument outputDocument = new PDDocument(); - - // Loop through the pages - int numPages = document.getNumberOfPages(); - for (int i = 0; i < numPages; i++) { - PDPage page = document.getPage(i); - - // Process the content stream to remove the watermark text - WatermarkRemover editor = new WatermarkRemover(watermarkText) { - }; - editor.processPage(page); - editor.processPage(page); - // Add the page to the output document - outputDocument.addPage(page); - } - - for (PDPage page : outputDocument.getPages()) { - List annotations = page.getAnnotations(); - List annotationsToRemove = new ArrayList<>(); - - for (PDAnnotation annotation : annotations) { - if (annotation instanceof PDAnnotationMarkup) { - PDAnnotationMarkup markup = (PDAnnotationMarkup) annotation; - String contents = markup.getContents(); - if (contents != null && contents.contains(watermarkText)) { - annotationsToRemove.add(markup); - } - } - } - - annotations.removeAll(annotationsToRemove); - } - PDDocumentCatalog catalog = outputDocument.getDocumentCatalog(); - PDAcroForm acroForm = catalog.getAcroForm(); - if (acroForm != null) { - List fields = acroForm.getFields(); - for (PDField field : fields) { - String fieldValue = field.getValueAsString(); - if (fieldValue.contains(watermarkText)) { - field.setValue(fieldValue.replace(watermarkText, "")); - } - } - } - - return PdfUtils.pdfDocToWebResponse(outputDocument, "removed.pdf"); - } - - @GetMapping("/remove-watermark") - public String removeWatermarkForm(Model model) { - model.addAttribute("currentPage", "remove-watermark"); - return "security/remove-watermark"; - } - -} +package stirling.software.SPDF.controller.api.security; + +import java.awt.Color; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentCatalog; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation; +import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationMarkup; +import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm; +import org.apache.pdfbox.pdmodel.interactive.form.PDField; +import org.apache.pdfbox.util.Matrix; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.PostMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RequestPart; +import org.springframework.web.bind.annotation.RestController; +import org.springframework.web.multipart.MultipartFile; + +import stirling.software.SPDF.utils.PdfUtils; +import stirling.software.SPDF.utils.WatermarkRemover; + +@RestController +public class WatermarkController { + + @PostMapping(consumes = "multipart/form-data", value = "/add-watermark") + public ResponseEntity addWatermark(@RequestPart(required = true, value = "fileInput") MultipartFile pdfFile, @RequestParam("watermarkText") String watermarkText, + @RequestParam(defaultValue = "30", name = "fontSize") float fontSize, @RequestParam(defaultValue = "0", name = "rotation") float rotation, + @RequestParam(defaultValue = "0.5", name = "opacity") float opacity, @RequestParam(defaultValue = "50", name = "widthSpacer") int widthSpacer, + @RequestParam(defaultValue = "50", name = "heightSpacer") int heightSpacer) throws IOException { + + // Load the input PDF + PDDocument document = PDDocument.load(pdfFile.getInputStream()); + + // Create a page in the document + for (PDPage page : document.getPages()) { + + // Get the page's content stream + PDPageContentStream contentStream = new PDPageContentStream(document, page, PDPageContentStream.AppendMode.APPEND, true); + + // Set transparency + PDExtendedGraphicsState graphicsState = new PDExtendedGraphicsState(); + graphicsState.setNonStrokingAlphaConstant(opacity); + contentStream.setGraphicsStateParameters(graphicsState); + + // Set font of watermark + PDFont font = PDType1Font.HELVETICA_BOLD; + contentStream.beginText(); + contentStream.setFont(font, fontSize); + contentStream.setNonStrokingColor(Color.LIGHT_GRAY); + + // Set size and location of watermark + float pageWidth = page.getMediaBox().getWidth(); + float pageHeight = page.getMediaBox().getHeight(); + float watermarkWidth = widthSpacer + font.getStringWidth(watermarkText) * fontSize / 1000; + float watermarkHeight = heightSpacer + fontSize; + int watermarkRows = (int) (pageHeight / watermarkHeight + 1); + int watermarkCols = (int) (pageWidth / watermarkWidth + 1); + + // Add the watermark text + for (int i = 0; i < watermarkRows; i++) { + for (int j = 0; j < watermarkCols; j++) { + contentStream.setTextMatrix(Matrix.getRotateInstance((float) Math.toRadians(rotation), j * watermarkWidth, i * watermarkHeight)); + contentStream.showTextWithPositioning(new Object[] { watermarkText }); + } + } + + contentStream.endText(); + + // Close the content stream + contentStream.close(); + } + return PdfUtils.pdfDocToWebResponse(document, pdfFile.getOriginalFilename().replaceFirst("[.][^.]+$", "") + "_watermarked.pdf"); + } + + + @PostMapping(consumes = "multipart/form-data", value = "/remove-watermark") + public ResponseEntity removeWatermark(@RequestPart(required = true, value = "fileInput") MultipartFile pdfFile, @RequestParam("watermarkText") String watermarkText) + throws Exception { + + // Load the input PDF + PDDocument document = PDDocument.load(pdfFile.getInputStream()); + + // Create a new PDF document for the output + PDDocument outputDocument = new PDDocument(); + + // Loop through the pages + int numPages = document.getNumberOfPages(); + for (int i = 0; i < numPages; i++) { + PDPage page = document.getPage(i); + + // Process the content stream to remove the watermark text + WatermarkRemover editor = new WatermarkRemover(watermarkText) { + }; + editor.processPage(page); + editor.processPage(page); + // Add the page to the output document + outputDocument.addPage(page); + } + + for (PDPage page : outputDocument.getPages()) { + List annotations = page.getAnnotations(); + List annotationsToRemove = new ArrayList<>(); + + for (PDAnnotation annotation : annotations) { + if (annotation instanceof PDAnnotationMarkup) { + PDAnnotationMarkup markup = (PDAnnotationMarkup) annotation; + String contents = markup.getContents(); + if (contents != null && contents.contains(watermarkText)) { + annotationsToRemove.add(markup); + } + } + } + + annotations.removeAll(annotationsToRemove); + } + PDDocumentCatalog catalog = outputDocument.getDocumentCatalog(); + PDAcroForm acroForm = catalog.getAcroForm(); + if (acroForm != null) { + List fields = acroForm.getFields(); + for (PDField field : fields) { + String fieldValue = field.getValueAsString(); + if (fieldValue.contains(watermarkText)) { + field.setValue(fieldValue.replace(watermarkText, "")); + } + } + } + + return PdfUtils.pdfDocToWebResponse(outputDocument, "removed.pdf"); + } + + +} diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertPDFToOffice.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertPDFToOffice.java deleted file mode 100644 index dd0b9d5f..00000000 --- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertPDFToOffice.java +++ /dev/null @@ -1,86 +0,0 @@ -package stirling.software.SPDF.controller.converters; - -import java.io.IOException; - -import org.springframework.http.ResponseEntity; -import org.springframework.stereotype.Controller; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.multipart.MultipartFile; -import org.springframework.web.servlet.ModelAndView; - -import stirling.software.SPDF.utils.PDFToFile; - -@Controller -public class ConvertPDFToOffice { - - @GetMapping("/pdf-to-html") - public ModelAndView pdfToHTML() { - ModelAndView modelAndView = new ModelAndView("convert/pdf-to-html"); - modelAndView.addObject("currentPage", "pdf-to-html"); - return modelAndView; - } - - @GetMapping("/pdf-to-presentation") - public ModelAndView pdfToPresentation() { - ModelAndView modelAndView = new ModelAndView("convert/pdf-to-presentation"); - modelAndView.addObject("currentPage", "pdf-to-presentation"); - return modelAndView; - } - - @GetMapping("/pdf-to-text") - public ModelAndView pdfToText() { - ModelAndView modelAndView = new ModelAndView("convert/pdf-to-text"); - modelAndView.addObject("currentPage", "pdf-to-text"); - return modelAndView; - } - - @GetMapping("/pdf-to-word") - public ModelAndView pdfToWord() { - ModelAndView modelAndView = new ModelAndView("convert/pdf-to-word"); - modelAndView.addObject("currentPage", "pdf-to-word"); - return modelAndView; - } - - @GetMapping("/pdf-to-xml") - public ModelAndView pdfToXML() { - ModelAndView modelAndView = new ModelAndView("convert/pdf-to-xml"); - modelAndView.addObject("currentPage", "pdf-to-xml"); - return modelAndView; - } - - @PostMapping("/pdf-to-html") - public ResponseEntity processPdfToHTML(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException { - PDFToFile pdfToFile = new PDFToFile(); - return pdfToFile.processPdfToOfficeFormat(inputFile, "html", "writer_pdf_import"); - } - - @PostMapping("/pdf-to-presentation") - public ResponseEntity processPdfToPresentation(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("outputFormat") String outputFormat) - throws IOException, InterruptedException { - PDFToFile pdfToFile = new PDFToFile(); - return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "impress_pdf_import"); - } - - @PostMapping("/pdf-to-text") - public ResponseEntity processPdfToRTForTXT(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("outputFormat") String outputFormat) - throws IOException, InterruptedException { - PDFToFile pdfToFile = new PDFToFile(); - return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import"); - } - - @PostMapping("/pdf-to-word") - public ResponseEntity processPdfToWord(@RequestParam("fileInput") MultipartFile inputFile, @RequestParam("outputFormat") String outputFormat) - throws IOException, InterruptedException { - PDFToFile pdfToFile = new PDFToFile(); - return pdfToFile.processPdfToOfficeFormat(inputFile, outputFormat, "writer_pdf_import"); - } - - @PostMapping("/pdf-to-xml") - public ResponseEntity processPdfToXML(@RequestParam("fileInput") MultipartFile inputFile) throws IOException, InterruptedException { - PDFToFile pdfToFile = new PDFToFile(); - return pdfToFile.processPdfToOfficeFormat(inputFile, "xml", "writer_pdf_import"); - } - -} diff --git a/src/main/java/stirling/software/SPDF/controller/web/ConverterWebController.java b/src/main/java/stirling/software/SPDF/controller/web/ConverterWebController.java new file mode 100644 index 00000000..f110cdc8 --- /dev/null +++ b/src/main/java/stirling/software/SPDF/controller/web/ConverterWebController.java @@ -0,0 +1,86 @@ +package stirling.software.SPDF.controller.web; + +import org.springframework.stereotype.Controller; +import org.springframework.ui.Model; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.servlet.ModelAndView; + +import io.swagger.v3.oas.annotations.Hidden; + +@Controller +public class ConverterWebController { + + @GetMapping("/img-to-pdf") + @Hidden + public String convertImgToPdfForm(Model model) { + model.addAttribute("currentPage", "img-to-pdf"); + return "convert/img-to-pdf"; + } + + + @GetMapping("/pdf-to-img") + @Hidden + public String pdfToimgForm(Model model) { + model.addAttribute("currentPage", "pdf-to-img"); + return "convert/pdf-to-img"; + } + + @GetMapping("/file-to-pdf") + @Hidden + public String convertToPdfForm(Model model) { + model.addAttribute("currentPage", "file-to-pdf"); + return "convert/file-to-pdf"; + } + + + + //PDF TO...... + + @GetMapping("/pdf-to-html") + @Hidden + public ModelAndView pdfToHTML() { + ModelAndView modelAndView = new ModelAndView("convert/pdf-to-html"); + modelAndView.addObject("currentPage", "pdf-to-html"); + return modelAndView; + } + + @GetMapping("/pdf-to-presentation") + @Hidden + public ModelAndView pdfToPresentation() { + ModelAndView modelAndView = new ModelAndView("convert/pdf-to-presentation"); + modelAndView.addObject("currentPage", "pdf-to-presentation"); + return modelAndView; + } + + @GetMapping("/pdf-to-text") + @Hidden + public ModelAndView pdfToText() { + ModelAndView modelAndView = new ModelAndView("convert/pdf-to-text"); + modelAndView.addObject("currentPage", "pdf-to-text"); + return modelAndView; + } + + @GetMapping("/pdf-to-word") + @Hidden + public ModelAndView pdfToWord() { + ModelAndView modelAndView = new ModelAndView("convert/pdf-to-word"); + modelAndView.addObject("currentPage", "pdf-to-word"); + return modelAndView; + } + + @GetMapping("/pdf-to-xml") + @Hidden + public ModelAndView pdfToXML() { + ModelAndView modelAndView = new ModelAndView("convert/pdf-to-xml"); + modelAndView.addObject("currentPage", "pdf-to-xml"); + return modelAndView; + } + + + @GetMapping("/pdf-to-pdfa") + @Hidden + public String pdfToPdfAForm(Model model) { + model.addAttribute("currentPage", "pdf-to-pdfa"); + return "convert/pdf-to-pdfa"; + } +} diff --git a/src/main/java/stirling/software/SPDF/controller/web/GeneralWebController.java b/src/main/java/stirling/software/SPDF/controller/web/GeneralWebController.java new file mode 100644 index 00000000..14ec0be6 --- /dev/null +++ b/src/main/java/stirling/software/SPDF/controller/web/GeneralWebController.java @@ -0,0 +1,69 @@ +package stirling.software.SPDF.controller.web; + +import org.springframework.stereotype.Controller; +import org.springframework.ui.Model; +import org.springframework.web.bind.annotation.GetMapping; + +import io.swagger.v3.oas.annotations.Hidden; + +@Controller +public class GeneralWebController { + @GetMapping("/merge-pdfs") + @Hidden + public String mergePdfForm(Model model) { + model.addAttribute("currentPage", "merge-pdfs"); + return "merge-pdfs"; + } + @GetMapping("/about") + @Hidden + public String gameForm(Model model) { + model.addAttribute("currentPage", "about"); + return "about"; + } + + @GetMapping("/multi-tool") + @Hidden + public String multiToolForm(Model model) { + model.addAttribute("currentPage", "multi-tool"); + return "multi-tool"; + } + + @GetMapping("/") + public String home(Model model) { + model.addAttribute("currentPage", "home"); + return "home"; + } + + @GetMapping("/home") + public String root(Model model) { + return "redirect:/"; + } + + @GetMapping("/remove-pages") + @Hidden + public String pageDeleter(Model model) { + model.addAttribute("currentPage", "remove-pages"); + return "remove-pages"; + } + + @GetMapping("/pdf-organizer") + @Hidden + public String pageOrganizer(Model model) { + model.addAttribute("currentPage", "pdf-organizer"); + return "pdf-organizer"; + } + + @GetMapping("/rotate-pdf") + @Hidden + public String rotatePdfForm(Model model) { + model.addAttribute("currentPage", "rotate-pdf"); + return "rotate-pdf"; + } + + @GetMapping("/split-pdfs") + @Hidden + public String splitPdfForm(Model model) { + model.addAttribute("currentPage", "split-pdfs"); + return "split-pdfs"; + } +} diff --git a/src/main/java/stirling/software/SPDF/controller/web/OtherWebController.java b/src/main/java/stirling/software/SPDF/controller/web/OtherWebController.java new file mode 100644 index 00000000..a7d17d76 --- /dev/null +++ b/src/main/java/stirling/software/SPDF/controller/web/OtherWebController.java @@ -0,0 +1,84 @@ +package stirling.software.SPDF.controller.web; + +import java.io.File; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; + +import org.springframework.stereotype.Controller; +import org.springframework.ui.Model; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.servlet.ModelAndView; + +import io.swagger.v3.oas.annotations.Hidden; + +@Controller +public class OtherWebController { + @GetMapping("/compress-pdf") + @Hidden + public String compressPdfForm(Model model) { + model.addAttribute("currentPage", "compress-pdf"); + return "other/compress-pdf"; + } + + @GetMapping("/extract-image-scans") + @Hidden + public ModelAndView extractImageScansForm() { + ModelAndView modelAndView = new ModelAndView("other/extract-image-scans"); + modelAndView.addObject("currentPage", "extract-image-scans"); + return modelAndView; + } + + @GetMapping("/extract-images") + @Hidden + public String extractImagesForm(Model model) { + model.addAttribute("currentPage", "extract-images"); + return "other/extract-images"; + } + + + @GetMapping("/change-metadata") + @Hidden + public String addWatermarkForm(Model model) { + model.addAttribute("currentPage", "change-metadata"); + return "other/change-metadata"; + } + + + public List getAvailableTesseractLanguages() { + String tessdataDir = "/usr/share/tesseract-ocr/4.00/tessdata"; + File[] files = new File(tessdataDir).listFiles(); + if (files == null) { + return Collections.emptyList(); + } + return Arrays.stream(files).filter(file -> file.getName().endsWith(".traineddata")).map(file -> file.getName().replace(".traineddata", "")) + .filter(lang -> !lang.equalsIgnoreCase("osd")).collect(Collectors.toList()); + } + + @GetMapping("/ocr-pdf") + @Hidden + public ModelAndView ocrPdfPage() { + ModelAndView modelAndView = new ModelAndView("other/ocr-pdf"); + modelAndView.addObject("languages", getAvailableTesseractLanguages()); + modelAndView.addObject("currentPage", "ocr-pdf"); + return modelAndView; + } + + + @GetMapping("/add-image") + @Hidden + public String overlayImage(Model model) { + model.addAttribute("currentPage", "add-image"); + return "other/add-image"; + } + + @GetMapping("/adjust-contrast") + @Hidden + public String contrast(Model model) { + model.addAttribute("currentPage", "adjust-contrast"); + return "other/adjust-contrast"; + } + + +} diff --git a/src/main/java/stirling/software/SPDF/controller/web/SecurityWebController.java b/src/main/java/stirling/software/SPDF/controller/web/SecurityWebController.java new file mode 100644 index 00000000..31f16563 --- /dev/null +++ b/src/main/java/stirling/software/SPDF/controller/web/SecurityWebController.java @@ -0,0 +1,46 @@ +package stirling.software.SPDF.controller.web; + +import org.springframework.stereotype.Controller; +import org.springframework.ui.Model; +import org.springframework.web.bind.annotation.GetMapping; + +import io.swagger.v3.oas.annotations.Hidden; + +@Controller +public class SecurityWebController { + @GetMapping("/add-password") + @Hidden + public String addPasswordForm(Model model) { + model.addAttribute("currentPage", "add-password"); + return "security/add-password"; + } + @GetMapping("/change-permissions") + @Hidden + public String permissionsForm(Model model) { + model.addAttribute("currentPage", "change-permissions"); + return "security/change-permissions"; + } + + @GetMapping("/remove-password") + @Hidden + public String removePasswordForm(Model model) { + model.addAttribute("currentPage", "remove-password"); + return "security/remove-password"; + } + + @GetMapping("/add-watermark") + @Hidden + public String addWatermarkForm(Model model) { + model.addAttribute("currentPage", "add-watermark"); + return "security/add-watermark"; + } + + //WIP + @GetMapping("/remove-watermark") + @Hidden + public String removeWatermarkForm(Model model) { + model.addAttribute("currentPage", "remove-watermark"); + return "security/remove-watermark"; + } + +} diff --git a/src/main/java/stirling/software/SPDF/utils/PDFToFile.java b/src/main/java/stirling/software/SPDF/utils/PDFToFile.java index 450f8192..ea9308ea 100644 --- a/src/main/java/stirling/software/SPDF/utils/PDFToFile.java +++ b/src/main/java/stirling/software/SPDF/utils/PDFToFile.java @@ -15,7 +15,6 @@ import java.util.zip.ZipOutputStream; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; -import org.springframework.http.HttpHeaders; import org.springframework.http.HttpStatus; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; @@ -41,8 +40,7 @@ public class PDFToFile { Path tempInputFile = null; Path tempOutputDir = null; byte[] fileBytes; - // Prepare response - HttpHeaders headers = new HttpHeaders(); + String fileName = "temp.file"; try { // Save the uploaded file to a temporary location @@ -63,16 +61,14 @@ public class PDFToFile { if (outputFiles.size() == 1) { // Return single output file File outputFile = outputFiles.get(0); - headers.setContentType(MediaType.APPLICATION_OCTET_STREAM); if (outputFormat.equals("txt:Text")) { outputFormat = "txt"; } - headers.setContentDispositionFormData("attachment", pdfBaseName + "." + outputFormat); + fileName = pdfBaseName + "." + outputFormat; fileBytes = FileUtils.readFileToByteArray(outputFile); } else { // Return output files in a ZIP archive - headers.setContentType(MediaType.APPLICATION_OCTET_STREAM); - headers.setContentDispositionFormData("attachment", pdfBaseName + "To" + outputFormat + ".zip"); + fileName = pdfBaseName + "To" + outputFormat + ".zip"; ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); ZipOutputStream zipOutputStream = new ZipOutputStream(byteArrayOutputStream); @@ -96,6 +92,6 @@ public class PDFToFile { if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile()); } - return new ResponseEntity<>(fileBytes, headers, HttpStatus.OK); + return PdfUtils.bytesToWebResponse(fileBytes, fileName, MediaType.APPLICATION_OCTET_STREAM); } } diff --git a/src/main/java/stirling/software/SPDF/utils/PdfUtils.java b/src/main/java/stirling/software/SPDF/utils/PdfUtils.java index d083121d..e6a1a602 100644 --- a/src/main/java/stirling/software/SPDF/utils/PdfUtils.java +++ b/src/main/java/stirling/software/SPDF/utils/PdfUtils.java @@ -8,6 +8,8 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.security.KeyPair; import java.security.KeyStore; @@ -43,19 +45,27 @@ public class PdfUtils { public static ResponseEntity boasToWebResponse(ByteArrayOutputStream baos, String docName) throws IOException { return PdfUtils.bytesToWebResponse(baos.toByteArray(), docName); - } - public static ResponseEntity bytesToWebResponse(byte[] bytes, String docName) throws IOException { + public static ResponseEntity boasToWebResponse(ByteArrayOutputStream baos, String docName, MediaType mediaType) throws IOException { + return PdfUtils.bytesToWebResponse(baos.toByteArray(), docName, mediaType); + } + + public static ResponseEntity bytesToWebResponse(byte[] bytes, String docName, MediaType mediaType) throws IOException { // Return the PDF as a response HttpHeaders headers = new HttpHeaders(); - headers.setContentType(MediaType.APPLICATION_PDF); + headers.setContentType(mediaType); headers.setContentLength(bytes.length); - headers.setContentDispositionFormData("attachment", docName); + String encodedDocName = URLEncoder.encode(docName, StandardCharsets.UTF_8.toString()).replaceAll("\\+", "%20"); + headers.setContentDispositionFormData("attachment", encodedDocName); return new ResponseEntity<>(bytes, headers, HttpStatus.OK); } + public static ResponseEntity bytesToWebResponse(byte[] bytes, String docName) throws IOException { + return bytesToWebResponse(bytes, docName, MediaType.APPLICATION_PDF); + } + public static byte[] convertFromPdf(byte[] inputStream, String imageType, ImageType colorType, boolean singleImage, int DPI) throws IOException, Exception { try (PDDocument document = PDDocument.load(new ByteArrayInputStream(inputStream))) { PDFRenderer pdfRenderer = new PDFRenderer(document); diff --git a/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java b/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java index c8744c52..33823507 100644 --- a/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java +++ b/src/main/java/stirling/software/SPDF/utils/ProcessExecutor.java @@ -13,7 +13,7 @@ import java.util.concurrent.Semaphore; public class ProcessExecutor { public enum Processes { - LIBRE_OFFICE, OCR_MY_PDF + LIBRE_OFFICE, OCR_MY_PDF, PYTHON_OPENCV, GHOSTSCRIPT } private static final Map instances = new ConcurrentHashMap<>(); @@ -23,6 +23,8 @@ public class ProcessExecutor { int semaphoreLimit = switch (key) { case LIBRE_OFFICE -> 1; case OCR_MY_PDF -> 2; + case PYTHON_OPENCV -> 8; + case GHOSTSCRIPT -> 16; }; return new ProcessExecutor(semaphoreLimit); }); diff --git a/src/main/resources/messages_ar_AR.properties b/src/main/resources/messages_ar_AR.properties index d2640f69..15a3ee2c 100644 --- a/src/main/resources/messages_ar_AR.properties +++ b/src/main/resources/messages_ar_AR.properties @@ -31,10 +31,10 @@ navbar.convert=تحويل navbar.security=الأمان navbar.other=أخرى navbar.darkmode=الوضع الداكن -navbar.pageOps = عمليات الصفحة +navbar.pageOps=عمليات الصفحة -home.multiTool.title = أداة متعددة PDF -home.multiTool.desc = دمج الصفحات وتدويرها وإعادة ترتيبها وإزالتها +home.multiTool.title=أداة متعددة PDF +home.multiTool.desc=دمج الصفحات وتدويرها وإعادة ترتيبها وإزالتها home.merge.title=دمج ملفات home.merge.desc=دمج ملفات PDF متعددة في ملف واحد بسهولة. @@ -91,24 +91,40 @@ home.ocr.desc=\u064A\u0642\u0648\u0645 \u0628\u0631\u0646\u0627\u0645\u062C \u06 home.extractImages.title=\u0627\u0633\u062A\u062E\u0631\u0627\u062C \u0627\u0644\u0635\u0648\u0631 home.extractImages.desc=\u064A\u0633\u062A\u062E\u0631\u062C \u062C\u0645\u064A\u0639 \u0627\u0644\u0635\u0648\u0631 \u0645\u0646 \u0645\u0644\u0641 PDF \u0648\u064A\u062D\u0641\u0638\u0647\u0627 \u0641\u064A \u0627\u0644\u0631\u0645\u0632 \u0627\u0644\u0628\u0631\u064A\u062F\u064A -home.pdfToPDFA.title = \u062A\u062D\u0648\u064A\u0644 \u0645\u0644\u0641\u0627\u062A PDF \u0625\u0644\u0649 PDF / A -home.pdfToPDFA.desc = \u062A\u062D\u0648\u064A\u0644 PDF \u0625\u0644\u0649 PDF / A \u0644\u0644\u062A\u062E\u0632\u064A\u0646 \u0637\u0648\u064A\u0644 \u0627\u0644\u0645\u062F\u0649 +home.pdfToPDFA.title=\u062A\u062D\u0648\u064A\u0644 \u0645\u0644\u0641\u0627\u062A PDF \u0625\u0644\u0649 PDF / A +home.pdfToPDFA.desc=\u062A\u062D\u0648\u064A\u0644 PDF \u0625\u0644\u0649 PDF / A \u0644\u0644\u062A\u062E\u0632\u064A\u0646 \u0637\u0648\u064A\u0644 \u0627\u0644\u0645\u062F\u0649 -home.PDFToWord.title = تحويل PDF إلى Word -home.PDFToWord.desc = تحويل PDF إلى تنسيقات Word (DOC و DOCX و ODT) +home.PDFToWord.title=تحويل PDF إلى Word +home.PDFToWord.desc=تحويل PDF إلى تنسيقات Word (DOC و DOCX و ODT) -home.PDFToPresentation.title = PDF للعرض التقديمي -home.PDFToPresentation.desc = تحويل PDF إلى تنسيقات عرض تقديمي (PPT و PPTX و ODP) +home.PDFToPresentation.title=PDF للعرض التقديمي +home.PDFToPresentation.desc=تحويل PDF إلى تنسيقات عرض تقديمي (PPT و PPTX و ODP) -home.PDFToText.title = تحويل PDF إلى نص / RTF -home.PDFToText.desc = تحويل PDF إلى تنسيق نص أو RTF +home.PDFToText.title=تحويل PDF إلى نص / RTF +home.PDFToText.desc=تحويل PDF إلى تنسيق نص أو RTF -home.PDFToHTML.title = تحويل PDF إلى HTML -home.PDFToHTML.desc = تحويل PDF إلى تنسيق HTML +home.PDFToHTML.title=تحويل PDF إلى HTML +home.PDFToHTML.desc=تحويل PDF إلى تنسيق HTML + +home.PDFToXML.title=تحويل PDF إلى XML +home.PDFToXML.desc=تحويل PDF إلى تنسيق XML + + +home.ScannerImageSplit.title=كشف / انقسام الصور الممسوحة ضوئيًا +home.ScannerImageSplit.desc=تقسيم عدة صور من داخل صورة / ملف PDF + +ScannerImageSplit.selectText.1=عتبة الزاوية: +ScannerImageSplit.selectText.2=تعيين الحد الأدنى للزاوية المطلقة المطلوبة لتدوير الصورة (افتراضي: 10). +ScannerImageSplit.selectText.3=التسامح: +ScannerImageSplit.selectText.4=يحدد نطاق تباين اللون حول لون الخلفية المقدر (الافتراضي: 30). +ScannerImageSplit.selectText.5=أدنى مساحة: +ScannerImageSplit.selectText.6=تعيين الحد الأدنى لمنطقة الصورة (الافتراضي: 10000). +ScannerImageSplit.selectText.7=الحد الأدنى لمنطقة المحيط: +ScannerImageSplit.selectText.8=تعيين الحد الأدنى لمنطقة المحيط للصورة +ScannerImageSplit.selectText.9=حجم الحدود: +ScannerImageSplit.selectText.10=يضبط حجم الحدود المضافة والمزالة لمنع الحدود البيضاء في الإخراج (الافتراضي: 1). -home.PDFToXML.title = تحويل PDF إلى XML -home.PDFToXML.desc = تحويل PDF إلى تنسيق XML navbar.settings=\u0625\u0639\u062F\u0627\u062F\u0627\u062A settings.title=\u0627\u0644\u0625\u0639\u062F\u0627\u062F\u0627\u062A @@ -133,6 +149,8 @@ ocr.selectText.7=\u0641\u0631\u0636 \u0627\u0644\u062A\u0639\u0631\u0641 \u0627\ ocr.selectText.8=\u0639\u0627\u062F\u064A (\u062E\u0637\u0623 \u0625\u0630\u0627 \u0643\u0627\u0646 PDF \u064A\u062D\u062A\u0648\u064A \u0639\u0644\u0649 \u0646\u0635) ocr.selectText.9=\u0625\u0639\u062F\u0627\u062F\u0627\u062A \u0625\u0636\u0627\u0641\u064A\u0629 ocr.selectText.10=\u0648\u0636\u0639 \u0627\u0644\u062A\u0639\u0631\u0641 \u0627\u0644\u0636\u0648\u0626\u064A \u0639\u0644\u0649 \u0627\u0644\u062D\u0631\u0648\u0641 +ocr.selectText.11 = إزالة الصور بعد التعرف الضوئي على الحروف (يزيل كل الصور ، يكون مفيدًا فقط إذا كان جزءًا من خطوة التحويل) +ocr.selectText.12 = نوع العرض (متقدم) ocr.help=\u064A\u0631\u062C\u0649 \u0642\u0631\u0627\u0621\u0629 \u0647\u0630\u0647 \u0627\u0644\u0648\u062B\u0627\u0626\u0642 \u062D\u0648\u0644 \u0643\u064A\u0641\u064A\u0629 \u0627\u0633\u062A\u062E\u062F\u0627\u0645 \u0647\u0630\u0627 \u0644\u0644\u063A\u0627\u062A \u0623\u062E\u0631\u0649 \u0648 / \u0623\u0648 \u0627\u0644\u0627\u0633\u062A\u062E\u062F\u0627\u0645 \u0644\u064A\u0633 \u0641\u064A \u0639\u0627\u0645\u0644 \u0627\u0644\u0625\u0631\u0633\u0627\u0621 ocr.credit=\u062A\u0633\u062A\u062E\u062F\u0645 \u0647\u0630\u0647 \u0627\u0644\u062E\u062F\u0645\u0629 OCRmyPDF \u0648 Tesseract \u0644 OCR. ocr.submit=\u0645\u0639\u0627\u0644\u062C\u0629 PDF \u0628\u0627\u0633\u062A\u062E\u062F\u0627\u0645 OCR @@ -182,8 +200,8 @@ pdfOrganiser.header=منظم صفحات PDF pdfOrganiser.submit=إعادة ترتيب الصفحات #multiTool -multiTool.title = أداة متعددة PDF -multiTool.header = أداة متعددة PDF +multiTool.title=أداة متعددة PDF +multiTool.header=أداة متعددة PDF #pageRemover pageRemover.title=مزيل الصفحة @@ -329,32 +347,32 @@ pdfToPDFA.credit=\u062A\u0633\u062A\u062E\u062F\u0645 \u0647\u0630\u0647 \u0627\ pdfToPDFA.submit=\u062A\u062D\u0648\u064A\u0644 -PDFToWord.title = تحويل PDF إلى Word -PDFToWord.header = تحويل PDF إلى Word -PDFToWord.selectText.1 = تنسيق ملف الإخراج -PDFToWord.credit = تستخدم هذه الخدمة LibreOffice لتحويل الملفات. -PDFToWord.submit = تحويل +PDFToWord.title=تحويل PDF إلى Word +PDFToWord.header=تحويل PDF إلى Word +PDFToWord.selectText.1=تنسيق ملف الإخراج +PDFToWord.credit=تستخدم هذه الخدمة LibreOffice لتحويل الملفات. +PDFToWord.submit=تحويل -PDFToPresentation.title = PDF للعرض التقديمي -PDFToPresentation.header = PDF للعرض التقديمي -PDFToPresentation.selectText.1 = تنسيق ملف الإخراج -PDFToPresentation.credit = تستخدم هذه الخدمة LibreOffice لتحويل الملف. -PDFToPresentation.submit = تحويل +PDFToPresentation.title=PDF للعرض التقديمي +PDFToPresentation.header=PDF للعرض التقديمي +PDFToPresentation.selectText.1=تنسيق ملف الإخراج +PDFToPresentation.credit=تستخدم هذه الخدمة LibreOffice لتحويل الملف. +PDFToPresentation.submit=تحويل -PDFToText.title = تحويل PDF إلى نص / RTF -PDFToText.header = تحويل PDF إلى نص / RTF -PDFToText.selectText.1 = تنسيق ملف الإخراج -PDFToText.credit = تستخدم هذه الخدمة LibreOffice لتحويل الملفات. -PDFToText.submit = تحويل +PDFToText.title=تحويل PDF إلى نص / RTF +PDFToText.header=تحويل PDF إلى نص / RTF +PDFToText.selectText.1=تنسيق ملف الإخراج +PDFToText.credit=تستخدم هذه الخدمة LibreOffice لتحويل الملفات. +PDFToText.submit=تحويل -PDFToHTML.title = PDF إلى HTML -PDFToHTML.header = PDF إلى HTML -PDFToHTML.credit = تستخدم هذه الخدمة LibreOffice لتحويل الملفات. -PDFToHTML.submit = تحويل +PDFToHTML.title=PDF إلى HTML +PDFToHTML.header=PDF إلى HTML +PDFToHTML.credit=تستخدم هذه الخدمة LibreOffice لتحويل الملفات. +PDFToHTML.submit=تحويل -PDFToXML.title = تحويل PDF إلى XML -PDFToXML.header = تحويل PDF إلى XML -PDFToXML.credit = تستخدم هذه الخدمة LibreOffice لتحويل الملفات. -PDFToXML.submit = تحويل +PDFToXML.title=تحويل PDF إلى XML +PDFToXML.header=تحويل PDF إلى XML +PDFToXML.credit=تستخدم هذه الخدمة LibreOffice لتحويل الملفات. +PDFToXML.submit=تحويل diff --git a/src/main/resources/messages_de_DE.properties b/src/main/resources/messages_de_DE.properties index a7cd44bb..57430b55 100644 --- a/src/main/resources/messages_de_DE.properties +++ b/src/main/resources/messages_de_DE.properties @@ -104,6 +104,20 @@ home.PDFToHTML.desc=PDF in HTML-Format konvertieren home.PDFToXML.title=PDF in XML home.PDFToXML.desc=PDF in XML-Format konvertieren +home.ScannerImageSplit.title=Gescannte Fotos erkennen/aufteilen +home.ScannerImageSplit.desc=Teilt mehrere Fotos innerhalb eines Fotos/PDF + +ScannerImageSplit.selectText.1=Winkelschwelle: +ScannerImageSplit.selectText.2=Legt den minimalen absoluten Winkel fest, der erforderlich ist, damit das Bild gedreht werden kann (Standard: 10). +ScannerImageSplit.selectText.3=Toleranz: +ScannerImageSplit.selectText.4=Bestimmt den Bereich der Farbvariation um die geschätzte Hintergrundfarbe herum (Standard: 30). +ScannerImageSplit.selectText.5=Mindestbereich: +ScannerImageSplit.selectText.6=Legt den minimalen Bereichsschwellenwert für ein Foto fest (Standard: 10000). +ScannerImageSplit.selectText.7=Minimaler Konturbereich: +ScannerImageSplit.selectText.8=Legt den minimalen Konturbereichsschwellenwert für ein Foto fest +ScannerImageSplit.selectText.9=Randgröße: +ScannerImageSplit.selectText.10=Legt die Größe des hinzugefügten und entfernten Randes fest, um weiße Ränder in der Ausgabe zu verhindern (Standard: 1). + navbar.settings=Einstellungen settings.title=Einstellungen @@ -128,6 +142,8 @@ ocr.selectText.7=OCR erzwingen, OCR wird jede Seite entfernen und alle ursprüng ocr.selectText.8=Normal (Fehler, wenn PDF Text enthält) ocr.selectText.9=Zusätzliche Einstellungen ocr.selectText.10=OCR-Modus +ocr.selectText.11=Bilder nach OCR entfernen (Entfernt ALLE Bilder, nur sinnvoll, wenn Teil des Konvertierungsschritts) +ocr.selectText.12=Rendertyp (Erweitert) ocr.help=Bitte lesen Sie diese Dokumentation, um zu erfahren, wie Sie dies für andere Sprachen verwenden und/oder nicht in Docker verwenden können ocr.credit=Dieser Dienst verwendet OCRmyPDF und Tesseract für OCR. ocr.submit=PDF mit OCR verarbeiten diff --git a/src/main/resources/messages_en_GB.properties b/src/main/resources/messages_en_GB.properties index e0e07a5b..26e958ae 100644 --- a/src/main/resources/messages_en_GB.properties +++ b/src/main/resources/messages_en_GB.properties @@ -105,8 +105,22 @@ home.PDFToHTML.desc=Convert PDF to HTML format home.PDFToXML.title=PDF to XML home.PDFToXML.desc=Convert PDF to XML format +home.ScannerImageSplit.title=Detect/Split Scanned photos +home.ScannerImageSplit.desc=Splits multiple photos from within a photo/PDF + +ScannerImageSplit.selectText.1=Angle Threshold: +ScannerImageSplit.selectText.2=Sets the minimum absolute angle required for the image to be rotated (default: 10). +ScannerImageSplit.selectText.3=Tolerance: +ScannerImageSplit.selectText.4=Determines the range of color variation around the estimated background color (default: 30). +ScannerImageSplit.selectText.5=Minimum Area: +ScannerImageSplit.selectText.6=Sets the minimum area threshold for a photo (default: 10000). +ScannerImageSplit.selectText.7=Minimum Contour Area: +ScannerImageSplit.selectText.8=Sets the minimum contour area threshold for a photo +ScannerImageSplit.selectText.9=Border Size: +ScannerImageSplit.selectText.10=Sets the size of the border added and removed to prevent white borders in the output (default: 1). + navbar.settings=Settings settings.title=Settings settings.update=Update available @@ -118,6 +132,7 @@ settings.downloadOption.3=Download file settings.zipThreshold=Zip files when the number of downloaded files exceeds + #OCR @@ -133,6 +148,8 @@ ocr.selectText.7=Force OCR, will OCR Every page removing all original text eleme ocr.selectText.8=Normal (Will error if PDF contains text) ocr.selectText.9=Additional Settings ocr.selectText.10=OCR Mode +ocr.selectText.11=Remove images after OCR (Removes ALL images, only useful if part of conversion step) +ocr.selectText.12=Render Type (Advanced) ocr.help=Please read this documentation on how to use this for other languages and/or use not in docker ocr.credit=This service uses OCRmyPDF and Tesseract for OCR. ocr.submit=Process PDF with OCR diff --git a/src/main/resources/messages_es_ES.properties b/src/main/resources/messages_es_ES.properties index 907eefbd..a8712edb 100644 --- a/src/main/resources/messages_es_ES.properties +++ b/src/main/resources/messages_es_ES.properties @@ -104,6 +104,19 @@ home.PDFToHTML.desc=Convertir PDF a formato HTML home.PDFToXML.title=PDF a XML home.PDFToXML.desc=Convertir PDF a formato XML +home.ScannerImageSplit.title=Detectar/Dividir fotos escaneadas +home.ScannerImageSplit.desc=Dividir varias fotos dentro de una foto/PDF + +ScannerImageSplit.selectText.1=Umbral de ángulo: +ScannerImageSplit.selectText.2=Establece el ángulo absoluto mínimo requerido para rotar la imagen (predeterminado: 10). +ScannerImageSplit.selectText.3=Tolerancia: +ScannerImageSplit.selectText.4=Determina el rango de variación de color alrededor del color de fondo estimado (predeterminado: 30). +ScannerImageSplit.selectText.5=Área mínima: +ScannerImageSplit.selectText.6=Establece el umbral mínimo de área para una foto (predeterminado: 10000). +ScannerImageSplit.selectText.7=Área de contorno mínima: +ScannerImageSplit.selectText.8=Establece el umbral mínimo del área de contorno para una foto +ScannerImageSplit.selectText.9=Tamaño del borde: +ScannerImageSplit.selectText.10=Establece el tamaño del borde agregado y eliminado para evitar bordes blancos en la salida (predeterminado: 1). navbar.settings=Ajustes settings.title=Ajustes @@ -131,6 +144,8 @@ ocr.selectText.7=Fuerza OCR, OCR eliminará en cada página todo el texto origin ocr.selectText.8=Normal (Se producirá un error si el PDF contiene texto) ocr.selectText.9=Ajustes Adicionales ocr.selectText.10=Modo OCR +ocr.selectText.11=Eliminar imágenes después de OCR (Elimina TODAS las imágenes, solo es útil si es parte del paso de conversión) +ocr.selectText.12=Tipo de procesamiento (avanzado) ocr.help=Lea esta documentación sobre cómo usar esto para otros idiomas y/o no usarlo en docker ocr.credit=Este servicio utiliza OCRmyPDF y Tesseract para OCR. ocr.submit=Procesa PDF con OCR diff --git a/src/main/resources/messages_fr_FR.properties b/src/main/resources/messages_fr_FR.properties index ece14e52..ddba2305 100644 --- a/src/main/resources/messages_fr_FR.properties +++ b/src/main/resources/messages_fr_FR.properties @@ -110,6 +110,20 @@ home.PDFToHTML.desc=Convertir le PDF au format HTML home.PDFToXML.title=PDF vers XML home.PDFToXML.desc=Convertir le PDF au format XML +home.ScannerImageSplit.title=Détecter/diviser les photos numérisées +home.ScannerImageSplit.desc=Divise plusieurs photos à partir d'une photo/PDF + +ScannerImageSplit.selectText.1=Seuil d'angle : +ScannerImageSplit.selectText.2=Définit l'angle absolu minimum requis pour la rotation de l'image (par défaut : 10). +ScannerImageSplit.selectText.3=Tolérance : +ScannerImageSplit.selectText.4=Détermine la plage de variation de couleur autour de la couleur d'arrière-plan estimée (par défaut : 30). +ScannerImageSplit.selectText.5=Zone minimale : +ScannerImageSplit.selectText.6=Définit le seuil de zone minimum pour une photo (par défaut : 10000). +ScannerImageSplit.selectText.7=Zone de contour minimale : +ScannerImageSplit.selectText.8=Définit le seuil de zone de contour minimum pour une photo +ScannerImageSplit.selectText.9=Taille de la bordure : +ScannerImageSplit.selectText.10=Définit la taille de la bordure ajoutée et supprimée pour éviter les bordures blanches dans la sortie (par défaut : 1). + navbar.settings=Paramètres settings.title=Paramètres settings.update=Mise à jour disponible @@ -134,6 +148,8 @@ ocr.selectText.7=Forcer l'OCR, OCR chaque page supprimera tous les éléments de ocr.selectText.8=Normal (Erreur si le PDF contient du texte) ocr.selectText.9=Paramètres supplémentaires ocr.selectText.10=Mode ROC +ocr.selectText.11=Supprimer les images après l'OCR (Supprime TOUTES les images, utile uniquement si elles font partie de l'étape de conversion) +ocr.selectText.12=Type de rendu (avancé) ocr.help=Veuillez lire cette documentation pour savoir comment l'utiliser pour d'autres langues et/ou une utilisation non dans docker ocr.credit=Ce service utilise OCRmyPDF et Tesseract pour l'OCR. ocr.submit=Traiter PDF avec OCR diff --git a/src/main/resources/static/css/rainbow-mode.css b/src/main/resources/static/css/rainbow-mode.css new file mode 100644 index 00000000..bd82dba1 --- /dev/null +++ b/src/main/resources/static/css/rainbow-mode.css @@ -0,0 +1,35 @@ +/* Rainbow Mode Styles */ +body { + background: linear-gradient(90deg, rgba(255,0,0,1) 0%, rgba(255,154,0,1) 10%, rgba(208,222,33,1) 20%, rgba(79,220,74,1) 30%, rgba(63,218,216,1) 40%, rgba(47,201,226,1) 50%, rgba(28,127,238,1) 60%, rgba(95,21,242,1) 70%, rgba(186,12,248,1) 80%, rgba(251,7,217,1) 90%, rgba(255,0,0,1) 100%); + color: #fff !important; +} + +.dark-card { + background: linear-gradient(90deg, rgba(255,0,0,1) 0%, rgba(255,154,0,1) 10%, rgba(208,222,33,1) 20%, rgba(79,220,74,1) 30%, rgba(63,218,216,1) 40%, rgba(47,201,226,1) 50%, rgba(28,127,238,1) 60%, rgba(95,21,242,1) 70%, rgba(186,12,248,1) 80%, rgba(251,7,217,1) 90%, rgba(255,0,0,1) 100%) !important; + color: white !important; +} +.jumbotron { + background: linear-gradient(90deg, rgba(255,0,0,1) 0%, rgba(255,154,0,1) 10%, rgba(208,222,33,1) 20%, rgba(79,220,74,1) 30%, rgba(63,218,216,1) 40%, rgba(47,201,226,1) 50%, rgba(28,127,238,1) 60%, rgba(95,21,242,1) 70%, rgba(186,12,248,1) 80%, rgba(251,7,217,1) 90%, rgba(255,0,0,1) 100%); + color: #fff !important; +} + +.list-group { + background: linear-gradient(90deg, rgba(255,0,0,1) 0%, rgba(255,154,0,1) 10%, rgba(208,222,33,1) 20%, rgba(79,220,74,1) 30%, rgba(63,218,216,1) 40%, rgba(47,201,226,1) 50%, rgba(28,127,238,1) 60%, rgba(95,21,242,1) 70%, rgba(186,12,248,1) 80%, rgba(251,7,217,1) 90%, rgba(255,0,0,1) 100%) !important; + color: fff !important; +} +.list-group-item { + background: linear-gradient(90deg, rgba(255,0,0,1) 0%, rgba(255,154,0,1) 10%, rgba(208,222,33,1) 20%, rgba(79,220,74,1) 30%, rgba(63,218,216,1) 40%, rgba(47,201,226,1) 50%, rgba(28,127,238,1) 60%, rgba(95,21,242,1) 70%, rgba(186,12,248,1) 80%, rgba(251,7,217,1) 90%, rgba(255,0,0,1) 100%) !important; + color: fff !important; +} +#support-section { + background: linear-gradient(90deg, rgba(255,0,0,1) 0%, rgba(255,154,0,1) 10%, rgba(208,222,33,1) 20%, rgba(79,220,74,1) 30%, rgba(63,218,216,1) 40%, rgba(47,201,226,1) 50%, rgba(28,127,238,1) 60%, rgba(95,21,242,1) 70%, rgba(186,12,248,1) 80%, rgba(251,7,217,1) 90%, rgba(255,0,0,1) 100%) !important; +} + + +#pages-container-wrapper { + --background-color: rgba(255, 255, 255, 0.046) !important; + --scroll-bar-color: #4c4c4c !important; + --scroll-bar-thumb: #d3d3d3 !important; + --scroll-bar-thumb-hover: #ffffff !important; +} + diff --git a/src/main/resources/static/images/scanner.svg b/src/main/resources/static/images/scanner.svg new file mode 100644 index 00000000..491d428c --- /dev/null +++ b/src/main/resources/static/images/scanner.svg @@ -0,0 +1,13 @@ + + + + + + + + + \ No newline at end of file diff --git a/src/main/resources/static/js/game.js b/src/main/resources/static/js/game.js new file mode 100644 index 00000000..2d138f93 --- /dev/null +++ b/src/main/resources/static/js/game.js @@ -0,0 +1,286 @@ +function initializeGame() { + const gameContainer = document.getElementById('game-container'); + const player = document.getElementById('player'); + + let playerSize = gameContainer.clientWidth * 0.0625; // 5% of container width + player.style.width = playerSize + 'px'; + player.style.height = playerSize + 'px'; + + let playerX = gameContainer.clientWidth / 2 - playerSize / 2; + let playerY = gameContainer.clientHeight * 0.1; + const scoreElement = document.getElementById('score'); + const levelElement = document.getElementById('level'); + const livesElement = document.getElementById('lives'); + const highScoreElement = document.getElementById('high-score'); + + let pdfSize = gameContainer.clientWidth * 0.0625; // 5% of container width + let projectileWidth = gameContainer.clientWidth * 0.00625; // 0.5% of container width + let projectileHeight = gameContainer.clientHeight * 0.01667; // 1% of container height + + let paused = false; + const fireRate = 200; // Time between shots in milliseconds + let lastProjectileTime = 0; + let lives = 3; + let highScore = localStorage.getItem('highScore') ? parseInt(localStorage.getItem('highScore')) : 0; + updateHighScore(); + + + + const keysPressed = {}; + const pdfs = []; + const projectiles = []; + let score = 0; + let level = 1; + let pdfSpeed = 1; + let gameOver = false; + + function handleKeys() { + if (keysPressed['ArrowLeft']) { + playerX -= 10; + } + if (keysPressed['ArrowRight']) { + playerX += 10; + } + if (keysPressed[' '] && !gameOver) { + const currentTime = new Date().getTime(); + if (currentTime - lastProjectileTime >= fireRate) { + shootProjectile(); + lastProjectileTime = currentTime; + } + } + updatePlayerPosition(); + } + + + + + document.addEventListener('keydown', (event) => { + if (event.key === ' ') { + event.preventDefault(); + } + keysPressed[event.key] = true; + handleKeys(); + }); + + document.addEventListener('keyup', (event) => { + keysPressed[event.key] = false; + }); + + + function updatePlayerPosition() { + player.style.left = playerX + 'px'; + player.style.bottom = playerY + 'px'; + } + + function updateLives() { + livesElement.textContent = 'Lives: ' + lives; + } + + function updateHighScore() { + highScoreElement.textContent = 'High Score: ' + highScore; + } + + + function shootProjectile() { + const projectile = document.createElement('div'); + projectile.classList.add('projectile'); + projectile.style.backgroundColor = 'black'; + projectile.style.width = projectileWidth + 'px'; + projectile.style.height = projectileHeight + 'px'; + projectile.style.left = (playerX + playerSize / 2 - projectileWidth / 2) + 'px'; + projectile.style.top = (gameContainer.clientHeight - playerY - playerSize) + 'px'; + gameContainer.appendChild(projectile); + projectiles.push(projectile); + } + + + + function spawnPdf() { + const pdf = document.createElement('img'); + pdf.src = 'images/file-earmark-pdf.svg'; + pdf.classList.add('pdf'); + pdf.style.width = pdfSize + 'px'; + pdf.style.height = pdfSize + 'px'; + pdf.style.left = Math.floor(Math.random() * (gameContainer.clientWidth - pdfSize)) + 'px'; + pdf.style.top = '0px'; + gameContainer.appendChild(pdf); + pdfs.push(pdf); + } + + +function resetEnemies() { + pdfs.forEach((pdf) => gameContainer.removeChild(pdf)); + pdfs.length = 0; +} + + + function updateGame() { + if (gameOver || paused) return; + + pdfs.forEach((pdf, pdfIndex) => { + const pdfY = parseInt(pdf.style.top) + pdfSpeed; + if (pdfY + 50 > gameContainer.clientHeight) { + gameContainer.removeChild(pdf); + pdfs.splice(pdfIndex, 1); + + // Deduct 2 points when a PDF gets past the player + score -= 0; + updateScore(); + + // Decrease lives and check if game over + lives--; + updateLives(); + if (lives <= 0) { + endGame(); + return; + } + + } else { + pdf.style.top = pdfY + 'px'; + + // Check for collision with player + if (collisionDetected(player, pdf)) { + lives--; + updateLives(); + resetEnemies(); + if (lives <= 0) { + endGame(); + return; + } + } + } + }); + + + + + + projectiles.forEach((projectile, projectileIndex) => { + const projectileY = parseInt(projectile.style.top) - 10; + if (projectileY < 0) { + gameContainer.removeChild(projectile); + projectiles.splice(projectileIndex, 1); + } else { + projectile.style.top = projectileY + 'px'; + } + + for (let pdfIndex = 0; pdfIndex < pdfs.length; pdfIndex++) { + const pdf = pdfs[pdfIndex]; + if (collisionDetected(projectile, pdf)) { + gameContainer.removeChild(pdf); + gameContainer.removeChild(projectile); + pdfs.splice(pdfIndex, 1); + projectiles.splice(projectileIndex, 1); + score = score + 10; + updateScore(); + break; + } + } + }); + + setTimeout(updateGame, 1000 / 60); + } +function resetGame() { + playerX = gameContainer.clientWidth / 2; + playerY = 50; + updatePlayerPosition(); + + pdfs.forEach((pdf) => gameContainer.removeChild(pdf)); + projectiles.forEach((projectile) => gameContainer.removeChild(projectile)); + + pdfs.length = 0; + projectiles.length = 0; + + score = 0; + level = 1; + lives = 3; + + gameOver = false; + + updateScore(); + updateLives(); + levelElement.textContent = 'Level: ' + level; + pdfSpeed = 1; + clearTimeout(spawnPdfTimeout); // Clear the existing spawnPdfTimeout + setTimeout(updateGame, 1000 / 60); + spawnPdfInterval(); +} + + + + function updateScore() { + scoreElement.textContent = 'Score: ' + score; + checkLevelUp(); + } + + + + function checkLevelUp() { + const newLevel = Math.floor(score / 100) + 1; + if (newLevel > level) { + level = newLevel; + levelElement.textContent = 'Level: ' + level; + pdfSpeed += 1; + } + } + + function collisionDetected(a, b) { + const rectA = a.getBoundingClientRect(); + const rectB = b.getBoundingClientRect(); + return ( + rectA.left < rectB.right && + rectA.right > rectB.left && + rectA.top < rectB.bottom && + rectA.bottom > rectB.top + ); + } + + function endGame() { + gameOver = true; + if (score > highScore) { + highScore = score; + localStorage.setItem('highScore', highScore); + updateHighScore(); + } + alert('Game Over! Your final score is: ' + score); + setTimeout(() => { // Wrap the resetGame() call in a setTimeout + resetGame(); + }, 0); + } + + + + + let spawnPdfTimeout; + + function spawnPdfInterval() { + console.log("spawnPdfInterval"); + if (gameOver || paused) { + console.log("spawnPdfInterval 2"); + clearTimeout(spawnPdfTimeout); + return; + } + console.log("spawnPdfInterval 3"); + spawnPdf(); + spawnPdfTimeout = setTimeout(spawnPdfInterval, 1000 - level * 50); + } + + updatePlayerPosition(); + updateGame(); + spawnPdfInterval(); + + + document.addEventListener('visibilitychange', function() { + if (document.hidden) { + paused = true; + } else { + paused = false; + updateGame(); + spawnPdfInterval(); + } + + }); + +} + +window.initializeGame = initializeGame; diff --git a/src/main/resources/static/rainbow.svg b/src/main/resources/static/rainbow.svg new file mode 100644 index 00000000..8e8aea78 --- /dev/null +++ b/src/main/resources/static/rainbow.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/src/main/resources/templates/about.html b/src/main/resources/templates/about.html new file mode 100644 index 00000000..3f53c928 --- /dev/null +++ b/src/main/resources/templates/about.html @@ -0,0 +1,22 @@ + + + + + + +
+
+
+

+
+
+
+ +
+
+
+
+
+ + + diff --git a/src/main/resources/templates/fragments/common.html b/src/main/resources/templates/fragments/common.html index 33154eff..0e5118e5 100644 --- a/src/main/resources/templates/fragments/common.html +++ b/src/main/resources/templates/fragments/common.html @@ -28,193 +28,235 @@ - + - -
-
- - -
-
-
-
- - + + +
+
+ + +
+
+
+
+ + + + + + + + + + diff --git a/src/main/resources/templates/home.html b/src/main/resources/templates/home.html index 7cee9a18..19b8fb8e 100644 --- a/src/main/resources/templates/home.html +++ b/src/main/resources/templates/home.html @@ -75,42 +75,39 @@ filter: invert(0.2) sepia(2) saturate(50) hue-rotate(190deg);
-
-
-
+
-
+
-
-
+
+
-
-
+
-
+
-
+
- +
diff --git a/src/main/resources/templates/other/adjust-contrast.html b/src/main/resources/templates/other/adjust-contrast.html new file mode 100644 index 00000000..87496d4f --- /dev/null +++ b/src/main/resources/templates/other/adjust-contrast.html @@ -0,0 +1,32 @@ + + + + + + + +
+
+
+

+
+
+
+

+ +
+
+
+ + +
+ +
+
+
+
+
+
+
+ + \ No newline at end of file diff --git a/src/main/resources/templates/other/extract-image-scans.html b/src/main/resources/templates/other/extract-image-scans.html new file mode 100644 index 00000000..cffddfc7 --- /dev/null +++ b/src/main/resources/templates/other/extract-image-scans.html @@ -0,0 +1,54 @@ + + + + + + + +
+
+
+

+
+
+
+

+ +
+
+
+ + + +
+
+ + + +
+
+ + + +
+
+ + + +
+
+ + + +
+ +
+ +
+
+
+
+
+
+ + \ No newline at end of file diff --git a/src/main/resources/templates/other/ocr-pdf.html b/src/main/resources/templates/other/ocr-pdf.html index a1470ac9..3e36b5eb 100644 --- a/src/main/resources/templates/other/ocr-pdf.html +++ b/src/main/resources/templates/other/ocr-pdf.html @@ -20,9 +20,9 @@
-
- - +
+ +

@@ -53,6 +53,19 @@
+
+ + +
+ + +
+ + +

From d0b57c0419391afcb86d47f77c06b2829ce92526 Mon Sep 17 00:00:00 2001 From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com> Date: Mon, 1 May 2023 22:16:06 +0100 Subject: [PATCH 2/4] Update push-docker.yml --- .github/workflows/push-docker.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/push-docker.yml b/.github/workflows/push-docker.yml index d0d8d165..4ef18b08 100644 --- a/.github/workflows/push-docker.yml +++ b/.github/workflows/push-docker.yml @@ -56,6 +56,17 @@ jobs: tags: | ${{ steps.versionNumber.outputs.versionNumber }}${{ github.ref == 'refs/heads/main' && '-alpha' || '' }} + - name: Generate tags 2 + id: meta2 + if: github.ref == 'refs/heads/master' + uses: docker/metadata-action@v4.4.0 + with: + images: | + ${{ secrets.DOCKER_HUB_USERNAME }}/s-pdf + ghcr.io/${{ github.repository_owner }}/s-pdf + tags: latest + + - name: Set up QEMU uses: docker/setup-qemu-action@v2.1.0 From d2aa72d8732b837e6938a20acb35a89bd3034091 Mon Sep 17 00:00:00 2001 From: Anthony Stirling <77850077+Frooodle@users.noreply.github.com> Date: Mon, 1 May 2023 22:31:30 +0100 Subject: [PATCH 3/4] Update Dockerfile --- Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 50bbc809..fbd71f5f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,8 +17,6 @@ ENV APP_HOME_NAME="Stirling PDF" #ENV APP_NAVBAR_NAME="Stirling PDF" # Run the application +RUN chmod +x /scripts/init.sh ENTRYPOINT ["/scripts/init.sh"] CMD ["java", "-jar", "/app.jar"] - - - From 7d90ecf91f3e7bd4cb8f3c3449e2f8b65323cd6a Mon Sep 17 00:00:00 2001 From: LeeAStone Date: Tue, 2 May 2023 10:04:57 +0100 Subject: [PATCH 4/4] Correct typo in messages_en_GB.properties (#126) Corrected spelling of "interactive" --- src/main/resources/messages_en_GB.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/resources/messages_en_GB.properties b/src/main/resources/messages_en_GB.properties index 26e958ae..6ac9683c 100644 --- a/src/main/resources/messages_en_GB.properties +++ b/src/main/resources/messages_en_GB.properties @@ -143,7 +143,7 @@ ocr.selectText.2=Produce text file containing OCR text alongside the OCR'ed PDF ocr.selectText.3=Correct pages were scanned at a skewed angle by rotating them back into place ocr.selectText.4=Clean page so its less likely that OCR will find text in background noise. (No output change) ocr.selectText.5=Clean page so its less likely that OCR will find text in background noise, maintains cleanup in output. -ocr.selectText.6=Ignores pages that have interacive text on them, only OCRs pages that are images +ocr.selectText.6=Ignores pages that have interactive text on them, only OCRs pages that are images ocr.selectText.7=Force OCR, will OCR Every page removing all original text elements ocr.selectText.8=Normal (Will error if PDF contains text) ocr.selectText.9=Additional Settings