diff --git a/build.gradle b/build.gradle index 117b7c09..ce3c8f53 100644 --- a/build.gradle +++ b/build.gradle @@ -21,7 +21,7 @@ dependencies { // https://mvnrepository.com/artifact/org.apache.pdfbox/jbig2-imageio implementation group: 'org.apache.pdfbox', name: 'jbig2-imageio', version: '3.0.4' - + implementation 'commons-io:commons-io:2.11.0' //general PDF implementation 'org.apache.pdfbox:pdfbox:2.0.27' diff --git a/src/main/java/stirling/software/SPDF/controller/OCRController.java b/src/main/java/stirling/software/SPDF/controller/OCRController.java index 0e2bd04b..3713cbda 100644 --- a/src/main/java/stirling/software/SPDF/controller/OCRController.java +++ b/src/main/java/stirling/software/SPDF/controller/OCRController.java @@ -6,6 +6,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -29,6 +30,7 @@ import org.springframework.web.servlet.ModelAndView; import stirling.software.SPDF.utils.ProcessExecutor; //import com.spire.pdf.*; import java.util.concurrent.Semaphore; +import java.util.regex.Pattern; @Controller public class OCRController { @@ -41,8 +43,6 @@ public class OCRController { modelAndView.addObject("currentPage", "ocr-pdf"); return modelAndView; } - - private final Semaphore semaphore = new Semaphore(2); @PostMapping("/ocr-pdf") public ResponseEntity processPdfWithOCR(@RequestParam("fileInput") MultipartFile inputFile, @@ -59,9 +59,19 @@ public class OCRController { throw new IOException("Please select at least one language."); } + // Validate and sanitize selected languages using regex + String languagePattern = "^[a-zA-Z]{3}$"; // Regex pattern for three-letter language codes + selectedLanguages = selectedLanguages.stream() + .filter(lang -> Pattern.matches(languagePattern, lang)) + .collect(Collectors.toList()); + + + if (selectedLanguages.isEmpty()) { + throw new IOException("None of the selected languages are valid."); + } // Save the uploaded file to a temporary location Path tempInputFile = Files.createTempFile("input_", ".pdf"); - inputFile.transferTo(tempInputFile.toFile()); + Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING); // Prepare the output file path Path tempOutputFile = Files.createTempFile("output_", ".pdf"); diff --git a/src/main/java/stirling/software/SPDF/controller/converters/ConvertOfficeController.java b/src/main/java/stirling/software/SPDF/controller/converters/ConvertOfficeController.java index 71c8b461..ab2106aa 100644 --- a/src/main/java/stirling/software/SPDF/controller/converters/ConvertOfficeController.java +++ b/src/main/java/stirling/software/SPDF/controller/converters/ConvertOfficeController.java @@ -1,6 +1,7 @@ package stirling.software.SPDF.controller.converters; import java.io.IOException; +import java.nio.file.StandardCopyOption; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; @@ -14,7 +15,7 @@ import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.multipart.MultipartFile; - +import org.apache.commons.io.FilenameUtils; import stirling.software.SPDF.utils.PdfUtils; import stirling.software.SPDF.utils.ProcessExecutor; @Controller @@ -39,14 +40,20 @@ public class ConvertOfficeController { public byte[] convertToPdf(MultipartFile inputFile) throws IOException, InterruptedException { + // Check for valid file extension + String originalFilename = inputFile.getOriginalFilename(); + if (originalFilename == null || !isValidFileExtension(FilenameUtils.getExtension(originalFilename))) { + throw new IllegalArgumentException("Invalid file extension"); + } + // Save the uploaded file to a temporary location - Path tempInputFile = Files.createTempFile("input_", "." + getFileExtension(inputFile.getOriginalFilename())); - inputFile.transferTo(tempInputFile.toFile()); + Path tempInputFile = Files.createTempFile("input_", "." + FilenameUtils.getExtension(originalFilename)); + Files.copy(inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING); // Prepare the output file path Path tempOutputFile = Files.createTempFile("output_", ".pdf"); - // Run the LibreOffice command + // Run the LibreOffice command List command = new ArrayList<>(Arrays.asList("unoconv", "-vvv", "-f", "pdf", @@ -64,14 +71,8 @@ public byte[] convertToPdf(MultipartFile inputFile) throws IOException, Interrup return pdfBytes; } - - - -private String getFileExtension(String fileName) { - int dotIndex = fileName.lastIndexOf('.'); - if (dotIndex == -1) { - return ""; - } - return fileName.substring(dotIndex + 1); +private boolean isValidFileExtension(String fileExtension) { + String extensionPattern = "^(?i)[a-z0-9]{2,4}$"; + return fileExtension.matches(extensionPattern); } }