2023-04-16 23:03:30 +02:00
|
|
|
package stirling.software.SPDF.utils;
|
2023-04-22 13:51:01 +02:00
|
|
|
|
2023-04-16 23:03:30 +02:00
|
|
|
import java.io.ByteArrayOutputStream;
|
|
|
|
import java.io.File;
|
|
|
|
import java.io.FileInputStream;
|
|
|
|
import java.io.IOException;
|
|
|
|
import java.nio.file.Files;
|
|
|
|
import java.nio.file.Path;
|
|
|
|
import java.nio.file.StandardCopyOption;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.Arrays;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.zip.ZipEntry;
|
|
|
|
import java.util.zip.ZipOutputStream;
|
|
|
|
|
|
|
|
import org.apache.commons.io.FileUtils;
|
|
|
|
import org.apache.commons.io.IOUtils;
|
|
|
|
import org.springframework.http.HttpStatus;
|
|
|
|
import org.springframework.http.MediaType;
|
|
|
|
import org.springframework.http.ResponseEntity;
|
|
|
|
import org.springframework.web.multipart.MultipartFile;
|
2023-04-22 13:51:01 +02:00
|
|
|
|
2023-07-29 14:53:30 +02:00
|
|
|
import stirling.software.SPDF.utils.ProcessExecutor.ProcessExecutorResult;
|
|
|
|
|
2023-04-16 23:03:30 +02:00
|
|
|
public class PDFToFile {
|
2023-04-22 13:51:01 +02:00
|
|
|
public ResponseEntity<byte[]> processPdfToOfficeFormat(
|
|
|
|
MultipartFile inputFile, String outputFormat, String libreOfficeFilter)
|
|
|
|
throws IOException, InterruptedException {
|
2023-04-16 23:03:30 +02:00
|
|
|
|
|
|
|
if (!"application/pdf".equals(inputFile.getContentType())) {
|
|
|
|
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get the original PDF file name without the extension
|
|
|
|
String originalPdfFileName = inputFile.getOriginalFilename();
|
|
|
|
String pdfBaseName = originalPdfFileName.substring(0, originalPdfFileName.lastIndexOf('.'));
|
|
|
|
|
|
|
|
// Validate output format
|
2023-04-22 13:51:01 +02:00
|
|
|
List<String> allowedFormats =
|
|
|
|
Arrays.asList(
|
|
|
|
"doc",
|
|
|
|
"docx",
|
|
|
|
"odt",
|
|
|
|
"ppt",
|
|
|
|
"pptx",
|
|
|
|
"odp",
|
|
|
|
"rtf",
|
|
|
|
"html",
|
|
|
|
"xml",
|
|
|
|
"txt:Text");
|
2023-04-16 23:03:30 +02:00
|
|
|
if (!allowedFormats.contains(outputFormat)) {
|
|
|
|
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
|
|
|
|
}
|
2023-04-22 13:51:01 +02:00
|
|
|
|
2023-04-16 23:03:30 +02:00
|
|
|
Path tempInputFile = null;
|
|
|
|
Path tempOutputDir = null;
|
|
|
|
byte[] fileBytes;
|
2023-05-01 22:57:48 +02:00
|
|
|
String fileName = "temp.file";
|
2023-04-16 23:03:30 +02:00
|
|
|
|
|
|
|
try {
|
|
|
|
// Save the uploaded file to a temporary location
|
|
|
|
tempInputFile = Files.createTempFile("input_", ".pdf");
|
|
|
|
Files.copy(
|
|
|
|
inputFile.getInputStream(), tempInputFile, StandardCopyOption.REPLACE_EXISTING);
|
|
|
|
|
|
|
|
// Prepare the output directory
|
|
|
|
tempOutputDir = Files.createTempDirectory("output_");
|
|
|
|
|
|
|
|
// Run the LibreOffice command
|
2023-04-22 13:51:01 +02:00
|
|
|
List<String> command =
|
|
|
|
new ArrayList<>(
|
|
|
|
Arrays.asList(
|
|
|
|
"soffice",
|
|
|
|
"--infilter=" + libreOfficeFilter,
|
|
|
|
"--convert-to",
|
|
|
|
outputFormat,
|
|
|
|
"--outdir",
|
|
|
|
tempOutputDir.toString(),
|
|
|
|
tempInputFile.toString()));
|
2023-07-29 14:53:30 +02:00
|
|
|
ProcessExecutorResult returnCode =
|
|
|
|
ProcessExecutor.getInstance(ProcessExecutor.Processes.LIBRE_OFFICE)
|
|
|
|
.runCommandWithOutputHandling(command);
|
2023-04-16 23:03:30 +02:00
|
|
|
|
|
|
|
// Get output files
|
|
|
|
List<File> outputFiles = Arrays.asList(tempOutputDir.toFile().listFiles());
|
|
|
|
|
|
|
|
if (outputFiles.size() == 1) {
|
|
|
|
// Return single output file
|
|
|
|
File outputFile = outputFiles.get(0);
|
2023-04-22 13:51:01 +02:00
|
|
|
if (outputFormat.equals("txt:Text")) {
|
|
|
|
outputFormat = "txt";
|
2023-04-16 23:03:30 +02:00
|
|
|
}
|
2023-05-01 22:57:48 +02:00
|
|
|
fileName = pdfBaseName + "." + outputFormat;
|
2023-04-16 23:03:30 +02:00
|
|
|
fileBytes = FileUtils.readFileToByteArray(outputFile);
|
|
|
|
} else {
|
|
|
|
// Return output files in a ZIP archive
|
2023-05-01 22:57:48 +02:00
|
|
|
fileName = pdfBaseName + "To" + outputFormat + ".zip";
|
2023-04-16 23:03:30 +02:00
|
|
|
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
|
|
|
|
ZipOutputStream zipOutputStream = new ZipOutputStream(byteArrayOutputStream);
|
|
|
|
|
|
|
|
for (File outputFile : outputFiles) {
|
|
|
|
ZipEntry entry = new ZipEntry(outputFile.getName());
|
|
|
|
zipOutputStream.putNextEntry(entry);
|
|
|
|
FileInputStream fis = new FileInputStream(outputFile);
|
|
|
|
IOUtils.copy(fis, zipOutputStream);
|
|
|
|
fis.close();
|
|
|
|
zipOutputStream.closeEntry();
|
|
|
|
}
|
|
|
|
|
|
|
|
zipOutputStream.close();
|
|
|
|
fileBytes = byteArrayOutputStream.toByteArray();
|
|
|
|
}
|
|
|
|
|
|
|
|
} finally {
|
|
|
|
// Clean up the temporary files
|
|
|
|
if (tempInputFile != null) Files.delete(tempInputFile);
|
|
|
|
if (tempOutputDir != null) FileUtils.deleteDirectory(tempOutputDir.toFile());
|
|
|
|
}
|
2023-05-31 21:15:48 +02:00
|
|
|
return WebResponseUtils.bytesToWebResponse(
|
|
|
|
fileBytes, fileName, MediaType.APPLICATION_OCTET_STREAM);
|
2023-04-16 23:03:30 +02:00
|
|
|
}
|
|
|
|
}
|