mirror of
https://github.com/Stirling-Tools/Stirling-PDF.git
synced 2024-11-23 15:21:25 +01:00
print (WIP), fake scan (WIP) and text conversion for ultra-lite (#1098)
* Changes! * lang * fake scan init, print init and pdf to text for exe * Hardening suggestions for Stirling-PDF / changes (#1099) * Switch order of literals to prevent NullPointerException * Introduced protections against predictable RNG abuse --------- Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com> * Update README.md * install custom fonts * Formats etc * version bump * disable WIP work * remove chinese font --------- Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com> Co-authored-by: systo <systo@host.docker.internal>
This commit is contained in:
parent
6c052a7b25
commit
71e93e3cb5
@ -25,15 +25,17 @@ ENV DOCKER_ENABLE_SECURITY=false \
|
||||
RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
|
||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
|
||||
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
|
||||
apk update && \
|
||||
apk add --no-cache \
|
||||
ca-certificates \
|
||||
tzdata \
|
||||
tini \
|
||||
openssl \
|
||||
openssl-dev \
|
||||
bash \
|
||||
curl \
|
||||
openjdk17-jre \
|
||||
su-exec \
|
||||
font-noto-cjk \
|
||||
shadow \
|
||||
# Doc conversion
|
||||
libreoffice@testing \
|
||||
@ -58,7 +60,8 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et
|
||||
addgroup -S stirlingpdfgroup && adduser -S stirlingpdfuser -G stirlingpdfgroup && \
|
||||
chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline && \
|
||||
chown stirlingpdfuser:stirlingpdfgroup /app.jar && \
|
||||
tesseract --list-langs
|
||||
tesseract --list-langs && \
|
||||
rm -rf /var/cache/apk/*
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
|
@ -121,6 +121,7 @@ docker run -d \
|
||||
-v /location/of/logs:/logs \
|
||||
-e DOCKER_ENABLE_SECURITY=false \
|
||||
-e INSTALL_BOOK_AND_ADVANCED_HTML_OPS=false \
|
||||
-e LANGS=en_GB \
|
||||
--name stirling-pdf \
|
||||
frooodle/s-pdf:latest
|
||||
|
||||
@ -147,6 +148,7 @@ services:
|
||||
environment:
|
||||
- DOCKER_ENABLE_SECURITY=false
|
||||
- INSTALL_BOOK_AND_ADVANCED_HTML_OPS=false
|
||||
- LANGS=en_GB
|
||||
```
|
||||
|
||||
Note: Podman is CLI-compatible with Docker, so simply replace "docker" with "podman".
|
||||
@ -252,13 +254,13 @@ metrics:
|
||||
- ``SYSTEM_CONNECTIONTIMEOUTMINUTES`` to set custom connection timeout values
|
||||
- ``DOCKER_ENABLE_SECURITY`` to tell docker to download security jar (required as true for auth login)
|
||||
- ``INSTALL_BOOK_AND_ADVANCED_HTML_OPS`` to download calibre onto stirling-pdf enabling pdf to/from book and advanced html conversion
|
||||
- ``LANGS`` to define custom font libraries to install for use for document conversions
|
||||
|
||||
## API
|
||||
|
||||
For those wanting to use Stirling-PDFs backend API to link with their own custom scripting to edit PDFs you can view all existing API documentation
|
||||
[here](https://app.swaggerhub.com/apis-docs/Stirling-Tools/Stirling-PDF/) or navigate to /swagger-ui/index.html of your stirling-pdf instance for your versions documentation (Or by following the API button in your settings of Stirling-PDF)
|
||||
|
||||
|
||||
## Login authentication
|
||||
|
||||
![stirling-login](images/login-light.png)
|
||||
|
@ -12,7 +12,7 @@ plugins {
|
||||
import com.github.jk1.license.render.*
|
||||
|
||||
group = 'stirling.software'
|
||||
version = '0.22.8'
|
||||
version = '0.23.0'
|
||||
sourceCompatibility = '17'
|
||||
|
||||
repositories {
|
||||
|
@ -21,6 +21,8 @@ services:
|
||||
environment:
|
||||
DOCKER_ENABLE_SECURITY: "false"
|
||||
SECURITY_ENABLELOGIN: "false"
|
||||
LANGS: "en_GB,en_US,ar_AR,de_DE,fr_FR,es_ES,zh_CN,zh_TW,ca_CA,it_IT,sv_SE,pl_PL,ro_RO,ko_KR,pt_BR,ru_RU,el_GR,hi_IN,hu_HU,tr_TR,id_ID"
|
||||
INSTALL_BOOK_AND_ADVANCED_HTML_OPS: "true"
|
||||
SYSTEM_DEFAULTLOCALE: en-US
|
||||
UI_APPNAME: Stirling-PDF
|
||||
UI_HOMEDESCRIPTION: Demo site for Stirling-PDF Latest
|
||||
|
@ -1,25 +1,30 @@
|
||||
#!/bin/sh
|
||||
#!/bin/bash
|
||||
|
||||
# Update the user and group IDs as per environment variables
|
||||
if [ ! -z "$PUID" ] && [ "$PUID" != "$(id -u stirlingpdfuser)" ]; then
|
||||
usermod -o -u "$PUID" stirlingpdfuser || true
|
||||
fi
|
||||
|
||||
|
||||
if [ ! -z "$PGID" ] && [ "$PGID" != "$(getent group stirlingpdfgroup | cut -d: -f3)" ]; then
|
||||
groupmod -o -g "$PGID" stirlingpdfgroup || true
|
||||
fi
|
||||
umask "$UMASK" || true
|
||||
|
||||
|
||||
if [[ "$INSTALL_BOOK_AND_ADVANCED_HTML_OPS" == "true" ]]; then
|
||||
apk add --no-cache calibre@testing
|
||||
fi
|
||||
|
||||
/scripts/download-security-jar.sh
|
||||
|
||||
if [[ -n "$LANGS" ]]; then
|
||||
/scripts/installFonts.sh $LANGS
|
||||
fi
|
||||
|
||||
echo "Setting permissions and ownership for necessary directories..."
|
||||
if chown -R stirlingpdfuser:stirlingpdfgroup $HOME /logs /scripts /usr/share/fonts/opentype/noto /usr/share/tessdata /configs /customFiles /pipeline /app.jar; then
|
||||
chmod -R 755 /logs /scripts /usr/share/fonts/opentype/noto /usr/share/tessdata /configs /customFiles /pipeline /app.jar || true
|
||||
# Attempt to change ownership of directories and files
|
||||
if chown -R stirlingpdfuser:stirlingpdfgroup $HOME /logs /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline /app.jar; then
|
||||
chmod -R 755 /logs /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline /app.jar || true
|
||||
# If chown succeeds, execute the command as stirlingpdfuser
|
||||
exec su-exec stirlingpdfuser "$@"
|
||||
else
|
||||
|
@ -13,18 +13,6 @@ if [ -d /usr/share/tesseract-ocr/5/tessdata ]; then
|
||||
cp -r /usr/share/tesseract-ocr/5/tessdata/* /usr/share/tessdata || true;
|
||||
fi
|
||||
|
||||
# Update the user and group IDs as per environment variables
|
||||
if [ ! -z "$PUID" ] && [ "$PUID" != "$(id -u stirlingpdfuser)" ]; then
|
||||
usermod -o -u "$PUID" stirlingpdfuser || true
|
||||
fi
|
||||
|
||||
|
||||
if [ ! -z "$PGID" ] && [ "$PGID" != "$(getent group stirlingpdfgroup | cut -d: -f3)" ]; then
|
||||
groupmod -o -g "$PGID" stirlingpdfgroup || true
|
||||
fi
|
||||
umask "$UMASK" || true
|
||||
|
||||
|
||||
# Check if TESSERACT_LANGS environment variable is set and is not empty
|
||||
if [[ -n "$TESSERACT_LANGS" ]]; then
|
||||
# Convert comma-separated values to a space-separated list
|
||||
@ -40,20 +28,4 @@ if [[ -n "$TESSERACT_LANGS" ]]; then
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ "$INSTALL_BOOK_AND_ADVANCED_HTML_OPS" == "true" ]]; then
|
||||
apk add --no-cache calibre@testing
|
||||
fi
|
||||
|
||||
/scripts/download-security-jar.sh
|
||||
|
||||
echo "Setting permissions and ownership for necessary directories..."
|
||||
# Attempt to change ownership of directories and files
|
||||
if chown -R stirlingpdfuser:stirlingpdfgroup $HOME /logs /scripts /usr/share/fonts/opentype/noto /usr/share/tessdata /configs /customFiles /pipeline /app.jar; then
|
||||
chmod -R 755 /logs /scripts /usr/share/fonts/opentype/noto /usr/share/tessdata /configs /customFiles /pipeline /app.jar || true
|
||||
# If chown succeeds, execute the command as stirlingpdfuser
|
||||
exec su-exec stirlingpdfuser "$@"
|
||||
else
|
||||
# If chown fails, execute the command without changing the user context
|
||||
echo "[WARN] Chown failed, running as host user"
|
||||
exec "$@"
|
||||
fi
|
||||
/scripts/init-without-ocr.sh "$@"
|
67
scripts/installFonts.sh
Normal file
67
scripts/installFonts.sh
Normal file
@ -0,0 +1,67 @@
|
||||
#!/bin/bash
|
||||
|
||||
LANGS=$1
|
||||
|
||||
# Function to install a font package
|
||||
install_font() {
|
||||
echo "Installing font package: $1"
|
||||
if ! apk add "$1" --no-cache; then
|
||||
echo "Failed to install $1"
|
||||
fi
|
||||
}
|
||||
|
||||
# Install common fonts used across many languages
|
||||
#common_fonts=(
|
||||
# font-terminus
|
||||
# font-dejavu
|
||||
# font-noto
|
||||
# font-noto-cjk
|
||||
# font-awesome
|
||||
# font-noto-extra
|
||||
#)
|
||||
#
|
||||
#for font in "${common_fonts[@]}"; do
|
||||
# install_font $font
|
||||
#done
|
||||
|
||||
# Map languages to specific font packages
|
||||
declare -A language_fonts=(
|
||||
["ar_AR"]="font-noto-arabic"
|
||||
["zh_CN"]="font-isas-misc"
|
||||
["zh_TW"]="font-isas-misc"
|
||||
["ja_JP"]="font-noto font-noto-thai font-noto-tibetan font-ipa font-sony-misc font-jis-misc"
|
||||
["ru_RU"]="font-vollkorn font-misc-cyrillic font-mutt-misc font-screen-cyrillic font-winitzki-cyrillic font-cronyx-cyrillic"
|
||||
["sr_LATN_RS"]="font-vollkorn font-misc-cyrillic font-mutt-misc font-screen-cyrillic font-winitzki-cyrillic font-cronyx-cyrillic"
|
||||
["uk_UA"]="font-vollkorn font-misc-cyrillic font-mutt-misc font-screen-cyrillic font-winitzki-cyrillic font-cronyx-cyrillic"
|
||||
["ko_KR"]="font-noto font-noto-thai font-noto-tibetan"
|
||||
["el_GR"]="font-noto"
|
||||
["hi_IN"]="font-noto-devanagari"
|
||||
["bg_BG"]="font-vollkorn font-misc-cyrillic"
|
||||
["GENERAL"]="font-terminus font-dejavu font-noto font-noto-cjk font-awesome font-noto-extra"
|
||||
)
|
||||
|
||||
# Install fonts for other languages which generally do not need special packages beyond 'font-noto'
|
||||
other_langs=("en_GB" "en_US" "de_DE" "fr_FR" "es_ES" "ca_CA" "it_IT" "pt_BR" "nl_NL" "sv_SE" "pl_PL" "ro_RO" "hu_HU" "tr_TR" "id_ID" "eu_ES")
|
||||
if [[ $LANGS == "ALL" ]]; then
|
||||
# Install all fonts from the language_fonts map
|
||||
for fonts in "${language_fonts[@]}"; do
|
||||
for font in $fonts; do
|
||||
install_font $font
|
||||
done
|
||||
done
|
||||
else
|
||||
# Split comma-separated languages and install necessary fonts
|
||||
IFS=',' read -ra LANG_CODES <<< "$LANGS"
|
||||
for code in "${LANG_CODES[@]}"; do
|
||||
if [[ " ${other_langs[@]} " =~ " ${code} " ]]; then
|
||||
install_font font-noto
|
||||
else
|
||||
fonts_to_install=${language_fonts[$code]}
|
||||
if [ ! -z "$fonts_to_install" ]; then
|
||||
for font in $fonts_to_install; do
|
||||
install_font $font
|
||||
done
|
||||
fi
|
||||
fi
|
||||
done
|
||||
fi
|
@ -62,6 +62,7 @@ public class SPdfApplication {
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws IOException, InterruptedException {
|
||||
|
||||
SpringApplication app = new SpringApplication(SPdfApplication.class);
|
||||
app.addInitializers(new ConfigInitializer());
|
||||
if (Files.exists(Paths.get("configs/settings.yml"))) {
|
||||
|
@ -146,7 +146,6 @@ public class EndpointConfiguration {
|
||||
addEndpointToGroup("CLI", "xlsx-to-pdf");
|
||||
addEndpointToGroup("CLI", "pdf-to-word");
|
||||
addEndpointToGroup("CLI", "pdf-to-presentation");
|
||||
addEndpointToGroup("CLI", "pdf-to-text");
|
||||
addEndpointToGroup("CLI", "pdf-to-html");
|
||||
addEndpointToGroup("CLI", "pdf-to-xml");
|
||||
addEndpointToGroup("CLI", "ocr-pdf");
|
||||
@ -154,6 +153,7 @@ public class EndpointConfiguration {
|
||||
addEndpointToGroup("CLI", "url-to-pdf");
|
||||
addEndpointToGroup("CLI", "book-to-pdf");
|
||||
addEndpointToGroup("CLI", "pdf-to-book");
|
||||
addEndpointToGroup("CLI", "pdf-to-rtf");
|
||||
|
||||
// Calibre
|
||||
addEndpointToGroup("Calibre", "book-to-pdf");
|
||||
@ -175,7 +175,7 @@ public class EndpointConfiguration {
|
||||
addEndpointToGroup("LibreOffice", "xlsx-to-pdf");
|
||||
addEndpointToGroup("LibreOffice", "pdf-to-word");
|
||||
addEndpointToGroup("LibreOffice", "pdf-to-presentation");
|
||||
addEndpointToGroup("LibreOffice", "pdf-to-text");
|
||||
addEndpointToGroup("LibreOffice", "pdf-to-rtf");
|
||||
addEndpointToGroup("LibreOffice", "pdf-to-html");
|
||||
addEndpointToGroup("LibreOffice", "pdf-to-xml");
|
||||
|
||||
@ -218,6 +218,7 @@ public class EndpointConfiguration {
|
||||
addEndpointToGroup("Java", "overlay-pdf");
|
||||
addEndpointToGroup("Java", "split-pdf-by-sections");
|
||||
addEndpointToGroup("Java", REMOVE_BLANKS);
|
||||
addEndpointToGroup("Java", "pdf-to-text");
|
||||
|
||||
// Javascript
|
||||
addEndpointToGroup("Javascript", "pdf-organizer");
|
||||
|
@ -31,7 +31,8 @@ public class ConvertPDFToPDFA {
|
||||
summary = "Convert a PDF to a PDF/A",
|
||||
description =
|
||||
"This endpoint converts a PDF file to a PDF/A file. PDF/A is a format designed for long-term archiving of digital documents. Input:PDF Output:PDF Type:SISO")
|
||||
public ResponseEntity<byte[]> pdfToPdfA(@ModelAttribute PdfToPdfARequest request) throws Exception {
|
||||
public ResponseEntity<byte[]> pdfToPdfA(@ModelAttribute PdfToPdfARequest request)
|
||||
throws Exception {
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
String outputFormat = request.getOutputFormat();
|
||||
|
||||
|
@ -1,27 +1,25 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.awt.AlphaComposite;
|
||||
import java.awt.Color;
|
||||
import java.awt.GradientPaint;
|
||||
import java.awt.Graphics2D;
|
||||
import java.awt.geom.AffineTransform;
|
||||
import java.awt.image.AffineTransformOp;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.awt.image.BufferedImageOp;
|
||||
import java.awt.image.ConvolveOp;
|
||||
import java.awt.image.Kernel;
|
||||
import java.awt.image.RescaleOp;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.security.SecureRandom;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.LosslessFactory;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.JPEGFactory;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
|
||||
import org.apache.pdfbox.rendering.ImageType;
|
||||
import org.apache.pdfbox.rendering.PDFRenderer;
|
||||
@ -29,16 +27,17 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.ModelAttribute;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.github.pixee.security.Filenames;
|
||||
import io.swagger.v3.oas.annotations.Hidden;
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import stirling.software.SPDF.model.api.PDFFile;
|
||||
import stirling.software.SPDF.utils.PdfUtils;
|
||||
import stirling.software.SPDF.utils.WebResponseUtils;
|
||||
|
||||
@RestController
|
||||
@ -49,97 +48,37 @@ public class FakeScanControllerWIP {
|
||||
private static final Logger logger = LoggerFactory.getLogger(FakeScanControllerWIP.class);
|
||||
|
||||
//TODO
|
||||
@Hidden
|
||||
// @PostMapping(consumes = "multipart/form-data", value = "/fakeScan")
|
||||
@Operation(
|
||||
summary = "Repair a PDF file",
|
||||
description =
|
||||
"This endpoint repairs a given PDF file by running Ghostscript command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response.")
|
||||
public ResponseEntity<byte[]> repairPdf(@ModelAttribute PDFFile request) throws IOException {
|
||||
//@PostMapping(consumes = "multipart/form-data", value = "/fake-scan")
|
||||
//@Operation(
|
||||
// summary = "Repair a PDF file",
|
||||
// description =
|
||||
// "This endpoint repairs a given PDF file by running Ghostscript command. The PDF is first saved to a temporary location, repaired, read back, and then returned as a response.")
|
||||
public ResponseEntity<byte[]> fakeScan(@ModelAttribute PDFFile request) throws IOException {
|
||||
MultipartFile inputFile = request.getFileInput();
|
||||
|
||||
// Load the PDF document
|
||||
PDDocument document = Loader.loadPDF(inputFile.getBytes());
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
pdfRenderer.setSubsamplingAllowed(true);
|
||||
for (int page = 0; page < document.getNumberOfPages(); ++page) {
|
||||
BufferedImage image = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
|
||||
ImageIO.write(image, "png", new File("scanned-" + (page + 1) + ".png"));
|
||||
PDFRenderer renderer = new PDFRenderer(document);
|
||||
List<BufferedImage> images = new ArrayList<>();
|
||||
// Convert each page to an image
|
||||
for (int i = 0; i < document.getNumberOfPages(); i++) {
|
||||
BufferedImage image = renderer.renderImageWithDPI(i, 150, ImageType.GRAY);
|
||||
images.add(processImage(image));
|
||||
}
|
||||
document.close();
|
||||
|
||||
// Constants
|
||||
int scannedness = 90; // Value between 0 and 100
|
||||
int dirtiness = 0; // Value between 0 and 100
|
||||
|
||||
// Load the source image
|
||||
BufferedImage sourceImage = ImageIO.read(new File("scanned-1.png"));
|
||||
|
||||
// Create the destination image
|
||||
BufferedImage destinationImage =
|
||||
new BufferedImage(
|
||||
sourceImage.getWidth(), sourceImage.getHeight(), sourceImage.getType());
|
||||
|
||||
// Apply a brightness and contrast effect based on the "scanned-ness"
|
||||
float scaleFactor = 1.0f + (scannedness / 100.0f) * 0.5f; // Between 1.0 and 1.5
|
||||
float offset = scannedness * 1.5f; // Between 0 and 150
|
||||
BufferedImageOp op = new RescaleOp(scaleFactor, offset, null);
|
||||
op.filter(sourceImage, destinationImage);
|
||||
|
||||
// Apply a rotation effect
|
||||
double rotationRequired =
|
||||
Math.toRadians(
|
||||
(new SecureRandom().nextInt(3 - 1)
|
||||
+ 1)); // Random angle between 1 and 3 degrees
|
||||
double locationX = destinationImage.getWidth() / 2;
|
||||
double locationY = destinationImage.getHeight() / 2;
|
||||
AffineTransform tx =
|
||||
AffineTransform.getRotateInstance(rotationRequired, locationX, locationY);
|
||||
AffineTransformOp rotateOp = new AffineTransformOp(tx, AffineTransformOp.TYPE_BILINEAR);
|
||||
destinationImage = rotateOp.filter(destinationImage, null);
|
||||
|
||||
// Apply a blur effect based on the "scanned-ness"
|
||||
float blurIntensity = scannedness / 100.0f * 0.2f; // Between 0.0 and 0.2
|
||||
float[] matrix = {
|
||||
blurIntensity, blurIntensity, blurIntensity,
|
||||
blurIntensity, blurIntensity, blurIntensity,
|
||||
blurIntensity, blurIntensity, blurIntensity
|
||||
};
|
||||
BufferedImageOp blurOp =
|
||||
new ConvolveOp(new Kernel(3, 3, matrix), ConvolveOp.EDGE_NO_OP, null);
|
||||
destinationImage = blurOp.filter(destinationImage, null);
|
||||
|
||||
// Add noise to the image based on the "dirtiness"
|
||||
Random random = new SecureRandom();
|
||||
for (int y = 0; y < destinationImage.getHeight(); y++) {
|
||||
for (int x = 0; x < destinationImage.getWidth(); x++) {
|
||||
if (random.nextInt(100) < dirtiness) {
|
||||
// Change the pixel color to black randomly based on the "dirtiness"
|
||||
destinationImage.setRGB(x, y, Color.BLACK.getRGB());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Save the image
|
||||
ImageIO.write(destinationImage, "PNG", new File("scanned-1.png"));
|
||||
|
||||
PDDocument documentOut = new PDDocument();
|
||||
for (int page = 1; page <= document.getNumberOfPages(); ++page) {
|
||||
BufferedImage bim = ImageIO.read(new File("scanned-" + page + ".png"));
|
||||
|
||||
// Adjust the dimensions of the page
|
||||
PDPage pdPage = new PDPage(new PDRectangle(bim.getWidth() - 1, bim.getHeight() - 1));
|
||||
documentOut.addPage(pdPage);
|
||||
|
||||
PDImageXObject pdImage = LosslessFactory.createFromImage(documentOut, bim);
|
||||
PDPageContentStream contentStream = new PDPageContentStream(documentOut, pdPage);
|
||||
|
||||
// Draw the image with a slight offset and enlarged dimensions
|
||||
contentStream.drawImage(pdImage, -1, -1, bim.getWidth() + 2, bim.getHeight() + 2);
|
||||
contentStream.close();
|
||||
}
|
||||
// Create a new PDF document with the processed images
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
documentOut.save(baos);
|
||||
documentOut.close();
|
||||
PDDocument newDocument = new PDDocument();
|
||||
for (BufferedImage img : images) {
|
||||
// PDPageContentStream contentStream = new PDPageContentStream(newDocument, new
|
||||
// PDPage());
|
||||
PDImageXObject pdImage = JPEGFactory.createFromImage(newDocument, img);
|
||||
PdfUtils.addImageToDocument(newDocument, pdImage, "maintainAspectRatio", false);
|
||||
}
|
||||
|
||||
newDocument.save(baos);
|
||||
newDocument.close();
|
||||
|
||||
// Return the optimized PDF as a response
|
||||
String outputFilename =
|
||||
@ -148,4 +87,164 @@ public class FakeScanControllerWIP {
|
||||
+ "_scanned.pdf";
|
||||
return WebResponseUtils.boasToWebResponse(baos, outputFilename);
|
||||
}
|
||||
|
||||
public BufferedImage processImage(BufferedImage image) {
|
||||
// Rotation
|
||||
|
||||
image = rotate(image);
|
||||
// image = softenEdges(image, 5);
|
||||
image = applyGaussianBlur(image, 0.5);
|
||||
addGaussianNoise(image, 0.25);
|
||||
image = linearStretch(image);
|
||||
|
||||
return image;
|
||||
}
|
||||
|
||||
private BufferedImage rotate(BufferedImage image) {
|
||||
|
||||
double rotationRequired = Math.toRadians(1.0);
|
||||
double locationX = image.getWidth() / 2;
|
||||
double locationY = image.getHeight() / 2;
|
||||
AffineTransform tx =
|
||||
AffineTransform.getRotateInstance(rotationRequired, locationX, locationY);
|
||||
AffineTransformOp op = new AffineTransformOp(tx, AffineTransformOp.TYPE_BICUBIC);
|
||||
return op.filter(image, null);
|
||||
}
|
||||
|
||||
private BufferedImage applyGaussianBlur(BufferedImage image, double sigma) {
|
||||
int radius = 3; // Fixed radius size for simplicity
|
||||
|
||||
int size = 2 * radius + 1;
|
||||
float[] data = new float[size * size];
|
||||
double sum = 0.0;
|
||||
|
||||
for (int i = -radius; i <= radius; i++) {
|
||||
for (int j = -radius; j <= radius; j++) {
|
||||
double xDistance = i * i;
|
||||
double yDistance = j * j;
|
||||
double g = Math.exp(-(xDistance + yDistance) / (2 * sigma * sigma));
|
||||
data[(i + radius) * size + j + radius] = (float) g;
|
||||
sum += g;
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize the kernel
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
data[i] /= sum;
|
||||
}
|
||||
|
||||
Kernel kernel = new Kernel(size, size, data);
|
||||
BufferedImageOp op = new ConvolveOp(kernel, ConvolveOp.EDGE_NO_OP, null);
|
||||
return op.filter(image, null);
|
||||
}
|
||||
|
||||
public BufferedImage softenEdges(BufferedImage image, int featherRadius) {
|
||||
int width = image.getWidth();
|
||||
int height = image.getHeight();
|
||||
BufferedImage output = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
|
||||
|
||||
Graphics2D g2 = output.createGraphics();
|
||||
g2.drawImage(image, 0, 0, null);
|
||||
g2.setComposite(AlphaComposite.DstIn);
|
||||
g2.setPaint(
|
||||
new GradientPaint(
|
||||
0, 0, new Color(0, 0, 0, 1f), 0, featherRadius, new Color(0, 0, 0, 0f)));
|
||||
g2.fillRect(0, 0, width, featherRadius); // top edge
|
||||
g2.setPaint(
|
||||
new GradientPaint(
|
||||
0,
|
||||
height - featherRadius,
|
||||
new Color(0, 0, 0, 0f),
|
||||
0,
|
||||
height,
|
||||
new Color(0, 0, 0, 1f)));
|
||||
g2.fillRect(0, height - featherRadius, width, featherRadius); // bottom edge
|
||||
g2.setPaint(
|
||||
new GradientPaint(
|
||||
0, 0, new Color(0, 0, 0, 1f), featherRadius, 0, new Color(0, 0, 0, 0f)));
|
||||
g2.fillRect(0, 0, featherRadius, height); // left edge
|
||||
g2.setPaint(
|
||||
new GradientPaint(
|
||||
width - featherRadius,
|
||||
0,
|
||||
new Color(0, 0, 0, 0f),
|
||||
width,
|
||||
0,
|
||||
new Color(0, 0, 0, 1f)));
|
||||
g2.fillRect(width - featherRadius, 0, featherRadius, height); // right edge
|
||||
g2.dispose();
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
private void addGaussianNoise(BufferedImage image, double strength) {
|
||||
Random rand = new SecureRandom();
|
||||
int width = image.getWidth();
|
||||
int height = image.getHeight();
|
||||
|
||||
for (int i = 0; i < width; i++) {
|
||||
for (int j = 0; j < height; j++) {
|
||||
int rgba = image.getRGB(i, j);
|
||||
int alpha = (rgba >> 24) & 0xff;
|
||||
int red = (rgba >> 16) & 0xff;
|
||||
int green = (rgba >> 8) & 0xff;
|
||||
int blue = rgba & 0xff;
|
||||
|
||||
// Apply Gaussian noise
|
||||
red = (int) (red + rand.nextGaussian() * strength);
|
||||
green = (int) (green + rand.nextGaussian() * strength);
|
||||
blue = (int) (blue + rand.nextGaussian() * strength);
|
||||
|
||||
// Clamping values to the 0-255 range
|
||||
red = Math.min(Math.max(0, red), 255);
|
||||
green = Math.min(Math.max(0, green), 255);
|
||||
blue = Math.min(Math.max(0, blue), 255);
|
||||
|
||||
image.setRGB(i, j, (alpha << 24) | (red << 16) | (green << 8) | blue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public BufferedImage linearStretch(BufferedImage image) {
|
||||
int width = image.getWidth();
|
||||
int height = image.getHeight();
|
||||
int min = 255;
|
||||
int max = 0;
|
||||
|
||||
// First pass: find the min and max grayscale values
|
||||
for (int y = 0; y < height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
int rgb = image.getRGB(x, y);
|
||||
int gray =
|
||||
(int)
|
||||
(((rgb >> 16) & 0xff) * 0.299
|
||||
+ ((rgb >> 8) & 0xff) * 0.587
|
||||
+ (rgb & 0xff) * 0.114); // Convert to grayscale
|
||||
if (gray < min) min = gray;
|
||||
if (gray > max) max = gray;
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass: stretch the histogram
|
||||
for (int y = 0; y < height; y++) {
|
||||
for (int x = 0; x < width; x++) {
|
||||
int rgb = image.getRGB(x, y);
|
||||
int alpha = (rgb >> 24) & 0xff;
|
||||
int red = (rgb >> 16) & 0xff;
|
||||
int green = (rgb >> 8) & 0xff;
|
||||
int blue = rgb & 0xff;
|
||||
|
||||
// Apply linear stretch to each channel
|
||||
red = (int) (((red - min) / (float) (max - min)) * 255);
|
||||
green = (int) (((green - min) / (float) (max - min)) * 255);
|
||||
blue = (int) (((blue - min) / (float) (max - min)) * 255);
|
||||
|
||||
// Set new RGB value maintaining the alpha channel
|
||||
rgb = (alpha << 24) | (red << 16) | (green << 8) | blue;
|
||||
image.setRGB(x, y, rgb);
|
||||
}
|
||||
}
|
||||
|
||||
return image;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,106 @@
|
||||
package stirling.software.SPDF.controller.api.misc;
|
||||
|
||||
import java.awt.Graphics;
|
||||
import java.awt.Graphics2D;
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.awt.print.PageFormat;
|
||||
import java.awt.print.Printable;
|
||||
import java.awt.print.PrinterException;
|
||||
import java.awt.print.PrinterJob;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import javax.imageio.ImageIO;
|
||||
import javax.print.PrintService;
|
||||
import javax.print.PrintServiceLookup;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.printing.PDFPageable;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.ModelAttribute;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import io.swagger.v3.oas.annotations.Operation;
|
||||
import io.swagger.v3.oas.annotations.tags.Tag;
|
||||
|
||||
import stirling.software.SPDF.model.api.misc.PrintFileRequest;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/misc")
|
||||
@Tag(name = "Misc", description = "Miscellaneous APIs")
|
||||
public class PrintFileController {
|
||||
|
||||
//TODO
|
||||
//@PostMapping(value = "/print-file", consumes = "multipart/form-data")
|
||||
//@Operation(
|
||||
// summary = "Prints PDF/Image file to a set printer",
|
||||
// description =
|
||||
// "Input of PDF or Image along with a printer name/URL/IP to match against to send it to (Fire and forget) Input:Any Output:N/A Type:SISO")
|
||||
public ResponseEntity<String> printFile(@ModelAttribute PrintFileRequest request)
|
||||
throws IOException {
|
||||
MultipartFile file = request.getFileInput();
|
||||
String printerName = request.getPrinterName();
|
||||
String contentType = file.getContentType();
|
||||
try {
|
||||
// Find matching printer
|
||||
PrintService[] services = PrintServiceLookup.lookupPrintServices(null, null);
|
||||
PrintService selectedService =
|
||||
Arrays.stream(services)
|
||||
.filter(
|
||||
service ->
|
||||
service.getName().toLowerCase().contains(printerName))
|
||||
.findFirst()
|
||||
.orElseThrow(
|
||||
() ->
|
||||
new IllegalArgumentException(
|
||||
"No matching printer found"));
|
||||
|
||||
System.out.println("Selected Printer: " + selectedService.getName());
|
||||
|
||||
if ("application/pdf".equals(contentType)) {
|
||||
PDDocument document = Loader.loadPDF(file.getBytes());
|
||||
PrinterJob job = PrinterJob.getPrinterJob();
|
||||
job.setPrintService(selectedService);
|
||||
job.setPageable(new PDFPageable(document));
|
||||
job.print();
|
||||
document.close();
|
||||
} else if (contentType.startsWith("image/")) {
|
||||
BufferedImage image = ImageIO.read(file.getInputStream());
|
||||
PrinterJob job = PrinterJob.getPrinterJob();
|
||||
job.setPrintService(selectedService);
|
||||
job.setPrintable(
|
||||
new Printable() {
|
||||
public int print(
|
||||
Graphics graphics, PageFormat pageFormat, int pageIndex)
|
||||
throws PrinterException {
|
||||
if (pageIndex != 0) {
|
||||
return NO_SUCH_PAGE;
|
||||
}
|
||||
Graphics2D g2d = (Graphics2D) graphics;
|
||||
g2d.translate(
|
||||
pageFormat.getImageableX(), pageFormat.getImageableY());
|
||||
g2d.drawImage(
|
||||
image,
|
||||
0,
|
||||
0,
|
||||
(int) pageFormat.getImageableWidth(),
|
||||
(int) pageFormat.getImageableHeight(),
|
||||
null);
|
||||
return PAGE_EXISTS;
|
||||
}
|
||||
});
|
||||
job.print();
|
||||
}
|
||||
return new ResponseEntity<>(
|
||||
"File printed successfully to " + selectedService.getName(), HttpStatus.OK);
|
||||
} catch (Exception e) {
|
||||
System.err.println("Failed to print: " + e.getMessage());
|
||||
return new ResponseEntity<>(e.getMessage(), HttpStatus.BAD_REQUEST);
|
||||
}
|
||||
}
|
||||
}
|
@ -54,6 +54,13 @@ public class OtherWebController {
|
||||
return "misc/add-page-numbers";
|
||||
}
|
||||
|
||||
@GetMapping("/fake-scan")
|
||||
@Hidden
|
||||
public String fakeScanForm(Model model) {
|
||||
model.addAttribute("currentPage", "fake-scan");
|
||||
return "misc/fake-scan";
|
||||
}
|
||||
|
||||
@GetMapping("/extract-images")
|
||||
@Hidden
|
||||
public String extractImagesForm(Model model) {
|
||||
@ -82,6 +89,13 @@ public class OtherWebController {
|
||||
return "misc/compare";
|
||||
}
|
||||
|
||||
@GetMapping("/print-file")
|
||||
@Hidden
|
||||
public String printFileForm(Model model) {
|
||||
model.addAttribute("currentPage", "print-file");
|
||||
return "misc/print-file";
|
||||
}
|
||||
|
||||
public List<String> getAvailableTesseractLanguages() {
|
||||
String tessdataDir = "/usr/share/tessdata";
|
||||
File[] files = new File(tessdataDir).listFiles();
|
||||
|
@ -0,0 +1,15 @@
|
||||
package stirling.software.SPDF.model.api.misc;
|
||||
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
|
||||
import lombok.Data;
|
||||
import lombok.EqualsAndHashCode;
|
||||
import stirling.software.SPDF.model.api.PDFFile;
|
||||
|
||||
@Data
|
||||
@EqualsAndHashCode(callSuper = true)
|
||||
public class PrintFileRequest extends PDFFile {
|
||||
|
||||
@Schema(description = "Name of printer to match against", required = true)
|
||||
private String printerName;
|
||||
}
|
@ -336,7 +336,7 @@ public class PdfUtils {
|
||||
}
|
||||
}
|
||||
|
||||
private static void addImageToDocument(
|
||||
public static void addImageToDocument(
|
||||
PDDocument doc, PDImageXObject image, String fitOption, boolean autoRotate)
|
||||
throws IOException {
|
||||
boolean imageIsLandscape = image.getWidth() > image.getHeight();
|
||||
|
@ -19,14 +19,13 @@
|
||||
<div class="mb-3">
|
||||
<label th:text="#{PDFToText.selectText.1}"></label>
|
||||
<select class="form-control" name="outputFormat">
|
||||
<option value="rtf">RTF</option>
|
||||
<option th:if="${@endpointConfiguration.isEndpointEnabled('pdf-to-rtf')}" value="rtf">RTF</option>
|
||||
<option value="txt">TXT</option>
|
||||
</select>
|
||||
</div>
|
||||
<br>
|
||||
<button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{PDFToText.submit}"></button>
|
||||
</form>
|
||||
<p class="mt-3" th:text="#{PDFToText.credit}"></p>
|
||||
<p th:if="${@endpointConfiguration.isEndpointEnabled('pdf-to-rtf')}" class="mt-3" th:text="#{PDFToText.credit}"></p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
29
src/main/resources/templates/misc/fake-scan.html
Normal file
29
src/main/resources/templates/misc/fake-scan.html
Normal file
@ -0,0 +1,29 @@
|
||||
<!DOCTYPE html>
|
||||
<html th:lang="${#locale.language}" th:dir="#{language.direction}" th:data-language="${#locale.toString()}" xmlns:th="http://www.thymeleaf.org">
|
||||
<head>
|
||||
<th:block th:insert="~{fragments/common :: head(title=#{fakeScan.title}, header=#{fakeScan.header})}"></th:block>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<th:block th:insert="~{fragments/common :: game}"></th:block>
|
||||
<div id="page-container">
|
||||
<div id="content-wrap">
|
||||
<th:block th:insert="~{fragments/navbar.html :: navbar}"></th:block>
|
||||
<br><br>
|
||||
<div class="container">
|
||||
<div class="row justify-content-center">
|
||||
<div class="col-md-6">
|
||||
<h2 th:text="#{fakeScan.header}"></h2>
|
||||
<form method="post" enctype="multipart/form-data" th:action="@{api/v1/misc/fake-scan}">
|
||||
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false, accept='application/pdf')}"></div>
|
||||
<br>
|
||||
<button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{fakeScan.submit}"></button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<th:block th:insert="~{fragments/footer.html :: footer}"></th:block>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
35
src/main/resources/templates/misc/print-file.html
Normal file
35
src/main/resources/templates/misc/print-file.html
Normal file
@ -0,0 +1,35 @@
|
||||
<!DOCTYPE html>
|
||||
<html th:lang="${#locale.language}" th:dir="#{language.direction}" th:data-language="${#locale.toString()}" xmlns:th="http://www.thymeleaf.org">
|
||||
<head>
|
||||
<th:block th:insert="~{fragments/common :: head(title=#{printFile.title}, header=#{printFile.header})}"></th:block>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<th:block th:insert="~{fragments/common :: game}"></th:block>
|
||||
<div id="page-container">
|
||||
<div id="content-wrap">
|
||||
<th:block th:insert="~{fragments/navbar.html :: navbar}"></th:block>
|
||||
<br><br>
|
||||
<div class="container">
|
||||
<div class="row justify-content-center">
|
||||
<div class="col-md-6">
|
||||
<h2 th:text="#{printFile.header}"></h2>
|
||||
<form action="#" th:action="@{api/v1/misc/print-file}" method="post" enctype="multipart/form-data">
|
||||
<div th:replace="~{fragments/common :: fileSelector(name='fileInput', multiple=false, accept='application/pdf,image/*')}"></div>
|
||||
<div class="card mb-3">
|
||||
<div class="card-body">
|
||||
<h4 th:text="#{printFile.selectText.1}">Select Printer</h4> <!-- Assuming the message code printFile.selectText.3 corresponds to "Select Printer" -->
|
||||
<label for="printerName" th:text="#{printFile.selectText.2}">Printer Name:</label> <!-- Assuming the message code printFile.selectText.4 corresponds to "Printer Name:" -->
|
||||
<input type="text" name="printerName" id="printerName" class="form-control">
|
||||
</div>
|
||||
</div>
|
||||
<button type="submit" id="submitBtn" class="btn btn-primary" th:text="#{printFile.submit}"></button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<th:block th:insert="~{fragments/footer.html :: footer}"></th:block>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
8
test.sh
8
test.sh
@ -78,8 +78,8 @@ main() {
|
||||
|
||||
|
||||
# Building Docker images
|
||||
docker build --build-arg VERSION_TAG=alpha -t frooodle/s-pdf:latest -f ./Dockerfile .
|
||||
docker build --build-arg VERSION_TAG=alpha -t frooodle/s-pdf:latest-ultra-lite -f ./Dockerfile-ultra-lite .
|
||||
docker build --no-cache --build-arg VERSION_TAG=alpha -t frooodle/s-pdf:latest -f ./Dockerfile .
|
||||
docker build --no-cache --build-arg VERSION_TAG=alpha -t frooodle/s-pdf:latest-ultra-lite -f ./Dockerfile-ultra-lite .
|
||||
|
||||
# Test each configuration
|
||||
run_tests "Stirling-PDF-Ultra-Lite" "./exampleYmlFiles/docker-compose-latest-ultra-lite.yml"
|
||||
@ -94,8 +94,8 @@ main() {
|
||||
|
||||
|
||||
# Building Docker images with security enabled
|
||||
docker build --build-arg VERSION_TAG=alpha -t frooodle/s-pdf:latest -f ./Dockerfile .
|
||||
docker build --build-arg VERSION_TAG=alpha -t frooodle/s-pdf:latest-ultra-lite -f ./Dockerfile-ultra-lite .
|
||||
docker build --no-cache --build-arg VERSION_TAG=alpha -t frooodle/s-pdf:latest -f ./Dockerfile .
|
||||
docker build --no-cache --build-arg VERSION_TAG=alpha -t frooodle/s-pdf:latest-ultra-lite -f ./Dockerfile-ultra-lite .
|
||||
|
||||
# Test each configuration with security
|
||||
run_tests "Stirling-PDF-Ultra-Lite-Security" "./exampleYmlFiles/docker-compose-latest-ultra-lite-security.yml"
|
||||
|
Loading…
Reference in New Issue
Block a user