mirror of
https://github.com/Stirling-Tools/Stirling-PDF.git
synced 2024-11-19 22:00:11 +01:00
39 lines
1.2 KiB
Python
39 lines
1.2 KiB
Python
import cv2
|
|
import numpy as np
|
|
import sys
|
|
|
|
def is_blank_image(image_path, threshold=10, white_value=255, blur_size=5):
|
|
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
|
|
|
|
if image is None:
|
|
print(f"Error: Unable to read the image file: {image_path}")
|
|
return False
|
|
|
|
# Apply Gaussian blur to reduce noise
|
|
blurred_image = cv2.GaussianBlur(image, (blur_size, blur_size), 0)
|
|
|
|
_, thresholded_image = cv2.threshold(blurred_image, white_value - threshold, white_value, cv2.THRESH_BINARY)
|
|
|
|
# Calculate the percentage of white pixels in the thresholded image
|
|
white_pixels = np.sum(thresholded_image == white_value)
|
|
total_pixels = thresholded_image.size
|
|
white_pixel_percentage = (white_pixels / total_pixels) * 100
|
|
|
|
return white_pixel_percentage > 99
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 2:
|
|
print("Usage: python detect_blank_page.py <image_path>")
|
|
sys.exit(1)
|
|
|
|
image_path = sys.argv[1]
|
|
blank = is_blank_image(image_path)
|
|
|
|
if blank:
|
|
# Return code 1: The image is considered blank.
|
|
sys.exit(1)
|
|
else:
|
|
# Return code 0: The image is not considered blank.
|
|
sys.exit(0) |