diff --git a/scripts/detect-blank-pages.py b/scripts/detect-blank-pages.py index 474c2735..4ca724c2 100644 --- a/scripts/detect-blank-pages.py +++ b/scripts/detect-blank-pages.py @@ -1,10 +1,11 @@ import cv2 import sys import argparse +import numpy as np def is_blank_image(image_path, threshold=10, white_percent=99, white_value=255, blur_size=5): image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) - + if image is None: print(f"Error: Unable to read the image file: {image_path}") return False @@ -15,19 +16,11 @@ def is_blank_image(image_path, threshold=10, white_percent=99, white_value=255, _, thresholded_image = cv2.threshold(blurred_image, white_value - threshold, white_value, cv2.THRESH_BINARY) # Calculate the percentage of white pixels in the thresholded image - white_pixels = 0 - total_pixels = thresholded_image.size - for i in range(0, thresholded_image.shape[0], 2): - for j in range(0, thresholded_image.shape[1], 2): - if thresholded_image[i, j] == white_value: - white_pixels += 1 - white_pixel_percentage = (white_pixels / (i * thresholded_image.shape[1] + j + 1)) * 100 - if white_pixel_percentage < white_percent: - return False + white_pixels = np.sum(thresholded_image == white_value) + white_pixel_percentage = (white_pixels / thresholded_image.size) * 100 print(f"Page has white pixel percent of {white_pixel_percentage}") - return True - + return white_pixel_percentage >= white_percent if __name__ == "__main__": @@ -39,9 +32,6 @@ if __name__ == "__main__": blank = is_blank_image(args.image_path, args.threshold, args.white_percent) - if blank: - # Return code 1: The image is considered blank. - sys.exit(1) - else: - # Return code 0: The image is not considered blank. - sys.exit(0) + # Return code 1: The image is considered blank. + # Return code 0: The image is not considered blank. + sys.exit(int(blank))