From e7f7ecd2b7ad4092722e6753e2533e81788d492c Mon Sep 17 00:00:00 2001 From: root Date: Mon, 24 Oct 2022 13:01:54 +0200 Subject: [PATCH] script now searches subdirectories too #1 --- DuplicateRemover.py | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/DuplicateRemover.py b/DuplicateRemover.py index 0048079..0c98140 100644 --- a/DuplicateRemover.py +++ b/DuplicateRemover.py @@ -4,10 +4,10 @@ import os import numpy as np import csv -def writeCSV(dirname, i1, i2): +def writeCSV(i1, i2): with open("duplicates.csv", "a") as file: writer = csv.writer(file, delimiter=',') - writer.writerow([dirname+i1, dirname+i2]) + writer.writerow([i1, i2]) class DuplicateRemover: extensions = ['png', 'jpg', 'jpeg', 'gif', 'webp'] @@ -20,26 +20,27 @@ class DuplicateRemover: Find and Delete Duplicates """ - fnames = os.listdir(self.dirname) + #fnames = os.listdir(self.dirname) hashes = {} duplicates = [] if verbose: print("Finding Duplicates Now!\n") - for image in fnames: - if list(image.lower().split('.'))[-1] in self.extensions: - try: - with Image.open(os.path.join(self.dirname,image)) as img: - temp_hash = imagehash.average_hash(img, self.hash_size) - if temp_hash in hashes: - if verbose: - print("Duplicate {} \nfound for Image {}!\n".format(image,hashes[temp_hash])) - if exportCSV: - writeCSV(self.dirname, image, hashes[temp_hash]) - duplicates.append(image) - else: - hashes[temp_hash] = image - except Exception as error: - print("Error: The following error occured:\n",image,"\n",error,"\n") + for path, subdirs, fnames in os.walk(self.dirname): + for image in fnames: + if list(image.lower().split('.'))[-1] in self.extensions: + try: + with Image.open(os.path.join(path,image)) as img: + temp_hash = imagehash.average_hash(img, self.hash_size) + if temp_hash in hashes: + if verbose: + print("Duplicate {} \nfound for Image {}!\n".format(os.path.join(path,image),os.path.join(path,hashes[temp_hash]))) + if exportCSV: + writeCSV(os.path.join(path,image), os.path.join(path,hashes[temp_hash])) + duplicates.append(os.path.join(path,image)) + else: + hashes[temp_hash] = os.path.join(path,image) + except Exception as error: + print("Error: The following error occured:\n",os.path.join(path,image),"\n",error,"\n") if len(duplicates) != 0: if verbose: @@ -49,9 +50,9 @@ class DuplicateRemover: space_saved = 0 if(a.strip().lower() == "y"): for duplicate in duplicates: - space_saved += os.path.getsize(os.path.join(self.dirname,duplicate)) + space_saved += os.path.getsize(duplicate) - os.remove(os.path.join(self.dirname,duplicate)) + os.remove(duplicate) if verbose: print("{} Deleted Succesfully!".format(duplicate))