From 90482e42d7ba542dd7968a2a9be641bf32812b90 Mon Sep 17 00:00:00 2001 From: inference Date: Wed, 25 Jun 2025 21:53:57 +0000 Subject: [PATCH] feat(scanner): add duplicate-scanner functionality Add functionality for the duplicate scanner. This uses the previously-implemented code to derive SHA-256-based hashes and detect duplicates before deleting them. Arguments are provided for a dry-run mode and directory selection. --- duplicate_check.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/duplicate_check.py b/duplicate_check.py index 5152b98..f4a6508 100644 --- a/duplicate_check.py +++ b/duplicate_check.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # Duplicate Check -# Version: 0.14.0 +# Version: 0.15.0 # Copyright 2025 Jake Winters # SPDX-License-Identifier: BSD-3-Clause @@ -23,4 +23,23 @@ def hash_file(file_path): with open(file_path, 'rb') as f: for byte_block in iter(lambda: f.read(65536), b''): sha256_hash.update(byte_block) - return sha256_hash.hexdigest() \ No newline at end of file + return sha256_hash.hexdigest() + +def find_and_delete_duplicates(directory, dry_run): + os.chdir(directory) + file_hashes = {} + for file in os.listdir(): + file_path = os.path.abspath(file) + if os.path.isfile(file_path): + file_hash = hash_file(file_path) + if file_hash in file_hashes: + if not dry_run: + os.remove(file_path) + print(f"Duplicate detected: {file_path}") + if not dry_run: + print(f"Duplicate deleted: {file_path}") + else: + file_hashes[file_hash] = file_hash + os.chdir('..') + +find_and_delete_duplicates(args.directory, args.dry_run)