feat(scanner): add duplicate-scanner functionality
Add functionality for the duplicate scanner. This uses the previously-implemented code to derive SHA-256-based hashes and detect duplicates before deleting them. Arguments are provided for a dry-run mode and directory selection.
This commit is contained in:
parent
a67a40c571
commit
90482e42d7
@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
# Duplicate Check
|
# Duplicate Check
|
||||||
# Version: 0.14.0
|
# Version: 0.15.0
|
||||||
|
|
||||||
# Copyright 2025 Jake Winters
|
# Copyright 2025 Jake Winters
|
||||||
# SPDX-License-Identifier: BSD-3-Clause
|
# SPDX-License-Identifier: BSD-3-Clause
|
||||||
@ -24,3 +24,22 @@ def hash_file(file_path):
|
|||||||
for byte_block in iter(lambda: f.read(65536), b''):
|
for byte_block in iter(lambda: f.read(65536), b''):
|
||||||
sha256_hash.update(byte_block)
|
sha256_hash.update(byte_block)
|
||||||
return sha256_hash.hexdigest()
|
return sha256_hash.hexdigest()
|
||||||
|
|
||||||
|
def find_and_delete_duplicates(directory, dry_run):
|
||||||
|
os.chdir(directory)
|
||||||
|
file_hashes = {}
|
||||||
|
for file in os.listdir():
|
||||||
|
file_path = os.path.abspath(file)
|
||||||
|
if os.path.isfile(file_path):
|
||||||
|
file_hash = hash_file(file_path)
|
||||||
|
if file_hash in file_hashes:
|
||||||
|
if not dry_run:
|
||||||
|
os.remove(file_path)
|
||||||
|
print(f"Duplicate detected: {file_path}")
|
||||||
|
if not dry_run:
|
||||||
|
print(f"Duplicate deleted: {file_path}")
|
||||||
|
else:
|
||||||
|
file_hashes[file_hash] = file_hash
|
||||||
|
os.chdir('..')
|
||||||
|
|
||||||
|
find_and_delete_duplicates(args.directory, args.dry_run)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user