import os import hashlib
def get_file_hash(filepath):
hasher = hashlib.md5()
with open(filepath, 'rb') as f:
buf = f.read(65536)
while len(buf) > 0:
hasher.update(buf)
buf = f.read(65536)
return hasher.hexdigest()
def find_duplicates(folder_path):
hash_map = {}
duplicates = []
for dirpath, dirnames, filenames in os.walk(folder_path):
for filename in filenames:
filepath = os.path.join(dirpath, filename)
file_hash = get_file_hash(filepath)
if file_hash in hash_map:
duplicates.append(filepath)
else:
hash_map[file_hash] = filepath
return duplicates
def delete_duplicates(file_list):
deleted_size = 0
for filepath in file_list:
try:
file_size = os.path.getsize(filepath)
os.remove(filepath)
deleted_size += file_size
print(f'删除: {filepath}')
except Exception as e:
print(f'删除失败: {filepath}, 错误: {e}')
return deleted_size
if __name__ == '__main__':
target_folder = input('请输入要清理的文件夹路径: ')
if os.path.exists(target_folder):
print('正在扫描重复文件...')
dup_files = find_duplicates(target_folder)
if dup_files:
print(f'发现 {len(dup_files)} 个重复文件')
confirm = input('是否删除这些文件? (y/n): ')
if confirm.lower() == 'y':
size_freed = delete_duplicates(dup_files)
print(f'清理完成。释放空间: {size_freed/1024/1024:.2f} MB')
else:
print('取消操作')
else:
print('没有发现重复文件')
else:
print('路径不存在')