Coverage for nineties/nineties.py: 100%
35 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-04 20:53:48 +00:00
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-04 20:53:48 +00:00
1# -*- coding: utf-8 -*-
2"""Peel the onions from the Nineties."""
3import hashlib
4import os
5from typing import no_type_check
7BUFFER_BYTES = 2 << 15
8DEBUG = os.getenv('DEBUG_90S')
11@no_type_check
12def list_dir(folder_path):
13 """Access the dir and yield the local names inside."""
14 return sorted(os.listdir(folder_path))
17@no_type_check
18def elements_of_gen(folder_path):
19 """Prefix names in folder path and yield sorted pairs of names and file paths."""
20 for name in sorted(name for name in list_dir(folder_path)):
21 yield name, os.path.join(folder_path, name)
24@no_type_check
25def read_folder(folder_path, get_size=os.path.getsize):
26 """Yield hash map of lists with name, byte size pairs of sorted by name (hint: timestamp)."""
27 hash_map = {}
28 for name, file_path in elements_of_gen(folder_path):
29 with open(file_path, 'rb') as in_file:
30 sha256_hash = hashlib.sha256()
31 for byte_block in iter(lambda in_f=in_file: in_f.read(BUFFER_BYTES), b''):
32 sha256_hash.update(byte_block)
33 hash_map.setdefault(sha256_hash.hexdigest(), []).append((name, get_size(file_path)))
34 return hash_map
37@no_type_check
38def triage_hashes(hash_map):
39 """Triage hash map in pair of names to keep and to remove in that order.
41 Three cases:
43 0. size zero regardless of hash => remove
44 1. unique hash => keep
45 2. hash matching two entries => keep both
46 3. hash with more than two entries => keep first and last, rest remove
47 """
48 keep, remove = [], []
49 for info in hash_map.values():
50 if info[0][1] == 0:
51 remove.extend(name for name, _ in info)
52 else:
53 if len(info) == 1:
54 keep.extend(name for name, _ in info)
55 else:
56 first, last = info[0][0], info[-1][0]
57 keep.extend([first, last])
58 remove.extend(name for name, _ in info[1:-1])
59 return keep, remove