Coverage for puhdistusalue/puhdistusalue.py: 100.00%
34 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-04 21:59:31 +00:00
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-04 21:59:31 +00:00
1# -*- coding: utf-8 -*-
2"""Purge monotonically named files in folders keeping range endpoints.
4Implementation uses sha256 hashes for identity and assumes that
5the natural order relates to the notion of fresher or better.
6"""
7import hashlib
8import os
9import typing
11BUFFER_BYTES = 2 << 15
14@typing.no_type_check
15def list_dir(folder_path):
16 """Access the dir and yield the local names inside."""
17 return os.listdir(folder_path)
20@typing.no_type_check
21def elements_of_gen(folder_path):
22 """Prefix names in folder path and yield sorted pairs of names and file paths."""
23 for name in sorted(name for name in list_dir(folder_path)):
24 yield name, os.path.join(folder_path, name)
27@typing.no_type_check
28def read_folder(folder_path, get_size=os.path.getsize):
29 """Yield hash map of lists with name, byte size pairs of sorted by name (hint: timestamp)."""
30 hash_map = {}
31 for name, file_path in elements_of_gen(folder_path):
32 with open(file_path, 'rb') as in_file:
33 sha256_hash = hashlib.sha256()
34 for byte_block in iter(lambda in_f=in_file: in_f.read(BUFFER_BYTES), b''):
35 sha256_hash.update(byte_block)
36 hash_map.setdefault(sha256_hash.hexdigest(), []).append((name, get_size(file_path)))
38 return hash_map
41@typing.no_type_check
42def triage_hashes(hash_map):
43 """Triage hash map in pair of names to keep and to remove in that order.
45 Three cases:
47 0. size zero regardless of hash => remove
48 1. unique hash => keep
49 2. hash matching two entries => keep both
50 3. hash with more than two entries => keep first and last, rest remove
51 """
52 keep, remove = [], []
53 for info in hash_map.values():
54 if info[0][1] == 0:
55 remove.extend(name for name, _ in info)
56 else:
57 if len(info) == 1:
58 keep.extend(name for name, _ in info)
59 else:
60 first, last = info[0][0], info[-1][0]
61 keep.extend([first, last])
62 remove.extend(name for name, _ in info[1:-1])
64 return keep, remove