Coverage for nineties/nineties.py: 100%

35 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-04 20:53:48 +00:00

1# -*- coding: utf-8 -*- 

2"""Peel the onions from the Nineties.""" 

3import hashlib 

4import os 

5from typing import no_type_check 

6 

7BUFFER_BYTES = 2 << 15 

8DEBUG = os.getenv('DEBUG_90S') 

9 

10 

11@no_type_check 

12def list_dir(folder_path): 

13 """Access the dir and yield the local names inside.""" 

14 return sorted(os.listdir(folder_path)) 

15 

16 

17@no_type_check 

18def elements_of_gen(folder_path): 

19 """Prefix names in folder path and yield sorted pairs of names and file paths.""" 

20 for name in sorted(name for name in list_dir(folder_path)): 

21 yield name, os.path.join(folder_path, name) 

22 

23 

24@no_type_check 

25def read_folder(folder_path, get_size=os.path.getsize): 

26 """Yield hash map of lists with name, byte size pairs of sorted by name (hint: timestamp).""" 

27 hash_map = {} 

28 for name, file_path in elements_of_gen(folder_path): 

29 with open(file_path, 'rb') as in_file: 

30 sha256_hash = hashlib.sha256() 

31 for byte_block in iter(lambda in_f=in_file: in_f.read(BUFFER_BYTES), b''): 

32 sha256_hash.update(byte_block) 

33 hash_map.setdefault(sha256_hash.hexdigest(), []).append((name, get_size(file_path))) 

34 return hash_map 

35 

36 

37@no_type_check 

38def triage_hashes(hash_map): 

39 """Triage hash map in pair of names to keep and to remove in that order. 

40 

41 Three cases: 

42 

43 0. size zero regardless of hash => remove 

44 1. unique hash => keep 

45 2. hash matching two entries => keep both 

46 3. hash with more than two entries => keep first and last, rest remove 

47 """ 

48 keep, remove = [], [] 

49 for info in hash_map.values(): 

50 if info[0][1] == 0: 

51 remove.extend(name for name, _ in info) 

52 else: 

53 if len(info) == 1: 

54 keep.extend(name for name, _ in info) 

55 else: 

56 first, last = info[0][0], info[-1][0] 

57 keep.extend([first, last]) 

58 remove.extend(name for name, _ in info[1:-1]) 

59 return keep, remove