Coverage for puhdistusalue/puhdistusalue.py: 100.00%

34 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-04 21:59:31 +00:00

1# -*- coding: utf-8 -*- 

2"""Purge monotonically named files in folders keeping range endpoints. 

3 

4Implementation uses sha256 hashes for identity and assumes that 

5the natural order relates to the notion of fresher or better. 

6""" 

7import hashlib 

8import os 

9import typing 

10 

11BUFFER_BYTES = 2 << 15 

12 

13 

14@typing.no_type_check 

15def list_dir(folder_path): 

16 """Access the dir and yield the local names inside.""" 

17 return os.listdir(folder_path) 

18 

19 

20@typing.no_type_check 

21def elements_of_gen(folder_path): 

22 """Prefix names in folder path and yield sorted pairs of names and file paths.""" 

23 for name in sorted(name for name in list_dir(folder_path)): 

24 yield name, os.path.join(folder_path, name) 

25 

26 

27@typing.no_type_check 

28def read_folder(folder_path, get_size=os.path.getsize): 

29 """Yield hash map of lists with name, byte size pairs of sorted by name (hint: timestamp).""" 

30 hash_map = {} 

31 for name, file_path in elements_of_gen(folder_path): 

32 with open(file_path, 'rb') as in_file: 

33 sha256_hash = hashlib.sha256() 

34 for byte_block in iter(lambda in_f=in_file: in_f.read(BUFFER_BYTES), b''): 

35 sha256_hash.update(byte_block) 

36 hash_map.setdefault(sha256_hash.hexdigest(), []).append((name, get_size(file_path))) 

37 

38 return hash_map 

39 

40 

41@typing.no_type_check 

42def triage_hashes(hash_map): 

43 """Triage hash map in pair of names to keep and to remove in that order. 

44 

45 Three cases: 

46 

47 0. size zero regardless of hash => remove 

48 1. unique hash => keep 

49 2. hash matching two entries => keep both 

50 3. hash with more than two entries => keep first and last, rest remove 

51 """ 

52 keep, remove = [], [] 

53 for info in hash_map.values(): 

54 if info[0][1] == 0: 

55 remove.extend(name for name, _ in info) 

56 else: 

57 if len(info) == 1: 

58 keep.extend(name for name, _ in info) 

59 else: 

60 first, last = info[0][0], info[-1][0] 

61 keep.extend([first, last]) 

62 remove.extend(name for name, _ in info[1:-1]) 

63 

64 return keep, remove