Coverage for stativ/stativ.py: 7.92%

138 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-04 22:28:28 +00:00

1"""Harvest the release mappings to bucket store from the backup tree.""" 

2 

3import copy 

4import datetime as dti 

5import json 

6import os 

7import pathlib 

8from typing import no_type_check 

9 

10import stativ.delta_store as delta 

11 

12BRM_NAME = os.getenv('BRM_NAME', 'brm') 

13BRM_ANCHOR = os.getenv('BRM_ANCHOR', '/') 

14BRM_STORE_ROOT = pathlib.Path(BRM_ANCHOR, BRM_NAME, 'data', 'filestore') 

15ENCODING = 'utf-8' 

16 

17BRM_BACKUP_ROOT = pathlib.Path(BRM_ANCHOR, BRM_NAME, 'backup/') 

18 

19 

20@no_type_check 

21def main(argv=None): 

22 """Harvest information.""" 

23 if argv: 

24 print('Unexpected arguments ...') 

25 return 2 

26 

27 print(f'Starting execution in folder ({pathlib.Path(".")})') 

28 print(f'- Assuming BRM storage root at ({BRM_STORE_ROOT}) i.e. the bucket store') 

29 print(f'- Assuming BRM backup root at ({BRM_BACKUP_ROOT}) i.e. the archival class artifacts') 

30 

31 if not delta.is_delta_store(pathlib.Path('')): 

32 print('There is no delta store in the current directory ...') 

33 return 2 

34 

35 with open(pathlib.Path('store', 'proxy.json'), 'rt', encoding=ENCODING) as handle: 

36 proxy = json.load(handle) 

37 

38 enter, change, gone, keep = {}, {}, {}, {} 

39 remain = {} 

40 entered, changed, removed, kept = 0, 0, 0, 0 

41 print('---') 

42 exclude = str(pathlib.Path(BRM_BACKUP_ROOT, 'backup-daily', 'current', 'etc')) # HACK A DID ACK 

43 for path in BRM_BACKUP_ROOT.glob('**/*'): 

44 if str(path).startswith(exclude): 

45 continue 

46 if path.name == f'{BRM_NAME}.properties': 

47 with open(path, 'rt', encoding=ENCODING) as handle: 

48 text_data = handle.readlines() 

49 meta = {} 

50 for line in text_data: 

51 if line.strip(): 

52 key, value = line.strip().split('=', 1) 

53 if key.startswith(f'{BRM_NAME}.'): 

54 _, key = key.split('.', 1) 

55 if key == 'timestamp': 

56 meta[key] = dti.datetime.utcfromtimestamp(float(value) / 1.0e3).astimezone().isoformat() 

57 

58 else: 

59 meta['key'] = value 

60 if proxy['_meta'] != meta: 

61 print(f'WARNING {BRM_NAME} version info changed') 

62 del proxy['_meta'] 

63 remain['_meta'] = copy.deepcopy(meta) 

64 elif path.name == f'{BRM_NAME}-file.xml': 

65 with open(path, 'rt', encoding=ENCODING) as handle: 

66 text_data = handle.readlines() 

67 meta = {} 

68 next_sha1_hash = False 

69 next_md5_hash = False 

70 next_sha256_hash = False 

71 for line in text_data: 

72 if line.strip(): 

73 rec = line.strip() 

74 if rec.startswith('<size>'): 

75 size_bytes = rec.split('<size>', 1)[1].split('</size>')[0] 

76 meta['size_bytes'] = int(size_bytes) 

77 continue 

78 if rec.startswith('<repoKey>'): 

79 repo_key = rec.split('<repoKey>', 1)[1].split('</repoKey>')[0] 

80 meta['repo_key'] = repo_key 

81 continue 

82 if rec.startswith('<path>'): 

83 a_path = rec.split('<path>', 1)[1].split('</path>')[0] 

84 meta['path'] = a_path 

85 continue 

86 if rec.startswith('<lastUpdated>'): 

87 last_update = rec.split('<lastUpdated>', 1)[1].split('</lastUpdated>')[0] 

88 meta['last_update'] = ( 

89 dti.datetime.utcfromtimestamp(float(last_update) / 1.0e3).astimezone().isoformat() 

90 ) 

91 continue 

92 if rec.startswith('<type>sha1<'): 

93 next_sha1_hash = True 

94 continue 

95 if next_sha1_hash and rec.startswith('<actual>'): 

96 sha1_lc_hex = rec.split('<actual>', 1)[1].split('</actual>')[0] 

97 meta['sha1_lc_hex'] = sha1_lc_hex 

98 next_sha1_hash = False 

99 continue 

100 if rec.startswith('<type>md5<'): 

101 next_md5_hash = True 

102 continue 

103 if next_md5_hash and rec.startswith('<actual>'): 

104 md5_lc_hex = rec.split('<actual>', 1)[1].split('</actual>')[0] 

105 meta['md5_lc_hex'] = md5_lc_hex 

106 next_md5_hash = False 

107 continue 

108 if rec.startswith('<type>sha256<'): 

109 next_sha256_hash = True 

110 continue 

111 if next_sha256_hash and rec.startswith('<actual>'): 

112 sha256_lc_hex = rec.split('<actual>', 1)[1].split('</actual>')[0] 

113 meta['sha256_lc_hex'] = sha256_lc_hex 

114 next_sha256_hash = False 

115 continue 

116 

117 bucket = meta['sha1_lc_hex'] 

118 prefix = bucket[:2] 

119 bucket_path = pathlib.Path(BRM_STORE_ROOT, prefix, bucket) 

120 meta['bucket_path'] = str(bucket_path) 

121 bucket_present = bucket_path.is_file() 

122 meta['bucket_present'] = bucket_present 

123 if bucket_present: 

124 b_stat = bucket_path.stat() 

125 meta['bucket_size_bytes'] = b_stat.st_size 

126 meta['bucket_modify'] = dti.datetime.utcfromtimestamp(b_stat.st_mtime).astimezone().isoformat() 

127 

128 p_key = str(pathlib.Path(meta['repo_key'], meta['path'])) 

129 if p_key not in proxy: 

130 print('INFO ENTER found new', p_key) 

131 enter[p_key] = copy.deepcopy(meta) 

132 entered += 1 

133 remain[p_key] = copy.deepcopy(meta) 

134 elif proxy[p_key] != meta: 

135 print('INFO CHANGE found changed', p_key) 

136 change[p_key] = copy.deepcopy(meta) 

137 del proxy[p_key] 

138 changed += 1 

139 remain[p_key] = copy.deepcopy(meta) 

140 else: 

141 keep[p_key] = copy.deepcopy(meta) 

142 del proxy[p_key] 

143 kept += 1 

144 remain[p_key] = copy.deepcopy(meta) 

145 

146 removed = len(proxy) 

147 print('INFO GONE processing', removed, 'gone entries') 

148 for key, val in proxy.items(): 

149 gone[key] = val 

150 

151 delta.dump_gone(gone, indent=True) 

152 delta.dump_change(change, indent=True) 

153 delta.dump_enter(enter, indent=True) 

154 delta.dump_keep(keep, indent=True) 

155 delta.dump_remain(remain, indent=True) 

156 

157 if len(remain) != 1 + entered + changed + kept: 

158 print('WARNING: len(remain) != 1 + entered + changed + kept') 

159 

160 print( 

161 f'SUMMARY: ENTER({entered}), CHANGE({changed}), KEPT({kept}), GONE({removed})' 

162 f' --> REMAIN({entered + changed + kept})' 

163 ) 

164 return 0