Coverage for stativ/stativ.py: 7.92%
138 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-04 22:28:28 +00:00
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-04 22:28:28 +00:00
1"""Harvest the release mappings to bucket store from the backup tree."""
3import copy
4import datetime as dti
5import json
6import os
7import pathlib
8from typing import no_type_check
10import stativ.delta_store as delta
12BRM_NAME = os.getenv('BRM_NAME', 'brm')
13BRM_ANCHOR = os.getenv('BRM_ANCHOR', '/')
14BRM_STORE_ROOT = pathlib.Path(BRM_ANCHOR, BRM_NAME, 'data', 'filestore')
15ENCODING = 'utf-8'
17BRM_BACKUP_ROOT = pathlib.Path(BRM_ANCHOR, BRM_NAME, 'backup/')
20@no_type_check
21def main(argv=None):
22 """Harvest information."""
23 if argv:
24 print('Unexpected arguments ...')
25 return 2
27 print(f'Starting execution in folder ({pathlib.Path(".")})')
28 print(f'- Assuming BRM storage root at ({BRM_STORE_ROOT}) i.e. the bucket store')
29 print(f'- Assuming BRM backup root at ({BRM_BACKUP_ROOT}) i.e. the archival class artifacts')
31 if not delta.is_delta_store(pathlib.Path('')):
32 print('There is no delta store in the current directory ...')
33 return 2
35 with open(pathlib.Path('store', 'proxy.json'), 'rt', encoding=ENCODING) as handle:
36 proxy = json.load(handle)
38 enter, change, gone, keep = {}, {}, {}, {}
39 remain = {}
40 entered, changed, removed, kept = 0, 0, 0, 0
41 print('---')
42 exclude = str(pathlib.Path(BRM_BACKUP_ROOT, 'backup-daily', 'current', 'etc')) # HACK A DID ACK
43 for path in BRM_BACKUP_ROOT.glob('**/*'):
44 if str(path).startswith(exclude):
45 continue
46 if path.name == f'{BRM_NAME}.properties':
47 with open(path, 'rt', encoding=ENCODING) as handle:
48 text_data = handle.readlines()
49 meta = {}
50 for line in text_data:
51 if line.strip():
52 key, value = line.strip().split('=', 1)
53 if key.startswith(f'{BRM_NAME}.'):
54 _, key = key.split('.', 1)
55 if key == 'timestamp':
56 meta[key] = dti.datetime.utcfromtimestamp(float(value) / 1.0e3).astimezone().isoformat()
58 else:
59 meta['key'] = value
60 if proxy['_meta'] != meta:
61 print(f'WARNING {BRM_NAME} version info changed')
62 del proxy['_meta']
63 remain['_meta'] = copy.deepcopy(meta)
64 elif path.name == f'{BRM_NAME}-file.xml':
65 with open(path, 'rt', encoding=ENCODING) as handle:
66 text_data = handle.readlines()
67 meta = {}
68 next_sha1_hash = False
69 next_md5_hash = False
70 next_sha256_hash = False
71 for line in text_data:
72 if line.strip():
73 rec = line.strip()
74 if rec.startswith('<size>'):
75 size_bytes = rec.split('<size>', 1)[1].split('</size>')[0]
76 meta['size_bytes'] = int(size_bytes)
77 continue
78 if rec.startswith('<repoKey>'):
79 repo_key = rec.split('<repoKey>', 1)[1].split('</repoKey>')[0]
80 meta['repo_key'] = repo_key
81 continue
82 if rec.startswith('<path>'):
83 a_path = rec.split('<path>', 1)[1].split('</path>')[0]
84 meta['path'] = a_path
85 continue
86 if rec.startswith('<lastUpdated>'):
87 last_update = rec.split('<lastUpdated>', 1)[1].split('</lastUpdated>')[0]
88 meta['last_update'] = (
89 dti.datetime.utcfromtimestamp(float(last_update) / 1.0e3).astimezone().isoformat()
90 )
91 continue
92 if rec.startswith('<type>sha1<'):
93 next_sha1_hash = True
94 continue
95 if next_sha1_hash and rec.startswith('<actual>'):
96 sha1_lc_hex = rec.split('<actual>', 1)[1].split('</actual>')[0]
97 meta['sha1_lc_hex'] = sha1_lc_hex
98 next_sha1_hash = False
99 continue
100 if rec.startswith('<type>md5<'):
101 next_md5_hash = True
102 continue
103 if next_md5_hash and rec.startswith('<actual>'):
104 md5_lc_hex = rec.split('<actual>', 1)[1].split('</actual>')[0]
105 meta['md5_lc_hex'] = md5_lc_hex
106 next_md5_hash = False
107 continue
108 if rec.startswith('<type>sha256<'):
109 next_sha256_hash = True
110 continue
111 if next_sha256_hash and rec.startswith('<actual>'):
112 sha256_lc_hex = rec.split('<actual>', 1)[1].split('</actual>')[0]
113 meta['sha256_lc_hex'] = sha256_lc_hex
114 next_sha256_hash = False
115 continue
117 bucket = meta['sha1_lc_hex']
118 prefix = bucket[:2]
119 bucket_path = pathlib.Path(BRM_STORE_ROOT, prefix, bucket)
120 meta['bucket_path'] = str(bucket_path)
121 bucket_present = bucket_path.is_file()
122 meta['bucket_present'] = bucket_present
123 if bucket_present:
124 b_stat = bucket_path.stat()
125 meta['bucket_size_bytes'] = b_stat.st_size
126 meta['bucket_modify'] = dti.datetime.utcfromtimestamp(b_stat.st_mtime).astimezone().isoformat()
128 p_key = str(pathlib.Path(meta['repo_key'], meta['path']))
129 if p_key not in proxy:
130 print('INFO ENTER found new', p_key)
131 enter[p_key] = copy.deepcopy(meta)
132 entered += 1
133 remain[p_key] = copy.deepcopy(meta)
134 elif proxy[p_key] != meta:
135 print('INFO CHANGE found changed', p_key)
136 change[p_key] = copy.deepcopy(meta)
137 del proxy[p_key]
138 changed += 1
139 remain[p_key] = copy.deepcopy(meta)
140 else:
141 keep[p_key] = copy.deepcopy(meta)
142 del proxy[p_key]
143 kept += 1
144 remain[p_key] = copy.deepcopy(meta)
146 removed = len(proxy)
147 print('INFO GONE processing', removed, 'gone entries')
148 for key, val in proxy.items():
149 gone[key] = val
151 delta.dump_gone(gone, indent=True)
152 delta.dump_change(change, indent=True)
153 delta.dump_enter(enter, indent=True)
154 delta.dump_keep(keep, indent=True)
155 delta.dump_remain(remain, indent=True)
157 if len(remain) != 1 + entered + changed + kept:
158 print('WARNING: len(remain) != 1 + entered + changed + kept')
160 print(
161 f'SUMMARY: ENTER({entered}), CHANGE({changed}), KEPT({kept}), GONE({removed})'
162 f' --> REMAIN({entered + changed + kept})'
163 )
164 return 0