Coverage for vitriini/processor.py: 10.65%

124 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-05 19:54:27 +00:00

1"""Showcase (Finnish: vitriini) some packaged content - guided by conventions - processor.""" 

2 

3import argparse 

4import datetime as dti 

5import logging 

6import pathlib 

7import shutil 

8import zipfile 

9 

10from vitriini import ( 

11 ENCODING, 

12 MAX_PACKED_BYTES, 

13 MAX_UNPACKED_BYTES, 

14 log, 

15) 

16 

17INCOMING = pathlib.Path('incoming') 

18SAMPLE = 'Example _funnily_nameD - 42' 

19SAMPLE_PATH = INCOMING / f'{SAMPLE}.zip' 

20PROCESSING = pathlib.Path('processing') 

21SLUG = SAMPLE.replace('_', '-').replace(' ', '-').lower() 

22OUT_ROOT = PROCESSING / SLUG 

23STAGE = pathlib.Path('staging') 

24 

25 

26MAGIC_LINE_LIB = '<script src="https://code.createjs.com/1.0.0/createjs.min.js"></script>' 

27LOCALIZE_LIB_IN = 'https://code.createjs.com/1.0.0/createjs.min.js' 

28LOCALIZE_LIB_OUT = '/assets/js/createjs.min.js' 

29 

30 

31def cess(options: argparse.Namespace) -> int: 

32 """Process the archive and content.""" 

33 verbose = options.verbose 

34 if verbose: 

35 logging.getLogger().setLevel(logging.DEBUG) 

36 

37 archive_path = pathlib.Path(options.archive_path) if options.archive_path else SAMPLE_PATH 

38 if not archive_path.is_file(): 

39 log.error('No file.') 

40 return 1 

41 

42 slug = archive_path.stem.replace('_', '-').replace(' ', '-').lower() 

43 processing_root = PROCESSING / slug 

44 today_folder_name = dti.datetime.today().strftime('%Y%m%d') 

45 log.info(f'Date folder name will be {today_folder_name}') 

46 

47 size_packed = archive_path.stat().st_size 

48 if size_packed > MAX_PACKED_BYTES: 

49 log.error('Archive too large') 

50 return 1 

51 

52 log.info(f'{archive_path} -> {processing_root}/ if size[bytes] {size_packed} <= {MAX_PACKED_BYTES}') 

53 

54 size_unpacked = 0 

55 try: 

56 with zipfile.ZipFile(archive_path, mode='r') as archive: 

57 zip_info = archive.infolist() 

58 except zipfile.BadZipFile as err: 

59 log.error(f'Incoming file is corrupt or has wrong type: {err}') 

60 return 1 

61 

62 log.info(f'Information on {len(zip_info)} entries in zip archive info list:') 

63 for slot, info in enumerate(zip_info, start=1): 

64 log.info(f'- slot {slot}:') 

65 log.info(f' + Filename: {info.filename}') 

66 log.info(f' + Modified: {dti.datetime(*info.date_time)}') 

67 log.info(f' + Normal size: {info.file_size} bytes') 

68 log.info(f' + Compressed size: {info.compress_size} bytes') 

69 size_unpacked += info.file_size 

70 

71 log.info(f'{archive_path} -> {processing_root}/ if size[bytes] unpacked {size_unpacked} <= {MAX_UNPACKED_BYTES}') 

72 

73 if size_unpacked > MAX_UNPACKED_BYTES: 

74 log.error('Zip bomb?') 

75 return 1 

76 

77 log.info(f'Compression factor is {round(size_unpacked / size_packed, 3) :5.3f} = {size_unpacked} / {size_packed}') 

78 

79 shutil.unpack_archive(archive_path, processing_root) 

80 

81 top_level_folders = [path for path in processing_root.iterdir() if path.is_dir() and path.name == 'images'] 

82 if not top_level_folders or len(top_level_folders) != 1: 

83 log.error('No top level folder with name images found or too many folders.') 

84 return 1 

85 log.info( 

86 f'Found: top level folder {tuple(e.name for e in top_level_folders)} SHALL contain an images folder (SUCC)' 

87 ) 

88 

89 images_has_folders = [path for path in top_level_folders[0].iterdir() if path.is_dir()] 

90 if images_has_folders: 

91 log.error('Images folder contains folders but shall not.') 

92 return 1 

93 log.info('Images folder SHALL NOT contain any folders (SUCC)') 

94 

95 image_files = [path for path in top_level_folders[0].iterdir() if path.is_file()] 

96 if not image_files: 

97 log.error('Images folder contains no files but shall contain.') 

98 return 1 

99 log.info('Images folder SHALL contain files (SUCC)') 

100 

101 for path in image_files: 

102 if path.suffix.lower() not in ('.gif', '.jpg', '.jpeg', '.png', '.svg', '.webp'): 

103 log.warning(f'{path.name} has unexpected suffix') 

104 

105 top_level_files = [path for path in processing_root.iterdir() if path.is_file() and path.suffix in ('.js', '.html')] 

106 if not top_level_files: 

107 log.error('No top level files found.') 

108 return 1 

109 

110 html_files = [path for path in top_level_files if path.is_file() and path.suffix == '.html'] 

111 js_files = [path for path in top_level_files if path.is_file() and path.suffix == '.js'] 

112 log.info( 

113 f'Found: top level files {tuple(e.name for e in top_level_files)} SHALL contain a html' 

114 f' ({"SUCC" if len(html_files) == 1 else "FAIL"})' 

115 f' and a js ({"SUCC" if len(js_files) == 1 else "FAIL"}) file' 

116 ) 

117 if any((len(c) != 1 for c in (html_files, js_files))): 

118 log.error('Too few or too many JS and HTML files on top-level.') 

119 return 1 

120 

121 html_lines = [line.strip() for line in html_files[0].open().readlines()] 

122 log.info(f'Html file has {len(html_lines)} lines - SHALL have 2 special lines in it (pending).') 

123 

124 if not any(MAGIC_LINE_LIB in line for line in html_lines): 

125 log.error('Did not find the library import line inside the html file.') 

126 return 1 

127 

128 consistent_reference = f'<script src="{js_files[0].name}?' 

129 if not any(consistent_reference in line.lower() for line in html_lines): 

130 log.error('Did not find the library import line inside the html file.') 

131 return 1 

132 

133 for slot, line in enumerate(html_lines): 

134 if MAGIC_LINE_LIB in line: 

135 html_lines[slot] = line.replace(LOCALIZE_LIB_IN, LOCALIZE_LIB_OUT) 

136 log.info(f'Did replace ({LOCALIZE_LIB_IN}) with ({LOCALIZE_LIB_OUT}) in slot {slot}') 

137 old = html_lines[slot + 1] 

138 html_lines[slot + 1] = html_lines[slot + 1].lower() # She said sorry and my name is Rosi 

139 log.info(f'Did lowercase ({old}) into ({html_lines[slot + 1]}) in slot {slot + 1}') 

140 

141 local_out_root = STAGE / today_folder_name / slug 

142 try: 

143 local_out_root.mkdir(parents=True, exist_ok=False) 

144 except FileExistsError: 

145 log.error('Staging folder is already there') 

146 return 1 

147 log.info(f'Created staging folder {local_out_root}') 

148 

149 local_out_images_root = local_out_root / 'images' 

150 try: 

151 local_out_images_root.mkdir(parents=True, exist_ok=False) 

152 except FileExistsError: 

153 log.error('Images folder is already there') 

154 return 1 

155 log.info(f'Created images folder {local_out_images_root}') 

156 

157 with open(local_out_root / 'index.html', 'wt', encoding=ENCODING) as handle: 

158 handle.write('\n'.join(html_lines)) 

159 handle.write('\n') 

160 log.info('Created index file in the staging folder') 

161 

162 shutil.copy(js_files[0], local_out_root / js_files[0].name) 

163 log.info(f'Copied js file {js_files[0].name} to the staging folder') 

164 

165 for path in image_files: 

166 shutil.copy(path, local_out_images_root / path.name) 

167 log.info(f'Copied js file {path.name} to the images folder') 

168 

169 log.info('OK - Looks good for now (prototype)') 

170 

171 return 0