8.88%

266 statements  

« prev     ^ index     » next       coverage.py v7.6.8, created at 2024-11-25 15:36:16 +00:00

1"""Render the concat document to pdf.""" 

2 

3import json 

4import os 

5import pathlib 

6import re 

7import shutil 

8import time 

9from typing import Union, no_type_check 

10 

11import yaml 

12 

13import liitos.captions as cap 

14import liitos.concat as con 

15import liitos.description_lists as dsc 

16import liitos.figures as fig 

17import liitos.gather as gat 

18import liitos.labels as lab 

19import liitos.patch as pat 

20import liitos.tables as tab 

21import liitos.tools as too 

22from liitos import ( 

23 CONTEXT, 

24 ENCODING, 

25 FROM_FORMAT_SPEC, 

26 LATEX_PAYLOAD_NAME, 

27 LOG_SEPARATOR, 

28 log, 

29 parse_csl, 

30) 

31 

32DOC_BASE = pathlib.Path('..', '..') 

33STRUCTURE_PATH = DOC_BASE / 'structure.yml' 

34IMAGES_FOLDER = 'images/' 

35DIAGRAMS_FOLDER = 'diagrams/' 

36PATCH_SPEC_NAME = 'patch.yml' 

37INTER_PROCESS_SYNC_SECS = 0.1 

38INTER_PROCESS_SYNC_ATTEMPTS = 10 

39VENDORED_SVG_PAT = re.compile(r'^.+\]\([^.]+\.[^.]+\.svg\ .+$') 

40 

41 

42@no_type_check 

43def read_patches(folder_path: pathlib.Path, patches_path: pathlib.Path) -> tuple[list[tuple[str, str]], bool]: 

44 """Ja ja.""" 

45 patches = [] 

46 need_patching = False 

47 log.info(f'inspecting any patch spec file ({patches_path}) ...') 

48 if patches_path.is_file() and patches_path.stat().st_size: 

49 target_path = folder_path / PATCH_SPEC_NAME 

50 shutil.copy(patches_path, target_path) 

51 try: 

52 with open(patches_path, 'rt', encoding=ENCODING) as handle: 

53 patch_spec = yaml.safe_load(handle) 

54 need_patching = True 

55 except (OSError, UnicodeDecodeError) as err: 

56 log.error(f'failed to load patch spec from ({patches_path}) with ({err}) - patching will be skipped') 

57 need_patching = False 

58 if need_patching: 

59 try: 

60 patches = [(rep, lace) for rep, lace in patch_spec] 

61 patch_pair_count = len(patches) 

62 if not patch_pair_count: 

63 need_patching = False 

64 log.warning('- ignoring empty patch spec') 

65 else: 

66 log.info( 

67 f'- loaded {patch_pair_count} patch pair{"" if patch_pair_count == 1 else "s"}' 

68 f' from patch spec file ({patches_path})' 

69 ) 

70 except ValueError as err: 

71 log.error(f'- failed to parse patch spec from ({patch_spec}) with ({err}) - patching will be skipped') 

72 need_patching = False 

73 else: 

74 if patches_path.is_file(): 

75 log.warning(f'- ignoring empty patch spec file ({patches_path})') 

76 else: 

77 log.info(f'- no patch spec file ({patches_path}) detected') 

78 return patches, need_patching 

79 

80 

81@no_type_check 

82def der( 

83 doc_root: Union[str, pathlib.Path], 

84 structure_name: str, 

85 target_key: str, 

86 facet_key: str, 

87 options: dict[str, Union[bool, str]], 

88) -> int: 

89 """Later alligator.""" 

90 log.info(LOG_SEPARATOR) 

91 log.info('entered render function ...') 

92 target_code = target_key 

93 facet_code = facet_key 

94 if not facet_code.strip() or not target_code.strip(): 

95 log.error(f'render requires non-empty target ({target_code}) and facet ({facet_code}) codes') 

96 return 2 

97 log.info(f'parsed target ({target_code}) and facet ({facet_code}) from request') 

98 

99 from_format_spec = options.get('from_format_spec', FROM_FORMAT_SPEC) 

100 filter_cs_list = parse_csl(options.get('filter_cs_list', '')) 

101 if filter_cs_list: 

102 log.info(f'parsed from-format-spec ({from_format_spec}) and filters ({", ".join(filter_cs_list)}) from request') 

103 else: 

104 log.info(f'parsed from-format-spec ({from_format_spec}) and no filters from request') 

105 

106 structure, asset_map = gat.prelude( 

107 doc_root=doc_root, structure_name=structure_name, target_key=target_key, facet_key=facet_key, command='render' 

108 ) 

109 log.info(f'prelude teleported processor into the document root at ({os.getcwd()}/)') 

110 

111 rel_concat_folder_path = pathlib.Path('render/pdf/') 

112 rel_concat_folder_path.mkdir(parents=True, exist_ok=True) 

113 

114 patches, need_patching = read_patches(rel_concat_folder_path, pathlib.Path(PATCH_SPEC_NAME)) 

115 

116 os.chdir(rel_concat_folder_path) 

117 log.info(f'render (this processor) teleported into the render/pdf location ({os.getcwd()}/)') 

118 

119 log.info(LOG_SEPARATOR) 

120 log.info('Assessing the local version control status (compared to upstream) ...') 

121 too.ensure_separate_log_lines(too.vcs_probe) 

122 CONTEXT['builder_node_id'] = too.node_id() 

123 log.info('Context noted with:') 

124 log.info(f'- builder-node-id({CONTEXT.get("builder_node_id")})') 

125 log.info(f'- source-hash({CONTEXT.get("source_hash")})') 

126 log.info(f'- source-hint({CONTEXT.get("source_hint")})') 

127 

128 ok, aspect_map = too.load_target(target_code, facet_code) 

129 if not ok or not aspect_map: 

130 return 0 if ok else 1 

131 

132 do_render = aspect_map.get('render', None) 

133 if do_render is not None: 

134 log.info(f'found render instruction with value ({aspect_map["render"]})') 

135 

136 if do_render is None or do_render: 

137 log.info('we will render ...') 

138 else: 

139 log.warning('we will not render ...') 

140 return 0xFADECAFE 

141 

142 log.info(LOG_SEPARATOR) 

143 log.info('transforming SVG assets to high resolution PNG bitmaps ...') 

144 for path_to_dir in (IMAGES_FOLDER, DIAGRAMS_FOLDER): 

145 the_folder = pathlib.Path(path_to_dir) 

146 if not the_folder.is_dir(): 

147 log.error( 

148 f'svg-to-png directory ({the_folder}) in ({pathlib.Path().cwd()}) does not exist or is no directory' 

149 ) 

150 continue 

151 for svg in pathlib.Path(path_to_dir).iterdir(): 

152 if svg.is_file() and svg.suffix == '.svg': 

153 png = str(svg).replace('.svg', '.png') 

154 svg_to_png_command = ['svgexport', svg, png, '100%'] 

155 too.delegate(svg_to_png_command, 'svg-to-png') 

156 

157 special_patching = [] 

158 log.info(LOG_SEPARATOR) 

159 log.info('rewriting src attribute values of SVG to PNG sources ...') 

160 with open('document.md', 'rt', encoding=ENCODING) as handle: 

161 lines = [line.rstrip() for line in handle.readlines()] 

162 for slot, line in enumerate(lines): 

163 if line.startswith('![') and '](' in line: 

164 if VENDORED_SVG_PAT.match(line): 

165 if '.svg' in line and line.count('.') >= 2: 

166 caption, src, alt, rest = con.parse_markdown_image(line) 

167 stem, app_indicator, format_suffix = src.rsplit('.', 2) 

168 log.info(f'- removing application indicator ({app_indicator}) from src ...') 

169 if format_suffix != 'svg': 

170 log.warning(f' + format_suffix (.{format_suffix}) unexpected in <<{line.rstrip()}>> ...') 

171 fine = f'![{caption}]({stem}.png "{alt}"){rest}' 

172 log.info(f' transform[#{slot + 1}]: {line}') 

173 log.info(f' into[#{slot + 1}]: {fine}') 

174 lines[slot] = fine 

175 dia_path_old = src.replace('.svg', '.png') 

176 dia_path_new = f'{stem}.png' 

177 dia_fine_rstrip = dia_path_new.rstrip() 

178 if dia_path_old and dia_path_new: 

179 special_patching.append((dia_path_old, dia_path_new)) 

180 log.info( 

181 f'post-action[#{slot + 1}]: adding to queue for sync move: ({dia_path_old})' 

182 f' -> ({dia_path_new})' 

183 ) 

184 else: 

185 log.warning(f'- old: {src.rstrip()}') 

186 log.warning(f'- new: {dia_fine_rstrip}') 

187 continue 

188 if '.svg' in line: 

189 fine = line.replace('.svg', '.png') 

190 log.info(f' transform[#{slot + 1}]: {line}') 

191 log.info(f' into[#{slot + 1}]: {fine}') 

192 lines[slot] = fine 

193 continue 

194 with open('document.md', 'wt', encoding=ENCODING) as handle: 

195 handle.write('\n'.join(lines)) 

196 

197 log.info(LOG_SEPARATOR) 

198 log.info('ensure diagram files can be found when patched ...') 

199 if special_patching: 

200 for old, mew in special_patching: 

201 source_asset = pathlib.Path(old) 

202 target_asset = pathlib.Path(mew) 

203 log.info(f'- moving: ({source_asset}) -> ({target_asset})') 

204 present = False 

205 remaining_attempts = INTER_PROCESS_SYNC_ATTEMPTS 

206 while remaining_attempts > 0 and not present: 

207 try: 

208 present = source_asset.is_file() 

209 except Exception as ex: 

210 log.error(f' * probing for resource ({old}) failed with ({ex}) ... continuing') 

211 log.info( 

212 f' + resource ({old}) is{" " if present else " NOT "}present at ({source_asset})' 

213 f' - attempt {11 - remaining_attempts} of {INTER_PROCESS_SYNC_ATTEMPTS} ...' 

214 ) 

215 if present: 

216 break 

217 time.sleep(INTER_PROCESS_SYNC_SECS) 

218 remaining_attempts -= 1 

219 if not source_asset.is_file(): 

220 log.warning( 

221 f'- resource ({old}) still not present at ({source_asset})' 

222 f' as seen from ({os.getcwd()}) after {remaining_attempts} attempts' 

223 f' and ({round(remaining_attempts * INTER_PROCESS_SYNC_SECS, 0) :.0f} seconds waiting)' 

224 ) 

225 shutil.move(source_asset, target_asset) 

226 else: 

227 log.info('post-action queue (from reference renaming) is empty - nothing to move') 

228 log.info(LOG_SEPARATOR) 

229 

230 # prototyping >>> 

231 fmt_spec = from_format_spec 

232 in_doc = 'document.md' 

233 out_doc = 'ast-no-filter.json' 

234 markdown_to_ast_command = [ 

235 'pandoc', 

236 '--verbose', 

237 '-f', 

238 fmt_spec, 

239 '-t', 

240 'json', 

241 in_doc, 

242 '-o', 

243 out_doc, 

244 ] 

245 log.info(LOG_SEPARATOR) 

246 log.info(f'executing ({" ".join(markdown_to_ast_command)}) ...') 

247 if code := too.delegate(markdown_to_ast_command, 'markdown-to-ast'): 

248 return code 

249 

250 log.info(LOG_SEPARATOR) 

251 

252 mermaid_caption_map = too.mermaid_captions_from_json_ast(out_doc) 

253 log.info(LOG_SEPARATOR) 

254 # no KISS too.ensure_separate_log_lines(json.dumps, [mermaid_caption_map, 2]) 

255 for line in json.dumps(mermaid_caption_map, indent=2).split('\n'): 

256 for fine in line.split('\n'): 

257 log.info(fine) 

258 log.info(LOG_SEPARATOR) 

259 

260 # <<< prototyping 

261 

262 fmt_spec = from_format_spec 

263 in_doc = 'document.md' 

264 out_doc = LATEX_PAYLOAD_NAME 

265 markdown_to_latex_command = [ 

266 'pandoc', 

267 '--verbose', 

268 '-f', 

269 fmt_spec, 

270 '-t', 

271 'latex', 

272 in_doc, 

273 '-o', 

274 out_doc, 

275 ] 

276 if filter_cs_list: 

277 filters = [added_prefix for expr in filter_cs_list for added_prefix in ('--filter', expr)] 

278 markdown_to_latex_command += filters 

279 log.info(LOG_SEPARATOR) 

280 log.info(f'executing ({" ".join(markdown_to_latex_command)}) ...') 

281 if code := too.delegate(markdown_to_latex_command, 'markdown-to-latex'): 

282 return code 

283 

284 log.info(LOG_SEPARATOR) 

285 log.info(f'load text lines from intermediate {LATEX_PAYLOAD_NAME} file before internal transforms ...') 

286 with open(LATEX_PAYLOAD_NAME, 'rt', encoding=ENCODING) as handle: 

287 lines = [line.rstrip() for line in handle.readlines()] 

288 

289 lines = too.execute_filter( 

290 cap.weave, 

291 head='move any captions below tables ...', 

292 backup='document-before-caps-patch.tex.txt', 

293 label='captions-below-tables', 

294 text_lines=lines, 

295 lookup=None, 

296 ) 

297 

298 lines = too.execute_filter( 

299 lab.inject, 

300 head='inject stem (derived from file name) labels ...', 

301 backup='document-before-inject-stem-label-patch.tex.txt', 

302 label='inject-stem-derived-labels', 

303 text_lines=lines, 

304 lookup=mermaid_caption_map, 

305 ) 

306 

307 lines = too.execute_filter( 

308 fig.scale, 

309 head='scale figures ...', 

310 backup='document-before-scale-figures-patch.tex.txt', 

311 label='inject-scale-figures', 

312 text_lines=lines, 

313 lookup=None, 

314 ) 

315 

316 lines = too.execute_filter( 

317 dsc.options, 

318 head='add options to descriptions (definition lists) ...', 

319 backup='document-before-description-options-patch.tex.txt', 

320 label='inject-description-options', 

321 text_lines=lines, 

322 lookup=None, 

323 ) 

324 

325 if options.get('patch_tables', False): 

326 lines = too.execute_filter( 

327 tab.patch, 

328 head='patching tables EXPERIMENTAL (table-shape) ...', 

329 backup='document-before-table-shape-patch.tex.txt', 

330 label='changed-table-shape', 

331 text_lines=lines, 

332 lookup=None, 

333 ) 

334 else: 

335 log.info(LOG_SEPARATOR) 

336 log.info('not patching tables but commenting out (ignoring) any columns command (table-shape) ...') 

337 patched_lines = [f'%IGNORED_{v}' if v.startswith(r'\columns=') else v for v in lines] 

338 patched_lines = [f'%IGNORED_{v}' if v.startswith(r'\tablefontsize=') else v for v in patched_lines] 

339 log.info('diff of the (ignore-table-shape-if-not-patched) filter result:') 

340 too.log_unified_diff(lines, patched_lines) 

341 lines = patched_lines 

342 log.info(LOG_SEPARATOR) 

343 

344 if need_patching: 

345 log.info(LOG_SEPARATOR) 

346 log.info('apply user patches ...') 

347 doc_before_user_patch = 'document-before-user-patch.tex.txt' 

348 with open(doc_before_user_patch, 'wt', encoding=ENCODING) as handle: 

349 handle.write('\n'.join(lines)) 

350 patched_lines = pat.apply(patches, lines) 

351 with open(LATEX_PAYLOAD_NAME, 'wt', encoding=ENCODING) as handle: 

352 handle.write('\n'.join(patched_lines)) 

353 log.info('diff of the (user-patches) filter result:') 

354 too.log_unified_diff(lines, patched_lines) 

355 lines = patched_lines 

356 else: 

357 log.info(LOG_SEPARATOR) 

358 log.info('skipping application of user patches ...') 

359 

360 log.info(LOG_SEPARATOR) 

361 log.info(f'Internal text line buffer counts {len(lines)} lines') 

362 

363 log.info(LOG_SEPARATOR) 

364 log.info('cp -a driver.tex this.tex ...') 

365 source_asset = 'driver.tex' 

366 target_asset = 'this.tex' 

367 shutil.copy(source_asset, target_asset) 

368 

369 latex_to_pdf_command = ['lualatex', '--shell-escape', 'this.tex'] 

370 log.info(LOG_SEPARATOR) 

371 log.info('1/3) lualatex --shell-escape this.tex ...') 

372 if code := too.delegate(latex_to_pdf_command, 'latex-to-pdf(1/3)'): 

373 return code 

374 

375 log.info(LOG_SEPARATOR) 

376 log.info('2/3) lualatex --shell-escape this.tex ...') 

377 if code := too.delegate(latex_to_pdf_command, 'latex-to-pdf(2/3)'): 

378 return code 

379 

380 log.info(LOG_SEPARATOR) 

381 log.info('3/3) lualatex --shell-escape this.tex ...') 

382 if code := too.delegate(latex_to_pdf_command, 'latex-to-pdf(3/3)'): 

383 return code 

384 

385 if str(options.get('label', '')).strip(): 

386 labeling_call = str(options['label']).strip().split() 

387 labeling_call.extend( 

388 [ 

389 '--key-value-pairs', 

390 ( 

391 f'BuilderNodeID={CONTEXT["builder_node_id"]}' 

392 f',SourceHash={CONTEXT["source_hash"]}' 

393 f',SourceHint={CONTEXT["source_hint"]}' 

394 ), 

395 ] 

396 ) 

397 log.info(LOG_SEPARATOR) 

398 log.info(f'Labeling the resulting pdf file per ({" ".join(labeling_call)})') 

399 too.delegate(labeling_call, 'label-pdf') 

400 log.info(LOG_SEPARATOR) 

401 

402 log.info(LOG_SEPARATOR) 

403 log.info('Moving stuff around (result phase) ...') 

404 source_asset = 'this.pdf' 

405 target_asset = '../index.pdf' 

406 shutil.copy(source_asset, target_asset) 

407 

408 log.info(LOG_SEPARATOR) 

409 log.info('Deliverable taxonomy: ...') 

410 too.report_taxonomy(pathlib.Path(target_asset)) 

411 

412 pdffonts_command = ['pdffonts', target_asset] 

413 too.delegate(pdffonts_command, 'assess-pdf-fonts') 

414 

415 log.info(LOG_SEPARATOR) 

416 log.info('done.') 

417 log.info(LOG_SEPARATOR) 

418 

419 return 0