Coverage for liitos/render.py: 80.97%

291 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-07 19:29:53 +00:00

1"""Render the concat document to pdf.""" 

2 

3import json 

4import os 

5import pathlib 

6import re 

7import shutil 

8import time 

9from typing import Union, no_type_check 

10 

11import yaml 

12 

13import liitos.captions as cap 

14import liitos.concat as con 

15import liitos.description_lists as dsc 

16import liitos.figures as fig 

17import liitos.gather as gat 

18import liitos.labels as lab 

19import liitos.patch as pat 

20import liitos.tables as tab 

21import liitos.tools as too 

22from liitos import ( 

23 CONTEXT, 

24 ENCODING, 

25 FROM_FORMAT_SPEC, 

26 LATEX_PAYLOAD_NAME, 

27 LOG_SEPARATOR, 

28 OptionsType, 

29 log, 

30 parse_csl, 

31) 

32 

33DOC_BASE = pathlib.Path('..', '..') 

34STRUCTURE_PATH = DOC_BASE / 'structure.yml' 

35IMAGES_FOLDER = 'images/' 

36DIAGRAMS_FOLDER = 'diagrams/' 

37PATCH_SPEC_NAME = 'patch.yml' 

38INTER_PROCESS_SYNC_SECS = 0.1 

39INTER_PROCESS_SYNC_ATTEMPTS = 10 

40VENDORED_SVG_PAT = re.compile(r'^.+\]\([^.]+\.[^.]+\.svg\ .+$') 

41 

42 

43@no_type_check 

44def read_patches(folder_path: pathlib.Path, patches_path: pathlib.Path) -> tuple[list[tuple[str, str]], bool]: 

45 """Obtain any search-replace pairs from user patching file.""" 

46 patches = [] 

47 need_patching = False 

48 log.info(f'inspecting any patch spec file ({patches_path}) ...') 

49 if patches_path.is_file() and patches_path.stat().st_size: 

50 target_path = folder_path / PATCH_SPEC_NAME 

51 shutil.copy(patches_path, target_path) 

52 try: 

53 with open(patches_path, 'rt', encoding=ENCODING) as handle: 

54 patch_spec = yaml.safe_load(handle) 

55 need_patching = True 

56 except (OSError, UnicodeDecodeError) as err: 

57 log.error(f'failed to load patch spec from ({patches_path}) with ({err}) - patching will be skipped') 

58 need_patching = False 

59 if need_patching: 59 ↛ 79line 59 didn't jump to line 79 because the condition on line 59 was always true

60 try: 

61 patches = [(rep, lace) for rep, lace in patch_spec] 

62 patch_pair_count = len(patches) 

63 if not patch_pair_count: 63 ↛ 64line 63 didn't jump to line 64 because the condition on line 63 was never true

64 need_patching = False 

65 log.warning('- ignoring empty patch spec') 

66 else: 

67 log.info( 

68 f'- loaded {patch_pair_count} patch pair{"" if patch_pair_count == 1 else "s"}' 

69 f' from patch spec file ({patches_path})' 

70 ) 

71 except ValueError as err: 

72 log.error(f'- failed to parse patch spec from ({patch_spec}) with ({err}) - patching will be skipped') 

73 need_patching = False 

74 else: 

75 if patches_path.is_file(): 75 ↛ 76line 75 didn't jump to line 76 because the condition on line 75 was never true

76 log.warning(f'- ignoring empty patch spec file ({patches_path})') 

77 else: 

78 log.info(f'- no patch spec file ({patches_path}) detected') 

79 return patches, need_patching 

80 

81 

82@no_type_check 

83def der( 

84 doc_root: Union[str, pathlib.Path], 

85 structure_name: str, 

86 target_key: str, 

87 facet_key: str, 

88 options: OptionsType, 

89) -> int: 

90 """Render the document as PDF, eventually.""" 

91 log.info(LOG_SEPARATOR) 

92 log.info('entered render function ...') 

93 target_code = target_key 

94 facet_code = facet_key 

95 if not facet_code.strip() or not target_code.strip(): 95 ↛ 96line 95 didn't jump to line 96 because the condition on line 95 was never true

96 log.error(f'render requires non-empty target ({target_code}) and facet ({facet_code}) codes') 

97 return 2 

98 log.info(f'parsed target ({target_code}) and facet ({facet_code}) from request') 

99 

100 from_format_spec = options.get('from_format_spec', FROM_FORMAT_SPEC) 

101 filter_cs_list = parse_csl(options.get('filter_cs_list', '')) 

102 if filter_cs_list: 102 ↛ 105line 102 didn't jump to line 105 because the condition on line 102 was always true

103 log.info(f'parsed from-format-spec ({from_format_spec}) and filters ({", ".join(filter_cs_list)}) from request') 

104 else: 

105 log.info(f'parsed from-format-spec ({from_format_spec}) and no filters from request') 

106 

107 structure, asset_map = gat.prelude( 

108 doc_root=doc_root, structure_name=structure_name, target_key=target_key, facet_key=facet_key, command='render' 

109 ) 

110 log.info(f'prelude teleported processor into the document root at ({os.getcwd()}/)') 

111 

112 rel_concat_folder_path = pathlib.Path('render/pdf/') 

113 rel_concat_folder_path.mkdir(parents=True, exist_ok=True) 

114 

115 patches, need_patching = read_patches(rel_concat_folder_path, pathlib.Path(PATCH_SPEC_NAME)) 

116 

117 os.chdir(rel_concat_folder_path) 

118 log.info(f'render (this processor) teleported into the render/pdf location ({os.getcwd()}/)') 

119 

120 log.info(LOG_SEPARATOR) 

121 log.warning('Assessing the local version control status (compared to upstream) ...') 

122 too.ensure_separate_log_lines(too.vcs_probe, log.warning) 

123 CONTEXT['builder_node_id'] = too.node_id() 

124 log.warning('Context noted with:') 

125 log.warning(f'- builder-node-id({CONTEXT.get("builder_node_id")})') 

126 log.warning(f'- source-hash({CONTEXT.get("source_hash")})') 

127 log.warning(f'- source-hint({CONTEXT.get("source_hint")})') 

128 

129 ok, aspect_map = too.load_target(target_code, facet_code) 

130 if not ok or not aspect_map: 130 ↛ 131line 130 didn't jump to line 131 because the condition on line 130 was never true

131 return 0 if ok else 1 

132 

133 is_quiet = options.get('quiet', False) 

134 do_render = aspect_map.get('render', None) 

135 if do_render is not None: 135 ↛ 138line 135 didn't jump to line 138 because the condition on line 135 was always true

136 log.info(f'found render instruction with value ({aspect_map["render"]})') 

137 

138 if do_render is None or do_render or options['force']: 138 ↛ 142line 138 didn't jump to line 142 because the condition on line 138 was always true

139 why = 'default-render' if do_render is None else ('render-true' if do_render else 'render-force') 

140 log.warning(f'we will render ({why=}) ...') 

141 else: 

142 log.warning('we will not render ...') 

143 return 0xFADECAFE 

144 

145 log.info(LOG_SEPARATOR) 

146 log.info('transforming SVG assets to high resolution PNG bitmaps ...') 

147 for path_to_dir in (IMAGES_FOLDER, DIAGRAMS_FOLDER): 

148 the_folder = pathlib.Path(path_to_dir) 

149 if not the_folder.is_dir(): 

150 log.info( 

151 f'svg-to-png directory ({the_folder}) in ({pathlib.Path().cwd()}) does not exist or is no directory' 

152 f' - trying to create {the_folder}' 

153 ) 

154 try: 

155 the_folder.mkdir(parents=True, exist_ok=True) 

156 except FileExistsError as err: 

157 log.error(f'failed to create {the_folder} - detail: {err}') 

158 continue 

159 for svg in pathlib.Path(path_to_dir).iterdir(): 

160 if svg.is_file() and svg.suffix == '.svg': 

161 png = str(svg).replace('.svg', '.png') 

162 svg_to_png_command = ['svgexport', svg, png, '100%'] 

163 too.delegate(svg_to_png_command, 'svg-to-png', is_quiet=is_quiet) 

164 

165 special_patching = [] 

166 log.info(LOG_SEPARATOR) 

167 log.info('rewriting src attribute values of SVG to PNG sources ...') 

168 with open('document.md', 'rt', encoding=ENCODING) as handle: 

169 lines = [line.rstrip() for line in handle.readlines()] 

170 for slot, line in enumerate(lines): 

171 if line.startswith('![') and '](' in line: 

172 if VENDORED_SVG_PAT.match(line): 

173 if '.svg' in line and line.count('.') >= 2: 173 ↛ 197line 173 didn't jump to line 197 because the condition on line 173 was always true

174 caption, src, alt, rest = con.parse_markdown_image(line) 

175 for msg in too.incoherent_math_mode_in_caption(caption, phase_info=f'for SVG image ({src})'): 175 ↛ 176line 175 didn't jump to line 176 because the loop on line 175 never started

176 log.warning(msg) 

177 stem, app_indicator, format_suffix = src.rsplit('.', 2) 

178 log.info(f'- removing application indicator ({app_indicator}) from src ...') 

179 if format_suffix != 'svg': 179 ↛ 180line 179 didn't jump to line 180 because the condition on line 179 was never true

180 log.warning(f' + format_suffix (.{format_suffix}) unexpected in <<{line.rstrip()}>> ...') 

181 fine = f'![{caption}]({stem}.png "{alt}"){rest}' 

182 log.info(f' transform[#{slot + 1}]: {line}') 

183 log.info(f' into[#{slot + 1}]: {fine}') 

184 lines[slot] = fine 

185 dia_path_old = src.replace('.svg', '.png') 

186 dia_path_new = f'{stem}.png' 

187 if dia_path_old and dia_path_new: 187 ↛ 194line 187 didn't jump to line 194 because the condition on line 187 was always true

188 special_patching.append((dia_path_old, dia_path_new)) 

189 log.info( 

190 f'post-action[#{slot + 1}]: adding to queue for sync move: ({dia_path_old})' 

191 f' -> ({dia_path_new})' 

192 ) 

193 else: 

194 log.warning(f'- old: {src.rstrip()}') 

195 log.warning(f'- new: {dia_path_new.rstrip()}') 

196 continue 

197 if '.svg' in line: 

198 fine = line.replace('.svg', '.png') 

199 log.info(f' transform[#{slot + 1}]: {line}') 

200 log.info(f' into[#{slot + 1}]: {fine}') 

201 lines[slot] = fine 

202 continue 

203 with open('document.md', 'wt', encoding=ENCODING) as handle: 

204 handle.write('\n'.join(lines)) 

205 

206 log.info(LOG_SEPARATOR) 

207 log.info('ensure diagram files can be found when patched ...') 

208 if special_patching: 

209 for old, mew in special_patching: 

210 source_asset = pathlib.Path(old) 

211 target_asset = pathlib.Path(mew) 

212 log.info(f'- moving: ({source_asset}) -> ({target_asset})') 

213 present = False 

214 remaining_attempts = INTER_PROCESS_SYNC_ATTEMPTS 

215 while remaining_attempts > 0 and not present: 215 ↛ 228line 215 didn't jump to line 228 because the condition on line 215 was always true

216 try: 

217 present = source_asset.is_file() 

218 except Exception as ex: 

219 log.error(f' * probing for resource ({old}) failed with ({ex}) ... continuing') 

220 log.info( 

221 f' + resource ({old}) is{" " if present else " NOT "}present at ({source_asset})' 

222 f' - attempt {11 - remaining_attempts} of {INTER_PROCESS_SYNC_ATTEMPTS} ...' 

223 ) 

224 if present: 224 ↛ 226line 224 didn't jump to line 226 because the condition on line 224 was always true

225 break 

226 time.sleep(INTER_PROCESS_SYNC_SECS) 

227 remaining_attempts -= 1 

228 if not source_asset.is_file(): 228 ↛ 229line 228 didn't jump to line 229 because the condition on line 228 was never true

229 log.warning( 

230 f'- resource ({old}) still not present at ({source_asset})' 

231 f' as seen from ({os.getcwd()}) after {remaining_attempts} attempts' 

232 f' and ({round(remaining_attempts * INTER_PROCESS_SYNC_SECS, 0) :.0f} seconds waiting)' 

233 ) 

234 elif target_asset.is_file(): 

235 log.warning(f'overwriting existing {target_asset} from {source_asset}') 

236 if not target_asset.is_file() and source_asset.is_file(): 

237 try: 

238 shutil.move(source_asset, target_asset) 

239 except FileNotFoundError as err: 

240 log.error(f'{source_asset} (existing) to {target_asset} (not-yet) move failed with: {err}') 

241 elif target_asset.is_file() and not source_asset.is_file(): 241 ↛ 242line 241 didn't jump to line 242 because the condition on line 241 was never true

242 log.warning(f'Houston, we have a problem> {source_asset} missing and {target_asset} present (ignored)') 

243 else: 

244 log.info('post-action queue (from reference renaming) is empty - nothing to move') 

245 log.info(LOG_SEPARATOR) 

246 

247 fmt_spec = from_format_spec 

248 in_doc = 'document.md' 

249 out_doc = 'ast-no-filter.json' 

250 markdown_to_ast_command = [ 

251 'pandoc', 

252 '--verbose', 

253 '-f', 

254 fmt_spec, 

255 '-t', 

256 'json', 

257 in_doc, 

258 '-o', 

259 out_doc, 

260 ] 

261 log.info(LOG_SEPARATOR) 

262 log.info(f'executing ({" ".join(markdown_to_ast_command)}) ...') 

263 if code := too.delegate(markdown_to_ast_command, 'markdown-to-ast', is_quiet=is_quiet): 263 ↛ 264line 263 didn't jump to line 264 because the condition on line 263 was never true

264 return code 

265 

266 log.info(LOG_SEPARATOR) 

267 

268 mermaid_caption_map = too.mermaid_captions_from_json_ast(out_doc) 

269 log.info(LOG_SEPARATOR) 

270 # no KISS too.ensure_separate_log_lines(json.dumps, [mermaid_caption_map, 2]) 

271 for line in json.dumps(mermaid_caption_map, indent=2).split('\n'): 

272 for fine in line.split('\n'): 

273 log.info(fine) 

274 log.info(LOG_SEPARATOR) 

275 

276 fmt_spec = from_format_spec 

277 in_doc = 'document.md' 

278 out_doc = LATEX_PAYLOAD_NAME 

279 markdown_to_latex_command = [ 

280 'pandoc', 

281 '--verbose', 

282 '-f', 

283 fmt_spec, 

284 '-t', 

285 'latex', 

286 in_doc, 

287 '-o', 

288 out_doc, 

289 ] 

290 if filter_cs_list: 290 ↛ 293line 290 didn't jump to line 293 because the condition on line 290 was always true

291 filters = [added_prefix for expr in filter_cs_list for added_prefix in ('--filter', expr)] 

292 markdown_to_latex_command += filters 

293 log.info(LOG_SEPARATOR) 

294 log.warning(f'executing ({" ".join(markdown_to_latex_command)}) ...') 

295 if code := too.delegate(markdown_to_latex_command, 'markdown-to-latex', is_quiet=is_quiet): 295 ↛ 296line 295 didn't jump to line 296 because the condition on line 295 was never true

296 return code 

297 

298 log.info(LOG_SEPARATOR) 

299 log.info(f'load text lines from intermediate {LATEX_PAYLOAD_NAME} file before internal transforms ...') 

300 with open(LATEX_PAYLOAD_NAME, 'rt', encoding=ENCODING) as handle: 

301 lines = [line.rstrip() for line in handle.readlines()] 

302 

303 patch_counter = 1 

304 if options.get('table_caption_below', False): 304 ↛ 305line 304 didn't jump to line 305 because the condition on line 304 was never true

305 lines = too.execute_filter( 

306 cap.weave, 

307 head='move any captions below tables ...', 

308 backup=f'document-before-caps-patch-{patch_counter}.tex.txt', 

309 label='captions-below-tables', 

310 text_lines=lines, 

311 lookup=None, 

312 ) 

313 patch_counter += 1 

314 else: 

315 log.info('NOT moving captions below tables!') 

316 

317 lines = too.execute_filter( 

318 lab.inject, 

319 head='inject stem (derived from file name) labels ...', 

320 backup=f'document-before-inject-stem-label-patch-{patch_counter}.tex.txt', 

321 label='inject-stem-derived-labels', 

322 text_lines=lines, 

323 lookup=mermaid_caption_map, 

324 ) 

325 patch_counter += 1 

326 

327 lines = too.execute_filter( 

328 fig.scale, 

329 head='scale figures ...', 

330 backup=f'document-before-scale-figures-patch-{patch_counter}.tex.txt', 

331 label='inject-scale-figures', 

332 text_lines=lines, 

333 lookup=None, 

334 ) 

335 patch_counter += 1 

336 

337 lines = too.execute_filter( 

338 dsc.options, 

339 head='add options to descriptions (definition lists) ...', 

340 backup=f'document-before-description-options-patch-{patch_counter}.tex.txt', 

341 label='inject-description-options', 

342 text_lines=lines, 

343 lookup=None, 

344 ) 

345 patch_counter += 1 

346 

347 if options.get('patch_tables', False): 347 ↛ 348line 347 didn't jump to line 348 because the condition on line 347 was never true

348 lookup_tunnel = {'table_style': 'ugly' if options.get('table_uglify', False) else 'readable'} 

349 lines = too.execute_filter( 

350 tab.patch, 

351 head='patching tables EXPERIMENTAL (table-shape) ...', 

352 backup=f'document-before-table-shape-patch-{patch_counter}.tex.txt', 

353 label='changed-table-shape', 

354 text_lines=lines, 

355 lookup=lookup_tunnel, 

356 ) 

357 patch_counter += 1 

358 else: 

359 log.info(LOG_SEPARATOR) 

360 log.info('not patching tables but commenting out (ignoring) any columns command (table-shape) ...') 

361 patched_lines = [f'%IGNORED_{v}' if v.startswith(r'\columns=') else v for v in lines] 

362 patched_lines = [f'%IGNORED_{v}' if v.startswith(r'\tablefontsize=') else v for v in patched_lines] 

363 log.info('diff of the (ignore-table-shape-if-not-patched) filter result:') 

364 too.log_unified_diff(lines, patched_lines) 

365 lines = patched_lines 

366 log.info(LOG_SEPARATOR) 

367 

368 if need_patching: 

369 log.info(LOG_SEPARATOR) 

370 log.info('apply user patches ...') 

371 doc_before_user_patch = f'document-before-user-patch-{patch_counter}.tex.txt' 

372 patch_counter += 1 

373 with open(doc_before_user_patch, 'wt', encoding=ENCODING) as handle: 

374 handle.write('\n'.join(lines)) 

375 patched_lines = pat.apply(patches, lines) 

376 with open(LATEX_PAYLOAD_NAME, 'wt', encoding=ENCODING) as handle: 

377 handle.write('\n'.join(patched_lines)) 

378 log.info('diff of the (user-patches) filter result:') 

379 too.log_unified_diff(lines, patched_lines) 

380 lines = patched_lines 

381 else: 

382 log.info(LOG_SEPARATOR) 

383 log.info('skipping application of user patches ...') 

384 

385 log.info(LOG_SEPARATOR) 

386 log.info(f'Internal text line buffer counts {len(lines)} lines') 

387 

388 log.info(LOG_SEPARATOR) 

389 log.info('cp -a driver.tex this.tex ...') 

390 source_asset = 'driver.tex' 

391 target_asset = 'this.tex' 

392 shutil.copy(source_asset, target_asset) 

393 

394 latex_to_pdf_command = ['lualatex', '--shell-escape', 'this.tex'] 

395 log.info(LOG_SEPARATOR) 

396 log.warning('1/3) lualatex --shell-escape this.tex ...') 

397 if code := too.delegate(latex_to_pdf_command, 'latex-to-pdf(1/3)', is_quiet=is_quiet): 397 ↛ 398line 397 didn't jump to line 398 because the condition on line 397 was never true

398 return code 

399 

400 log.info(LOG_SEPARATOR) 

401 log.warning('2/3) lualatex --shell-escape this.tex ...') 

402 if code := too.delegate(latex_to_pdf_command, 'latex-to-pdf(2/3)', is_quiet=is_quiet): 402 ↛ 403line 402 didn't jump to line 403 because the condition on line 402 was never true

403 return code 

404 

405 log.info(LOG_SEPARATOR) 

406 log.warning('3/3) lualatex --shell-escape this.tex ...') 

407 if code := too.delegate(latex_to_pdf_command, 'latex-to-pdf(3/3)', is_quiet=is_quiet): 407 ↛ 408line 407 didn't jump to line 408 because the condition on line 407 was never true

408 return code 

409 

410 if str(options.get('label', '')).strip(): 410 ↛ 411line 410 didn't jump to line 411 because the condition on line 410 was never true

411 labeling_call = str(options['label']).strip().split() 

412 labeling_call.extend( 

413 [ 

414 '--key-value-pairs', 

415 ( 

416 f'BuilderNodeID={CONTEXT["builder_node_id"]}' 

417 f',SourceHash={CONTEXT.get("source_hash", "no-source-hash-given")}' 

418 f',SourceHint={CONTEXT.get("source_hint", "no-source-hint-given")}' 

419 ), 

420 ] 

421 ) 

422 log.info(LOG_SEPARATOR) 

423 log.warning(f'Labeling the resulting pdf file per ({" ".join(labeling_call)})') 

424 too.delegate(labeling_call, 'label-pdf', is_quiet=is_quiet) 

425 log.info(LOG_SEPARATOR) 

426 

427 log.info(LOG_SEPARATOR) 

428 log.warning('Moving stuff around (result phase) ...') 

429 source_asset = 'this.pdf' 

430 target_asset = '../index.pdf' 

431 shutil.copy(source_asset, target_asset) 

432 

433 log.info(LOG_SEPARATOR) 

434 log.warning('Deliverable taxonomy: ...') 

435 too.report_taxonomy(pathlib.Path(target_asset)) 

436 

437 pdffonts_command = ['pdffonts', target_asset] 

438 too.delegate(pdffonts_command, 'assess-pdf-fonts', is_quiet=is_quiet) 

439 

440 log.info(LOG_SEPARATOR) 

441 log.warning('done.') 

442 log.info(LOG_SEPARATOR) 

443 

444 return 0