Coverage for liitos/render.py: 80.97%
291 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 19:29:53 +00:00
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 19:29:53 +00:00
1"""Render the concat document to pdf."""
3import json
4import os
5import pathlib
6import re
7import shutil
8import time
9from typing import Union, no_type_check
11import yaml
13import liitos.captions as cap
14import liitos.concat as con
15import liitos.description_lists as dsc
16import liitos.figures as fig
17import liitos.gather as gat
18import liitos.labels as lab
19import liitos.patch as pat
20import liitos.tables as tab
21import liitos.tools as too
22from liitos import (
23 CONTEXT,
24 ENCODING,
25 FROM_FORMAT_SPEC,
26 LATEX_PAYLOAD_NAME,
27 LOG_SEPARATOR,
28 OptionsType,
29 log,
30 parse_csl,
31)
33DOC_BASE = pathlib.Path('..', '..')
34STRUCTURE_PATH = DOC_BASE / 'structure.yml'
35IMAGES_FOLDER = 'images/'
36DIAGRAMS_FOLDER = 'diagrams/'
37PATCH_SPEC_NAME = 'patch.yml'
38INTER_PROCESS_SYNC_SECS = 0.1
39INTER_PROCESS_SYNC_ATTEMPTS = 10
40VENDORED_SVG_PAT = re.compile(r'^.+\]\([^.]+\.[^.]+\.svg\ .+$')
43@no_type_check
44def read_patches(folder_path: pathlib.Path, patches_path: pathlib.Path) -> tuple[list[tuple[str, str]], bool]:
45 """Obtain any search-replace pairs from user patching file."""
46 patches = []
47 need_patching = False
48 log.info(f'inspecting any patch spec file ({patches_path}) ...')
49 if patches_path.is_file() and patches_path.stat().st_size:
50 target_path = folder_path / PATCH_SPEC_NAME
51 shutil.copy(patches_path, target_path)
52 try:
53 with open(patches_path, 'rt', encoding=ENCODING) as handle:
54 patch_spec = yaml.safe_load(handle)
55 need_patching = True
56 except (OSError, UnicodeDecodeError) as err:
57 log.error(f'failed to load patch spec from ({patches_path}) with ({err}) - patching will be skipped')
58 need_patching = False
59 if need_patching: 59 ↛ 79line 59 didn't jump to line 79 because the condition on line 59 was always true
60 try:
61 patches = [(rep, lace) for rep, lace in patch_spec]
62 patch_pair_count = len(patches)
63 if not patch_pair_count: 63 ↛ 64line 63 didn't jump to line 64 because the condition on line 63 was never true
64 need_patching = False
65 log.warning('- ignoring empty patch spec')
66 else:
67 log.info(
68 f'- loaded {patch_pair_count} patch pair{"" if patch_pair_count == 1 else "s"}'
69 f' from patch spec file ({patches_path})'
70 )
71 except ValueError as err:
72 log.error(f'- failed to parse patch spec from ({patch_spec}) with ({err}) - patching will be skipped')
73 need_patching = False
74 else:
75 if patches_path.is_file(): 75 ↛ 76line 75 didn't jump to line 76 because the condition on line 75 was never true
76 log.warning(f'- ignoring empty patch spec file ({patches_path})')
77 else:
78 log.info(f'- no patch spec file ({patches_path}) detected')
79 return patches, need_patching
82@no_type_check
83def der(
84 doc_root: Union[str, pathlib.Path],
85 structure_name: str,
86 target_key: str,
87 facet_key: str,
88 options: OptionsType,
89) -> int:
90 """Render the document as PDF, eventually."""
91 log.info(LOG_SEPARATOR)
92 log.info('entered render function ...')
93 target_code = target_key
94 facet_code = facet_key
95 if not facet_code.strip() or not target_code.strip(): 95 ↛ 96line 95 didn't jump to line 96 because the condition on line 95 was never true
96 log.error(f'render requires non-empty target ({target_code}) and facet ({facet_code}) codes')
97 return 2
98 log.info(f'parsed target ({target_code}) and facet ({facet_code}) from request')
100 from_format_spec = options.get('from_format_spec', FROM_FORMAT_SPEC)
101 filter_cs_list = parse_csl(options.get('filter_cs_list', ''))
102 if filter_cs_list: 102 ↛ 105line 102 didn't jump to line 105 because the condition on line 102 was always true
103 log.info(f'parsed from-format-spec ({from_format_spec}) and filters ({", ".join(filter_cs_list)}) from request')
104 else:
105 log.info(f'parsed from-format-spec ({from_format_spec}) and no filters from request')
107 structure, asset_map = gat.prelude(
108 doc_root=doc_root, structure_name=structure_name, target_key=target_key, facet_key=facet_key, command='render'
109 )
110 log.info(f'prelude teleported processor into the document root at ({os.getcwd()}/)')
112 rel_concat_folder_path = pathlib.Path('render/pdf/')
113 rel_concat_folder_path.mkdir(parents=True, exist_ok=True)
115 patches, need_patching = read_patches(rel_concat_folder_path, pathlib.Path(PATCH_SPEC_NAME))
117 os.chdir(rel_concat_folder_path)
118 log.info(f'render (this processor) teleported into the render/pdf location ({os.getcwd()}/)')
120 log.info(LOG_SEPARATOR)
121 log.warning('Assessing the local version control status (compared to upstream) ...')
122 too.ensure_separate_log_lines(too.vcs_probe, log.warning)
123 CONTEXT['builder_node_id'] = too.node_id()
124 log.warning('Context noted with:')
125 log.warning(f'- builder-node-id({CONTEXT.get("builder_node_id")})')
126 log.warning(f'- source-hash({CONTEXT.get("source_hash")})')
127 log.warning(f'- source-hint({CONTEXT.get("source_hint")})')
129 ok, aspect_map = too.load_target(target_code, facet_code)
130 if not ok or not aspect_map: 130 ↛ 131line 130 didn't jump to line 131 because the condition on line 130 was never true
131 return 0 if ok else 1
133 is_quiet = options.get('quiet', False)
134 do_render = aspect_map.get('render', None)
135 if do_render is not None: 135 ↛ 138line 135 didn't jump to line 138 because the condition on line 135 was always true
136 log.info(f'found render instruction with value ({aspect_map["render"]})')
138 if do_render is None or do_render or options['force']: 138 ↛ 142line 138 didn't jump to line 142 because the condition on line 138 was always true
139 why = 'default-render' if do_render is None else ('render-true' if do_render else 'render-force')
140 log.warning(f'we will render ({why=}) ...')
141 else:
142 log.warning('we will not render ...')
143 return 0xFADECAFE
145 log.info(LOG_SEPARATOR)
146 log.info('transforming SVG assets to high resolution PNG bitmaps ...')
147 for path_to_dir in (IMAGES_FOLDER, DIAGRAMS_FOLDER):
148 the_folder = pathlib.Path(path_to_dir)
149 if not the_folder.is_dir():
150 log.info(
151 f'svg-to-png directory ({the_folder}) in ({pathlib.Path().cwd()}) does not exist or is no directory'
152 f' - trying to create {the_folder}'
153 )
154 try:
155 the_folder.mkdir(parents=True, exist_ok=True)
156 except FileExistsError as err:
157 log.error(f'failed to create {the_folder} - detail: {err}')
158 continue
159 for svg in pathlib.Path(path_to_dir).iterdir():
160 if svg.is_file() and svg.suffix == '.svg':
161 png = str(svg).replace('.svg', '.png')
162 svg_to_png_command = ['svgexport', svg, png, '100%']
163 too.delegate(svg_to_png_command, 'svg-to-png', is_quiet=is_quiet)
165 special_patching = []
166 log.info(LOG_SEPARATOR)
167 log.info('rewriting src attribute values of SVG to PNG sources ...')
168 with open('document.md', 'rt', encoding=ENCODING) as handle:
169 lines = [line.rstrip() for line in handle.readlines()]
170 for slot, line in enumerate(lines):
171 if line.startswith(':
173 if '.svg' in line and line.count('.') >= 2: 173 ↛ 197line 173 didn't jump to line 197 because the condition on line 173 was always true
174 caption, src, alt, rest = con.parse_markdown_image(line)
175 for msg in too.incoherent_math_mode_in_caption(caption, phase_info=f'for SVG image ({src})'): 175 ↛ 176line 175 didn't jump to line 176 because the loop on line 175 never started
176 log.warning(msg)
177 stem, app_indicator, format_suffix = src.rsplit('.', 2)
178 log.info(f'- removing application indicator ({app_indicator}) from src ...')
179 if format_suffix != 'svg': 179 ↛ 180line 179 didn't jump to line 180 because the condition on line 179 was never true
180 log.warning(f' + format_suffix (.{format_suffix}) unexpected in <<{line.rstrip()}>> ...')
181 fine = f'{rest}'
182 log.info(f' transform[#{slot + 1}]: {line}')
183 log.info(f' into[#{slot + 1}]: {fine}')
184 lines[slot] = fine
185 dia_path_old = src.replace('.svg', '.png')
186 dia_path_new = f'{stem}.png'
187 if dia_path_old and dia_path_new: 187 ↛ 194line 187 didn't jump to line 194 because the condition on line 187 was always true
188 special_patching.append((dia_path_old, dia_path_new))
189 log.info(
190 f'post-action[#{slot + 1}]: adding to queue for sync move: ({dia_path_old})'
191 f' -> ({dia_path_new})'
192 )
193 else:
194 log.warning(f'- old: {src.rstrip()}')
195 log.warning(f'- new: {dia_path_new.rstrip()}')
196 continue
197 if '.svg' in line:
198 fine = line.replace('.svg', '.png')
199 log.info(f' transform[#{slot + 1}]: {line}')
200 log.info(f' into[#{slot + 1}]: {fine}')
201 lines[slot] = fine
202 continue
203 with open('document.md', 'wt', encoding=ENCODING) as handle:
204 handle.write('\n'.join(lines))
206 log.info(LOG_SEPARATOR)
207 log.info('ensure diagram files can be found when patched ...')
208 if special_patching:
209 for old, mew in special_patching:
210 source_asset = pathlib.Path(old)
211 target_asset = pathlib.Path(mew)
212 log.info(f'- moving: ({source_asset}) -> ({target_asset})')
213 present = False
214 remaining_attempts = INTER_PROCESS_SYNC_ATTEMPTS
215 while remaining_attempts > 0 and not present: 215 ↛ 228line 215 didn't jump to line 228 because the condition on line 215 was always true
216 try:
217 present = source_asset.is_file()
218 except Exception as ex:
219 log.error(f' * probing for resource ({old}) failed with ({ex}) ... continuing')
220 log.info(
221 f' + resource ({old}) is{" " if present else " NOT "}present at ({source_asset})'
222 f' - attempt {11 - remaining_attempts} of {INTER_PROCESS_SYNC_ATTEMPTS} ...'
223 )
224 if present: 224 ↛ 226line 224 didn't jump to line 226 because the condition on line 224 was always true
225 break
226 time.sleep(INTER_PROCESS_SYNC_SECS)
227 remaining_attempts -= 1
228 if not source_asset.is_file(): 228 ↛ 229line 228 didn't jump to line 229 because the condition on line 228 was never true
229 log.warning(
230 f'- resource ({old}) still not present at ({source_asset})'
231 f' as seen from ({os.getcwd()}) after {remaining_attempts} attempts'
232 f' and ({round(remaining_attempts * INTER_PROCESS_SYNC_SECS, 0) :.0f} seconds waiting)'
233 )
234 elif target_asset.is_file():
235 log.warning(f'overwriting existing {target_asset} from {source_asset}')
236 if not target_asset.is_file() and source_asset.is_file():
237 try:
238 shutil.move(source_asset, target_asset)
239 except FileNotFoundError as err:
240 log.error(f'{source_asset} (existing) to {target_asset} (not-yet) move failed with: {err}')
241 elif target_asset.is_file() and not source_asset.is_file(): 241 ↛ 242line 241 didn't jump to line 242 because the condition on line 241 was never true
242 log.warning(f'Houston, we have a problem> {source_asset} missing and {target_asset} present (ignored)')
243 else:
244 log.info('post-action queue (from reference renaming) is empty - nothing to move')
245 log.info(LOG_SEPARATOR)
247 fmt_spec = from_format_spec
248 in_doc = 'document.md'
249 out_doc = 'ast-no-filter.json'
250 markdown_to_ast_command = [
251 'pandoc',
252 '--verbose',
253 '-f',
254 fmt_spec,
255 '-t',
256 'json',
257 in_doc,
258 '-o',
259 out_doc,
260 ]
261 log.info(LOG_SEPARATOR)
262 log.info(f'executing ({" ".join(markdown_to_ast_command)}) ...')
263 if code := too.delegate(markdown_to_ast_command, 'markdown-to-ast', is_quiet=is_quiet): 263 ↛ 264line 263 didn't jump to line 264 because the condition on line 263 was never true
264 return code
266 log.info(LOG_SEPARATOR)
268 mermaid_caption_map = too.mermaid_captions_from_json_ast(out_doc)
269 log.info(LOG_SEPARATOR)
270 # no KISS too.ensure_separate_log_lines(json.dumps, [mermaid_caption_map, 2])
271 for line in json.dumps(mermaid_caption_map, indent=2).split('\n'):
272 for fine in line.split('\n'):
273 log.info(fine)
274 log.info(LOG_SEPARATOR)
276 fmt_spec = from_format_spec
277 in_doc = 'document.md'
278 out_doc = LATEX_PAYLOAD_NAME
279 markdown_to_latex_command = [
280 'pandoc',
281 '--verbose',
282 '-f',
283 fmt_spec,
284 '-t',
285 'latex',
286 in_doc,
287 '-o',
288 out_doc,
289 ]
290 if filter_cs_list: 290 ↛ 293line 290 didn't jump to line 293 because the condition on line 290 was always true
291 filters = [added_prefix for expr in filter_cs_list for added_prefix in ('--filter', expr)]
292 markdown_to_latex_command += filters
293 log.info(LOG_SEPARATOR)
294 log.warning(f'executing ({" ".join(markdown_to_latex_command)}) ...')
295 if code := too.delegate(markdown_to_latex_command, 'markdown-to-latex', is_quiet=is_quiet): 295 ↛ 296line 295 didn't jump to line 296 because the condition on line 295 was never true
296 return code
298 log.info(LOG_SEPARATOR)
299 log.info(f'load text lines from intermediate {LATEX_PAYLOAD_NAME} file before internal transforms ...')
300 with open(LATEX_PAYLOAD_NAME, 'rt', encoding=ENCODING) as handle:
301 lines = [line.rstrip() for line in handle.readlines()]
303 patch_counter = 1
304 if options.get('table_caption_below', False): 304 ↛ 305line 304 didn't jump to line 305 because the condition on line 304 was never true
305 lines = too.execute_filter(
306 cap.weave,
307 head='move any captions below tables ...',
308 backup=f'document-before-caps-patch-{patch_counter}.tex.txt',
309 label='captions-below-tables',
310 text_lines=lines,
311 lookup=None,
312 )
313 patch_counter += 1
314 else:
315 log.info('NOT moving captions below tables!')
317 lines = too.execute_filter(
318 lab.inject,
319 head='inject stem (derived from file name) labels ...',
320 backup=f'document-before-inject-stem-label-patch-{patch_counter}.tex.txt',
321 label='inject-stem-derived-labels',
322 text_lines=lines,
323 lookup=mermaid_caption_map,
324 )
325 patch_counter += 1
327 lines = too.execute_filter(
328 fig.scale,
329 head='scale figures ...',
330 backup=f'document-before-scale-figures-patch-{patch_counter}.tex.txt',
331 label='inject-scale-figures',
332 text_lines=lines,
333 lookup=None,
334 )
335 patch_counter += 1
337 lines = too.execute_filter(
338 dsc.options,
339 head='add options to descriptions (definition lists) ...',
340 backup=f'document-before-description-options-patch-{patch_counter}.tex.txt',
341 label='inject-description-options',
342 text_lines=lines,
343 lookup=None,
344 )
345 patch_counter += 1
347 if options.get('patch_tables', False): 347 ↛ 348line 347 didn't jump to line 348 because the condition on line 347 was never true
348 lookup_tunnel = {'table_style': 'ugly' if options.get('table_uglify', False) else 'readable'}
349 lines = too.execute_filter(
350 tab.patch,
351 head='patching tables EXPERIMENTAL (table-shape) ...',
352 backup=f'document-before-table-shape-patch-{patch_counter}.tex.txt',
353 label='changed-table-shape',
354 text_lines=lines,
355 lookup=lookup_tunnel,
356 )
357 patch_counter += 1
358 else:
359 log.info(LOG_SEPARATOR)
360 log.info('not patching tables but commenting out (ignoring) any columns command (table-shape) ...')
361 patched_lines = [f'%IGNORED_{v}' if v.startswith(r'\columns=') else v for v in lines]
362 patched_lines = [f'%IGNORED_{v}' if v.startswith(r'\tablefontsize=') else v for v in patched_lines]
363 log.info('diff of the (ignore-table-shape-if-not-patched) filter result:')
364 too.log_unified_diff(lines, patched_lines)
365 lines = patched_lines
366 log.info(LOG_SEPARATOR)
368 if need_patching:
369 log.info(LOG_SEPARATOR)
370 log.info('apply user patches ...')
371 doc_before_user_patch = f'document-before-user-patch-{patch_counter}.tex.txt'
372 patch_counter += 1
373 with open(doc_before_user_patch, 'wt', encoding=ENCODING) as handle:
374 handle.write('\n'.join(lines))
375 patched_lines = pat.apply(patches, lines)
376 with open(LATEX_PAYLOAD_NAME, 'wt', encoding=ENCODING) as handle:
377 handle.write('\n'.join(patched_lines))
378 log.info('diff of the (user-patches) filter result:')
379 too.log_unified_diff(lines, patched_lines)
380 lines = patched_lines
381 else:
382 log.info(LOG_SEPARATOR)
383 log.info('skipping application of user patches ...')
385 log.info(LOG_SEPARATOR)
386 log.info(f'Internal text line buffer counts {len(lines)} lines')
388 log.info(LOG_SEPARATOR)
389 log.info('cp -a driver.tex this.tex ...')
390 source_asset = 'driver.tex'
391 target_asset = 'this.tex'
392 shutil.copy(source_asset, target_asset)
394 latex_to_pdf_command = ['lualatex', '--shell-escape', 'this.tex']
395 log.info(LOG_SEPARATOR)
396 log.warning('1/3) lualatex --shell-escape this.tex ...')
397 if code := too.delegate(latex_to_pdf_command, 'latex-to-pdf(1/3)', is_quiet=is_quiet): 397 ↛ 398line 397 didn't jump to line 398 because the condition on line 397 was never true
398 return code
400 log.info(LOG_SEPARATOR)
401 log.warning('2/3) lualatex --shell-escape this.tex ...')
402 if code := too.delegate(latex_to_pdf_command, 'latex-to-pdf(2/3)', is_quiet=is_quiet): 402 ↛ 403line 402 didn't jump to line 403 because the condition on line 402 was never true
403 return code
405 log.info(LOG_SEPARATOR)
406 log.warning('3/3) lualatex --shell-escape this.tex ...')
407 if code := too.delegate(latex_to_pdf_command, 'latex-to-pdf(3/3)', is_quiet=is_quiet): 407 ↛ 408line 407 didn't jump to line 408 because the condition on line 407 was never true
408 return code
410 if str(options.get('label', '')).strip(): 410 ↛ 411line 410 didn't jump to line 411 because the condition on line 410 was never true
411 labeling_call = str(options['label']).strip().split()
412 labeling_call.extend(
413 [
414 '--key-value-pairs',
415 (
416 f'BuilderNodeID={CONTEXT["builder_node_id"]}'
417 f',SourceHash={CONTEXT.get("source_hash", "no-source-hash-given")}'
418 f',SourceHint={CONTEXT.get("source_hint", "no-source-hint-given")}'
419 ),
420 ]
421 )
422 log.info(LOG_SEPARATOR)
423 log.warning(f'Labeling the resulting pdf file per ({" ".join(labeling_call)})')
424 too.delegate(labeling_call, 'label-pdf', is_quiet=is_quiet)
425 log.info(LOG_SEPARATOR)
427 log.info(LOG_SEPARATOR)
428 log.warning('Moving stuff around (result phase) ...')
429 source_asset = 'this.pdf'
430 target_asset = '../index.pdf'
431 shutil.copy(source_asset, target_asset)
433 log.info(LOG_SEPARATOR)
434 log.warning('Deliverable taxonomy: ...')
435 too.report_taxonomy(pathlib.Path(target_asset))
437 pdffonts_command = ['pdffonts', target_asset]
438 too.delegate(pdffonts_command, 'assess-pdf-fonts', is_quiet=is_quiet)
440 log.info(LOG_SEPARATOR)
441 log.warning('done.')
442 log.info(LOG_SEPARATOR)
444 return 0