Coverage for liitos/concat.py: 88.65%
519 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 19:29:53 +00:00
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 19:29:53 +00:00
1"""Given a target and facet, concatenate a tree of markdown files to a single file rewriting all image refs."""
3import json
4import os
5import pathlib
6import re
7import shutil
8from typing import Union, no_type_check
10import treelib
11import yaml
13import liitos.gather as gat
14import liitos.meta as met
15import liitos.placeholder as plh
16import liitos.tools as too
17from liitos import ENCODING, LOG_SEPARATOR, PathLike, log
19ALT_INJECTOR_HACK = 'INJECTED-ALT-TEXT-TO-TRIGGER-FIGURE-ENVIRONMENT-AROUND-IMAGE-IN-PANDOC'
20CAP_INJECTOR_HACK = 'INJECTED-CAP-TEXT-TO-MARK-MISSING-CAPTION-IN-OUTPUT'
21DOC_BASE = pathlib.Path('..', '..')
22STRUCTURE_PATH = DOC_BASE / 'structure.yml'
23SLASH = '/'
24IMAGES_FOLDER = 'images/'
25DIAGRAMS_FOLDER = 'diagrams/'
27"""
28```{.python .cb.run}
29with open('sub/as.md') as fp:
30 print(fp.read())
31```
32"""
33READ_SLOT_FENCE_BEGIN = '```{.python .cb.run}'
34READ_SLOT_CONTEXT_BEGIN = 'with open('
35READ_SLOT_FENCE_END = '```'
37r"""
38\include{markdown_file_path}
39"""
40INCLUDE_SLOT = '\\include{'
42"""
43
44
45
46
47"""
48IMG_LINE_STARTSWITH = '!['
49MD_IMG_PATTERN = re.compile(r'^!\[(?P<cap>[^(]*)\]\((?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$')
50MD_IMG_PATTERN_RIGHT_SPLIT = re.compile(r'^(?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$')
52NL = '\n'
55@no_type_check
56def process_approvals(aspects: dict[str, str]) -> Union[gat.Approvals, int]:
57 """Best effort loading of approvals data.
59 Examples:
61 >>> aspects = {gat.KEY_APPROVALS: 'missing-file'}
62 >>> process_approvals(aspects)
63 1
65 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
66 >>> approvals_name = 'empty-as-approvals.yml'
67 >>> aspects = {gat.KEY_APPROVALS: str(DOC_BASE / approvals_name)}
68 >>> process_approvals(aspects)
69 1
71 >>> DOC_BASE = pathlib.Path('.')
72 >>> aspects = {gat.KEY_APPROVALS: __file__}
73 >>> process_approvals(aspects)
74 1
76 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
77 >>> approvals_name = 'space-as-approvals.yml'
78 >>> aspects = {gat.KEY_APPROVALS: str(DOC_BASE / approvals_name)}
79 >>> process_approvals(aspects)
80 1
81 """
82 approvals_path = DOC_BASE / aspects[gat.KEY_APPROVALS]
83 if not approvals_path.is_file() or not approvals_path.stat().st_size:
84 log.error(f'destructure failed to find non-empty approvals file at {approvals_path}')
85 return 1
86 if approvals_path.suffix.lower() not in ('.json', '.yaml', '.yml'):
87 log.error(f'approvals file format per suffix ({approvals_path.suffix}) not supported')
88 return 1
89 approvals_channel = 'yaml' if approvals_path.suffix.lower() in ('.yaml', '.yml') else 'json'
90 with open(approvals_path, 'rt', encoding=ENCODING) as handle:
91 approvals = yaml.safe_load(handle) if approvals_channel == 'yaml' else json.load(handle)
92 if not approvals: 92 ↛ 93line 92 didn't jump to line 93 because the condition on line 92 was never true
93 log.error(f'empty approvals file? Please add approvals to ({approvals_path})')
94 return 1
95 if approvals_channel == 'yaml': 95 ↛ 99line 95 didn't jump to line 99 because the condition on line 95 was always true
96 with open('approvals.yml', 'wt', encoding=ENCODING) as handle:
97 yaml.dump(approvals, handle, default_flow_style=False)
98 else:
99 with open('approvals.json', 'wt', encoding=ENCODING) as handle:
100 json.dump(approvals, handle, indent=2)
101 return approvals
104@no_type_check
105def process_binder(aspects: dict[str, str]) -> Union[gat.Binder, int]:
106 """Best effort loading of binder data.
108 Examples:
110 >>> aspects = {gat.KEY_BIND: 'missing-file'}
111 >>> process_binder(aspects)
112 1
114 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
115 >>> binder_name = 'empty-as-bind.txt'
116 >>> aspects = {gat.KEY_BIND: str(DOC_BASE / binder_name)}
117 >>> process_binder(aspects)
118 1
120 >>> DOC_BASE = pathlib.Path('.')
121 >>> aspects = {gat.KEY_BIND: __file__}
122 >>> process_binder(aspects)
123 1
125 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
126 >>> binder_name = 'space-as-bind.txt'
127 >>> aspects = {gat.KEY_BIND: str(DOC_BASE / binder_name)}
128 >>> process_binder(aspects)
129 1
130 """
131 bind_path = DOC_BASE / aspects[gat.KEY_BIND]
132 if not bind_path.is_file() or not bind_path.stat().st_size:
133 log.error(f'destructure failed to find non-empty bind file at {bind_path}')
134 return 1
135 if bind_path.suffix.lower() not in ('.txt',):
136 log.error(f'bind file format per suffix ({bind_path.suffix}) not supported')
137 return 1
138 with open(bind_path, 'rt', encoding=ENCODING) as handle:
139 binder = [line.strip() for line in handle.readlines() if line.strip()]
140 if not binder: 140 ↛ 141line 140 didn't jump to line 141 because the condition on line 140 was never true
141 log.error(f'empty bind file? Please add component paths to ({bind_path})')
142 return 1
143 with open('bind.txt', 'wt', encoding=ENCODING) as handle:
144 handle.write('\n'.join(binder) + '\n')
145 return binder
148@no_type_check
149def process_changes(aspects: dict[str, str]) -> Union[gat.Changes, int]:
150 """Best effort loading of changes data.
152 Examples:
154 >>> aspects = {gat.KEY_CHANGES: 'missing-file'}
155 >>> process_changes(aspects)
156 1
158 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
159 >>> changes_name = 'empty-as-changtes.yml'
160 >>> aspects = {gat.KEY_CHANGES: str(DOC_BASE / changes_name)}
161 >>> process_changes(aspects)
162 1
164 >>> DOC_BASE = pathlib.Path('.')
165 >>> aspects = {gat.KEY_CHANGES: __file__}
166 >>> process_changes(aspects)
167 1
169 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
170 >>> changes_name = 'space-as-changes.yml'
171 >>> aspects = {gat.KEY_CHANGES: str(DOC_BASE / changes_name)}
172 >>> process_changes(aspects)
173 1
174 """
175 changes_path = DOC_BASE / aspects[gat.KEY_CHANGES]
176 if not changes_path.is_file() or not changes_path.stat().st_size:
177 log.error(f'destructure failed to find non-empty changes file at {changes_path}')
178 return 1
179 if changes_path.suffix.lower() not in ('.json', '.yaml', '.yml'):
180 log.error(f'changes file format per suffix ({changes_path.suffix}) not supported')
181 return 1
182 changes_channel = 'yaml' if changes_path.suffix.lower() in ('.yaml', '.yml') else 'json'
183 with open(changes_path, 'rt', encoding=ENCODING) as handle:
184 changes = yaml.safe_load(handle) if changes_channel == 'yaml' else json.load(handle)
185 if not changes: 185 ↛ 186line 185 didn't jump to line 186 because the condition on line 185 was never true
186 log.error(f'empty changes file? Please add changes data to ({changes_path})')
187 return 1
188 if changes_channel == 'yaml': 188 ↛ 192line 188 didn't jump to line 192 because the condition on line 188 was always true
189 with open('changes.yml', 'wt', encoding=ENCODING) as handle:
190 yaml.dump(changes, handle, default_flow_style=False)
191 else:
192 with open('changes.json', 'wt', encoding=ENCODING) as handle:
193 json.dump(changes, handle, indent=2)
194 return changes
197@no_type_check
198def parse_markdown_image(text_line: str) -> tuple[str, str, str, str]:
199 """Parse a markdown image line within our conventions into caption, src, alt, and optional rest.
201 Examples:
203 >>> t = ''
204 >>> parse_markdown_image(t)
205 ('', '', '', '')
207 >>> t = '![]()'
208 >>> parse_markdown_image(t)
209 ('', '', '', '![]()')
211 >>> t = ''
212 >>> parse_markdown_image(t)
213 ('a', 'b', 'c', '')
215 >>> t = ''
216 >>> parse_markdown_image(t)
217 ('a', 'liitos/placeholders/this-resource-is-missing.png', 'c', '')
218 """
219 invalid_marker = ('', '', '', text_line)
221 exclam = '!'
222 osb = '['
223 if not text_line or not text_line.startswith(f'{exclam}{osb}'):
224 log.error(f'- INVALID-MD-IMG_LINE::START <<{text_line.rstrip()}>>')
225 return invalid_marker
227 csb = ']'
228 osb_cnt = text_line.count(osb)
229 csb_cnt = text_line.count(csb)
230 if osb_cnt + csb_cnt < 2:
231 log.error(f'- INVALID-MD-IMG_LINE::SB-TOK-CNT-LOW <<{text_line.rstrip()}>>')
232 return invalid_marker
233 if osb_cnt != csb_cnt:
234 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')
236 orb = '('
237 cap_src_boundary = f'{csb}{orb}'
238 if cap_src_boundary not in text_line:
239 log.error(f'- INVALID-MD-IMG_LINE::CAP-SRC-BOUNDARY <<{text_line.rstrip()}>>')
240 return invalid_marker
242 crb = ')'
243 orb_cnt = text_line.count(orb)
244 crb_cnt = text_line.count(crb)
245 if orb_cnt + crb_cnt < 2:
246 log.error(f'- INVALID-MD-IMG_LINE::RB-TOK-CNT-LOW <<{text_line.rstrip()}>>')
247 return invalid_marker
248 if orb_cnt != crb_cnt:
249 log.warning(f'- INCOMPLETE-MD-IMG_LINE::RB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')
251 quo = '"'
252 quo_cnt = text_line.count(quo)
253 if quo_cnt < 2:
254 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-LOW <<{text_line.rstrip()}>>')
255 if quo_cnt % 2:
256 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')
258 sp = ' '
259 sp_cnt = text_line.count(sp)
260 if not sp_cnt:
261 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SP-TOK-CNT-LOW <<{text_line.rstrip()}>>')
263 dot = '.'
264 sla = '/'
265 abs_path_indicator = f'{csb}{orb}{sla}'
266 may_have_abs_path = abs_path_indicator in text_line
267 if may_have_abs_path:
268 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-ABS-PATH <<{text_line.rstrip()}>>')
269 naive_upwards_path_indicator = f'{csb}{orb}{dot}{dot}{sla}'
270 may_have_upwards_path = naive_upwards_path_indicator in text_line
271 if may_have_upwards_path:
272 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-UPWARDS-PATH <<{text_line.rstrip()}>>')
274 log.info('- parsing the markdown image text line ...')
275 if orb_cnt + crb_cnt > 2 or orb_cnt != crb_cnt:
276 # The regex is not safe for orb inside caption
277 left, right = text_line.split(cap_src_boundary, 1)
278 match_right = MD_IMG_PATTERN_RIGHT_SPLIT.match(right)
279 if not match_right:
280 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-RIGHT-SPLIT-FAILED <<{text_line.rstrip()}>>')
281 return invalid_marker
283 parts = match_right.groupdict()
284 cap = left[2:]
285 if not cap:
286 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>')
287 cap = CAP_INJECTOR_HACK
289 src = parts['src']
290 alt = parts['alt']
291 rest = parts['rest']
292 if orb in alt or crb in alt:
293 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-PARTIAL-MATCH <<{text_line.rstrip()}>>')
294 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}')
295 for msg in too.incoherent_math_mode_in_caption(cap, phase_info=f'detected in markdown image ({src}) parsing'): 295 ↛ 296line 295 didn't jump to line 296 because the loop on line 295 never started
296 log.warning(msg)
298 return cap, src, alt, rest
300 match = MD_IMG_PATTERN.match(text_line)
301 if not match:
302 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-FAILED <<{text_line.rstrip()}>>')
303 return invalid_marker
305 parts = match.groupdict()
306 cap = parts['cap']
307 if not cap:
308 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>')
309 cap = CAP_INJECTOR_HACK
311 src = parts['src']
312 alt = parts['alt']
313 rest = parts['rest']
314 if orb in alt or crb in alt: 314 ↛ 315line 314 didn't jump to line 315 because the condition on line 314 was never true
315 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-FULL-MATCH <<{text_line.rstrip()}>>')
316 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}')
318 return cap, src, alt, rest
321@no_type_check
322def adapt_image(text_line: str, collector: list[str], upstream: str, root: str) -> str:
323 """YES."""
324 cap, src, alt, rest = parse_markdown_image(text_line)
325 if not src: 325 ↛ 326line 325 didn't jump to line 326 because the condition on line 325 was never true
326 log.error(f'parse of markdown image text line failed - empty src, and rest is <<{rest.rstrip()}>>')
327 return text_line
329 log.info(f'called adapt_image({text_line}, ..., {upstream=}, {root=}) --> {src=}')
330 the_aquarium = pathlib.Path(upstream).parent
331 the_bowl = pathlib.Path(src).parent
332 the_fish = pathlib.Path(src).name
333 dest_path = (pathlib.Path(root) / '../../' / the_aquarium / the_bowl).resolve() / the_fish
334 img_path = os.path.relpath(dest_path, start=root)
335 log.info(f'path remapped to {img_path}')
336 collector.append(img_path)
337 img_hack = img_path
338 if f'/{IMAGES_FOLDER}' in img_path:
339 img_hack = IMAGES_FOLDER + img_path.split(f'/{IMAGES_FOLDER}', 1)[1]
340 elif f'/{DIAGRAMS_FOLDER}' in img_path:
341 img_hack = DIAGRAMS_FOLDER + img_path.split(f'/{DIAGRAMS_FOLDER}', 1)[1]
343 if img_hack != img_path:
344 log.info(f'{img_hack} <--- OK? --- {img_path}')
346 alt_text = f'"{alt}"' if alt else f'"{ALT_INJECTOR_HACK}"'
347 belte_og_seler = f'{rest}'
348 log.info(f'==> belte-og-seler: ->>{belte_og_seler}<<-')
349 return belte_og_seler
352@no_type_check
353def harvest_include(
354 text_line: str, slot: int, regions: dict[str, list[tuple[tuple[int, int], str]]], tree: treelib.Tree, parent: str
355) -> None:
356 r"""TODO.
358 Examples:
360 >>> text = 'baz\n\\include{c}\nquux'
361 >>> slot = 0
362 >>> regions = {SLASH: [((0, 1), 'b')], 'b': [((0, 1), 'c')], 'c': [((0, 1), 'cx')]}
363 >>> tr = treelib.Tree()
364 >>> root = SLASH
365 >>> tr.create_node(root, root)
366 Node(tag=/, identifier=/, data=None)
367 >>> harvest_include(text, slot, regions, tr, root)
368 >>> print(tr)
369 /
370 └── /c}
371 quux
372 <BLANKLINE>
373 """
374 include_local = text_line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip()
375 include = str(pathlib.Path(parent).parent / include_local)
376 regions[parent].append(((slot, slot), include))
377 tree.create_node(include, include, parent=parent)
380@no_type_check
381def rollup(
382 jobs: list[list[str]],
383 docs: dict[str, list[str]],
384 regions: dict[str, list[tuple[tuple[int, int], str]]],
385 flat: dict[str, str],
386) -> list[list[str]]:
387 r"""TODO.
389 Examples:
391 >>> jobs = [['a', 'b'], ['b', 'c']]
392 >>> docs = {'a': ['a1', 'a2'], 'b': ['b1', 'b2'], 'c': ['c1', 'c2', 'c3']}
393 >>> regions = {'a': [((0, 1), 'b')], 'b': [((0, 1), 'c')], 'c': [((0, 1), 'cx')]}
394 >>> flat = {'a': 'a1\na2', 'b': 'b1\nb2', 'c': 'c1\nc2\nc3'}
395 >>> rollup(jobs, docs, regions, flat)
396 [[], []]
397 >>> flat
398 {'a': 'b1\nb2\n', 'b': 'c1\nc2\nc3\n', 'c': 'c1\nc2\nc3'}
400 >>> jobs = [['/', 'b'], ['/', 'c']]
401 >>> docs, regions, flat = {}, {}, {'baz': 'quux'}
402 >>> rollup(jobs, docs, regions, flat)
403 [[]]
404 >>> flat
405 {'baz': 'quux'}
406 """
407 tackle = [those[0] for those in jobs if those and those[0] != SLASH]
408 if tackle:
409 log.info(f' Insertion ongoing with parts ({", ".join(tuple(sorted(tackle)))}) remaining')
410 else:
411 return [[]]
412 for that in tackle:
413 buf = []
414 for slot, line in enumerate(docs[that]):
415 special = False
416 the_first = False
417 the_include = ''
418 for pair, include in regions[that]:
419 low, high = pair
420 if low <= slot <= high:
421 special = True
422 if low == slot:
423 the_first = True
424 the_include = include
425 if not special:
426 buf.append(line)
427 continue
428 if the_first:
429 buf.append(flat[the_include])
430 flat[that] = '\n'.join(buf) + '\n'
432 return [[job for job in chain if job not in flat] for chain in jobs]
435def copy_eventually(src_base: pathlib.Path, tgt_base: pathlib.Path, local_path: PathLike) -> None:
436 """Copy visual assets eventually and warn on overwrites."""
437 if not tgt_base.is_dir():
438 try:
439 tgt_base.mkdir(parents=True, exist_ok=True)
440 except FileExistsError as err:
441 log.error(f'failed to create folder {tgt_base} - detail: {err}')
442 source_asset = src_base / local_path
443 if not source_asset.is_file(): 443 ↛ 446line 443 didn't jump to line 446 because the condition on line 443 was always true
444 log.info(f'falling back to {local_path} instead of {source_asset=}, ignoring {src_base=}')
445 source_asset = pathlib.Path(local_path) # TODO: Since adapt_image fix receive paths incl. src_base
446 target_asset = tgt_base / pathlib.Path(local_path).name
447 if target_asset.is_file():
448 log.warning(f'overwriting existing {target_asset} from {source_asset}')
449 try:
450 shutil.copy(source_asset, target_asset)
451 except FileNotFoundError as err:
452 log.error(err)
453 code, msg = plh.dump_placeholder(target_asset)
454 log.warning(msg) if code else log.info(msg)
455 except NotADirectoryError as err:
456 log.error(err)
457 code, msg = plh.dump_placeholder(target_asset)
458 log.warning(msg) if code else log.info(msg)
461@no_type_check
462def collect_assets(
463 collector: list[str],
464 doc_base: Union[PathLike, None] = None,
465 images_folder: Union[PathLike, None] = None,
466 diagrams_folder: Union[PathLike, None] = None,
467) -> None:
468 """Collect assets into the rendering space.
470 Examples:
472 >>> c = ['foo']
473 >>> collect_assets(c)
475 >>> import tempfile
476 >>> with tempfile.TemporaryDirectory() as imaf:
477 ... c = [imaf + 'foo']
478 ... collect_assets(c, doc_base='.', images_folder=imaf)
480 >>> import tempfile
481 >>> with tempfile.TemporaryDirectory() as imaf:
482 ... with tempfile.TemporaryDirectory() as diaf:
483 ... c = [imaf + 'foo', diaf + 'bar']
484 ... collect_assets(c, doc_base='.', images_folder=imaf, diagrams_folder=diaf)
486 >>> import tempfile
487 >>> with tempfile.TemporaryDirectory() as imaf:
488 ... ima = pathlib.Path(imaf) / 'images'
489 ... ima.touch()
490 ... with tempfile.TemporaryDirectory() as diaf:
491 ... dia = pathlib.Path(diaf) / 'diagrams'
492 ... dia.touch()
493 ... c = [str(ima / 'foo'), str(dia / 'bar')]
494 ... collect_assets(c, doc_base='.', images_folder=ima, diagrams_folder=dia)
495 """
496 doc_base = pathlib.Path(doc_base) if doc_base else DOC_BASE
497 img_part = str(images_folder) if images_folder else IMAGES_FOLDER
498 dia_part = str(diagrams_folder) if diagrams_folder else DIAGRAMS_FOLDER
499 for img_path in collector:
500 where_to = img_part if img_part in img_path else (dia_part if dia_part in img_path else None)
501 if where_to is not None:
502 log.info(f'calling copy_eventually({doc_base}, {pathlib.Path(where_to)}, {img_path})')
503 copy_eventually(doc_base, pathlib.Path(where_to), img_path)
504 else:
505 log.error(f'asset collection for neither images nor diagrams requested per {img_path} - ignoring')
508@no_type_check
509def concatenate(
510 doc_root: Union[str, pathlib.Path],
511 structure_name: str,
512 target_key: str,
513 facet_key: str,
514 options: dict[str, Union[bool, str]],
515) -> int:
516 """Later alligator.
518 Examples:
520 >>> restore_cwd = os.getcwd()
521 >>> dr = '.'
522 >>> sn = 'foo'
523 >>> tk = ''
524 >>> fk = ''
525 >>> op = {'bar': True}
526 >>> concatenate(dr, sn, tk, fk, op, )
527 2
528 >>> os.chdir(restore_cwd)
530 >>> restore_cwd = os.getcwd()
531 >>> dr = 'example/no-renda'
532 >>> sn = 'structure.yml'
533 >>> tk = 'prod_kind'
534 >>> fk = 'no-renda'
535 >>> op = {'force': True}
536 >>> concatenate(dr, sn, tk, fk, op)
537 0
538 >>> os.chdir(restore_cwd)
540 >>> restore_cwd = os.getcwd()
541 >>> dr = 'example/tuna'
542 >>> sn = 'structure.yml'
543 >>> tk = 'prod_kind'
544 >>> fk = 'non-existing-facet-key'
545 >>> op = {'bar': True}
546 >>> concatenate(dr, sn, tk, fk, op)
547 1
548 >>> os.chdir(restore_cwd)
550 >>> restore_cwd = os.getcwd()
551 >>> dr = 'test/fixtures/basic/'
552 >>> sn = 'structure.yml'
553 >>> tk = 'abc'
554 >>> fk = 'missing'
555 >>> op = {'bar': True}
556 >>> concatenate(dr, sn, tk, fk, op)
557 2
558 >>> os.chdir(restore_cwd)
560 >>> restore_cwd = os.getcwd()
561 >>> dr = 'example/tuna'
562 >>> sn = 'structure.yml'
563 >>> tk = 'prod_kind'
564 >>> fk = 'tuna'
565 >>> op = {'bar': True}
566 >>> concatenate(dr, sn, tk, fk, op)
567 0
568 >>> os.chdir(restore_cwd)
570 >>> restore_cwd = os.getcwd()
571 >>> dr = 'example/tuna'
572 >>> sn = 'structure.yml'
573 >>> tk = 'prod_kind'
574 >>> fk = 'tuna'
575 >>> op = {'bar': True}
576 >>> try:
577 ... code = concatenate(dr, sn, tk, fk, op)
578 ... except FileNotFoundError:
579 ... code = -1
580 >>> os.chdir(restore_cwd)
581 >>> code
582 0
584 >>> restore_cwd = os.getcwd()
585 >>> dr = 'example/ejected-templates'
586 >>> sn = 'structure.yml'
587 >>> tk = 'prod_kind'
588 >>> fk = 'ejected-templates'
589 >>> op = {'bar': True}
590 >>> try:
591 ... code = concatenate(dr, sn, tk, fk, op)
592 ... except FileNotFoundError:
593 ... code = -1
594 >>> os.chdir(restore_cwd)
595 >>> code
596 0
598 >>> restore_cwd = os.getcwd()
599 >>> dr = 'example/ejected-templates'
600 >>> sn = 'structure.yml'
601 >>> tk = 'prod_kind'
602 >>> fk = 'ejected-templates-borked'
603 >>> op = {'bar': True}
604 >>> try:
605 ... code = concatenate(dr, sn, tk, fk, op)
606 ... except FileNotFoundError:
607 ... code = -1
608 >>> os.chdir(restore_cwd)
609 >>> code
610 0
612 >>> restore_cwd = os.getcwd()
613 >>> dr = 'example/tuna'
614 >>> sn = 'structure.yml'
615 >>> tk = 'prod_kind'
616 >>> fk = 'tuna'
617 >>> op = {'bar': True}
618 >>> abs_here = pathlib.Path().resolve()
619 >>> try:
620 ... code = concatenate(dr, sn, tk, fk, op)
621 ... except FileNotFoundError:
622 ... code = -1
623 >>> os.chdir(restore_cwd)
624 >>> code
625 0
627 """
628 log.info(LOG_SEPARATOR)
629 log.warning('entered concat function ...')
630 target_code = target_key
631 facet_code = facet_key
632 if not facet_code.strip() or not target_code.strip():
633 log.error(f'concatenate requires non-empty target ({target_code}) and facet ({facet_code}) codes')
634 return 2
636 log.info(f'parsed target ({target_code}) and facet ({facet_code}) from request')
638 structure, asset_map = gat.prelude(
639 doc_root=doc_root, structure_name=structure_name, target_key=target_key, facet_key=facet_key, command='concat'
640 )
641 log.info(f'prelude teleported processor into the document root at ({os.getcwd()}/)')
642 rel_concat_folder_path = pathlib.Path('render/pdf/')
643 rel_concat_folder_path.mkdir(parents=True, exist_ok=True)
644 os.chdir(rel_concat_folder_path)
645 log.info(f'concatenate (this processor) teleported into the render/pdf location ({os.getcwd()}/)')
647 ok, aspect_map = too.load_target(target_code, facet_code)
648 if not ok or not aspect_map:
649 return 0 if ok else 1
651 may_render = aspect_map.get('render', True)
652 if not may_render:
653 topic = f'structure({pathlib.Path(doc_root) / structure_name}) for target: {target_key} and facet: {facet_key}'
654 log.warning(f'- render is declared as false in {topic}')
655 if not options['force']: 655 ↛ 656line 655 didn't jump to line 656 because the condition on line 655 was never true
656 return 42
657 else:
658 log.warning(' + overwritten by force mode')
660 approvals = process_approvals(aspect_map)
661 if isinstance(approvals, int):
662 return 2
663 binder = process_binder(aspect_map)
664 if isinstance(binder, int): 664 ↛ 665line 664 didn't jump to line 665 because the condition on line 664 was never true
665 return 3
666 changes = process_changes(aspect_map)
667 if isinstance(changes, int): 667 ↛ 668line 667 didn't jump to line 668 because the condition on line 667 was never true
668 return 4
669 metadata = met.load(aspect_map)
670 if isinstance(metadata, int): 670 ↛ 671line 670 didn't jump to line 671 because the condition on line 670 was never true
671 return 5
673 root = SLASH
674 root_path = str(pathlib.Path.cwd().resolve()).rstrip(SLASH) + SLASH
675 tree = treelib.Tree()
676 tree.create_node(root, root)
677 documents = {}
678 insert_regions = {}
679 img_collector = []
680 log.info(LOG_SEPARATOR)
681 log.info('processing binder ...')
682 for entry in binder:
683 ref_path = DOC_BASE / entry
684 log.debug(f'- {entry} as {ref_path}')
685 with open(ref_path, 'rt', encoding=ENCODING) as handle:
686 documents[entry] = [line.rstrip() for line in handle.readlines()]
687 insert_regions[entry] = []
688 in_region = False
689 begin, end = 0, 0
690 include = ''
691 tree.create_node(entry, entry, parent=root)
692 for slot, line in enumerate(documents[entry]):
693 if line.startswith(IMG_LINE_STARTSWITH):
694 documents[entry][slot] = adapt_image(line, img_collector, entry, root_path)
695 log.debug(f'{slot :02d}|{line.rstrip()}')
696 if not in_region:
697 if line.startswith(READ_SLOT_FENCE_BEGIN):
698 in_region = True
699 begin = slot
700 continue
701 if line.startswith(INCLUDE_SLOT):
702 include = line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip()
703 insert_regions[entry].append(((slot, slot), include))
704 tree.create_node(include, include, parent=entry)
705 include = ''
706 continue
707 if in_region:
708 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
709 include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
710 elif line.startswith(READ_SLOT_FENCE_END):
711 end = slot
712 insert_regions[entry].append(((begin, end), include))
713 tree.create_node(include, include, parent=entry)
714 in_region = False
715 begin, end = 0, 0
716 include = ''
718 for coords, include in insert_regions[entry]: # include is anchored on DOC_BASE
719 ref_path = DOC_BASE / include
720 with open(ref_path, 'rt', encoding=ENCODING) as handle:
721 documents[include] = [line.rstrip() for line in handle.readlines()]
722 insert_regions[include] = []
723 in_region = False
724 begin, end = 0, 0
725 sub_include = ''
726 for slot, line in enumerate(documents[include]):
727 if line.startswith(IMG_LINE_STARTSWITH): 727 ↛ 728line 727 didn't jump to line 728 because the condition on line 727 was never true
728 documents[include][slot] = adapt_image(line, img_collector, include, root_path)
729 log.debug(f'{slot :02d}|{line.rstrip()}')
730 if not in_region:
731 if line.startswith(READ_SLOT_FENCE_BEGIN):
732 in_region = True
733 begin = slot
734 continue
735 if line.startswith(INCLUDE_SLOT):
736 harvest_include(line, slot, insert_regions, tree, include)
737 continue
738 if in_region:
739 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
740 sub_include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
741 sub_include = str(pathlib.Path(include).parent / sub_include)
742 elif line.startswith(READ_SLOT_FENCE_END):
743 end = slot
744 insert_regions[include].append(((begin, end), sub_include))
745 tree.create_node(sub_include, sub_include, parent=include)
746 in_region = False
747 begin, end = 0, 0
748 sub_include = ''
750 for coords, sub_include in insert_regions[include]:
751 ref_path = DOC_BASE / sub_include
752 with open(ref_path, 'rt', encoding=ENCODING) as handle:
753 documents[sub_include] = [line.rstrip() for line in handle.readlines()]
754 insert_regions[sub_include] = []
755 in_region = False
756 begin, end = 0, 0
757 sub_sub_include = ''
758 for slot, line in enumerate(documents[sub_include]):
759 if line.startswith(IMG_LINE_STARTSWITH):
760 documents[sub_include][slot] = adapt_image(line, img_collector, sub_include, root_path)
761 log.debug(f'{slot :02d}|{line.rstrip()}')
762 if not in_region:
763 if line.startswith(READ_SLOT_FENCE_BEGIN):
764 in_region = True
765 begin = slot
766 continue
767 if line.startswith(INCLUDE_SLOT): 767 ↛ 768line 767 didn't jump to line 768 because the condition on line 767 was never true
768 harvest_include(line, slot, insert_regions, tree, sub_include)
769 continue
770 if in_region:
771 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
772 sub_sub_include = (
773 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
774 )
775 sub_sub_include = str(pathlib.Path(sub_include).parent / sub_sub_include)
776 elif line.startswith(READ_SLOT_FENCE_END):
777 end = slot
778 insert_regions[sub_include].append(((begin, end), sub_sub_include))
779 tree.create_node(sub_sub_include, sub_sub_include, parent=sub_include)
780 in_region = False
781 begin, end = 0, 0
782 sub_sub_include = ''
784 for coords, sub_sub_include in insert_regions[sub_include]:
785 ref_path = DOC_BASE / sub_sub_include
786 with open(ref_path, 'rt', encoding=ENCODING) as handle:
787 documents[sub_sub_include] = [line.rstrip() for line in handle.readlines()]
788 insert_regions[sub_sub_include] = []
789 in_region = False
790 begin, end = 0, 0
791 sub_sub_sub_include = ''
792 for slot, line in enumerate(documents[sub_sub_include]):
793 if line.startswith(IMG_LINE_STARTSWITH):
794 documents[sub_sub_include][slot] = adapt_image(
795 line, img_collector, sub_sub_include, root_path
796 )
797 log.debug(f'{slot :02d}|{line.rstrip()}')
798 if not in_region: 798 ↛ 806line 798 didn't jump to line 806 because the condition on line 798 was always true
799 if line.startswith(READ_SLOT_FENCE_BEGIN): 799 ↛ 800line 799 didn't jump to line 800 because the condition on line 799 was never true
800 in_region = True
801 begin = slot
802 continue
803 if line.startswith(INCLUDE_SLOT): 803 ↛ 804line 803 didn't jump to line 804 because the condition on line 803 was never true
804 harvest_include(line, slot, insert_regions, tree, sub_sub_include)
805 continue
806 if in_region: 806 ↛ 807line 806 didn't jump to line 807 because the condition on line 806 was never true
807 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
808 sub_sub_sub_include = (
809 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
810 )
811 sub_sub_sub_include = str(pathlib.Path(sub_sub_include).parent / sub_sub_sub_include)
812 elif line.startswith(READ_SLOT_FENCE_END):
813 end = slot
814 insert_regions[sub_sub_include].append(((begin, end), sub_sub_sub_include))
815 tree.create_node(sub_sub_sub_include, sub_sub_sub_include, parent=sub_sub_include)
816 in_region = False
817 begin, end = 0, 0
818 sub_sub_sub_include = ''
820 for coords, sub_sub_sub_include in insert_regions[sub_include]:
821 ref_path = DOC_BASE / sub_sub_sub_include
822 with open(ref_path, 'rt', encoding=ENCODING) as handle:
823 documents[sub_sub_sub_include] = [line.rstrip() for line in handle.readlines()]
824 insert_regions[sub_sub_sub_include] = []
825 in_region = False
826 begin, end = 0, 0
827 sub_sub_sub_sub_include = ''
828 for slot, line in enumerate(documents[sub_sub_sub_include]):
829 if line.startswith(IMG_LINE_STARTSWITH):
830 documents[sub_sub_sub_include][slot] = adapt_image(
831 line, img_collector, sub_sub_sub_include, root_path
832 )
833 log.debug(f'{slot :02d}|{line.rstrip()}')
834 if not in_region: 834 ↛ 842line 834 didn't jump to line 842 because the condition on line 834 was always true
835 if line.startswith(READ_SLOT_FENCE_BEGIN): 835 ↛ 836line 835 didn't jump to line 836 because the condition on line 835 was never true
836 in_region = True
837 begin = slot
838 continue
839 if line.startswith(INCLUDE_SLOT): 839 ↛ 840line 839 didn't jump to line 840 because the condition on line 839 was never true
840 harvest_include(line, slot, insert_regions, tree, sub_sub_sub_include)
841 continue
842 if in_region: 842 ↛ 843line 842 didn't jump to line 843 because the condition on line 842 was never true
843 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
844 sub_sub_sub_sub_include = (
845 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
846 )
847 sub_sub_sub_sub_include = str(
848 pathlib.Path(sub_sub_sub_include).parent / sub_sub_sub_sub_include
849 )
850 elif line.startswith(READ_SLOT_FENCE_END):
851 end = slot
852 insert_regions[sub_sub_sub_include].append(((begin, end), sub_sub_sub_include))
853 tree.create_node(
854 sub_sub_sub_sub_include, sub_sub_sub_sub_include, parent=sub_sub_sub_include
855 )
856 in_region = False
857 begin, end = 0, 0
858 sub_sub_sub_sub_include = ''
860 top_down_paths = tree.paths_to_leaves()
861 bottom_up_paths = [list(reversed(td_p)) for td_p in top_down_paths]
862 log.info(LOG_SEPARATOR)
863 log.info('resulting tree:')
864 for edge in str(tree).split(NL):
865 log.info(edge)
867 log.info(LOG_SEPARATOR)
868 log.info(f'provisioning chains for the {len(bottom_up_paths)} bottom up leaf paths:')
869 for num, leaf_path in enumerate(bottom_up_paths):
870 the_way_up = f'|-> {leaf_path[0]}' if len(leaf_path) == 1 else f'{" -> ".join(leaf_path)}'
871 log.info(f'{num :2d}: {the_way_up}')
873 concat = {}
874 log.info(LOG_SEPARATOR)
875 log.info(f'dependencies for the {len(insert_regions)} document parts:')
876 for key, regions in insert_regions.items():
877 num_in = len(regions)
878 dashes = '-' * num_in
879 incl_disp = f'( {num_in} include{"" if num_in == 1 else "s"} )'
880 indicator = '(no includes)' if not regions else f'<{dashes + incl_disp + dashes}'
881 log.info(f'- part {key} {indicator}')
882 for region in regions:
883 between = f'between lines {region[0][0] :3d} and {region[0][1] :3d}'
884 insert = f'include fragment {region[1]}'
885 log.info(f' + {between} {insert}')
886 if not regions: # No includes
887 concat[key] = '\n'.join(documents[key]) + '\n'
888 log.info(f' * did concat {key} document for insertion')
890 chains = [leaf_path for leaf_path in bottom_up_paths]
891 log.info(LOG_SEPARATOR)
892 log.info(f'starting insertions bottom up for the {len(chains)} inclusion chains:')
893 todo = [[job for job in chain if job not in concat] for chain in chains]
894 while todo != [[]]:
895 todo = rollup(todo, documents, insert_regions, concat)
897 log.info(LOG_SEPARATOR)
898 log.info('writing final concat markdown to document.md')
899 with open('document.md', 'wt', encoding=ENCODING) as handle:
900 handle.write('\n'.join(concat[bind] for bind in binder) + '\n')
902 log.info(LOG_SEPARATOR)
903 log.info('collecting assets (images and diagrams)')
904 collect_assets(img_collector)
905 log.info(LOG_SEPARATOR)
906 log.info(f'concat result document (document.md) and artifacts are within folder ({os.getcwd()}/)')
907 log.info(LOG_SEPARATOR)
908 log.info('processing complete - SUCCESS')
909 log.info(LOG_SEPARATOR)
910 return 0