Coverage for liitos/concat.py: 88.81%
507 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-08-31 13:07:35 +00:00
« prev ^ index » next coverage.py v7.10.6, created at 2025-08-31 13:07:35 +00:00
1"""Given a target and facet, concatenate a tree of markdown files to a single file rewriting all image refs."""
3import json
4import os
5import pathlib
6import re
7import shutil
8from typing import Union, no_type_check
10import treelib
11import yaml
13import liitos.gather as gat
14import liitos.meta as met
15import liitos.placeholder as plh
16import liitos.tools as too
17from liitos import ENCODING, LOG_SEPARATOR, PathLike, log
19ALT_INJECTOR_HACK = 'INJECTED-ALT-TEXT-TO-TRIGGER-FIGURE-ENVIRONMENT-AROUND-IMAGE-IN-PANDOC'
20CAP_INJECTOR_HACK = 'INJECTED-CAP-TEXT-TO-MARK-MISSING-CAPTION-IN-OUTPUT'
21DOC_BASE = pathlib.Path('..', '..')
22STRUCTURE_PATH = DOC_BASE / 'structure.yml'
23SLASH = '/'
24IMAGES_FOLDER = 'images/'
25DIAGRAMS_FOLDER = 'diagrams/'
27"""
28```{.python .cb.run}
29with open('sub/as.md') as fp:
30 print(fp.read())
31```
32"""
33READ_SLOT_FENCE_BEGIN = '```{.python .cb.run}'
34READ_SLOT_CONTEXT_BEGIN = 'with open('
35READ_SLOT_FENCE_END = '```'
37r"""
38\include{markdown_file_path}
39"""
40INCLUDE_SLOT = '\\include{'
42"""
43
44
45
46
47"""
48IMG_LINE_STARTSWITH = '!['
49MD_IMG_PATTERN = re.compile(r'^!\[(?P<cap>[^(]*)\]\((?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$')
50MD_IMG_PATTERN_RIGHT_SPLIT = re.compile(r'^(?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$')
52NL = '\n'
55@no_type_check
56def process_approvals(aspects: dict[str, str]) -> Union[gat.Approvals, int]:
57 """Best effort loading of approvals data.
59 Examples:
61 >>> aspects = {gat.KEY_APPROVALS: 'missing-file'}
62 >>> process_approvals(aspects)
63 1
65 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
66 >>> approvals_name = 'empty-as-approvals.yml'
67 >>> aspects = {gat.KEY_APPROVALS: str(DOC_BASE / approvals_name)}
68 >>> process_approvals(aspects)
69 1
71 >>> DOC_BASE = pathlib.Path('.')
72 >>> aspects = {gat.KEY_APPROVALS: __file__}
73 >>> process_approvals(aspects)
74 1
76 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
77 >>> approvals_name = 'space-as-approvals.yml'
78 >>> aspects = {gat.KEY_APPROVALS: str(DOC_BASE / approvals_name)}
79 >>> process_approvals(aspects)
80 1
81 """
82 approvals_path = DOC_BASE / aspects[gat.KEY_APPROVALS]
83 if not approvals_path.is_file() or not approvals_path.stat().st_size:
84 log.error(f'destructure failed to find non-empty approvals file at {approvals_path}')
85 return 1
86 if approvals_path.suffix.lower() not in ('.json', '.yaml', '.yml'):
87 log.error(f'approvals file format per suffix ({approvals_path.suffix}) not supported')
88 return 1
89 approvals_channel = 'yaml' if approvals_path.suffix.lower() in ('.yaml', '.yml') else 'json'
90 with open(approvals_path, 'rt', encoding=ENCODING) as handle:
91 approvals = yaml.safe_load(handle) if approvals_channel == 'yaml' else json.load(handle)
92 if not approvals: 92 ↛ 93line 92 didn't jump to line 93 because the condition on line 92 was never true
93 log.error(f'empty approvals file? Please add approvals to ({approvals_path})')
94 return 1
95 if approvals_channel == 'yaml': 95 ↛ 99line 95 didn't jump to line 99 because the condition on line 95 was always true
96 with open('approvals.yml', 'wt', encoding=ENCODING) as handle:
97 yaml.dump(approvals, handle, default_flow_style=False)
98 else:
99 with open('approvals.json', 'wt', encoding=ENCODING) as handle:
100 json.dump(approvals, handle, indent=2)
101 return approvals
104@no_type_check
105def process_binder(aspects: dict[str, str]) -> Union[gat.Binder, int]:
106 """Best effort loading of binder data.
108 Examples:
110 >>> aspects = {gat.KEY_BIND: 'missing-file'}
111 >>> process_binder(aspects)
112 1
114 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
115 >>> binder_name = 'empty-as-bind.txt'
116 >>> aspects = {gat.KEY_BIND: str(DOC_BASE / binder_name)}
117 >>> process_binder(aspects)
118 1
120 >>> DOC_BASE = pathlib.Path('.')
121 >>> aspects = {gat.KEY_BIND: __file__}
122 >>> process_binder(aspects)
123 1
125 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
126 >>> binder_name = 'space-as-bind.txt'
127 >>> aspects = {gat.KEY_BIND: str(DOC_BASE / binder_name)}
128 >>> process_binder(aspects)
129 1
130 """
131 bind_path = DOC_BASE / aspects[gat.KEY_BIND]
132 if not bind_path.is_file() or not bind_path.stat().st_size:
133 log.error(f'destructure failed to find non-empty bind file at {bind_path}')
134 return 1
135 if bind_path.suffix.lower() not in ('.txt',):
136 log.error(f'bind file format per suffix ({bind_path.suffix}) not supported')
137 return 1
138 with open(bind_path, 'rt', encoding=ENCODING) as handle:
139 binder = [line.strip() for line in handle.readlines() if line.strip()]
140 if not binder: 140 ↛ 141line 140 didn't jump to line 141 because the condition on line 140 was never true
141 log.error(f'empty bind file? Please add component paths to ({bind_path})')
142 return 1
143 with open('bind.txt', 'wt', encoding=ENCODING) as handle:
144 handle.write('\n'.join(binder) + '\n')
145 return binder
148@no_type_check
149def process_changes(aspects: dict[str, str]) -> Union[gat.Changes, int]:
150 """Best effort loading of changes data.
152 Examples:
154 >>> aspects = {gat.KEY_CHANGES: 'missing-file'}
155 >>> process_changes(aspects)
156 1
158 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
159 >>> changes_name = 'empty-as-changtes.yml'
160 >>> aspects = {gat.KEY_CHANGES: str(DOC_BASE / changes_name)}
161 >>> process_changes(aspects)
162 1
164 >>> DOC_BASE = pathlib.Path('.')
165 >>> aspects = {gat.KEY_CHANGES: __file__}
166 >>> process_changes(aspects)
167 1
169 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
170 >>> changes_name = 'space-as-changes.yml'
171 >>> aspects = {gat.KEY_CHANGES: str(DOC_BASE / changes_name)}
172 >>> process_changes(aspects)
173 1
174 """
175 changes_path = DOC_BASE / aspects[gat.KEY_CHANGES]
176 if not changes_path.is_file() or not changes_path.stat().st_size:
177 log.error(f'destructure failed to find non-empty changes file at {changes_path}')
178 return 1
179 if changes_path.suffix.lower() not in ('.json', '.yaml', '.yml'):
180 log.error(f'changes file format per suffix ({changes_path.suffix}) not supported')
181 return 1
182 changes_channel = 'yaml' if changes_path.suffix.lower() in ('.yaml', '.yml') else 'json'
183 with open(changes_path, 'rt', encoding=ENCODING) as handle:
184 changes = yaml.safe_load(handle) if changes_channel == 'yaml' else json.load(handle)
185 if not changes: 185 ↛ 186line 185 didn't jump to line 186 because the condition on line 185 was never true
186 log.error(f'empty changes file? Please add changes data to ({changes_path})')
187 return 1
188 if changes_channel == 'yaml': 188 ↛ 192line 188 didn't jump to line 192 because the condition on line 188 was always true
189 with open('changes.yml', 'wt', encoding=ENCODING) as handle:
190 yaml.dump(changes, handle, default_flow_style=False)
191 else:
192 with open('changes.json', 'wt', encoding=ENCODING) as handle:
193 json.dump(changes, handle, indent=2)
194 return changes
197@no_type_check
198def parse_markdown_image(text_line: str) -> tuple[str, str, str, str]:
199 """Parse a markdown image line within our conventions into caption, src, alt, and optional rest.
201 Examples:
203 >>> t = ''
204 >>> parse_markdown_image(t)
205 ('', '', '', '')
207 >>> t = '![]()'
208 >>> parse_markdown_image(t)
209 ('', '', '', '![]()')
211 >>> t = ''
212 >>> parse_markdown_image(t)
213 ('a', 'b', 'c', '')
215 >>> t = ''
216 >>> parse_markdown_image(t)
217 ('a', 'liitos/placeholders/this-resource-is-missing.png', 'c', '')
218 """
219 invalid_marker = ('', '', '', text_line)
221 exclam = '!'
222 osb = '['
223 if not text_line or not text_line.startswith(f'{exclam}{osb}'):
224 log.error(f'- INVALID-MD-IMG_LINE::START <<{text_line.rstrip()}>>')
225 return invalid_marker
227 csb = ']'
228 osb_cnt = text_line.count(osb)
229 csb_cnt = text_line.count(csb)
230 if osb_cnt + csb_cnt < 2:
231 log.error(f'- INVALID-MD-IMG_LINE::SB-TOK-CNT-LOW <<{text_line.rstrip()}>>')
232 return invalid_marker
233 if osb_cnt != csb_cnt:
234 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')
236 orb = '('
237 cap_src_boundary = f'{csb}{orb}'
238 if cap_src_boundary not in text_line:
239 log.error(f'- INVALID-MD-IMG_LINE::CAP-SRC-BOUNDARY <<{text_line.rstrip()}>>')
240 return invalid_marker
242 crb = ')'
243 orb_cnt = text_line.count(orb)
244 crb_cnt = text_line.count(crb)
245 if orb_cnt + crb_cnt < 2:
246 log.error(f'- INVALID-MD-IMG_LINE::RB-TOK-CNT-LOW <<{text_line.rstrip()}>>')
247 return invalid_marker
248 if orb_cnt != crb_cnt:
249 log.warning(f'- INCOMPLETE-MD-IMG_LINE::RB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')
251 quo = '"'
252 quo_cnt = text_line.count(quo)
253 if quo_cnt < 2:
254 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-LOW <<{text_line.rstrip()}>>')
255 if quo_cnt % 2:
256 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')
258 sp = ' '
259 sp_cnt = text_line.count(sp)
260 if not sp_cnt:
261 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SP-TOK-CNT-LOW <<{text_line.rstrip()}>>')
263 dot = '.'
264 sla = '/'
265 abs_path_indicator = f'{csb}{orb}{sla}'
266 may_have_abs_path = abs_path_indicator in text_line
267 if may_have_abs_path:
268 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-ABS-PATH <<{text_line.rstrip()}>>')
269 naive_upwards_path_indicator = f'{csb}{orb}{dot}{dot}{sla}'
270 may_have_upwards_path = naive_upwards_path_indicator in text_line
271 if may_have_upwards_path:
272 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-UPWARDS-PATH <<{text_line.rstrip()}>>')
274 log.info('- parsing the markdown image text line ...')
275 if orb_cnt + crb_cnt > 2 or orb_cnt != crb_cnt:
276 # The regex is not safe for orb inside caption
277 left, right = text_line.split(cap_src_boundary, 1)
278 match_right = MD_IMG_PATTERN_RIGHT_SPLIT.match(right)
279 if not match_right:
280 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-RIGHT-SPLIT-FAILED <<{text_line.rstrip()}>>')
281 return invalid_marker
283 parts = match_right.groupdict()
284 cap = left[2:]
285 if not cap:
286 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>')
287 cap = CAP_INJECTOR_HACK
289 src = parts['src']
290 alt = parts['alt']
291 rest = parts['rest']
292 if orb in alt or crb in alt:
293 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-PARTIAL-MATCH <<{text_line.rstrip()}>>')
294 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}')
296 return cap, src, alt, rest
298 match = MD_IMG_PATTERN.match(text_line)
299 if not match:
300 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-FAILED <<{text_line.rstrip()}>>')
301 return invalid_marker
303 parts = match.groupdict()
304 cap = parts['cap']
305 if not cap:
306 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>')
307 cap = CAP_INJECTOR_HACK
309 src = parts['src']
310 alt = parts['alt']
311 rest = parts['rest']
312 if orb in alt or crb in alt: 312 ↛ 313line 312 didn't jump to line 313 because the condition on line 312 was never true
313 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-FULL-MATCH <<{text_line.rstrip()}>>')
314 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}')
316 return cap, src, alt, rest
319@no_type_check
320def adapt_image(text_line: str, collector: list[str], upstream: str, root: str) -> str:
321 """YES."""
322 cap, src, alt, rest = parse_markdown_image(text_line)
323 if not src: 323 ↛ 324line 323 didn't jump to line 324 because the condition on line 323 was never true
324 log.error(f'parse of markdown image text line failed - empty src, and rest is <<{rest.rstrip()}>>')
325 return text_line
327 img_path = str((pathlib.Path(upstream).parent / src).resolve()).replace(root, '')
328 collector.append(img_path)
329 img_hack = img_path
330 if f'/{IMAGES_FOLDER}' in img_path:
331 img_hack = IMAGES_FOLDER + img_path.split(f'/{IMAGES_FOLDER}', 1)[1]
332 elif f'/{DIAGRAMS_FOLDER}' in img_path:
333 img_hack = DIAGRAMS_FOLDER + img_path.split(f'/{DIAGRAMS_FOLDER}', 1)[1]
335 if img_hack != img_path:
336 log.info(f'{img_hack} <--- OK? --- {img_path}')
338 alt_text = f'"{alt}"' if alt else f'"{ALT_INJECTOR_HACK}"'
339 belte_og_seler = f'{rest}'
340 log.info(f'==> belte-og-seler: ->>{belte_og_seler}<<-')
341 return belte_og_seler
344@no_type_check
345def harvest_include(
346 text_line: str, slot: int, regions: dict[str, list[tuple[tuple[int, int], str]]], tree: treelib.Tree, parent: str
347) -> None:
348 r"""TODO.
350 Examples:
352 >>> text = 'baz\n\\include{c}\nquux'
353 >>> slot = 0
354 >>> regions = {SLASH: [((0, 1), 'b')], 'b': [((0, 1), 'c')], 'c': [((0, 1), 'cx')]}
355 >>> tr = treelib.Tree()
356 >>> root = SLASH
357 >>> tr.create_node(root, root)
358 Node(tag=/, identifier=/, data=None)
359 >>> harvest_include(text, slot, regions, tr, root)
360 >>> print(tr)
361 /
362 └── /c}
363 quux
364 <BLANKLINE>
365 """
366 include_local = text_line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip()
367 include = str(pathlib.Path(parent).parent / include_local)
368 regions[parent].append(((slot, slot), include))
369 tree.create_node(include, include, parent=parent)
372@no_type_check
373def rollup(
374 jobs: list[list[str]],
375 docs: dict[str, list[str]],
376 regions: dict[str, list[tuple[tuple[int, int], str]]],
377 flat: dict[str, str],
378) -> list[list[str]]:
379 r"""TODO.
381 Examples:
383 >>> jobs = [['a', 'b'], ['b', 'c']]
384 >>> docs = {'a': ['a1', 'a2'], 'b': ['b1', 'b2'], 'c': ['c1', 'c2', 'c3']}
385 >>> regions = {'a': [((0, 1), 'b')], 'b': [((0, 1), 'c')], 'c': [((0, 1), 'cx')]}
386 >>> flat = {'a': 'a1\na2', 'b': 'b1\nb2', 'c': 'c1\nc2\nc3'}
387 >>> rollup(jobs, docs, regions, flat)
388 [[], []]
389 >>> flat
390 {'a': 'b1\nb2\n', 'b': 'c1\nc2\nc3\n', 'c': 'c1\nc2\nc3'}
392 >>> jobs = [['/', 'b'], ['/', 'c']]
393 >>> docs, regions, flat = {}, {}, {'baz': 'quux'}
394 >>> rollup(jobs, docs, regions, flat)
395 [[]]
396 >>> flat
397 {'baz': 'quux'}
398 """
399 tackle = [those[0] for those in jobs if those and those[0] != SLASH]
400 if tackle:
401 log.info(f' Insertion ongoing with parts ({", ".join(tuple(sorted(tackle)))}) remaining')
402 else:
403 return [[]]
404 for that in tackle:
405 buf = []
406 for slot, line in enumerate(docs[that]):
407 special = False
408 the_first = False
409 the_include = ''
410 for pair, include in regions[that]:
411 low, high = pair
412 if low <= slot <= high:
413 special = True
414 if low == slot:
415 the_first = True
416 the_include = include
417 if not special:
418 buf.append(line)
419 continue
420 if the_first:
421 buf.append(flat[the_include])
422 flat[that] = '\n'.join(buf) + '\n'
424 return [[job for job in chain if job not in flat] for chain in jobs]
427def copy_eventually(src_base: pathlib.Path, tgt_base: pathlib.Path, local_path: PathLike) -> None:
428 """Copy visual assets eventually and warn on overwrites."""
429 if not tgt_base.is_dir():
430 try:
431 tgt_base.mkdir(parents=True, exist_ok=True)
432 except FileExistsError as err:
433 log.error(f'failed to create folder {tgt_base} - detail: {err}')
434 source_asset = src_base / local_path
435 target_asset = tgt_base / pathlib.Path(local_path).name
436 if target_asset.is_file():
437 log.warning(f'overwriting existing {target_asset} from {source_asset}')
438 try:
439 shutil.copy(source_asset, target_asset)
440 except FileNotFoundError as err:
441 log.error(err)
442 code, msg = plh.dump_placeholder(target_asset)
443 log.warning(msg) if code else log.info(msg)
444 except NotADirectoryError as err:
445 log.error(err)
446 code, msg = plh.dump_placeholder(target_asset)
447 log.warning(msg) if code else log.info(msg)
450@no_type_check
451def collect_assets(
452 collector: list[str],
453 doc_base: Union[PathLike, None] = None,
454 images_folder: Union[PathLike, None] = None,
455 diagrams_folder: Union[PathLike, None] = None,
456) -> None:
457 """Collect assets into the rendering space.
459 Examples:
461 >>> c = ['foo']
462 >>> collect_assets(c)
464 >>> import tempfile
465 >>> with tempfile.TemporaryDirectory() as imaf:
466 ... c = [imaf + 'foo']
467 ... collect_assets(c, doc_base='.', images_folder=imaf)
469 >>> import tempfile
470 >>> with tempfile.TemporaryDirectory() as imaf:
471 ... with tempfile.TemporaryDirectory() as diaf:
472 ... c = [imaf + 'foo', diaf + 'bar']
473 ... collect_assets(c, doc_base='.', images_folder=imaf, diagrams_folder=diaf)
475 >>> import tempfile
476 >>> with tempfile.TemporaryDirectory() as imaf:
477 ... ima = pathlib.Path(imaf) / 'images'
478 ... ima.touch()
479 ... with tempfile.TemporaryDirectory() as diaf:
480 ... dia = pathlib.Path(diaf) / 'diagrams'
481 ... dia.touch()
482 ... c = [str(ima / 'foo'), str(dia / 'bar')]
483 ... collect_assets(c, doc_base='.', images_folder=ima, diagrams_folder=dia)
484 """
485 doc_base = pathlib.Path(doc_base) if doc_base else DOC_BASE
486 img_part = str(images_folder) if images_folder else IMAGES_FOLDER
487 dia_part = str(diagrams_folder) if diagrams_folder else DIAGRAMS_FOLDER
488 for img_path in collector:
489 where_to = img_part if img_part in img_path else (dia_part if dia_part in img_path else None)
490 if where_to is not None:
491 copy_eventually(doc_base, pathlib.Path(where_to), img_path)
492 else:
493 log.error(f'asset collection for neither images nor diagrams requested per {img_path} - ignoring')
496@no_type_check
497def concatenate(
498 doc_root: Union[str, pathlib.Path],
499 structure_name: str,
500 target_key: str,
501 facet_key: str,
502 options: dict[str, Union[bool, str]],
503) -> int:
504 """Later alligator.
506 Examples:
508 >>> restore_cwd = os.getcwd()
509 >>> dr = '.'
510 >>> sn = 'foo'
511 >>> tk = ''
512 >>> fk = ''
513 >>> op = {'bar': True}
514 >>> concatenate(dr, sn, tk, fk, op, )
515 2
516 >>> os.chdir(restore_cwd)
518 >>> restore_cwd = os.getcwd()
519 >>> dr = 'example/no-renda'
520 >>> sn = 'structure.yml'
521 >>> tk = 'prod_kind'
522 >>> fk = 'no-renda'
523 >>> op = {'force': True}
524 >>> concatenate(dr, sn, tk, fk, op)
525 0
526 >>> os.chdir(restore_cwd)
528 >>> restore_cwd = os.getcwd()
529 >>> dr = 'example/tuna'
530 >>> sn = 'structure.yml'
531 >>> tk = 'prod_kind'
532 >>> fk = 'non-existing-facet-key'
533 >>> op = {'bar': True}
534 >>> concatenate(dr, sn, tk, fk, op)
535 1
536 >>> os.chdir(restore_cwd)
538 >>> restore_cwd = os.getcwd()
539 >>> dr = 'test/fixtures/basic/'
540 >>> sn = 'structure.yml'
541 >>> tk = 'abc'
542 >>> fk = 'missing'
543 >>> op = {'bar': True}
544 >>> concatenate(dr, sn, tk, fk, op)
545 2
546 >>> os.chdir(restore_cwd)
548 >>> restore_cwd = os.getcwd()
549 >>> dr = 'example/tuna'
550 >>> sn = 'structure.yml'
551 >>> tk = 'prod_kind'
552 >>> fk = 'tuna'
553 >>> op = {'bar': True}
554 >>> concatenate(dr, sn, tk, fk, op)
555 0
556 >>> os.chdir(restore_cwd)
558 >>> restore_cwd = os.getcwd()
559 >>> dr = 'example/tuna'
560 >>> sn = 'structure.yml'
561 >>> tk = 'prod_kind'
562 >>> fk = 'tuna'
563 >>> op = {'bar': True}
564 >>> try:
565 ... code = concatenate(dr, sn, tk, fk, op)
566 ... except FileNotFoundError:
567 ... code = -1
568 >>> os.chdir(restore_cwd)
569 >>> code
570 0
572 >>> restore_cwd = os.getcwd()
573 >>> dr = 'example/ejected-templates'
574 >>> sn = 'structure.yml'
575 >>> tk = 'prod_kind'
576 >>> fk = 'ejected-templates'
577 >>> op = {'bar': True}
578 >>> try:
579 ... code = concatenate(dr, sn, tk, fk, op)
580 ... except FileNotFoundError:
581 ... code = -1
582 >>> os.chdir(restore_cwd)
583 >>> code
584 0
586 >>> restore_cwd = os.getcwd()
587 >>> dr = 'example/ejected-templates'
588 >>> sn = 'structure.yml'
589 >>> tk = 'prod_kind'
590 >>> fk = 'ejected-templates-borked'
591 >>> op = {'bar': True}
592 >>> try:
593 ... code = concatenate(dr, sn, tk, fk, op)
594 ... except FileNotFoundError:
595 ... code = -1
596 >>> os.chdir(restore_cwd)
597 >>> code
598 0
600 >>> restore_cwd = os.getcwd()
601 >>> dr = 'example/tuna'
602 >>> sn = 'structure.yml'
603 >>> tk = 'prod_kind'
604 >>> fk = 'tuna'
605 >>> op = {'bar': True}
606 >>> abs_here = pathlib.Path().resolve()
607 >>> try:
608 ... code = concatenate(dr, sn, tk, fk, op)
609 ... except FileNotFoundError:
610 ... code = -1
611 >>> os.chdir(restore_cwd)
612 >>> code
613 0
615 """
616 log.info(LOG_SEPARATOR)
617 log.warning('entered concat function ...')
618 target_code = target_key
619 facet_code = facet_key
620 if not facet_code.strip() or not target_code.strip():
621 log.error(f'concatenate requires non-empty target ({target_code}) and facet ({facet_code}) codes')
622 return 2
624 log.info(f'parsed target ({target_code}) and facet ({facet_code}) from request')
626 structure, asset_map = gat.prelude(
627 doc_root=doc_root, structure_name=structure_name, target_key=target_key, facet_key=facet_key, command='concat'
628 )
629 log.info(f'prelude teleported processor into the document root at ({os.getcwd()}/)')
630 rel_concat_folder_path = pathlib.Path('render/pdf/')
631 rel_concat_folder_path.mkdir(parents=True, exist_ok=True)
632 os.chdir(rel_concat_folder_path)
633 log.info(f'concatenate (this processor) teleported into the render/pdf location ({os.getcwd()}/)')
635 ok, aspect_map = too.load_target(target_code, facet_code)
636 if not ok or not aspect_map:
637 return 0 if ok else 1
639 may_render = aspect_map.get('render', True)
640 if not may_render:
641 topic = f'structure({pathlib.Path(doc_root) / structure_name}) for target: {target_key} and facet: {facet_key}'
642 log.warning(f'- render is declared as false in {topic}')
643 if not options['force']: 643 ↛ 644line 643 didn't jump to line 644 because the condition on line 643 was never true
644 return 42
645 else:
646 log.warning(' + overwritten by force mode')
648 approvals = process_approvals(aspect_map)
649 if isinstance(approvals, int):
650 return 2
651 binder = process_binder(aspect_map)
652 if isinstance(binder, int): 652 ↛ 653line 652 didn't jump to line 653 because the condition on line 652 was never true
653 return 3
654 changes = process_changes(aspect_map)
655 if isinstance(changes, int): 655 ↛ 656line 655 didn't jump to line 656 because the condition on line 655 was never true
656 return 4
657 metadata = met.load(aspect_map)
658 if isinstance(metadata, int): 658 ↛ 659line 658 didn't jump to line 659 because the condition on line 658 was never true
659 return 5
661 root = SLASH
662 root_path = str(pathlib.Path.cwd().resolve()).rstrip(SLASH) + SLASH
663 tree = treelib.Tree()
664 tree.create_node(root, root)
665 documents = {}
666 insert_regions = {}
667 img_collector = []
668 log.info(LOG_SEPARATOR)
669 log.info('processing binder ...')
670 for entry in binder:
671 ref_path = DOC_BASE / entry
672 log.debug(f'- {entry} as {ref_path}')
673 with open(ref_path, 'rt', encoding=ENCODING) as handle:
674 documents[entry] = [line.rstrip() for line in handle.readlines()]
675 insert_regions[entry] = []
676 in_region = False
677 begin, end = 0, 0
678 include = ''
679 tree.create_node(entry, entry, parent=root)
680 for slot, line in enumerate(documents[entry]):
681 if line.startswith(IMG_LINE_STARTSWITH):
682 documents[entry][slot] = adapt_image(line, img_collector, entry, root_path)
683 log.debug(f'{slot :02d}|{line.rstrip()}')
684 if not in_region:
685 if line.startswith(READ_SLOT_FENCE_BEGIN):
686 in_region = True
687 begin = slot
688 continue
689 if line.startswith(INCLUDE_SLOT):
690 include = line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip()
691 insert_regions[entry].append(((slot, slot), include))
692 tree.create_node(include, include, parent=entry)
693 include = ''
694 continue
695 if in_region:
696 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
697 include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
698 elif line.startswith(READ_SLOT_FENCE_END):
699 end = slot
700 insert_regions[entry].append(((begin, end), include))
701 tree.create_node(include, include, parent=entry)
702 in_region = False
703 begin, end = 0, 0
704 include = ''
706 for coords, include in insert_regions[entry]: # include is anchored on DOC_BASE
707 ref_path = DOC_BASE / include
708 with open(ref_path, 'rt', encoding=ENCODING) as handle:
709 documents[include] = [line.rstrip() for line in handle.readlines()]
710 insert_regions[include] = []
711 in_region = False
712 begin, end = 0, 0
713 sub_include = ''
714 for slot, line in enumerate(documents[include]):
715 if line.startswith(IMG_LINE_STARTSWITH): 715 ↛ 716line 715 didn't jump to line 716 because the condition on line 715 was never true
716 documents[include][slot] = adapt_image(line, img_collector, include, root_path)
717 log.debug(f'{slot :02d}|{line.rstrip()}')
718 if not in_region:
719 if line.startswith(READ_SLOT_FENCE_BEGIN):
720 in_region = True
721 begin = slot
722 continue
723 if line.startswith(INCLUDE_SLOT):
724 harvest_include(line, slot, insert_regions, tree, include)
725 continue
726 if in_region:
727 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
728 sub_include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
729 sub_include = str(pathlib.Path(include).parent / sub_include)
730 elif line.startswith(READ_SLOT_FENCE_END):
731 end = slot
732 insert_regions[include].append(((begin, end), sub_include))
733 tree.create_node(sub_include, sub_include, parent=include)
734 in_region = False
735 begin, end = 0, 0
736 sub_include = ''
738 for coords, sub_include in insert_regions[include]:
739 ref_path = DOC_BASE / sub_include
740 with open(ref_path, 'rt', encoding=ENCODING) as handle:
741 documents[sub_include] = [line.rstrip() for line in handle.readlines()]
742 insert_regions[sub_include] = []
743 in_region = False
744 begin, end = 0, 0
745 sub_sub_include = ''
746 for slot, line in enumerate(documents[sub_include]):
747 if line.startswith(IMG_LINE_STARTSWITH):
748 documents[sub_include][slot] = adapt_image(line, img_collector, sub_include, root_path)
749 log.debug(f'{slot :02d}|{line.rstrip()}')
750 if not in_region:
751 if line.startswith(READ_SLOT_FENCE_BEGIN):
752 in_region = True
753 begin = slot
754 continue
755 if line.startswith(INCLUDE_SLOT): 755 ↛ 756line 755 didn't jump to line 756 because the condition on line 755 was never true
756 harvest_include(line, slot, insert_regions, tree, sub_include)
757 continue
758 if in_region:
759 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
760 sub_sub_include = (
761 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
762 )
763 sub_sub_include = str(pathlib.Path(sub_include).parent / sub_sub_include)
764 elif line.startswith(READ_SLOT_FENCE_END):
765 end = slot
766 insert_regions[sub_include].append(((begin, end), sub_sub_include))
767 tree.create_node(sub_sub_include, sub_sub_include, parent=sub_include)
768 in_region = False
769 begin, end = 0, 0
770 sub_sub_include = ''
772 for coords, sub_sub_include in insert_regions[sub_include]:
773 ref_path = DOC_BASE / sub_sub_include
774 with open(ref_path, 'rt', encoding=ENCODING) as handle:
775 documents[sub_sub_include] = [line.rstrip() for line in handle.readlines()]
776 insert_regions[sub_sub_include] = []
777 in_region = False
778 begin, end = 0, 0
779 sub_sub_sub_include = ''
780 for slot, line in enumerate(documents[sub_sub_include]):
781 if line.startswith(IMG_LINE_STARTSWITH):
782 documents[sub_sub_include][slot] = adapt_image(
783 line, img_collector, sub_sub_include, root_path
784 )
785 log.debug(f'{slot :02d}|{line.rstrip()}')
786 if not in_region: 786 ↛ 794line 786 didn't jump to line 794 because the condition on line 786 was always true
787 if line.startswith(READ_SLOT_FENCE_BEGIN): 787 ↛ 788line 787 didn't jump to line 788 because the condition on line 787 was never true
788 in_region = True
789 begin = slot
790 continue
791 if line.startswith(INCLUDE_SLOT): 791 ↛ 792line 791 didn't jump to line 792 because the condition on line 791 was never true
792 harvest_include(line, slot, insert_regions, tree, sub_sub_include)
793 continue
794 if in_region: 794 ↛ 795line 794 didn't jump to line 795 because the condition on line 794 was never true
795 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
796 sub_sub_sub_include = (
797 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
798 )
799 sub_sub_sub_include = str(pathlib.Path(sub_sub_include).parent / sub_sub_sub_include)
800 elif line.startswith(READ_SLOT_FENCE_END):
801 end = slot
802 insert_regions[sub_sub_include].append(((begin, end), sub_sub_sub_include))
803 tree.create_node(sub_sub_sub_include, sub_sub_sub_include, parent=sub_sub_include)
804 in_region = False
805 begin, end = 0, 0
806 sub_sub_sub_include = ''
808 for coords, sub_sub_sub_include in insert_regions[sub_include]:
809 ref_path = DOC_BASE / sub_sub_sub_include
810 with open(ref_path, 'rt', encoding=ENCODING) as handle:
811 documents[sub_sub_sub_include] = [line.rstrip() for line in handle.readlines()]
812 insert_regions[sub_sub_sub_include] = []
813 in_region = False
814 begin, end = 0, 0
815 sub_sub_sub_sub_include = ''
816 for slot, line in enumerate(documents[sub_sub_sub_include]):
817 if line.startswith(IMG_LINE_STARTSWITH):
818 documents[sub_sub_sub_include][slot] = adapt_image(
819 line, img_collector, sub_sub_sub_include, root_path
820 )
821 log.debug(f'{slot :02d}|{line.rstrip()}')
822 if not in_region: 822 ↛ 830line 822 didn't jump to line 830 because the condition on line 822 was always true
823 if line.startswith(READ_SLOT_FENCE_BEGIN): 823 ↛ 824line 823 didn't jump to line 824 because the condition on line 823 was never true
824 in_region = True
825 begin = slot
826 continue
827 if line.startswith(INCLUDE_SLOT): 827 ↛ 828line 827 didn't jump to line 828 because the condition on line 827 was never true
828 harvest_include(line, slot, insert_regions, tree, sub_sub_sub_include)
829 continue
830 if in_region: 830 ↛ 831line 830 didn't jump to line 831 because the condition on line 830 was never true
831 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
832 sub_sub_sub_sub_include = (
833 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
834 )
835 sub_sub_sub_sub_include = str(
836 pathlib.Path(sub_sub_sub_include).parent / sub_sub_sub_sub_include
837 )
838 elif line.startswith(READ_SLOT_FENCE_END):
839 end = slot
840 insert_regions[sub_sub_sub_include].append(((begin, end), sub_sub_sub_include))
841 tree.create_node(
842 sub_sub_sub_sub_include, sub_sub_sub_sub_include, parent=sub_sub_sub_include
843 )
844 in_region = False
845 begin, end = 0, 0
846 sub_sub_sub_sub_include = ''
848 top_down_paths = tree.paths_to_leaves()
849 bottom_up_paths = [list(reversed(td_p)) for td_p in top_down_paths]
850 log.info(LOG_SEPARATOR)
851 log.info('resulting tree:')
852 for edge in str(tree).split(NL):
853 log.info(edge)
855 log.info(LOG_SEPARATOR)
856 log.info(f'provisioning chains for the {len(bottom_up_paths)} bottom up leaf paths:')
857 for num, leaf_path in enumerate(bottom_up_paths):
858 the_way_up = f'|-> {leaf_path[0]}' if len(leaf_path) == 1 else f'{" -> ".join(leaf_path)}'
859 log.info(f'{num :2d}: {the_way_up}')
861 concat = {}
862 log.info(LOG_SEPARATOR)
863 log.info(f'dependencies for the {len(insert_regions)} document parts:')
864 for key, regions in insert_regions.items():
865 num_in = len(regions)
866 dashes = '-' * num_in
867 incl_disp = f'( {num_in} include{"" if num_in == 1 else "s"} )'
868 indicator = '(no includes)' if not regions else f'<{dashes + incl_disp + dashes}'
869 log.info(f'- part {key} {indicator}')
870 for region in regions:
871 between = f'between lines {region[0][0] :3d} and {region[0][1] :3d}'
872 insert = f'include fragment {region[1]}'
873 log.info(f' + {between} {insert}')
874 if not regions: # No includes
875 concat[key] = '\n'.join(documents[key]) + '\n'
876 log.info(f' * did concat {key} document for insertion')
878 chains = [leaf_path for leaf_path in bottom_up_paths]
879 log.info(LOG_SEPARATOR)
880 log.info(f'starting insertions bottom up for the {len(chains)} inclusion chains:')
881 todo = [[job for job in chain if job not in concat] for chain in chains]
882 while todo != [[]]:
883 todo = rollup(todo, documents, insert_regions, concat)
885 log.info(LOG_SEPARATOR)
886 log.info('writing final concat markdown to document.md')
887 with open('document.md', 'wt', encoding=ENCODING) as handle:
888 handle.write('\n'.join(concat[bind] for bind in binder) + '\n')
890 log.info(LOG_SEPARATOR)
891 log.info('collecting assets (images and diagrams)')
892 collect_assets(img_collector)
893 log.info(LOG_SEPARATOR)
894 log.info(f'concat result document (document.md) and artifacts are within folder ({os.getcwd()}/)')
895 log.info(LOG_SEPARATOR)
896 log.info('processing complete - SUCCESS')
897 log.info(LOG_SEPARATOR)
898 return 0