Coverage for liitos/concat.py: 90.01%
515 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-05 17:22:35 +00:00
« prev ^ index » next coverage.py v7.6.10, created at 2025-01-05 17:22:35 +00:00
1"""Given a target and facet, concatenate a tree of markdown files to a single file rewriting all image refs."""
3import json
4import os
5import pathlib
6import re
7import shutil
8from typing import Union, no_type_check
10import treelib # type: ignore
11import yaml
13import liitos.gather as gat
14import liitos.meta as met
15import liitos.placeholder as plh
16import liitos.tools as too
17from liitos import ENCODING, LOG_SEPARATOR, PathLike, log
19ALT_INJECTOR_HACK = 'INJECTED-ALT-TEXT-TO-TRIGGER-FIGURE-ENVIRONMENT-AROUND-IMAGE-IN-PANDOC'
20CAP_INJECTOR_HACK = 'INJECTED-CAP-TEXT-TO-MARK-MISSING-CAPTION-IN-OUTPUT'
21DOC_BASE = pathlib.Path('..', '..')
22STRUCTURE_PATH = DOC_BASE / 'structure.yml'
23SLASH = '/'
24IMAGES_FOLDER = 'images/'
25DIAGRAMS_FOLDER = 'diagrams/'
27"""
28```{.python .cb.run}
29with open('sub/as.md') as fp:
30 print(fp.read())
31```
32"""
33READ_SLOT_FENCE_BEGIN = '```{.python .cb.run}'
34READ_SLOT_CONTEXT_BEGIN = 'with open('
35READ_SLOT_FENCE_END = '```'
37r"""
38\include{markdown_file_path}
39"""
40INCLUDE_SLOT = '\\include{'
42"""
43![Alt Text Red](images/red.png "Caption Text Red")
44![Alt Text Dot Dot Lime](../images/lime.png "Caption Text Dot Dot Lime")
45![Alt Text Blue](images/blue.png "Caption Text Blue")
46![Alt Text Sting Red](other/images/red.png "Caption Text Sting Red")
47"""
48IMG_LINE_STARTSWITH = '!['
49MD_IMG_PATTERN = re.compile(r'^!\[(?P<cap>[^(]*)\]\((?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$')
50MD_IMG_PATTERN_RIGHT_SPLIT = re.compile(r'^(?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$')
52NL = '\n'
55@no_type_check
56def process_approvals(aspects: dict[str, str]) -> Union[gat.Approvals, int]:
57 """Best effort loading of approvals data.
59 Examples:
61 >>> aspects = {gat.KEY_APPROVALS: 'missing-file'}
62 >>> process_approvals(aspects)
63 1
65 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
66 >>> approvals_name = 'empty-as-approvals.yml'
67 >>> aspects = {gat.KEY_APPROVALS: str(DOC_BASE / approvals_name)}
68 >>> process_approvals(aspects)
69 1
71 >>> DOC_BASE = pathlib.Path('.')
72 >>> aspects = {gat.KEY_APPROVALS: __file__}
73 >>> process_approvals(aspects)
74 1
76 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
77 >>> approvals_name = 'space-as-approvals.yml'
78 >>> aspects = {gat.KEY_APPROVALS: str(DOC_BASE / approvals_name)}
79 >>> process_approvals(aspects)
80 1
81 """
82 approvals_path = DOC_BASE / aspects[gat.KEY_APPROVALS]
83 if not approvals_path.is_file() or not approvals_path.stat().st_size:
84 log.error(f'destructure failed to find non-empty approvals file at {approvals_path}')
85 return 1
86 if approvals_path.suffix.lower() not in ('.json', '.yaml', '.yml'):
87 log.error(f'approvals file format per suffix ({approvals_path.suffix}) not supported')
88 return 1
89 approvals_channel = 'yaml' if approvals_path.suffix.lower() in ('.yaml', '.yml') else 'json'
90 with open(approvals_path, 'rt', encoding=ENCODING) as handle:
91 approvals = yaml.safe_load(handle) if approvals_channel == 'yaml' else json.load(handle)
92 if not approvals: 92 ↛ 93line 92 didn't jump to line 93 because the condition on line 92 was never true
93 log.error(f'empty approvals file? Please add approvals to ({approvals_path})')
94 return 1
95 if approvals_channel == 'yaml':
96 with open('approvals.yml', 'wt', encoding=ENCODING) as handle:
97 yaml.dump(approvals, handle, default_flow_style=False)
98 else:
99 with open('approvals.json', 'wt', encoding=ENCODING) as handle:
100 json.dump(approvals, handle, indent=2)
101 return approvals
104@no_type_check
105def process_binder(aspects: dict[str, str]) -> Union[gat.Binder, int]:
106 """Best effort loading of binder data.
108 Examples:
110 >>> aspects = {gat.KEY_BIND: 'missing-file'}
111 >>> process_binder(aspects)
112 1
114 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
115 >>> binder_name = 'empty-as-bind.txt'
116 >>> aspects = {gat.KEY_BIND: str(DOC_BASE / binder_name)}
117 >>> process_binder(aspects)
118 1
120 >>> DOC_BASE = pathlib.Path('.')
121 >>> aspects = {gat.KEY_BIND: __file__}
122 >>> process_binder(aspects)
123 1
125 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
126 >>> binder_name = 'space-as-bind.txt'
127 >>> aspects = {gat.KEY_BIND: str(DOC_BASE / binder_name)}
128 >>> process_binder(aspects)
129 1
130 """
131 bind_path = DOC_BASE / aspects[gat.KEY_BIND]
132 if not bind_path.is_file() or not bind_path.stat().st_size:
133 log.error(f'destructure failed to find non-empty bind file at {bind_path}')
134 return 1
135 if bind_path.suffix.lower() not in ('.txt',):
136 log.error(f'bind file format per suffix ({bind_path.suffix}) not supported')
137 return 1
138 with open(bind_path, 'rt', encoding=ENCODING) as handle:
139 binder = [line.strip() for line in handle.readlines() if line.strip()]
140 if not binder: 140 ↛ 141line 140 didn't jump to line 141 because the condition on line 140 was never true
141 log.error(f'empty bind file? Please add component paths to ({bind_path})')
142 return 1
143 with open('bind.txt', 'wt', encoding=ENCODING) as handle:
144 handle.write('\n'.join(binder) + '\n')
145 return binder
148@no_type_check
149def process_changes(aspects: dict[str, str]) -> Union[gat.Changes, int]:
150 """Best effort loading of changes data.
152 Examples:
154 >>> aspects = {gat.KEY_CHANGES: 'missing-file'}
155 >>> process_changes(aspects)
156 1
158 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
159 >>> changes_name = 'empty-as-changtes.yml'
160 >>> aspects = {gat.KEY_CHANGES: str(DOC_BASE / changes_name)}
161 >>> process_changes(aspects)
162 1
164 >>> DOC_BASE = pathlib.Path('.')
165 >>> aspects = {gat.KEY_CHANGES: __file__}
166 >>> process_changes(aspects)
167 1
169 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'
170 >>> changes_name = 'space-as-changes.yml'
171 >>> aspects = {gat.KEY_CHANGES: str(DOC_BASE / changes_name)}
172 >>> process_changes(aspects)
173 1
174 """
175 changes_path = DOC_BASE / aspects[gat.KEY_CHANGES]
176 if not changes_path.is_file() or not changes_path.stat().st_size:
177 log.error(f'destructure failed to find non-empty changes file at {changes_path}')
178 return 1
179 if changes_path.suffix.lower() not in ('.json', '.yaml', '.yml'):
180 log.error(f'changes file format per suffix ({changes_path.suffix}) not supported')
181 return 1
182 changes_channel = 'yaml' if changes_path.suffix.lower() in ('.yaml', '.yml') else 'json'
183 with open(changes_path, 'rt', encoding=ENCODING) as handle:
184 changes = yaml.safe_load(handle) if changes_channel == 'yaml' else json.load(handle)
185 if not changes: 185 ↛ 186line 185 didn't jump to line 186 because the condition on line 185 was never true
186 log.error(f'empty changes file? Please add changes data to ({changes_path})')
187 return 1
188 if changes_channel == 'yaml':
189 with open('changes.yml', 'wt', encoding=ENCODING) as handle:
190 yaml.dump(changes, handle, default_flow_style=False)
191 else:
192 with open('changes.json', 'wt', encoding=ENCODING) as handle:
193 json.dump(changes, handle, indent=2)
194 return changes
197@no_type_check
198def parse_markdown_image(text_line: str) -> tuple[str, str, str, str]:
199 """Parse a markdown image line within our conventions into caption, src, alt, and optional rest.
201 Examples:
203 >>> t = ''
204 >>> parse_markdown_image(t)
205 ('', '', '', '')
207 >>> t = '![]()'
208 >>> parse_markdown_image(t)
209 ('', '', '', '![]()')
211 >>> t = '![a](b "c")'
212 >>> parse_markdown_image(t)
213 ('a', 'b', 'c', '')
215 >>> t = '![a](liitos/placeholders/this-resource-is-missing.png "c")'
216 >>> parse_markdown_image(t)
217 ('a', 'liitos/placeholders/this-resource-is-missing.png', 'c', '')
218 """
219 invalid_marker = ('', '', '', text_line)
221 exclam = '!'
222 osb = '['
223 if not text_line or not text_line.startswith(f'{exclam}{osb}'):
224 log.error(f'- INVALID-MD-IMG_LINE::START <<{text_line.rstrip()}>>')
225 return invalid_marker
227 csb = ']'
228 osb_cnt = text_line.count(osb)
229 csb_cnt = text_line.count(csb)
230 if osb_cnt + csb_cnt < 2:
231 log.error(f'- INVALID-MD-IMG_LINE::SB-TOK-CNT-LOW <<{text_line.rstrip()}>>')
232 return invalid_marker
233 if osb_cnt != csb_cnt:
234 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')
236 orb = '('
237 cap_src_boundary = f'{csb}{orb}'
238 if cap_src_boundary not in text_line:
239 log.error(f'- INVALID-MD-IMG_LINE::CAP-SRC-BOUNDARY <<{text_line.rstrip()}>>')
240 return invalid_marker
242 crb = ')'
243 orb_cnt = text_line.count(orb)
244 crb_cnt = text_line.count(crb)
245 if orb_cnt + crb_cnt < 2:
246 log.error(f'- INVALID-MD-IMG_LINE::RB-TOK-CNT-LOW <<{text_line.rstrip()}>>')
247 return invalid_marker
248 if orb_cnt != crb_cnt:
249 log.warning(f'- INCOMPLETE-MD-IMG_LINE::RB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')
251 quo = '"'
252 quo_cnt = text_line.count(quo)
253 if quo_cnt < 2:
254 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-LOW <<{text_line.rstrip()}>>')
255 if quo_cnt % 2:
256 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')
258 sp = ' '
259 sp_cnt = text_line.count(sp)
260 if not sp_cnt:
261 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SP-TOK-CNT-LOW <<{text_line.rstrip()}>>')
263 dot = '.'
264 sla = '/'
265 abs_path_indicator = f'{csb}{orb}{sla}'
266 may_have_abs_path = abs_path_indicator in text_line
267 if may_have_abs_path:
268 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-ABS-PATH <<{text_line.rstrip()}>>')
269 naive_upwards_path_indicator = f'{csb}{orb}{dot}{dot}{sla}'
270 may_have_upwards_path = naive_upwards_path_indicator in text_line
271 if may_have_upwards_path:
272 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-UPWARDS-PATH <<{text_line.rstrip()}>>')
274 log.info('- parsing the markdown image text line ...')
275 if orb_cnt + crb_cnt > 2 or orb_cnt != crb_cnt:
276 # The regex is not safe for orb inside caption
277 left, right = text_line.split(cap_src_boundary, 1)
278 match_right = MD_IMG_PATTERN_RIGHT_SPLIT.match(right)
279 if not match_right:
280 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-RIGHT-SPLIT-FAILED <<{text_line.rstrip()}>>')
281 return invalid_marker
283 parts = match_right.groupdict()
284 cap = left[2:]
285 if not cap:
286 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>')
287 cap = CAP_INJECTOR_HACK
289 src = parts['src']
290 alt = parts['alt']
291 rest = parts['rest']
292 if orb in alt or crb in alt:
293 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-PARTIAL-MATCH <<{text_line.rstrip()}>>')
294 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}')
296 return cap, src, alt, rest
298 match = MD_IMG_PATTERN.match(text_line)
299 if not match:
300 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-FAILED <<{text_line.rstrip()}>>')
301 return invalid_marker
303 parts = match.groupdict()
304 cap = parts['cap']
305 if not cap:
306 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>')
307 cap = CAP_INJECTOR_HACK
309 src = parts['src']
310 alt = parts['alt']
311 rest = parts['rest']
312 if orb in alt or crb in alt: 312 ↛ 313line 312 didn't jump to line 313 because the condition on line 312 was never true
313 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-FULL-MATCH <<{text_line.rstrip()}>>')
314 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}')
316 return cap, src, alt, rest
319@no_type_check
320def adapt_image(text_line: str, collector: list[str], upstream: str, root: str) -> str:
321 """YES."""
322 cap, src, alt, rest = parse_markdown_image(text_line)
323 if not src: 323 ↛ 324line 323 didn't jump to line 324 because the condition on line 323 was never true
324 log.error(f'parse of markdown image text line failed - empty src, and rest is <<{rest.rstrip()}>>')
325 return text_line
327 img_path = str((pathlib.Path(upstream).parent / src).resolve()).replace(root, '')
328 collector.append(img_path)
329 img_hack = img_path
330 if f'/{IMAGES_FOLDER}' in img_path:
331 img_hack = IMAGES_FOLDER + img_path.split(f'/{IMAGES_FOLDER}', 1)[1]
332 elif f'/{DIAGRAMS_FOLDER}' in img_path:
333 img_hack = DIAGRAMS_FOLDER + img_path.split(f'/{DIAGRAMS_FOLDER}', 1)[1]
335 if img_hack != img_path:
336 log.info(f'{img_hack} <--- OK? --- {img_path}')
338 alt_text = f'"{alt}"' if alt else f'"{ALT_INJECTOR_HACK}"'
339 belte_og_seler = f'![{cap}]({img_hack} {alt_text}){rest}'
340 log.info(f'==> belte-og-seler: ->>{belte_og_seler}<<-')
341 return belte_og_seler
344@no_type_check
345def harvest_include(
346 text_line: str, slot: int, regions: dict[str, list[tuple[tuple[int, int], str]]], tree: treelib.Tree, parent: str
347) -> None:
348 r"""TODO.
350 Examples:
352 >>> text = 'baz\n\\include{c}\nquux'
353 >>> slot = 0
354 >>> regions = {SLASH: [((0, 1), 'b')], 'b': [((0, 1), 'c')], 'c': [((0, 1), 'cx')]}
355 >>> tr = treelib.Tree()
356 >>> root = SLASH
357 >>> tr.create_node(root, root)
358 Node(tag=/, identifier=/, data=None)
359 >>> harvest_include(text, slot, regions, tr, root)
360 >>> print(tr)
361 /
362 └── /c}
363 quux
364 <BLANKLINE>
365 """
366 include_local = text_line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip()
367 include = str(pathlib.Path(parent).parent / include_local)
368 regions[parent].append(((slot, slot), include))
369 tree.create_node(include, include, parent=parent)
372@no_type_check
373def rollup(
374 jobs: list[list[str]],
375 docs: dict[str, list[str]],
376 regions: dict[str, list[tuple[tuple[int, int], str]]],
377 flat: dict[str, str],
378) -> list[list[str]]:
379 r"""TODO.
381 Examples:
383 >>> jobs = [['a', 'b'], ['b', 'c']]
384 >>> docs = {'a': ['a1', 'a2'], 'b': ['b1', 'b2'], 'c': ['c1', 'c2', 'c3']}
385 >>> regions = {'a': [((0, 1), 'b')], 'b': [((0, 1), 'c')], 'c': [((0, 1), 'cx')]}
386 >>> flat = {'a': 'a1\na2', 'b': 'b1\nb2', 'c': 'c1\nc2\nc3'}
387 >>> rollup(jobs, docs, regions, flat)
388 [[], []]
389 >>> flat
390 {'a': 'b1\nb2\n', 'b': 'c1\nc2\nc3\n', 'c': 'c1\nc2\nc3'}
392 >>> jobs = [['/', 'b'], ['/', 'c']]
393 >>> docs, regions, flat = {}, {}, {'baz': 'quux'}
394 >>> rollup(jobs, docs, regions, flat)
395 [[]]
396 >>> flat
397 {'baz': 'quux'}
398 """
399 tackle = [those[0] for those in jobs if those and those[0] != SLASH]
400 if tackle:
401 log.info(f' Insertion ongoing with parts ({", ".join(tuple(sorted(tackle)))}) remaining')
402 else:
403 return [[]]
404 for that in tackle:
405 buf = []
406 for slot, line in enumerate(docs[that]):
407 special = False
408 the_first = False
409 the_include = ''
410 for pair, include in regions[that]:
411 low, high = pair
412 if low <= slot <= high:
413 special = True
414 if low == slot:
415 the_first = True
416 the_include = include
417 if not special:
418 buf.append(line)
419 continue
420 if the_first:
421 buf.append(flat[the_include])
422 flat[that] = '\n'.join(buf) + '\n'
424 return [[job for job in chain if job not in flat] for chain in jobs]
427@no_type_check
428def collect_assets(
429 collector: list[str],
430 doc_base: Union[PathLike, None] = None,
431 images_folder: Union[PathLike, None] = None,
432 diagrams_folder: Union[PathLike, None] = None,
433) -> None:
434 """TODO
436 Examples:
438 >>> c = ['foo']
439 >>> collect_assets(c)
441 >>> import tempfile
442 >>> with tempfile.TemporaryDirectory() as imaf:
443 ... c = [imaf + 'foo']
444 ... collect_assets(c, doc_base='.', images_folder=imaf)
446 >>> import tempfile
447 >>> with tempfile.TemporaryDirectory() as imaf:
448 ... with tempfile.TemporaryDirectory() as diaf:
449 ... c = [imaf + 'foo', diaf + 'bar']
450 ... collect_assets(c, doc_base='.', images_folder=imaf, diagrams_folder=diaf)
452 >>> import tempfile
453 >>> with tempfile.TemporaryDirectory() as imaf:
454 ... ima = pathlib.Path(imaf) / 'images'
455 ... ima.touch()
456 ... with tempfile.TemporaryDirectory() as diaf:
457 ... dia = pathlib.Path(diaf) / 'diagrams'
458 ... dia.touch()
459 ... c = [str(ima / 'foo'), str(dia / 'bar')]
460 ... collect_assets(c, doc_base='.', images_folder=ima, diagrams_folder=dia)
461 """
462 doc_base = pathlib.Path(doc_base) if doc_base else DOC_BASE
463 images_folder = str(images_folder) if images_folder else IMAGES_FOLDER
464 diagrams_folder = str(diagrams_folder) if diagrams_folder else DIAGRAMS_FOLDER
466 images = pathlib.Path(images_folder)
467 diagrams = pathlib.Path(diagrams_folder)
468 for img_path in collector:
469 if images_folder in img_path:
470 if not images.is_dir():
471 try:
472 images.mkdir(parents=True, exist_ok=True)
473 except FileExistsError as err:
474 log.error(f'failed to create {images} - detail: {err}')
475 source_asset = doc_base / img_path
476 target_asset = images / pathlib.Path(img_path).name
477 try:
478 shutil.copy(source_asset, target_asset)
479 except FileNotFoundError as err:
480 log.error(err)
481 code, msg = plh.dump_placeholder(target_asset)
482 log.warning(msg) if code else log.info(msg)
483 except NotADirectoryError as err:
484 log.error(err)
485 code, msg = plh.dump_placeholder(target_asset)
486 log.warning(msg) if code else log.info(msg)
487 continue
488 if diagrams_folder in img_path:
489 if not diagrams.is_dir():
490 try:
491 diagrams.mkdir(parents=True, exist_ok=True)
492 except FileExistsError as err:
493 log.error(f'failed to create {diagrams} - detail: {err}')
494 source_asset = doc_base / img_path
495 target_asset = diagrams / pathlib.Path(img_path).name
496 try:
497 shutil.copy(source_asset, target_asset)
498 except FileNotFoundError as err:
499 log.error(err)
500 code, msg = plh.dump_placeholder(target_asset)
501 log.warning(msg) if code else log.info(msg)
502 except NotADirectoryError as err:
503 log.error(err)
504 code, msg = plh.dump_placeholder(target_asset)
505 log.warning(msg) if code else log.info(msg)
508@no_type_check
509def concatenate(
510 doc_root: Union[str, pathlib.Path],
511 structure_name: str,
512 target_key: str,
513 facet_key: str,
514 options: dict[str, Union[bool, str]],
515) -> int:
516 """Later alligator.
518 Examples:
520 >>> restore_cwd = os.getcwd()
521 >>> dr = '.'
522 >>> sn = 'foo'
523 >>> tk = ''
524 >>> fk = ''
525 >>> op = {'bar': True}
526 >>> concatenate(dr, sn, tk, fk, op, )
527 2
528 >>> os.chdir(restore_cwd)
530 >>> restore_cwd = os.getcwd()
531 >>> dr = 'example/tuna'
532 >>> sn = 'structure.yml'
533 >>> tk = 'prod_kind'
534 >>> fk = 'non-existing-facet-key'
535 >>> op = {'bar': True}
536 >>> concatenate(dr, sn, tk, fk, op)
537 1
538 >>> os.chdir(restore_cwd)
540 >>> restore_cwd = os.getcwd()
541 >>> dr = 'test/fixtures/basic/'
542 >>> sn = 'structure.yml'
543 >>> tk = 'abc'
544 >>> fk = 'missing'
545 >>> op = {'bar': True}
546 >>> concatenate(dr, sn, tk, fk, op)
547 2
548 >>> os.chdir(restore_cwd)
550 >>> restore_cwd = os.getcwd()
551 >>> dr = 'example/tuna'
552 >>> sn = 'structure.yml'
553 >>> tk = 'prod_kind'
554 >>> fk = 'tuna'
555 >>> op = {'bar': True}
556 >>> concatenate(dr, sn, tk, fk, op)
557 0
558 >>> os.chdir(restore_cwd)
560 >>> restore_cwd = os.getcwd()
561 >>> dr = 'example/tuna'
562 >>> sn = 'structure.yml'
563 >>> tk = 'prod_kind'
564 >>> fk = 'tuna'
565 >>> op = {'bar': True}
566 >>> try:
567 ... code = concatenate(dr, sn, tk, fk, op)
568 ... except FileNotFoundError:
569 ... code = -1
570 >>> os.chdir(restore_cwd)
571 >>> code
572 0
574 >>> restore_cwd = os.getcwd()
575 >>> dr = 'example/ejected-templates'
576 >>> sn = 'structure.yml'
577 >>> tk = 'prod_kind'
578 >>> fk = 'ejected-templates'
579 >>> op = {'bar': True}
580 >>> try:
581 ... code = concatenate(dr, sn, tk, fk, op)
582 ... except FileNotFoundError:
583 ... code = -1
584 >>> os.chdir(restore_cwd)
585 >>> code
586 0
588 >>> restore_cwd = os.getcwd()
589 >>> dr = 'example/ejected-templates'
590 >>> sn = 'structure.yml'
591 >>> tk = 'prod_kind'
592 >>> fk = 'ejected-templates-borked'
593 >>> op = {'bar': True}
594 >>> try:
595 ... code = concatenate(dr, sn, tk, fk, op)
596 ... except FileNotFoundError:
597 ... code = -1
598 >>> os.chdir(restore_cwd)
599 >>> code
600 0
602 >>> restore_cwd = os.getcwd()
603 >>> dr = 'example/tuna'
604 >>> sn = 'structure.yml'
605 >>> tk = 'prod_kind'
606 >>> fk = 'tuna'
607 >>> op = {'bar': True}
608 >>> abs_here = pathlib.Path().resolve()
609 >>> try:
610 ... code = concatenate(dr, sn, tk, fk, op)
611 ... except FileNotFoundError:
612 ... code = -1
613 >>> os.chdir(restore_cwd)
614 >>> code
615 0
617 """
618 log.info(LOG_SEPARATOR)
619 log.info('entered concat function ...')
620 target_code = target_key
621 facet_code = facet_key
622 if not facet_code.strip() or not target_code.strip():
623 log.error(f'concatenate requires non-empty target ({target_code}) and facet ({facet_code}) codes')
624 return 2
626 log.info(f'parsed target ({target_code}) and facet ({facet_code}) from request')
628 structure, asset_map = gat.prelude(
629 doc_root=doc_root, structure_name=structure_name, target_key=target_key, facet_key=facet_key, command='concat'
630 )
631 log.info(f'prelude teleported processor into the document root at ({os.getcwd()}/)')
632 rel_concat_folder_path = pathlib.Path('render/pdf/')
633 rel_concat_folder_path.mkdir(parents=True, exist_ok=True)
634 os.chdir(rel_concat_folder_path)
635 log.info(f'concatenate (this processor) teleported into the render/pdf location ({os.getcwd()}/)')
637 ok, aspect_map = too.load_target(target_code, facet_code)
638 if not ok or not aspect_map:
639 return 0 if ok else 1
641 approvals = process_approvals(aspect_map)
642 if isinstance(approvals, int):
643 return 2
644 binder = process_binder(aspect_map)
645 if isinstance(binder, int): 645 ↛ 646line 645 didn't jump to line 646 because the condition on line 645 was never true
646 return 3
647 changes = process_changes(aspect_map)
648 if isinstance(changes, int): 648 ↛ 649line 648 didn't jump to line 649 because the condition on line 648 was never true
649 return 4
650 metadata = met.load(aspect_map)
651 if isinstance(metadata, int): 651 ↛ 652line 651 didn't jump to line 652 because the condition on line 651 was never true
652 return 5
654 root = SLASH
655 root_path = str(pathlib.Path.cwd().resolve()).rstrip(SLASH) + SLASH
656 tree = treelib.Tree()
657 tree.create_node(root, root)
658 documents = {}
659 insert_regions = {}
660 img_collector = []
661 log.info(LOG_SEPARATOR)
662 log.info('processing binder ...')
663 for entry in binder:
664 ref_path = DOC_BASE / entry
665 log.debug(f'- {entry} as {ref_path}')
666 with open(ref_path, 'rt', encoding=ENCODING) as handle:
667 documents[entry] = [line.rstrip() for line in handle.readlines()]
668 insert_regions[entry] = []
669 in_region = False
670 begin, end = 0, 0
671 include = ''
672 tree.create_node(entry, entry, parent=root)
673 for slot, line in enumerate(documents[entry]):
674 if line.startswith(IMG_LINE_STARTSWITH):
675 documents[entry][slot] = adapt_image(line, img_collector, entry, root_path)
676 log.debug(f'{slot :02d}|{line.rstrip()}')
677 if not in_region:
678 if line.startswith(READ_SLOT_FENCE_BEGIN):
679 in_region = True
680 begin = slot
681 continue
682 if line.startswith(INCLUDE_SLOT):
683 include = line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip()
684 insert_regions[entry].append(((slot, slot), include))
685 tree.create_node(include, include, parent=entry)
686 include = ''
687 continue
688 if in_region:
689 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
690 include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
691 elif line.startswith(READ_SLOT_FENCE_END):
692 end = slot
693 insert_regions[entry].append(((begin, end), include))
694 tree.create_node(include, include, parent=entry)
695 in_region = False
696 begin, end = 0, 0
697 include = ''
699 for coords, include in insert_regions[entry]: # include is anchored on DOC_BASE
700 ref_path = DOC_BASE / include
701 with open(ref_path, 'rt', encoding=ENCODING) as handle:
702 documents[include] = [line.rstrip() for line in handle.readlines()]
703 insert_regions[include] = []
704 in_region = False
705 begin, end = 0, 0
706 sub_include = ''
707 for slot, line in enumerate(documents[include]):
708 if line.startswith(IMG_LINE_STARTSWITH): 708 ↛ 709line 708 didn't jump to line 709 because the condition on line 708 was never true
709 documents[include][slot] = adapt_image(line, img_collector, include, root_path)
710 log.debug(f'{slot :02d}|{line.rstrip()}')
711 if not in_region:
712 if line.startswith(READ_SLOT_FENCE_BEGIN):
713 in_region = True
714 begin = slot
715 continue
716 if line.startswith(INCLUDE_SLOT):
717 harvest_include(line, slot, insert_regions, tree, include)
718 continue
719 if in_region:
720 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
721 sub_include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
722 sub_include = str(pathlib.Path(include).parent / sub_include)
723 elif line.startswith(READ_SLOT_FENCE_END):
724 end = slot
725 insert_regions[include].append(((begin, end), sub_include))
726 tree.create_node(sub_include, sub_include, parent=include)
727 in_region = False
728 begin, end = 0, 0
729 sub_include = ''
731 for coords, sub_include in insert_regions[include]:
732 ref_path = DOC_BASE / sub_include
733 with open(ref_path, 'rt', encoding=ENCODING) as handle:
734 documents[sub_include] = [line.rstrip() for line in handle.readlines()]
735 insert_regions[sub_include] = []
736 in_region = False
737 begin, end = 0, 0
738 sub_sub_include = ''
739 for slot, line in enumerate(documents[sub_include]):
740 if line.startswith(IMG_LINE_STARTSWITH):
741 documents[sub_include][slot] = adapt_image(line, img_collector, sub_include, root_path)
742 log.debug(f'{slot :02d}|{line.rstrip()}')
743 if not in_region:
744 if line.startswith(READ_SLOT_FENCE_BEGIN):
745 in_region = True
746 begin = slot
747 continue
748 if line.startswith(INCLUDE_SLOT): 748 ↛ 749line 748 didn't jump to line 749 because the condition on line 748 was never true
749 harvest_include(line, slot, insert_regions, tree, sub_include)
750 continue
751 if in_region:
752 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
753 sub_sub_include = (
754 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
755 )
756 sub_sub_include = str(pathlib.Path(sub_include).parent / sub_sub_include)
757 elif line.startswith(READ_SLOT_FENCE_END):
758 end = slot
759 insert_regions[sub_include].append(((begin, end), sub_sub_include))
760 tree.create_node(sub_sub_include, sub_sub_include, parent=sub_include)
761 in_region = False
762 begin, end = 0, 0
763 sub_sub_include = ''
765 for coords, sub_sub_include in insert_regions[sub_include]:
766 ref_path = DOC_BASE / sub_sub_include
767 with open(ref_path, 'rt', encoding=ENCODING) as handle:
768 documents[sub_sub_include] = [line.rstrip() for line in handle.readlines()]
769 insert_regions[sub_sub_include] = []
770 in_region = False
771 begin, end = 0, 0
772 sub_sub_sub_include = ''
773 for slot, line in enumerate(documents[sub_sub_include]):
774 if line.startswith(IMG_LINE_STARTSWITH):
775 documents[sub_sub_include][slot] = adapt_image(
776 line, img_collector, sub_sub_include, root_path
777 )
778 log.debug(f'{slot :02d}|{line.rstrip()}')
779 if not in_region: 779 ↛ 787line 779 didn't jump to line 787 because the condition on line 779 was always true
780 if line.startswith(READ_SLOT_FENCE_BEGIN): 780 ↛ 781line 780 didn't jump to line 781 because the condition on line 780 was never true
781 in_region = True
782 begin = slot
783 continue
784 if line.startswith(INCLUDE_SLOT): 784 ↛ 785line 784 didn't jump to line 785 because the condition on line 784 was never true
785 harvest_include(line, slot, insert_regions, tree, sub_sub_include)
786 continue
787 if in_region: 787 ↛ 788line 787 didn't jump to line 788 because the condition on line 787 was never true
788 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
789 sub_sub_sub_include = (
790 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
791 )
792 sub_sub_sub_include = str(pathlib.Path(sub_sub_include).parent / sub_sub_sub_include)
793 elif line.startswith(READ_SLOT_FENCE_END):
794 end = slot
795 insert_regions[sub_sub_include].append(((begin, end), sub_sub_sub_include))
796 tree.create_node(sub_sub_sub_include, sub_sub_sub_include, parent=sub_sub_include)
797 in_region = False
798 begin, end = 0, 0
799 sub_sub_sub_include = ''
801 for coords, sub_sub_sub_include in insert_regions[sub_include]:
802 ref_path = DOC_BASE / sub_sub_sub_include
803 with open(ref_path, 'rt', encoding=ENCODING) as handle:
804 documents[sub_sub_sub_include] = [line.rstrip() for line in handle.readlines()]
805 insert_regions[sub_sub_sub_include] = []
806 in_region = False
807 begin, end = 0, 0
808 sub_sub_sub_sub_include = ''
809 for slot, line in enumerate(documents[sub_sub_sub_include]):
810 if line.startswith(IMG_LINE_STARTSWITH):
811 documents[sub_sub_sub_include][slot] = adapt_image(
812 line, img_collector, sub_sub_sub_include, root_path
813 )
814 log.debug(f'{slot :02d}|{line.rstrip()}')
815 if not in_region: 815 ↛ 823line 815 didn't jump to line 823 because the condition on line 815 was always true
816 if line.startswith(READ_SLOT_FENCE_BEGIN): 816 ↛ 817line 816 didn't jump to line 817 because the condition on line 816 was never true
817 in_region = True
818 begin = slot
819 continue
820 if line.startswith(INCLUDE_SLOT): 820 ↛ 821line 820 didn't jump to line 821 because the condition on line 820 was never true
821 harvest_include(line, slot, insert_regions, tree, sub_sub_sub_include)
822 continue
823 if in_region: 823 ↛ 824line 823 didn't jump to line 824 because the condition on line 823 was never true
824 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
825 sub_sub_sub_sub_include = (
826 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
827 )
828 sub_sub_sub_sub_include = str(
829 pathlib.Path(sub_sub_sub_include).parent / sub_sub_sub_sub_include
830 )
831 elif line.startswith(READ_SLOT_FENCE_END):
832 end = slot
833 insert_regions[sub_sub_sub_include].append(((begin, end), sub_sub_sub_include))
834 tree.create_node(
835 sub_sub_sub_sub_include, sub_sub_sub_sub_include, parent=sub_sub_sub_include
836 )
837 in_region = False
838 begin, end = 0, 0
839 sub_sub_sub_sub_include = ''
841 top_down_paths = tree.paths_to_leaves()
842 bottom_up_paths = [list(reversed(td_p)) for td_p in top_down_paths]
843 log.info(LOG_SEPARATOR)
844 log.info('resulting tree:')
845 for edge in str(tree).split(NL):
846 log.info(edge)
848 log.info(LOG_SEPARATOR)
849 log.info(f'provisioning chains for the {len(bottom_up_paths)} bottom up leaf paths:')
850 for num, leaf_path in enumerate(bottom_up_paths):
851 the_way_up = f'|-> {leaf_path[0]}' if len(leaf_path) == 1 else f'{" -> ".join(leaf_path)}'
852 log.info(f'{num :2d}: {the_way_up}')
854 concat = {}
855 log.info(LOG_SEPARATOR)
856 log.info(f'dependencies for the {len(insert_regions)} document parts:')
857 for key, regions in insert_regions.items():
858 num_in = len(regions)
859 dashes = '-' * num_in
860 incl_disp = f'( {num_in} include{"" if num_in == 1 else "s"} )'
861 indicator = '(no includes)' if not regions else f'<{dashes + incl_disp + dashes}'
862 log.info(f'- part {key} {indicator}')
863 for region in regions:
864 between = f'between lines {region[0][0] :3d} and {region[0][1] :3d}'
865 insert = f'include fragment {region[1]}'
866 log.info(f' + {between} {insert}')
867 if not regions: # No includes
868 concat[key] = '\n'.join(documents[key]) + '\n'
869 log.info(f' * did concat {key} document for insertion')
871 chains = [leaf_path for leaf_path in bottom_up_paths]
872 log.info(LOG_SEPARATOR)
873 log.info(f'starting insertions bottom up for the {len(chains)} inclusion chains:')
874 todo = [[job for job in chain if job not in concat] for chain in chains]
875 while todo != [[]]:
876 todo = rollup(todo, documents, insert_regions, concat)
878 log.info(LOG_SEPARATOR)
879 log.info('writing final concat markdown to document.md')
880 with open('document.md', 'wt', encoding=ENCODING) as handle:
881 handle.write('\n'.join(concat[bind] for bind in binder) + '\n')
883 log.info(LOG_SEPARATOR)
884 log.info('collecting assets (images and diagrams)')
885 collect_assets(img_collector)
886 log.info(LOG_SEPARATOR)
887 log.info(f'concat result document (document.md) and artifacts are within folder ({os.getcwd()}/)')
888 log.info(LOG_SEPARATOR)
889 log.info('processing complete - SUCCESS')
890 log.info(LOG_SEPARATOR)
891 return 0