8.21%
541 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-10 18:56:07 +00:00
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-10 18:56:07 +00:00
1"""Given a target and facet, concatenate a tree of markdown files to a single file rewriting all image refs."""
3import json
4import os
5import pathlib
6import re
7import shutil
8import sys
9from io import StringIO
10from typing import Union, no_type_check
12import treelib # type: ignore
13import yaml
15import liitos.gather as gat
16import liitos.placeholder as plh
17import liitos.tools as too
18from liitos import ENCODING, LOG_SEPARATOR, log
20ALT_INJECTOR_HACK = 'INJECTED-ALT-TEXT-TO-TRIGGER-FIGURE-ENVIRONMENT-AROUND-IMAGE-IN-PANDOC'
21CAP_INJECTOR_HACK = 'INJECTED-CAP-TEXT-TO-MARK-MISSING-CAPTION-IN-OUTPUT'
22DOC_BASE = pathlib.Path('..', '..')
23STRUCTURE_PATH = DOC_BASE / 'structure.yml'
24SLASH = '/'
25IMAGES_FOLDER = 'images/'
26DIAGRAMS_FOLDER = 'diagrams/'
28"""
29```{.python .cb.run}
30with open('sub/as.md') as fp:
31 print(fp.read())
32```
33"""
34READ_SLOT_FENCE_BEGIN = '```{.python .cb.run}'
35READ_SLOT_CONTEXT_BEGIN = 'with open('
36READ_SLOT_FENCE_END = '```'
38r"""
39\include{markdown_file_path}
40"""
41INCLUDE_SLOT = '\\include{'
43"""
44![Alt Text Red](images/red.png "Caption Text Red")
45![Alt Text Dot Dot Lime](../images/lime.png "Caption Text Dot Dot Lime")
46![Alt Text Blue](images/blue.png "Caption Text Blue")
47![Alt Text Sting Red](other/images/red.png "Caption Text Sting Red")
48"""
49IMG_LINE_STARTSWITH = '!['
50MD_IMG_PATTERN = re.compile(r'^!\[(?P<cap>[^(]*)\]\((?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$')
51MD_IMG_PATTERN_RIGHT_SPLIT = re.compile(r'^(?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$')
53NL = '\n'
56@no_type_check
57class RedirectedStdout:
58 @no_type_check
59 def __init__(self):
60 self._stdout = None
61 self._string_io = None
63 @no_type_check
64 def __enter__(self):
65 self._stdout = sys.stdout
66 sys.stdout = self._string_io = StringIO()
67 return self
69 @no_type_check
70 def __exit__(self, type, value, traceback):
71 sys.stdout = self._stdout
73 @no_type_check
74 def __str__(self):
75 return self._string_io.getvalue()
78@no_type_check
79def process_approvals(aspects: dict[str, str]) -> Union[gat.Approvals, int]:
80 """TODO."""
81 approvals_path = DOC_BASE / aspects[gat.KEY_APPROVALS]
82 if not approvals_path.is_file() or not approvals_path.stat().st_size:
83 log.error(f'destructure failed to find non-empty approvals file at {approvals_path}')
84 return 1
85 if approvals_path.suffix.lower() not in ('.json', '.yaml', '.yml'):
86 log.error(f'approvals file format per suffix ({approvals_path.suffix}) not supported')
87 return 1
88 approvals_channel = 'yaml' if approvals_path.suffix.lower() in ('.yaml', '.yml') else 'json'
89 with open(approvals_path, 'rt', encoding=ENCODING) as handle:
90 approvals = yaml.safe_load(handle) if approvals_channel == 'yaml' else json.load(handle)
91 if not approvals:
92 log.error(f'empty approvals file? Please add approvals to ({approvals_path})')
93 return 1
94 if approvals_channel == 'yaml':
95 with open('approvals.yml', 'wt', encoding=ENCODING) as handle:
96 yaml.dump(approvals, handle, default_flow_style=False)
97 else:
98 with open('approvals.json', 'wt', encoding=ENCODING) as handle:
99 json.dump(approvals, handle, indent=2)
100 return approvals
103@no_type_check
104def process_binder(aspects: dict[str, str]) -> Union[gat.Binder, int]:
105 """TODO."""
106 bind_path = DOC_BASE / aspects[gat.KEY_BIND]
107 if not bind_path.is_file() or not bind_path.stat().st_size:
108 log.error(f'destructure failed to find non-empty bind file at {bind_path}')
109 return 1
110 if bind_path.suffix.lower() not in ('.txt',):
111 log.error(f'bind file format per suffix ({bind_path.suffix}) not supported')
112 return 1
113 with open(bind_path, 'rt', encoding=ENCODING) as handle:
114 binder = [line.strip() for line in handle.readlines() if line.strip()]
115 if not binder:
116 log.error(f'empty bind file? Please add component paths to ({bind_path})')
117 return 1
118 with open('bind.txt', 'wt', encoding=ENCODING) as handle:
119 handle.write('\n'.join(binder) + '\n')
120 return binder
123@no_type_check
124def process_changes(aspects: dict[str, str]) -> Union[gat.Changes, int]:
125 """TODO."""
126 changes_path = DOC_BASE / aspects[gat.KEY_CHANGES]
127 if not changes_path.is_file() or not changes_path.stat().st_size:
128 log.error(f'destructure failed to find non-empty changes file at {changes_path}')
129 return 1
130 if changes_path.suffix.lower() not in ('.json', '.yaml', '.yml'):
131 log.error(f'changes file format per suffix ({changes_path.suffix}) not supported')
132 return 1
133 changes_channel = 'yaml' if changes_path.suffix.lower() in ('.yaml', '.yml') else 'json'
134 with open(changes_path, 'rt', encoding=ENCODING) as handle:
135 changes = yaml.safe_load(handle) if changes_channel == 'yaml' else json.load(handle)
136 if not changes:
137 log.error(f'empty changes file? Please add changes data to ({changes_path})')
138 return 1
139 if changes_channel == 'yaml':
140 with open('changes.yml', 'wt', encoding=ENCODING) as handle:
141 yaml.dump(changes, handle, default_flow_style=False)
142 else:
143 with open('changes.json', 'wt', encoding=ENCODING) as handle:
144 json.dump(changes, handle, indent=2)
145 return changes
148@no_type_check
149def process_meta(aspects: dict[str, str]) -> Union[gat.Meta, int]:
150 """TODO."""
151 meta_path = DOC_BASE / aspects[gat.KEY_META]
152 if not meta_path.is_file() or not meta_path.stat().st_size:
153 log.error(f'destructure failed to find non-empty meta file at {meta_path}')
154 return 1
155 if meta_path.suffix.lower() not in ('.yaml', '.yml'):
156 log.error(f'meta file format per suffix ({meta_path.suffix}) not supported')
157 return 1
158 with open(meta_path, 'rt', encoding=ENCODING) as handle:
159 metadata = yaml.safe_load(handle)
160 if not metadata:
161 log.error(f'empty metadata file? Please add metadata to ({meta_path})')
162 return 1
163 if 'import' in metadata['document']:
164 base_meta_path = DOC_BASE / metadata['document']['import']
165 if not base_meta_path.is_file() or not base_meta_path.stat().st_size:
166 log.error(
167 f'metadata declares import of base data from ({base_meta_path.name})'
168 f' but failed to find non-empty base file at {base_meta_path}'
169 )
170 return 1
171 with open(base_meta_path, 'rt', encoding=ENCODING) as handle:
172 base_data = yaml.safe_load(handle)
173 for key, value in metadata['document']['patch'].items():
174 base_data['document']['common'][key] = value
175 metadata = base_data
176 with open('metadata.yml', 'wt', encoding=ENCODING) as handle:
177 yaml.dump(metadata, handle, default_flow_style=False)
178 return metadata
181@no_type_check
182def parse_markdown_image(text_line: str) -> tuple[str, str, str, str]:
183 """Parse a markdown image line within our conventions into caption, src, alt, and optional rest."""
184 invalid_marker = ('', '', '', text_line)
186 exclam = '!'
187 osb = '['
188 if not text_line or not text_line.startswith(f'{exclam}{osb}'):
189 log.error(f'- INVALID-MD-IMG_LINE::START <<{text_line.rstrip()}>>')
190 return invalid_marker
192 csb = ']'
193 osb_cnt = text_line.count(osb)
194 csb_cnt = text_line.count(csb)
195 if osb_cnt + csb_cnt < 2:
196 log.error(f'- INVALID-MD-IMG_LINE::SB-TOK-CNT-LOW <<{text_line.rstrip()}>>')
197 return invalid_marker
198 if osb_cnt != csb_cnt:
199 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')
201 orb = '('
202 cap_src_boundary = f'{csb}{orb}'
203 if cap_src_boundary not in text_line:
204 log.error(f'- INVALID-MD-IMG_LINE::CAP-SRC-BOUNDARY <<{text_line.rstrip()}>>')
205 return invalid_marker
207 crb = ')'
208 orb_cnt = text_line.count(orb)
209 crb_cnt = text_line.count(crb)
210 if orb_cnt + crb_cnt < 2:
211 log.error(f'- INVALID-MD-IMG_LINE::RB-TOK-CNT-LOW <<{text_line.rstrip()}>>')
212 return invalid_marker
213 if orb_cnt != crb_cnt:
214 log.warning(f'- INCOMPLETE-MD-IMG_LINE::RB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')
216 quo = '"'
217 quo_cnt = text_line.count(quo)
218 if quo_cnt < 2:
219 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-LOW <<{text_line.rstrip()}>>')
220 if quo_cnt % 2:
221 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')
223 sp = ' '
224 sp_cnt = text_line.count(sp)
225 if not sp_cnt:
226 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SP-TOK-CNT-LOW <<{text_line.rstrip()}>>')
228 dot = '.'
229 sla = '/'
230 abs_path_indicator = f'{csb}{orb}{sla}'
231 may_have_abs_path = abs_path_indicator in text_line
232 if may_have_abs_path:
233 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-ABS-PATH <<{text_line.rstrip()}>>')
234 naive_upwards_path_indicator = f'{csb}{orb}{dot}{dot}{sla}'
235 may_have_upwards_path = naive_upwards_path_indicator in text_line
236 if may_have_upwards_path:
237 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-UPWARDS-PATH <<{text_line.rstrip()}>>')
239 log.info('- parsing the markdown image text line ...')
240 if orb_cnt + crb_cnt > 2 or orb_cnt != crb_cnt:
241 # The regex is not safe for orb inside caption
242 left, right = text_line.split(cap_src_boundary, 1)
243 match_right = MD_IMG_PATTERN_RIGHT_SPLIT.match(right)
244 if not match_right:
245 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-RIGHT-SPLIT-FAILED <<{text_line.rstrip()}>>')
246 return invalid_marker
248 parts = match_right.groupdict()
249 cap = left[2:]
250 if not cap:
251 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>')
252 cap = CAP_INJECTOR_HACK
254 src = parts['src']
255 alt = parts['alt']
256 rest = parts['rest']
257 if orb in alt or crb in alt:
258 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-PARTIAL-MATCH <<{text_line.rstrip()}>>')
259 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}')
261 return cap, src, alt, rest
263 match = MD_IMG_PATTERN.match(text_line)
264 if not match:
265 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-FAILED <<{text_line.rstrip()}>>')
266 return invalid_marker
268 parts = match.groupdict()
269 cap = parts['cap']
270 if not cap:
271 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>')
272 cap = CAP_INJECTOR_HACK
274 src = parts['src']
275 alt = parts['alt']
276 rest = parts['rest']
277 if orb in alt or crb in alt:
278 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-FULL-MATCH <<{text_line.rstrip()}>>')
279 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}')
281 return cap, src, alt, rest
284@no_type_check
285def adapt_image(text_line: str, collector: list[str], upstream: str, root: str) -> str:
286 """YES."""
287 cap, src, alt, rest = parse_markdown_image(text_line)
288 if not src:
289 log.error(f'parse of markdown image text line failed - empty src, and rest is <<{rest.rstrip()}>>')
290 return text_line
292 img_path = str((pathlib.Path(upstream).parent / src).resolve()).replace(root, '')
293 collector.append(img_path)
294 img_hack = img_path
295 if f'/{IMAGES_FOLDER}' in img_path:
296 img_hack = IMAGES_FOLDER + img_path.split(f'/{IMAGES_FOLDER}', 1)[1]
297 elif f'/{DIAGRAMS_FOLDER}' in img_path:
298 img_hack = DIAGRAMS_FOLDER + img_path.split(f'/{DIAGRAMS_FOLDER}', 1)[1]
300 if img_hack != img_path:
301 log.info(f'{img_hack} <--- OK? --- {img_path}')
303 alt_text = f'"{alt}"' if alt else f'"{ALT_INJECTOR_HACK}"'
304 belte_og_seler = f'![{cap}]({img_hack} {alt_text}){rest}'
305 log.info(f'==> belte-og-seler: ->>{belte_og_seler}<<-')
306 return belte_og_seler
309@no_type_check
310def harvest_include(
311 text_line: str, slot: int, regions: dict[str, list[tuple[tuple[int, int], str]]], tree: treelib.Tree, parent: str
312) -> None:
313 """TODO."""
314 include_local = text_line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip()
315 include = str(pathlib.Path(parent).parent / include_local)
316 regions[parent].append(((slot, slot), include))
317 tree.create_node(include, include, parent=parent)
320@no_type_check
321def rollup(
322 jobs: list[list[str]],
323 docs: dict[str, list[str]],
324 regions: dict[str, list[tuple[tuple[int, int], str]]],
325 flat: dict[str, str],
326) -> list[list[str]]:
327 """TODO."""
328 tackle = [those[0] for those in jobs if those and those[0] != SLASH]
329 if tackle:
330 log.info(f' Insertion ongoing with parts ({", ".join(tuple(sorted(tackle)))}) remaining')
331 else:
332 return [[]]
333 for that in tackle:
334 buf = []
335 for slot, line in enumerate(docs[that]):
336 special = False
337 the_first = False
338 the_include = ''
339 for pair, include in regions[that]:
340 low, high = pair
341 if low <= slot <= high:
342 special = True
343 if low == slot:
344 the_first = True
345 the_include = include
346 if not special:
347 buf.append(line)
348 continue
349 if the_first:
350 buf.append(flat[the_include])
351 flat[that] = '\n'.join(buf) + '\n'
353 return [[job for job in chain if job not in flat] for chain in jobs]
356@no_type_check
357def collect_assets(collector: list[str]) -> None:
358 """TODO"""
359 images = pathlib.Path(IMAGES_FOLDER)
360 images.mkdir(parents=True, exist_ok=True)
361 diagrams = pathlib.Path(DIAGRAMS_FOLDER)
362 diagrams.mkdir(parents=True, exist_ok=True)
363 for img_path in collector:
364 if IMAGES_FOLDER in img_path:
365 source_asset = DOC_BASE / img_path
366 target_asset = images / pathlib.Path(img_path).name
367 try:
368 shutil.copy(source_asset, target_asset)
369 except FileNotFoundError as err:
370 log.error(err)
371 code, msg = plh.dump_placeholder(target_asset)
372 log.warning(msg) if code else log.info(msg)
373 continue
374 if DIAGRAMS_FOLDER in img_path:
375 source_asset = DOC_BASE / img_path
376 target_asset = diagrams / pathlib.Path(img_path).name
377 try:
378 shutil.copy(source_asset, target_asset)
379 except FileNotFoundError as err:
380 log.error(err)
381 code, msg = plh.dump_placeholder(target_asset)
382 log.warning(msg) if code else log.info(msg)
385@no_type_check
386def concatenate(
387 doc_root: Union[str, pathlib.Path],
388 structure_name: str,
389 target_key: str,
390 facet_key: str,
391 options: dict[str, Union[bool, str]],
392) -> int:
393 """Later alligator."""
394 log.info(LOG_SEPARATOR)
395 log.info('entered concat function ...')
396 target_code = target_key
397 facet_code = facet_key
398 if not facet_code.strip() or not target_code.strip():
399 log.error(f'concatenate requires non-empty target ({target_code}) and facet ({facet_code}) codes')
400 return 2
402 log.info(f'parsed target ({target_code}) and facet ({facet_code}) from request')
404 structure, asset_map = gat.prelude(
405 doc_root=doc_root, structure_name=structure_name, target_key=target_key, facet_key=facet_key, command='concat'
406 )
407 log.info(f'prelude teleported processor into the document root at ({os.getcwd()}/)')
408 rel_concat_folder_path = pathlib.Path('render/pdf/')
409 rel_concat_folder_path.mkdir(parents=True, exist_ok=True)
410 os.chdir(rel_concat_folder_path)
411 log.info(f'concatenate (this processor) teleported into the render/pdf location ({os.getcwd()}/)')
413 ok, aspect_map = too.load_target(target_code, facet_code)
414 if not ok or not aspect_map:
415 return 0 if ok else 1
417 approvals = process_approvals(aspect_map)
418 if isinstance(approvals, int):
419 return 1
420 binder = process_binder(aspect_map)
421 if isinstance(binder, int):
422 return 1
423 changes = process_changes(aspect_map)
424 if isinstance(changes, int):
425 return 1
426 metadata = process_meta(aspect_map)
427 if isinstance(metadata, int):
428 return 1
430 root = SLASH
431 root_path = str(pathlib.Path.cwd().resolve()).rstrip(SLASH) + SLASH
432 tree = treelib.Tree()
433 tree.create_node(root, root)
434 documents = {}
435 insert_regions = {}
436 img_collector = []
437 log.info(LOG_SEPARATOR)
438 log.info('processing binder ...')
439 for entry in binder:
440 ref_path = DOC_BASE / entry
441 log.debug(f'- {entry} as {ref_path}')
442 with open(ref_path, 'rt', encoding=ENCODING) as handle:
443 documents[entry] = [line.rstrip() for line in handle.readlines()]
444 insert_regions[entry] = []
445 in_region = False
446 begin, end = 0, 0
447 include = ''
448 tree.create_node(entry, entry, parent=root)
449 for slot, line in enumerate(documents[entry]):
450 if line.startswith(IMG_LINE_STARTSWITH):
451 documents[entry][slot] = adapt_image(line, img_collector, entry, root_path)
452 log.debug(f'{slot :02d}|{line.rstrip()}')
453 if not in_region:
454 if line.startswith(READ_SLOT_FENCE_BEGIN):
455 in_region = True
456 begin = slot
457 continue
458 if line.startswith(INCLUDE_SLOT):
459 include = line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip()
460 insert_regions[entry].append(((slot, slot), include))
461 tree.create_node(include, include, parent=entry)
462 include = ''
463 continue
464 if in_region:
465 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
466 include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
467 elif line.startswith(READ_SLOT_FENCE_END):
468 end = slot
469 insert_regions[entry].append(((begin, end), include))
470 tree.create_node(include, include, parent=entry)
471 in_region = False
472 begin, end = 0, 0
473 include = ''
475 for coords, include in insert_regions[entry]: # include is anchored on DOC_BASE
476 ref_path = DOC_BASE / include
477 with open(ref_path, 'rt', encoding=ENCODING) as handle:
478 documents[include] = [line.rstrip() for line in handle.readlines()]
479 insert_regions[include] = []
480 in_region = False
481 begin, end = 0, 0
482 sub_include = ''
483 for slot, line in enumerate(documents[include]):
484 if line.startswith(IMG_LINE_STARTSWITH):
485 documents[include][slot] = adapt_image(line, img_collector, include, root_path)
486 log.debug(f'{slot :02d}|{line.rstrip()}')
487 if not in_region:
488 if line.startswith(READ_SLOT_FENCE_BEGIN):
489 in_region = True
490 begin = slot
491 continue
492 if line.startswith(INCLUDE_SLOT):
493 harvest_include(line, slot, insert_regions, tree, include)
494 continue
495 if in_region:
496 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
497 sub_include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
498 sub_include = str(pathlib.Path(include).parent / sub_include)
499 elif line.startswith(READ_SLOT_FENCE_END):
500 end = slot
501 insert_regions[include].append(((begin, end), sub_include))
502 tree.create_node(sub_include, sub_include, parent=include)
503 in_region = False
504 begin, end = 0, 0
505 sub_include = ''
507 for coords, sub_include in insert_regions[include]:
508 ref_path = DOC_BASE / sub_include
509 with open(ref_path, 'rt', encoding=ENCODING) as handle:
510 documents[sub_include] = [line.rstrip() for line in handle.readlines()]
511 insert_regions[sub_include] = []
512 in_region = False
513 begin, end = 0, 0
514 sub_sub_include = ''
515 for slot, line in enumerate(documents[sub_include]):
516 if line.startswith(IMG_LINE_STARTSWITH):
517 documents[sub_include][slot] = adapt_image(line, img_collector, sub_include, root_path)
518 log.debug(f'{slot :02d}|{line.rstrip()}')
519 if not in_region:
520 if line.startswith(READ_SLOT_FENCE_BEGIN):
521 in_region = True
522 begin = slot
523 continue
524 if line.startswith(INCLUDE_SLOT):
525 harvest_include(line, slot, insert_regions, tree, sub_include)
526 continue
527 if in_region:
528 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
529 sub_sub_include = (
530 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
531 )
532 sub_sub_include = str(pathlib.Path(sub_include).parent / sub_sub_include)
533 elif line.startswith(READ_SLOT_FENCE_END):
534 end = slot
535 insert_regions[sub_include].append(((begin, end), sub_sub_include))
536 tree.create_node(sub_sub_include, sub_sub_include, parent=sub_include)
537 in_region = False
538 begin, end = 0, 0
539 sub_sub_include = ''
541 for coords, sub_sub_include in insert_regions[sub_include]:
542 ref_path = DOC_BASE / sub_sub_include
543 with open(ref_path, 'rt', encoding=ENCODING) as handle:
544 documents[sub_sub_include] = [line.rstrip() for line in handle.readlines()]
545 insert_regions[sub_sub_include] = []
546 in_region = False
547 begin, end = 0, 0
548 sub_sub_sub_include = ''
549 for slot, line in enumerate(documents[sub_sub_include]):
550 if line.startswith(IMG_LINE_STARTSWITH):
551 documents[sub_sub_include][slot] = adapt_image(
552 line, img_collector, sub_sub_include, root_path
553 )
554 log.debug(f'{slot :02d}|{line.rstrip()}')
555 if not in_region:
556 if line.startswith(READ_SLOT_FENCE_BEGIN):
557 in_region = True
558 begin = slot
559 continue
560 if line.startswith(INCLUDE_SLOT):
561 harvest_include(line, slot, insert_regions, tree, sub_sub_include)
562 continue
563 if in_region:
564 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
565 sub_sub_sub_include = (
566 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
567 )
568 sub_sub_sub_include = str(pathlib.Path(sub_sub_include).parent / sub_sub_sub_include)
569 elif line.startswith(READ_SLOT_FENCE_END):
570 end = slot
571 insert_regions[sub_sub_include].append(((begin, end), sub_sub_sub_include))
572 tree.create_node(sub_sub_sub_include, sub_sub_sub_include, parent=sub_sub_include)
573 in_region = False
574 begin, end = 0, 0
575 sub_sub_sub_include = ''
577 for coords, sub_sub_sub_include in insert_regions[sub_include]:
578 ref_path = DOC_BASE / sub_sub_sub_include
579 with open(ref_path, 'rt', encoding=ENCODING) as handle:
580 documents[sub_sub_sub_include] = [line.rstrip() for line in handle.readlines()]
581 insert_regions[sub_sub_sub_include] = []
582 in_region = False
583 begin, end = 0, 0
584 sub_sub_sub_sub_include = ''
585 for slot, line in enumerate(documents[sub_sub_sub_include]):
586 if line.startswith(IMG_LINE_STARTSWITH):
587 documents[sub_sub_sub_include][slot] = adapt_image(
588 line, img_collector, sub_sub_sub_include, root_path
589 )
590 log.debug(f'{slot :02d}|{line.rstrip()}')
591 if not in_region:
592 if line.startswith(READ_SLOT_FENCE_BEGIN):
593 in_region = True
594 begin = slot
595 continue
596 if line.startswith(INCLUDE_SLOT):
597 harvest_include(line, slot, insert_regions, tree, sub_sub_sub_include)
598 continue
599 if in_region:
600 if line.startswith(READ_SLOT_CONTEXT_BEGIN):
601 sub_sub_sub_sub_include = (
602 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')
603 )
604 sub_sub_sub_sub_include = str(
605 pathlib.Path(sub_sub_sub_include).parent / sub_sub_sub_sub_include
606 )
607 elif line.startswith(READ_SLOT_FENCE_END):
608 end = slot
609 insert_regions[sub_sub_sub_include].append(((begin, end), sub_sub_sub_include))
610 tree.create_node(
611 sub_sub_sub_sub_include, sub_sub_sub_sub_include, parent=sub_sub_sub_include
612 )
613 in_region = False
614 begin, end = 0, 0
615 sub_sub_sub_sub_include = ''
617 top_down_paths = tree.paths_to_leaves()
618 bottom_up_paths = [list(reversed(td_p)) for td_p in top_down_paths]
619 log.info(LOG_SEPARATOR)
620 log.info('resulting tree:')
621 for edge in str(tree).split(NL):
622 log.info(edge)
624 log.info(LOG_SEPARATOR)
625 log.info(f'provisioning chains for the {len(bottom_up_paths)} bottom up leaf paths:')
626 for num, leaf_path in enumerate(bottom_up_paths):
627 the_way_up = f'|-> {leaf_path[0]}' if len(leaf_path) == 1 else f'{" -> ".join(leaf_path)}'
628 log.info(f'{num :2d}: {the_way_up}')
630 concat = {}
631 log.info(LOG_SEPARATOR)
632 log.info(f'dependencies for the {len(insert_regions)} document parts:')
633 for key, regions in insert_regions.items():
634 num_in = len(regions)
635 dashes = '-' * num_in
636 incl_disp = f'( {num_in} include{"" if num_in == 1 else "s"} )'
637 indicator = '(no includes)' if not regions else f'<{dashes + incl_disp + dashes}'
638 log.info(f'- part {key} {indicator}')
639 for region in regions:
640 between = f'between lines {region[0][0] :3d} and {region[0][1] :3d}'
641 insert = f'include fragment {region[1]}'
642 log.info(f' + {between} {insert}')
643 if not regions: # No includes
644 concat[key] = '\n'.join(documents[key]) + '\n'
645 log.info(f' * did concat {key} document for insertion')
647 chains = [leaf_path for leaf_path in bottom_up_paths]
648 log.info(LOG_SEPARATOR)
649 log.info(f'starting insertions bottom up for the {len(chains)} inclusion chains:')
650 todo = [[job for job in chain if job not in concat] for chain in chains]
651 while todo != [[]]:
652 todo = rollup(todo, documents, insert_regions, concat)
654 log.info(LOG_SEPARATOR)
655 log.info('writing final concat markdown to document.md')
656 with open('document.md', 'wt', encoding=ENCODING) as handle:
657 handle.write('\n'.join(concat[bind] for bind in binder) + '\n')
659 log.info(LOG_SEPARATOR)
660 log.info('collecting assets (images and diagrams)')
661 collect_assets(img_collector)
662 log.info(LOG_SEPARATOR)
663 log.info(f'concat result document (document.md) and artifacts are within folder ({os.getcwd()}/)')
664 log.info(LOG_SEPARATOR)
665 log.info('processing complete - SUCCESS')
666 log.info(LOG_SEPARATOR)
667 return 0