8.21%

541 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-10 18:56:07 +00:00

1"""Given a target and facet, concatenate a tree of markdown files to a single file rewriting all image refs.""" 

2 

3import json 

4import os 

5import pathlib 

6import re 

7import shutil 

8import sys 

9from io import StringIO 

10from typing import Union, no_type_check 

11 

12import treelib # type: ignore 

13import yaml 

14 

15import liitos.gather as gat 

16import liitos.placeholder as plh 

17import liitos.tools as too 

18from liitos import ENCODING, LOG_SEPARATOR, log 

19 

20ALT_INJECTOR_HACK = 'INJECTED-ALT-TEXT-TO-TRIGGER-FIGURE-ENVIRONMENT-AROUND-IMAGE-IN-PANDOC' 

21CAP_INJECTOR_HACK = 'INJECTED-CAP-TEXT-TO-MARK-MISSING-CAPTION-IN-OUTPUT' 

22DOC_BASE = pathlib.Path('..', '..') 

23STRUCTURE_PATH = DOC_BASE / 'structure.yml' 

24SLASH = '/' 

25IMAGES_FOLDER = 'images/' 

26DIAGRAMS_FOLDER = 'diagrams/' 

27 

28""" 

29```{.python .cb.run} 

30with open('sub/as.md') as fp: 

31 print(fp.read()) 

32``` 

33""" 

34READ_SLOT_FENCE_BEGIN = '```{.python .cb.run}' 

35READ_SLOT_CONTEXT_BEGIN = 'with open(' 

36READ_SLOT_FENCE_END = '```' 

37 

38r""" 

39\include{markdown_file_path} 

40""" 

41INCLUDE_SLOT = '\\include{' 

42 

43""" 

44![Alt Text Red](images/red.png "Caption Text Red") 

45![Alt Text Dot Dot Lime](../images/lime.png "Caption Text Dot Dot Lime") 

46![Alt Text Blue](images/blue.png "Caption Text Blue") 

47![Alt Text Sting Red](other/images/red.png "Caption Text Sting Red") 

48""" 

49IMG_LINE_STARTSWITH = '![' 

50MD_IMG_PATTERN = re.compile(r'^!\[(?P<cap>[^(]*)\]\((?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$') 

51MD_IMG_PATTERN_RIGHT_SPLIT = re.compile(r'^(?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$') 

52 

53NL = '\n' 

54 

55 

56@no_type_check 

57class RedirectedStdout: 

58 @no_type_check 

59 def __init__(self): 

60 self._stdout = None 

61 self._string_io = None 

62 

63 @no_type_check 

64 def __enter__(self): 

65 self._stdout = sys.stdout 

66 sys.stdout = self._string_io = StringIO() 

67 return self 

68 

69 @no_type_check 

70 def __exit__(self, type, value, traceback): 

71 sys.stdout = self._stdout 

72 

73 @no_type_check 

74 def __str__(self): 

75 return self._string_io.getvalue() 

76 

77 

78@no_type_check 

79def process_approvals(aspects: dict[str, str]) -> Union[gat.Approvals, int]: 

80 """TODO.""" 

81 approvals_path = DOC_BASE / aspects[gat.KEY_APPROVALS] 

82 if not approvals_path.is_file() or not approvals_path.stat().st_size: 

83 log.error(f'destructure failed to find non-empty approvals file at {approvals_path}') 

84 return 1 

85 if approvals_path.suffix.lower() not in ('.json', '.yaml', '.yml'): 

86 log.error(f'approvals file format per suffix ({approvals_path.suffix}) not supported') 

87 return 1 

88 approvals_channel = 'yaml' if approvals_path.suffix.lower() in ('.yaml', '.yml') else 'json' 

89 with open(approvals_path, 'rt', encoding=ENCODING) as handle: 

90 approvals = yaml.safe_load(handle) if approvals_channel == 'yaml' else json.load(handle) 

91 if not approvals: 

92 log.error(f'empty approvals file? Please add approvals to ({approvals_path})') 

93 return 1 

94 if approvals_channel == 'yaml': 

95 with open('approvals.yml', 'wt', encoding=ENCODING) as handle: 

96 yaml.dump(approvals, handle, default_flow_style=False) 

97 else: 

98 with open('approvals.json', 'wt', encoding=ENCODING) as handle: 

99 json.dump(approvals, handle, indent=2) 

100 return approvals 

101 

102 

103@no_type_check 

104def process_binder(aspects: dict[str, str]) -> Union[gat.Binder, int]: 

105 """TODO.""" 

106 bind_path = DOC_BASE / aspects[gat.KEY_BIND] 

107 if not bind_path.is_file() or not bind_path.stat().st_size: 

108 log.error(f'destructure failed to find non-empty bind file at {bind_path}') 

109 return 1 

110 if bind_path.suffix.lower() not in ('.txt',): 

111 log.error(f'bind file format per suffix ({bind_path.suffix}) not supported') 

112 return 1 

113 with open(bind_path, 'rt', encoding=ENCODING) as handle: 

114 binder = [line.strip() for line in handle.readlines() if line.strip()] 

115 if not binder: 

116 log.error(f'empty bind file? Please add component paths to ({bind_path})') 

117 return 1 

118 with open('bind.txt', 'wt', encoding=ENCODING) as handle: 

119 handle.write('\n'.join(binder) + '\n') 

120 return binder 

121 

122 

123@no_type_check 

124def process_changes(aspects: dict[str, str]) -> Union[gat.Changes, int]: 

125 """TODO.""" 

126 changes_path = DOC_BASE / aspects[gat.KEY_CHANGES] 

127 if not changes_path.is_file() or not changes_path.stat().st_size: 

128 log.error(f'destructure failed to find non-empty changes file at {changes_path}') 

129 return 1 

130 if changes_path.suffix.lower() not in ('.json', '.yaml', '.yml'): 

131 log.error(f'changes file format per suffix ({changes_path.suffix}) not supported') 

132 return 1 

133 changes_channel = 'yaml' if changes_path.suffix.lower() in ('.yaml', '.yml') else 'json' 

134 with open(changes_path, 'rt', encoding=ENCODING) as handle: 

135 changes = yaml.safe_load(handle) if changes_channel == 'yaml' else json.load(handle) 

136 if not changes: 

137 log.error(f'empty changes file? Please add changes data to ({changes_path})') 

138 return 1 

139 if changes_channel == 'yaml': 

140 with open('changes.yml', 'wt', encoding=ENCODING) as handle: 

141 yaml.dump(changes, handle, default_flow_style=False) 

142 else: 

143 with open('changes.json', 'wt', encoding=ENCODING) as handle: 

144 json.dump(changes, handle, indent=2) 

145 return changes 

146 

147 

148@no_type_check 

149def process_meta(aspects: dict[str, str]) -> Union[gat.Meta, int]: 

150 """TODO.""" 

151 meta_path = DOC_BASE / aspects[gat.KEY_META] 

152 if not meta_path.is_file() or not meta_path.stat().st_size: 

153 log.error(f'destructure failed to find non-empty meta file at {meta_path}') 

154 return 1 

155 if meta_path.suffix.lower() not in ('.yaml', '.yml'): 

156 log.error(f'meta file format per suffix ({meta_path.suffix}) not supported') 

157 return 1 

158 with open(meta_path, 'rt', encoding=ENCODING) as handle: 

159 metadata = yaml.safe_load(handle) 

160 if not metadata: 

161 log.error(f'empty metadata file? Please add metadata to ({meta_path})') 

162 return 1 

163 if 'import' in metadata['document']: 

164 base_meta_path = DOC_BASE / metadata['document']['import'] 

165 if not base_meta_path.is_file() or not base_meta_path.stat().st_size: 

166 log.error( 

167 f'metadata declares import of base data from ({base_meta_path.name})' 

168 f' but failed to find non-empty base file at {base_meta_path}' 

169 ) 

170 return 1 

171 with open(base_meta_path, 'rt', encoding=ENCODING) as handle: 

172 base_data = yaml.safe_load(handle) 

173 for key, value in metadata['document']['patch'].items(): 

174 base_data['document']['common'][key] = value 

175 metadata = base_data 

176 with open('metadata.yml', 'wt', encoding=ENCODING) as handle: 

177 yaml.dump(metadata, handle, default_flow_style=False) 

178 return metadata 

179 

180 

181@no_type_check 

182def parse_markdown_image(text_line: str) -> tuple[str, str, str, str]: 

183 """Parse a markdown image line within our conventions into caption, src, alt, and optional rest.""" 

184 invalid_marker = ('', '', '', text_line) 

185 

186 exclam = '!' 

187 osb = '[' 

188 if not text_line or not text_line.startswith(f'{exclam}{osb}'): 

189 log.error(f'- INVALID-MD-IMG_LINE::START <<{text_line.rstrip()}>>') 

190 return invalid_marker 

191 

192 csb = ']' 

193 osb_cnt = text_line.count(osb) 

194 csb_cnt = text_line.count(csb) 

195 if osb_cnt + csb_cnt < 2: 

196 log.error(f'- INVALID-MD-IMG_LINE::SB-TOK-CNT-LOW <<{text_line.rstrip()}>>') 

197 return invalid_marker 

198 if osb_cnt != csb_cnt: 

199 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>') 

200 

201 orb = '(' 

202 cap_src_boundary = f'{csb}{orb}' 

203 if cap_src_boundary not in text_line: 

204 log.error(f'- INVALID-MD-IMG_LINE::CAP-SRC-BOUNDARY <<{text_line.rstrip()}>>') 

205 return invalid_marker 

206 

207 crb = ')' 

208 orb_cnt = text_line.count(orb) 

209 crb_cnt = text_line.count(crb) 

210 if orb_cnt + crb_cnt < 2: 

211 log.error(f'- INVALID-MD-IMG_LINE::RB-TOK-CNT-LOW <<{text_line.rstrip()}>>') 

212 return invalid_marker 

213 if orb_cnt != crb_cnt: 

214 log.warning(f'- INCOMPLETE-MD-IMG_LINE::RB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>') 

215 

216 quo = '"' 

217 quo_cnt = text_line.count(quo) 

218 if quo_cnt < 2: 

219 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-LOW <<{text_line.rstrip()}>>') 

220 if quo_cnt % 2: 

221 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>') 

222 

223 sp = ' ' 

224 sp_cnt = text_line.count(sp) 

225 if not sp_cnt: 

226 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SP-TOK-CNT-LOW <<{text_line.rstrip()}>>') 

227 

228 dot = '.' 

229 sla = '/' 

230 abs_path_indicator = f'{csb}{orb}{sla}' 

231 may_have_abs_path = abs_path_indicator in text_line 

232 if may_have_abs_path: 

233 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-ABS-PATH <<{text_line.rstrip()}>>') 

234 naive_upwards_path_indicator = f'{csb}{orb}{dot}{dot}{sla}' 

235 may_have_upwards_path = naive_upwards_path_indicator in text_line 

236 if may_have_upwards_path: 

237 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-UPWARDS-PATH <<{text_line.rstrip()}>>') 

238 

239 log.info('- parsing the markdown image text line ...') 

240 if orb_cnt + crb_cnt > 2 or orb_cnt != crb_cnt: 

241 # The regex is not safe for orb inside caption 

242 left, right = text_line.split(cap_src_boundary, 1) 

243 match_right = MD_IMG_PATTERN_RIGHT_SPLIT.match(right) 

244 if not match_right: 

245 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-RIGHT-SPLIT-FAILED <<{text_line.rstrip()}>>') 

246 return invalid_marker 

247 

248 parts = match_right.groupdict() 

249 cap = left[2:] 

250 if not cap: 

251 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>') 

252 cap = CAP_INJECTOR_HACK 

253 

254 src = parts['src'] 

255 alt = parts['alt'] 

256 rest = parts['rest'] 

257 if orb in alt or crb in alt: 

258 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-PARTIAL-MATCH <<{text_line.rstrip()}>>') 

259 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}') 

260 

261 return cap, src, alt, rest 

262 

263 match = MD_IMG_PATTERN.match(text_line) 

264 if not match: 

265 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-FAILED <<{text_line.rstrip()}>>') 

266 return invalid_marker 

267 

268 parts = match.groupdict() 

269 cap = parts['cap'] 

270 if not cap: 

271 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>') 

272 cap = CAP_INJECTOR_HACK 

273 

274 src = parts['src'] 

275 alt = parts['alt'] 

276 rest = parts['rest'] 

277 if orb in alt or crb in alt: 

278 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-FULL-MATCH <<{text_line.rstrip()}>>') 

279 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}') 

280 

281 return cap, src, alt, rest 

282 

283 

284@no_type_check 

285def adapt_image(text_line: str, collector: list[str], upstream: str, root: str) -> str: 

286 """YES.""" 

287 cap, src, alt, rest = parse_markdown_image(text_line) 

288 if not src: 

289 log.error(f'parse of markdown image text line failed - empty src, and rest is <<{rest.rstrip()}>>') 

290 return text_line 

291 

292 img_path = str((pathlib.Path(upstream).parent / src).resolve()).replace(root, '') 

293 collector.append(img_path) 

294 img_hack = img_path 

295 if f'/{IMAGES_FOLDER}' in img_path: 

296 img_hack = IMAGES_FOLDER + img_path.split(f'/{IMAGES_FOLDER}', 1)[1] 

297 elif f'/{DIAGRAMS_FOLDER}' in img_path: 

298 img_hack = DIAGRAMS_FOLDER + img_path.split(f'/{DIAGRAMS_FOLDER}', 1)[1] 

299 

300 if img_hack != img_path: 

301 log.info(f'{img_hack} <--- OK? --- {img_path}') 

302 

303 alt_text = f'"{alt}"' if alt else f'"{ALT_INJECTOR_HACK}"' 

304 belte_og_seler = f'![{cap}]({img_hack} {alt_text}){rest}' 

305 log.info(f'==> belte-og-seler: ->>{belte_og_seler}<<-') 

306 return belte_og_seler 

307 

308 

309@no_type_check 

310def harvest_include( 

311 text_line: str, slot: int, regions: dict[str, list[tuple[tuple[int, int], str]]], tree: treelib.Tree, parent: str 

312) -> None: 

313 """TODO.""" 

314 include_local = text_line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip() 

315 include = str(pathlib.Path(parent).parent / include_local) 

316 regions[parent].append(((slot, slot), include)) 

317 tree.create_node(include, include, parent=parent) 

318 

319 

320@no_type_check 

321def rollup( 

322 jobs: list[list[str]], 

323 docs: dict[str, list[str]], 

324 regions: dict[str, list[tuple[tuple[int, int], str]]], 

325 flat: dict[str, str], 

326) -> list[list[str]]: 

327 """TODO.""" 

328 tackle = [those[0] for those in jobs if those and those[0] != SLASH] 

329 if tackle: 

330 log.info(f' Insertion ongoing with parts ({", ".join(tuple(sorted(tackle)))}) remaining') 

331 else: 

332 return [[]] 

333 for that in tackle: 

334 buf = [] 

335 for slot, line in enumerate(docs[that]): 

336 special = False 

337 the_first = False 

338 the_include = '' 

339 for pair, include in regions[that]: 

340 low, high = pair 

341 if low <= slot <= high: 

342 special = True 

343 if low == slot: 

344 the_first = True 

345 the_include = include 

346 if not special: 

347 buf.append(line) 

348 continue 

349 if the_first: 

350 buf.append(flat[the_include]) 

351 flat[that] = '\n'.join(buf) + '\n' 

352 

353 return [[job for job in chain if job not in flat] for chain in jobs] 

354 

355 

356@no_type_check 

357def collect_assets(collector: list[str]) -> None: 

358 """TODO""" 

359 images = pathlib.Path(IMAGES_FOLDER) 

360 images.mkdir(parents=True, exist_ok=True) 

361 diagrams = pathlib.Path(DIAGRAMS_FOLDER) 

362 diagrams.mkdir(parents=True, exist_ok=True) 

363 for img_path in collector: 

364 if IMAGES_FOLDER in img_path: 

365 source_asset = DOC_BASE / img_path 

366 target_asset = images / pathlib.Path(img_path).name 

367 try: 

368 shutil.copy(source_asset, target_asset) 

369 except FileNotFoundError as err: 

370 log.error(err) 

371 code, msg = plh.dump_placeholder(target_asset) 

372 log.warning(msg) if code else log.info(msg) 

373 continue 

374 if DIAGRAMS_FOLDER in img_path: 

375 source_asset = DOC_BASE / img_path 

376 target_asset = diagrams / pathlib.Path(img_path).name 

377 try: 

378 shutil.copy(source_asset, target_asset) 

379 except FileNotFoundError as err: 

380 log.error(err) 

381 code, msg = plh.dump_placeholder(target_asset) 

382 log.warning(msg) if code else log.info(msg) 

383 

384 

385@no_type_check 

386def concatenate( 

387 doc_root: Union[str, pathlib.Path], 

388 structure_name: str, 

389 target_key: str, 

390 facet_key: str, 

391 options: dict[str, Union[bool, str]], 

392) -> int: 

393 """Later alligator.""" 

394 log.info(LOG_SEPARATOR) 

395 log.info('entered concat function ...') 

396 target_code = target_key 

397 facet_code = facet_key 

398 if not facet_code.strip() or not target_code.strip(): 

399 log.error(f'concatenate requires non-empty target ({target_code}) and facet ({facet_code}) codes') 

400 return 2 

401 

402 log.info(f'parsed target ({target_code}) and facet ({facet_code}) from request') 

403 

404 structure, asset_map = gat.prelude( 

405 doc_root=doc_root, structure_name=structure_name, target_key=target_key, facet_key=facet_key, command='concat' 

406 ) 

407 log.info(f'prelude teleported processor into the document root at ({os.getcwd()}/)') 

408 rel_concat_folder_path = pathlib.Path('render/pdf/') 

409 rel_concat_folder_path.mkdir(parents=True, exist_ok=True) 

410 os.chdir(rel_concat_folder_path) 

411 log.info(f'concatenate (this processor) teleported into the render/pdf location ({os.getcwd()}/)') 

412 

413 ok, aspect_map = too.load_target(target_code, facet_code) 

414 if not ok or not aspect_map: 

415 return 0 if ok else 1 

416 

417 approvals = process_approvals(aspect_map) 

418 if isinstance(approvals, int): 

419 return 1 

420 binder = process_binder(aspect_map) 

421 if isinstance(binder, int): 

422 return 1 

423 changes = process_changes(aspect_map) 

424 if isinstance(changes, int): 

425 return 1 

426 metadata = process_meta(aspect_map) 

427 if isinstance(metadata, int): 

428 return 1 

429 

430 root = SLASH 

431 root_path = str(pathlib.Path.cwd().resolve()).rstrip(SLASH) + SLASH 

432 tree = treelib.Tree() 

433 tree.create_node(root, root) 

434 documents = {} 

435 insert_regions = {} 

436 img_collector = [] 

437 log.info(LOG_SEPARATOR) 

438 log.info('processing binder ...') 

439 for entry in binder: 

440 ref_path = DOC_BASE / entry 

441 log.debug(f'- {entry} as {ref_path}') 

442 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

443 documents[entry] = [line.rstrip() for line in handle.readlines()] 

444 insert_regions[entry] = [] 

445 in_region = False 

446 begin, end = 0, 0 

447 include = '' 

448 tree.create_node(entry, entry, parent=root) 

449 for slot, line in enumerate(documents[entry]): 

450 if line.startswith(IMG_LINE_STARTSWITH): 

451 documents[entry][slot] = adapt_image(line, img_collector, entry, root_path) 

452 log.debug(f'{slot :02d}|{line.rstrip()}') 

453 if not in_region: 

454 if line.startswith(READ_SLOT_FENCE_BEGIN): 

455 in_region = True 

456 begin = slot 

457 continue 

458 if line.startswith(INCLUDE_SLOT): 

459 include = line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip() 

460 insert_regions[entry].append(((slot, slot), include)) 

461 tree.create_node(include, include, parent=entry) 

462 include = '' 

463 continue 

464 if in_region: 

465 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

466 include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

467 elif line.startswith(READ_SLOT_FENCE_END): 

468 end = slot 

469 insert_regions[entry].append(((begin, end), include)) 

470 tree.create_node(include, include, parent=entry) 

471 in_region = False 

472 begin, end = 0, 0 

473 include = '' 

474 

475 for coords, include in insert_regions[entry]: # include is anchored on DOC_BASE 

476 ref_path = DOC_BASE / include 

477 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

478 documents[include] = [line.rstrip() for line in handle.readlines()] 

479 insert_regions[include] = [] 

480 in_region = False 

481 begin, end = 0, 0 

482 sub_include = '' 

483 for slot, line in enumerate(documents[include]): 

484 if line.startswith(IMG_LINE_STARTSWITH): 

485 documents[include][slot] = adapt_image(line, img_collector, include, root_path) 

486 log.debug(f'{slot :02d}|{line.rstrip()}') 

487 if not in_region: 

488 if line.startswith(READ_SLOT_FENCE_BEGIN): 

489 in_region = True 

490 begin = slot 

491 continue 

492 if line.startswith(INCLUDE_SLOT): 

493 harvest_include(line, slot, insert_regions, tree, include) 

494 continue 

495 if in_region: 

496 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

497 sub_include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

498 sub_include = str(pathlib.Path(include).parent / sub_include) 

499 elif line.startswith(READ_SLOT_FENCE_END): 

500 end = slot 

501 insert_regions[include].append(((begin, end), sub_include)) 

502 tree.create_node(sub_include, sub_include, parent=include) 

503 in_region = False 

504 begin, end = 0, 0 

505 sub_include = '' 

506 

507 for coords, sub_include in insert_regions[include]: 

508 ref_path = DOC_BASE / sub_include 

509 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

510 documents[sub_include] = [line.rstrip() for line in handle.readlines()] 

511 insert_regions[sub_include] = [] 

512 in_region = False 

513 begin, end = 0, 0 

514 sub_sub_include = '' 

515 for slot, line in enumerate(documents[sub_include]): 

516 if line.startswith(IMG_LINE_STARTSWITH): 

517 documents[sub_include][slot] = adapt_image(line, img_collector, sub_include, root_path) 

518 log.debug(f'{slot :02d}|{line.rstrip()}') 

519 if not in_region: 

520 if line.startswith(READ_SLOT_FENCE_BEGIN): 

521 in_region = True 

522 begin = slot 

523 continue 

524 if line.startswith(INCLUDE_SLOT): 

525 harvest_include(line, slot, insert_regions, tree, sub_include) 

526 continue 

527 if in_region: 

528 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

529 sub_sub_include = ( 

530 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

531 ) 

532 sub_sub_include = str(pathlib.Path(sub_include).parent / sub_sub_include) 

533 elif line.startswith(READ_SLOT_FENCE_END): 

534 end = slot 

535 insert_regions[sub_include].append(((begin, end), sub_sub_include)) 

536 tree.create_node(sub_sub_include, sub_sub_include, parent=sub_include) 

537 in_region = False 

538 begin, end = 0, 0 

539 sub_sub_include = '' 

540 

541 for coords, sub_sub_include in insert_regions[sub_include]: 

542 ref_path = DOC_BASE / sub_sub_include 

543 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

544 documents[sub_sub_include] = [line.rstrip() for line in handle.readlines()] 

545 insert_regions[sub_sub_include] = [] 

546 in_region = False 

547 begin, end = 0, 0 

548 sub_sub_sub_include = '' 

549 for slot, line in enumerate(documents[sub_sub_include]): 

550 if line.startswith(IMG_LINE_STARTSWITH): 

551 documents[sub_sub_include][slot] = adapt_image( 

552 line, img_collector, sub_sub_include, root_path 

553 ) 

554 log.debug(f'{slot :02d}|{line.rstrip()}') 

555 if not in_region: 

556 if line.startswith(READ_SLOT_FENCE_BEGIN): 

557 in_region = True 

558 begin = slot 

559 continue 

560 if line.startswith(INCLUDE_SLOT): 

561 harvest_include(line, slot, insert_regions, tree, sub_sub_include) 

562 continue 

563 if in_region: 

564 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

565 sub_sub_sub_include = ( 

566 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

567 ) 

568 sub_sub_sub_include = str(pathlib.Path(sub_sub_include).parent / sub_sub_sub_include) 

569 elif line.startswith(READ_SLOT_FENCE_END): 

570 end = slot 

571 insert_regions[sub_sub_include].append(((begin, end), sub_sub_sub_include)) 

572 tree.create_node(sub_sub_sub_include, sub_sub_sub_include, parent=sub_sub_include) 

573 in_region = False 

574 begin, end = 0, 0 

575 sub_sub_sub_include = '' 

576 

577 for coords, sub_sub_sub_include in insert_regions[sub_include]: 

578 ref_path = DOC_BASE / sub_sub_sub_include 

579 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

580 documents[sub_sub_sub_include] = [line.rstrip() for line in handle.readlines()] 

581 insert_regions[sub_sub_sub_include] = [] 

582 in_region = False 

583 begin, end = 0, 0 

584 sub_sub_sub_sub_include = '' 

585 for slot, line in enumerate(documents[sub_sub_sub_include]): 

586 if line.startswith(IMG_LINE_STARTSWITH): 

587 documents[sub_sub_sub_include][slot] = adapt_image( 

588 line, img_collector, sub_sub_sub_include, root_path 

589 ) 

590 log.debug(f'{slot :02d}|{line.rstrip()}') 

591 if not in_region: 

592 if line.startswith(READ_SLOT_FENCE_BEGIN): 

593 in_region = True 

594 begin = slot 

595 continue 

596 if line.startswith(INCLUDE_SLOT): 

597 harvest_include(line, slot, insert_regions, tree, sub_sub_sub_include) 

598 continue 

599 if in_region: 

600 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

601 sub_sub_sub_sub_include = ( 

602 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

603 ) 

604 sub_sub_sub_sub_include = str( 

605 pathlib.Path(sub_sub_sub_include).parent / sub_sub_sub_sub_include 

606 ) 

607 elif line.startswith(READ_SLOT_FENCE_END): 

608 end = slot 

609 insert_regions[sub_sub_sub_include].append(((begin, end), sub_sub_sub_include)) 

610 tree.create_node( 

611 sub_sub_sub_sub_include, sub_sub_sub_sub_include, parent=sub_sub_sub_include 

612 ) 

613 in_region = False 

614 begin, end = 0, 0 

615 sub_sub_sub_sub_include = '' 

616 

617 top_down_paths = tree.paths_to_leaves() 

618 bottom_up_paths = [list(reversed(td_p)) for td_p in top_down_paths] 

619 log.info(LOG_SEPARATOR) 

620 log.info('resulting tree:') 

621 for edge in str(tree).split(NL): 

622 log.info(edge) 

623 

624 log.info(LOG_SEPARATOR) 

625 log.info(f'provisioning chains for the {len(bottom_up_paths)} bottom up leaf paths:') 

626 for num, leaf_path in enumerate(bottom_up_paths): 

627 the_way_up = f'|-> {leaf_path[0]}' if len(leaf_path) == 1 else f'{" -> ".join(leaf_path)}' 

628 log.info(f'{num :2d}: {the_way_up}') 

629 

630 concat = {} 

631 log.info(LOG_SEPARATOR) 

632 log.info(f'dependencies for the {len(insert_regions)} document parts:') 

633 for key, regions in insert_regions.items(): 

634 num_in = len(regions) 

635 dashes = '-' * num_in 

636 incl_disp = f'( {num_in} include{"" if num_in == 1 else "s"} )' 

637 indicator = '(no includes)' if not regions else f'<{dashes + incl_disp + dashes}' 

638 log.info(f'- part {key} {indicator}') 

639 for region in regions: 

640 between = f'between lines {region[0][0] :3d} and {region[0][1] :3d}' 

641 insert = f'include fragment {region[1]}' 

642 log.info(f' + {between} {insert}') 

643 if not regions: # No includes 

644 concat[key] = '\n'.join(documents[key]) + '\n' 

645 log.info(f' * did concat {key} document for insertion') 

646 

647 chains = [leaf_path for leaf_path in bottom_up_paths] 

648 log.info(LOG_SEPARATOR) 

649 log.info(f'starting insertions bottom up for the {len(chains)} inclusion chains:') 

650 todo = [[job for job in chain if job not in concat] for chain in chains] 

651 while todo != [[]]: 

652 todo = rollup(todo, documents, insert_regions, concat) 

653 

654 log.info(LOG_SEPARATOR) 

655 log.info('writing final concat markdown to document.md') 

656 with open('document.md', 'wt', encoding=ENCODING) as handle: 

657 handle.write('\n'.join(concat[bind] for bind in binder) + '\n') 

658 

659 log.info(LOG_SEPARATOR) 

660 log.info('collecting assets (images and diagrams)') 

661 collect_assets(img_collector) 

662 log.info(LOG_SEPARATOR) 

663 log.info(f'concat result document (document.md) and artifacts are within folder ({os.getcwd()}/)') 

664 log.info(LOG_SEPARATOR) 

665 log.info('processing complete - SUCCESS') 

666 log.info(LOG_SEPARATOR) 

667 return 0