Coverage for liitos/concat.py: 88.65%

519 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-07 19:29:53 +00:00

1"""Given a target and facet, concatenate a tree of markdown files to a single file rewriting all image refs.""" 

2 

3import json 

4import os 

5import pathlib 

6import re 

7import shutil 

8from typing import Union, no_type_check 

9 

10import treelib 

11import yaml 

12 

13import liitos.gather as gat 

14import liitos.meta as met 

15import liitos.placeholder as plh 

16import liitos.tools as too 

17from liitos import ENCODING, LOG_SEPARATOR, PathLike, log 

18 

19ALT_INJECTOR_HACK = 'INJECTED-ALT-TEXT-TO-TRIGGER-FIGURE-ENVIRONMENT-AROUND-IMAGE-IN-PANDOC' 

20CAP_INJECTOR_HACK = 'INJECTED-CAP-TEXT-TO-MARK-MISSING-CAPTION-IN-OUTPUT' 

21DOC_BASE = pathlib.Path('..', '..') 

22STRUCTURE_PATH = DOC_BASE / 'structure.yml' 

23SLASH = '/' 

24IMAGES_FOLDER = 'images/' 

25DIAGRAMS_FOLDER = 'diagrams/' 

26 

27""" 

28```{.python .cb.run} 

29with open('sub/as.md') as fp: 

30 print(fp.read()) 

31``` 

32""" 

33READ_SLOT_FENCE_BEGIN = '```{.python .cb.run}' 

34READ_SLOT_CONTEXT_BEGIN = 'with open(' 

35READ_SLOT_FENCE_END = '```' 

36 

37r""" 

38\include{markdown_file_path} 

39""" 

40INCLUDE_SLOT = '\\include{' 

41 

42""" 

43![Alt Text Red](images/red.png "Caption Text Red") 

44![Alt Text Dot Dot Lime](../images/lime.png "Caption Text Dot Dot Lime") 

45![Alt Text Blue](images/blue.png "Caption Text Blue") 

46![Alt Text Sting Red](other/images/red.png "Caption Text Sting Red") 

47""" 

48IMG_LINE_STARTSWITH = '![' 

49MD_IMG_PATTERN = re.compile(r'^!\[(?P<cap>[^(]*)\]\((?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$') 

50MD_IMG_PATTERN_RIGHT_SPLIT = re.compile(r'^(?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$') 

51 

52NL = '\n' 

53 

54 

55@no_type_check 

56def process_approvals(aspects: dict[str, str]) -> Union[gat.Approvals, int]: 

57 """Best effort loading of approvals data. 

58 

59 Examples: 

60 

61 >>> aspects = {gat.KEY_APPROVALS: 'missing-file'} 

62 >>> process_approvals(aspects) 

63 1 

64 

65 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

66 >>> approvals_name = 'empty-as-approvals.yml' 

67 >>> aspects = {gat.KEY_APPROVALS: str(DOC_BASE / approvals_name)} 

68 >>> process_approvals(aspects) 

69 1 

70 

71 >>> DOC_BASE = pathlib.Path('.') 

72 >>> aspects = {gat.KEY_APPROVALS: __file__} 

73 >>> process_approvals(aspects) 

74 1 

75 

76 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

77 >>> approvals_name = 'space-as-approvals.yml' 

78 >>> aspects = {gat.KEY_APPROVALS: str(DOC_BASE / approvals_name)} 

79 >>> process_approvals(aspects) 

80 1 

81 """ 

82 approvals_path = DOC_BASE / aspects[gat.KEY_APPROVALS] 

83 if not approvals_path.is_file() or not approvals_path.stat().st_size: 

84 log.error(f'destructure failed to find non-empty approvals file at {approvals_path}') 

85 return 1 

86 if approvals_path.suffix.lower() not in ('.json', '.yaml', '.yml'): 

87 log.error(f'approvals file format per suffix ({approvals_path.suffix}) not supported') 

88 return 1 

89 approvals_channel = 'yaml' if approvals_path.suffix.lower() in ('.yaml', '.yml') else 'json' 

90 with open(approvals_path, 'rt', encoding=ENCODING) as handle: 

91 approvals = yaml.safe_load(handle) if approvals_channel == 'yaml' else json.load(handle) 

92 if not approvals: 92 ↛ 93line 92 didn't jump to line 93 because the condition on line 92 was never true

93 log.error(f'empty approvals file? Please add approvals to ({approvals_path})') 

94 return 1 

95 if approvals_channel == 'yaml': 95 ↛ 99line 95 didn't jump to line 99 because the condition on line 95 was always true

96 with open('approvals.yml', 'wt', encoding=ENCODING) as handle: 

97 yaml.dump(approvals, handle, default_flow_style=False) 

98 else: 

99 with open('approvals.json', 'wt', encoding=ENCODING) as handle: 

100 json.dump(approvals, handle, indent=2) 

101 return approvals 

102 

103 

104@no_type_check 

105def process_binder(aspects: dict[str, str]) -> Union[gat.Binder, int]: 

106 """Best effort loading of binder data. 

107 

108 Examples: 

109 

110 >>> aspects = {gat.KEY_BIND: 'missing-file'} 

111 >>> process_binder(aspects) 

112 1 

113 

114 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

115 >>> binder_name = 'empty-as-bind.txt' 

116 >>> aspects = {gat.KEY_BIND: str(DOC_BASE / binder_name)} 

117 >>> process_binder(aspects) 

118 1 

119 

120 >>> DOC_BASE = pathlib.Path('.') 

121 >>> aspects = {gat.KEY_BIND: __file__} 

122 >>> process_binder(aspects) 

123 1 

124 

125 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

126 >>> binder_name = 'space-as-bind.txt' 

127 >>> aspects = {gat.KEY_BIND: str(DOC_BASE / binder_name)} 

128 >>> process_binder(aspects) 

129 1 

130 """ 

131 bind_path = DOC_BASE / aspects[gat.KEY_BIND] 

132 if not bind_path.is_file() or not bind_path.stat().st_size: 

133 log.error(f'destructure failed to find non-empty bind file at {bind_path}') 

134 return 1 

135 if bind_path.suffix.lower() not in ('.txt',): 

136 log.error(f'bind file format per suffix ({bind_path.suffix}) not supported') 

137 return 1 

138 with open(bind_path, 'rt', encoding=ENCODING) as handle: 

139 binder = [line.strip() for line in handle.readlines() if line.strip()] 

140 if not binder: 140 ↛ 141line 140 didn't jump to line 141 because the condition on line 140 was never true

141 log.error(f'empty bind file? Please add component paths to ({bind_path})') 

142 return 1 

143 with open('bind.txt', 'wt', encoding=ENCODING) as handle: 

144 handle.write('\n'.join(binder) + '\n') 

145 return binder 

146 

147 

148@no_type_check 

149def process_changes(aspects: dict[str, str]) -> Union[gat.Changes, int]: 

150 """Best effort loading of changes data. 

151 

152 Examples: 

153 

154 >>> aspects = {gat.KEY_CHANGES: 'missing-file'} 

155 >>> process_changes(aspects) 

156 1 

157 

158 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

159 >>> changes_name = 'empty-as-changtes.yml' 

160 >>> aspects = {gat.KEY_CHANGES: str(DOC_BASE / changes_name)} 

161 >>> process_changes(aspects) 

162 1 

163 

164 >>> DOC_BASE = pathlib.Path('.') 

165 >>> aspects = {gat.KEY_CHANGES: __file__} 

166 >>> process_changes(aspects) 

167 1 

168 

169 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

170 >>> changes_name = 'space-as-changes.yml' 

171 >>> aspects = {gat.KEY_CHANGES: str(DOC_BASE / changes_name)} 

172 >>> process_changes(aspects) 

173 1 

174 """ 

175 changes_path = DOC_BASE / aspects[gat.KEY_CHANGES] 

176 if not changes_path.is_file() or not changes_path.stat().st_size: 

177 log.error(f'destructure failed to find non-empty changes file at {changes_path}') 

178 return 1 

179 if changes_path.suffix.lower() not in ('.json', '.yaml', '.yml'): 

180 log.error(f'changes file format per suffix ({changes_path.suffix}) not supported') 

181 return 1 

182 changes_channel = 'yaml' if changes_path.suffix.lower() in ('.yaml', '.yml') else 'json' 

183 with open(changes_path, 'rt', encoding=ENCODING) as handle: 

184 changes = yaml.safe_load(handle) if changes_channel == 'yaml' else json.load(handle) 

185 if not changes: 185 ↛ 186line 185 didn't jump to line 186 because the condition on line 185 was never true

186 log.error(f'empty changes file? Please add changes data to ({changes_path})') 

187 return 1 

188 if changes_channel == 'yaml': 188 ↛ 192line 188 didn't jump to line 192 because the condition on line 188 was always true

189 with open('changes.yml', 'wt', encoding=ENCODING) as handle: 

190 yaml.dump(changes, handle, default_flow_style=False) 

191 else: 

192 with open('changes.json', 'wt', encoding=ENCODING) as handle: 

193 json.dump(changes, handle, indent=2) 

194 return changes 

195 

196 

197@no_type_check 

198def parse_markdown_image(text_line: str) -> tuple[str, str, str, str]: 

199 """Parse a markdown image line within our conventions into caption, src, alt, and optional rest. 

200 

201 Examples: 

202 

203 >>> t = '' 

204 >>> parse_markdown_image(t) 

205 ('', '', '', '') 

206 

207 >>> t = '![]()' 

208 >>> parse_markdown_image(t) 

209 ('', '', '', '![]()') 

210 

211 >>> t = '![a](b "c")' 

212 >>> parse_markdown_image(t) 

213 ('a', 'b', 'c', '') 

214 

215 >>> t = '![a](liitos/placeholders/this-resource-is-missing.png "c")' 

216 >>> parse_markdown_image(t) 

217 ('a', 'liitos/placeholders/this-resource-is-missing.png', 'c', '') 

218 """ 

219 invalid_marker = ('', '', '', text_line) 

220 

221 exclam = '!' 

222 osb = '[' 

223 if not text_line or not text_line.startswith(f'{exclam}{osb}'): 

224 log.error(f'- INVALID-MD-IMG_LINE::START <<{text_line.rstrip()}>>') 

225 return invalid_marker 

226 

227 csb = ']' 

228 osb_cnt = text_line.count(osb) 

229 csb_cnt = text_line.count(csb) 

230 if osb_cnt + csb_cnt < 2: 

231 log.error(f'- INVALID-MD-IMG_LINE::SB-TOK-CNT-LOW <<{text_line.rstrip()}>>') 

232 return invalid_marker 

233 if osb_cnt != csb_cnt: 

234 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>') 

235 

236 orb = '(' 

237 cap_src_boundary = f'{csb}{orb}' 

238 if cap_src_boundary not in text_line: 

239 log.error(f'- INVALID-MD-IMG_LINE::CAP-SRC-BOUNDARY <<{text_line.rstrip()}>>') 

240 return invalid_marker 

241 

242 crb = ')' 

243 orb_cnt = text_line.count(orb) 

244 crb_cnt = text_line.count(crb) 

245 if orb_cnt + crb_cnt < 2: 

246 log.error(f'- INVALID-MD-IMG_LINE::RB-TOK-CNT-LOW <<{text_line.rstrip()}>>') 

247 return invalid_marker 

248 if orb_cnt != crb_cnt: 

249 log.warning(f'- INCOMPLETE-MD-IMG_LINE::RB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>') 

250 

251 quo = '"' 

252 quo_cnt = text_line.count(quo) 

253 if quo_cnt < 2: 

254 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-LOW <<{text_line.rstrip()}>>') 

255 if quo_cnt % 2: 

256 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>') 

257 

258 sp = ' ' 

259 sp_cnt = text_line.count(sp) 

260 if not sp_cnt: 

261 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SP-TOK-CNT-LOW <<{text_line.rstrip()}>>') 

262 

263 dot = '.' 

264 sla = '/' 

265 abs_path_indicator = f'{csb}{orb}{sla}' 

266 may_have_abs_path = abs_path_indicator in text_line 

267 if may_have_abs_path: 

268 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-ABS-PATH <<{text_line.rstrip()}>>') 

269 naive_upwards_path_indicator = f'{csb}{orb}{dot}{dot}{sla}' 

270 may_have_upwards_path = naive_upwards_path_indicator in text_line 

271 if may_have_upwards_path: 

272 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-UPWARDS-PATH <<{text_line.rstrip()}>>') 

273 

274 log.info('- parsing the markdown image text line ...') 

275 if orb_cnt + crb_cnt > 2 or orb_cnt != crb_cnt: 

276 # The regex is not safe for orb inside caption 

277 left, right = text_line.split(cap_src_boundary, 1) 

278 match_right = MD_IMG_PATTERN_RIGHT_SPLIT.match(right) 

279 if not match_right: 

280 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-RIGHT-SPLIT-FAILED <<{text_line.rstrip()}>>') 

281 return invalid_marker 

282 

283 parts = match_right.groupdict() 

284 cap = left[2:] 

285 if not cap: 

286 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>') 

287 cap = CAP_INJECTOR_HACK 

288 

289 src = parts['src'] 

290 alt = parts['alt'] 

291 rest = parts['rest'] 

292 if orb in alt or crb in alt: 

293 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-PARTIAL-MATCH <<{text_line.rstrip()}>>') 

294 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}') 

295 for msg in too.incoherent_math_mode_in_caption(cap, phase_info=f'detected in markdown image ({src}) parsing'): 295 ↛ 296line 295 didn't jump to line 296 because the loop on line 295 never started

296 log.warning(msg) 

297 

298 return cap, src, alt, rest 

299 

300 match = MD_IMG_PATTERN.match(text_line) 

301 if not match: 

302 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-FAILED <<{text_line.rstrip()}>>') 

303 return invalid_marker 

304 

305 parts = match.groupdict() 

306 cap = parts['cap'] 

307 if not cap: 

308 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>') 

309 cap = CAP_INJECTOR_HACK 

310 

311 src = parts['src'] 

312 alt = parts['alt'] 

313 rest = parts['rest'] 

314 if orb in alt or crb in alt: 314 ↛ 315line 314 didn't jump to line 315 because the condition on line 314 was never true

315 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-FULL-MATCH <<{text_line.rstrip()}>>') 

316 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}') 

317 

318 return cap, src, alt, rest 

319 

320 

321@no_type_check 

322def adapt_image(text_line: str, collector: list[str], upstream: str, root: str) -> str: 

323 """YES.""" 

324 cap, src, alt, rest = parse_markdown_image(text_line) 

325 if not src: 325 ↛ 326line 325 didn't jump to line 326 because the condition on line 325 was never true

326 log.error(f'parse of markdown image text line failed - empty src, and rest is <<{rest.rstrip()}>>') 

327 return text_line 

328 

329 log.info(f'called adapt_image({text_line}, ..., {upstream=}, {root=}) --> {src=}') 

330 the_aquarium = pathlib.Path(upstream).parent 

331 the_bowl = pathlib.Path(src).parent 

332 the_fish = pathlib.Path(src).name 

333 dest_path = (pathlib.Path(root) / '../../' / the_aquarium / the_bowl).resolve() / the_fish 

334 img_path = os.path.relpath(dest_path, start=root) 

335 log.info(f'path remapped to {img_path}') 

336 collector.append(img_path) 

337 img_hack = img_path 

338 if f'/{IMAGES_FOLDER}' in img_path: 

339 img_hack = IMAGES_FOLDER + img_path.split(f'/{IMAGES_FOLDER}', 1)[1] 

340 elif f'/{DIAGRAMS_FOLDER}' in img_path: 

341 img_hack = DIAGRAMS_FOLDER + img_path.split(f'/{DIAGRAMS_FOLDER}', 1)[1] 

342 

343 if img_hack != img_path: 

344 log.info(f'{img_hack} <--- OK? --- {img_path}') 

345 

346 alt_text = f'"{alt}"' if alt else f'"{ALT_INJECTOR_HACK}"' 

347 belte_og_seler = f'![{cap}]({img_hack} {alt_text}){rest}' 

348 log.info(f'==> belte-og-seler: ->>{belte_og_seler}<<-') 

349 return belte_og_seler 

350 

351 

352@no_type_check 

353def harvest_include( 

354 text_line: str, slot: int, regions: dict[str, list[tuple[tuple[int, int], str]]], tree: treelib.Tree, parent: str 

355) -> None: 

356 r"""TODO. 

357 

358 Examples: 

359 

360 >>> text = 'baz\n\\include{c}\nquux' 

361 >>> slot = 0 

362 >>> regions = {SLASH: [((0, 1), 'b')], 'b': [((0, 1), 'c')], 'c': [((0, 1), 'cx')]} 

363 >>> tr = treelib.Tree() 

364 >>> root = SLASH 

365 >>> tr.create_node(root, root) 

366 Node(tag=/, identifier=/, data=None) 

367 >>> harvest_include(text, slot, regions, tr, root) 

368 >>> print(tr) 

369 / 

370 └── /c} 

371 quux 

372 <BLANKLINE> 

373 """ 

374 include_local = text_line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip() 

375 include = str(pathlib.Path(parent).parent / include_local) 

376 regions[parent].append(((slot, slot), include)) 

377 tree.create_node(include, include, parent=parent) 

378 

379 

380@no_type_check 

381def rollup( 

382 jobs: list[list[str]], 

383 docs: dict[str, list[str]], 

384 regions: dict[str, list[tuple[tuple[int, int], str]]], 

385 flat: dict[str, str], 

386) -> list[list[str]]: 

387 r"""TODO. 

388 

389 Examples: 

390 

391 >>> jobs = [['a', 'b'], ['b', 'c']] 

392 >>> docs = {'a': ['a1', 'a2'], 'b': ['b1', 'b2'], 'c': ['c1', 'c2', 'c3']} 

393 >>> regions = {'a': [((0, 1), 'b')], 'b': [((0, 1), 'c')], 'c': [((0, 1), 'cx')]} 

394 >>> flat = {'a': 'a1\na2', 'b': 'b1\nb2', 'c': 'c1\nc2\nc3'} 

395 >>> rollup(jobs, docs, regions, flat) 

396 [[], []] 

397 >>> flat 

398 {'a': 'b1\nb2\n', 'b': 'c1\nc2\nc3\n', 'c': 'c1\nc2\nc3'} 

399 

400 >>> jobs = [['/', 'b'], ['/', 'c']] 

401 >>> docs, regions, flat = {}, {}, {'baz': 'quux'} 

402 >>> rollup(jobs, docs, regions, flat) 

403 [[]] 

404 >>> flat 

405 {'baz': 'quux'} 

406 """ 

407 tackle = [those[0] for those in jobs if those and those[0] != SLASH] 

408 if tackle: 

409 log.info(f' Insertion ongoing with parts ({", ".join(tuple(sorted(tackle)))}) remaining') 

410 else: 

411 return [[]] 

412 for that in tackle: 

413 buf = [] 

414 for slot, line in enumerate(docs[that]): 

415 special = False 

416 the_first = False 

417 the_include = '' 

418 for pair, include in regions[that]: 

419 low, high = pair 

420 if low <= slot <= high: 

421 special = True 

422 if low == slot: 

423 the_first = True 

424 the_include = include 

425 if not special: 

426 buf.append(line) 

427 continue 

428 if the_first: 

429 buf.append(flat[the_include]) 

430 flat[that] = '\n'.join(buf) + '\n' 

431 

432 return [[job for job in chain if job not in flat] for chain in jobs] 

433 

434 

435def copy_eventually(src_base: pathlib.Path, tgt_base: pathlib.Path, local_path: PathLike) -> None: 

436 """Copy visual assets eventually and warn on overwrites.""" 

437 if not tgt_base.is_dir(): 

438 try: 

439 tgt_base.mkdir(parents=True, exist_ok=True) 

440 except FileExistsError as err: 

441 log.error(f'failed to create folder {tgt_base} - detail: {err}') 

442 source_asset = src_base / local_path 

443 if not source_asset.is_file(): 443 ↛ 446line 443 didn't jump to line 446 because the condition on line 443 was always true

444 log.info(f'falling back to {local_path} instead of {source_asset=}, ignoring {src_base=}') 

445 source_asset = pathlib.Path(local_path) # TODO: Since adapt_image fix receive paths incl. src_base 

446 target_asset = tgt_base / pathlib.Path(local_path).name 

447 if target_asset.is_file(): 

448 log.warning(f'overwriting existing {target_asset} from {source_asset}') 

449 try: 

450 shutil.copy(source_asset, target_asset) 

451 except FileNotFoundError as err: 

452 log.error(err) 

453 code, msg = plh.dump_placeholder(target_asset) 

454 log.warning(msg) if code else log.info(msg) 

455 except NotADirectoryError as err: 

456 log.error(err) 

457 code, msg = plh.dump_placeholder(target_asset) 

458 log.warning(msg) if code else log.info(msg) 

459 

460 

461@no_type_check 

462def collect_assets( 

463 collector: list[str], 

464 doc_base: Union[PathLike, None] = None, 

465 images_folder: Union[PathLike, None] = None, 

466 diagrams_folder: Union[PathLike, None] = None, 

467) -> None: 

468 """Collect assets into the rendering space. 

469 

470 Examples: 

471 

472 >>> c = ['foo'] 

473 >>> collect_assets(c) 

474 

475 >>> import tempfile 

476 >>> with tempfile.TemporaryDirectory() as imaf: 

477 ... c = [imaf + 'foo'] 

478 ... collect_assets(c, doc_base='.', images_folder=imaf) 

479 

480 >>> import tempfile 

481 >>> with tempfile.TemporaryDirectory() as imaf: 

482 ... with tempfile.TemporaryDirectory() as diaf: 

483 ... c = [imaf + 'foo', diaf + 'bar'] 

484 ... collect_assets(c, doc_base='.', images_folder=imaf, diagrams_folder=diaf) 

485 

486 >>> import tempfile 

487 >>> with tempfile.TemporaryDirectory() as imaf: 

488 ... ima = pathlib.Path(imaf) / 'images' 

489 ... ima.touch() 

490 ... with tempfile.TemporaryDirectory() as diaf: 

491 ... dia = pathlib.Path(diaf) / 'diagrams' 

492 ... dia.touch() 

493 ... c = [str(ima / 'foo'), str(dia / 'bar')] 

494 ... collect_assets(c, doc_base='.', images_folder=ima, diagrams_folder=dia) 

495 """ 

496 doc_base = pathlib.Path(doc_base) if doc_base else DOC_BASE 

497 img_part = str(images_folder) if images_folder else IMAGES_FOLDER 

498 dia_part = str(diagrams_folder) if diagrams_folder else DIAGRAMS_FOLDER 

499 for img_path in collector: 

500 where_to = img_part if img_part in img_path else (dia_part if dia_part in img_path else None) 

501 if where_to is not None: 

502 log.info(f'calling copy_eventually({doc_base}, {pathlib.Path(where_to)}, {img_path})') 

503 copy_eventually(doc_base, pathlib.Path(where_to), img_path) 

504 else: 

505 log.error(f'asset collection for neither images nor diagrams requested per {img_path} - ignoring') 

506 

507 

508@no_type_check 

509def concatenate( 

510 doc_root: Union[str, pathlib.Path], 

511 structure_name: str, 

512 target_key: str, 

513 facet_key: str, 

514 options: dict[str, Union[bool, str]], 

515) -> int: 

516 """Later alligator. 

517 

518 Examples: 

519 

520 >>> restore_cwd = os.getcwd() 

521 >>> dr = '.' 

522 >>> sn = 'foo' 

523 >>> tk = '' 

524 >>> fk = '' 

525 >>> op = {'bar': True} 

526 >>> concatenate(dr, sn, tk, fk, op, ) 

527 2 

528 >>> os.chdir(restore_cwd) 

529 

530 >>> restore_cwd = os.getcwd() 

531 >>> dr = 'example/no-renda' 

532 >>> sn = 'structure.yml' 

533 >>> tk = 'prod_kind' 

534 >>> fk = 'no-renda' 

535 >>> op = {'force': True} 

536 >>> concatenate(dr, sn, tk, fk, op) 

537 0 

538 >>> os.chdir(restore_cwd) 

539 

540 >>> restore_cwd = os.getcwd() 

541 >>> dr = 'example/tuna' 

542 >>> sn = 'structure.yml' 

543 >>> tk = 'prod_kind' 

544 >>> fk = 'non-existing-facet-key' 

545 >>> op = {'bar': True} 

546 >>> concatenate(dr, sn, tk, fk, op) 

547 1 

548 >>> os.chdir(restore_cwd) 

549 

550 >>> restore_cwd = os.getcwd() 

551 >>> dr = 'test/fixtures/basic/' 

552 >>> sn = 'structure.yml' 

553 >>> tk = 'abc' 

554 >>> fk = 'missing' 

555 >>> op = {'bar': True} 

556 >>> concatenate(dr, sn, tk, fk, op) 

557 2 

558 >>> os.chdir(restore_cwd) 

559 

560 >>> restore_cwd = os.getcwd() 

561 >>> dr = 'example/tuna' 

562 >>> sn = 'structure.yml' 

563 >>> tk = 'prod_kind' 

564 >>> fk = 'tuna' 

565 >>> op = {'bar': True} 

566 >>> concatenate(dr, sn, tk, fk, op) 

567 0 

568 >>> os.chdir(restore_cwd) 

569 

570 >>> restore_cwd = os.getcwd() 

571 >>> dr = 'example/tuna' 

572 >>> sn = 'structure.yml' 

573 >>> tk = 'prod_kind' 

574 >>> fk = 'tuna' 

575 >>> op = {'bar': True} 

576 >>> try: 

577 ... code = concatenate(dr, sn, tk, fk, op) 

578 ... except FileNotFoundError: 

579 ... code = -1 

580 >>> os.chdir(restore_cwd) 

581 >>> code 

582 0 

583 

584 >>> restore_cwd = os.getcwd() 

585 >>> dr = 'example/ejected-templates' 

586 >>> sn = 'structure.yml' 

587 >>> tk = 'prod_kind' 

588 >>> fk = 'ejected-templates' 

589 >>> op = {'bar': True} 

590 >>> try: 

591 ... code = concatenate(dr, sn, tk, fk, op) 

592 ... except FileNotFoundError: 

593 ... code = -1 

594 >>> os.chdir(restore_cwd) 

595 >>> code 

596 0 

597 

598 >>> restore_cwd = os.getcwd() 

599 >>> dr = 'example/ejected-templates' 

600 >>> sn = 'structure.yml' 

601 >>> tk = 'prod_kind' 

602 >>> fk = 'ejected-templates-borked' 

603 >>> op = {'bar': True} 

604 >>> try: 

605 ... code = concatenate(dr, sn, tk, fk, op) 

606 ... except FileNotFoundError: 

607 ... code = -1 

608 >>> os.chdir(restore_cwd) 

609 >>> code 

610 0 

611 

612 >>> restore_cwd = os.getcwd() 

613 >>> dr = 'example/tuna' 

614 >>> sn = 'structure.yml' 

615 >>> tk = 'prod_kind' 

616 >>> fk = 'tuna' 

617 >>> op = {'bar': True} 

618 >>> abs_here = pathlib.Path().resolve() 

619 >>> try: 

620 ... code = concatenate(dr, sn, tk, fk, op) 

621 ... except FileNotFoundError: 

622 ... code = -1 

623 >>> os.chdir(restore_cwd) 

624 >>> code 

625 0 

626 

627 """ 

628 log.info(LOG_SEPARATOR) 

629 log.warning('entered concat function ...') 

630 target_code = target_key 

631 facet_code = facet_key 

632 if not facet_code.strip() or not target_code.strip(): 

633 log.error(f'concatenate requires non-empty target ({target_code}) and facet ({facet_code}) codes') 

634 return 2 

635 

636 log.info(f'parsed target ({target_code}) and facet ({facet_code}) from request') 

637 

638 structure, asset_map = gat.prelude( 

639 doc_root=doc_root, structure_name=structure_name, target_key=target_key, facet_key=facet_key, command='concat' 

640 ) 

641 log.info(f'prelude teleported processor into the document root at ({os.getcwd()}/)') 

642 rel_concat_folder_path = pathlib.Path('render/pdf/') 

643 rel_concat_folder_path.mkdir(parents=True, exist_ok=True) 

644 os.chdir(rel_concat_folder_path) 

645 log.info(f'concatenate (this processor) teleported into the render/pdf location ({os.getcwd()}/)') 

646 

647 ok, aspect_map = too.load_target(target_code, facet_code) 

648 if not ok or not aspect_map: 

649 return 0 if ok else 1 

650 

651 may_render = aspect_map.get('render', True) 

652 if not may_render: 

653 topic = f'structure({pathlib.Path(doc_root) / structure_name}) for target: {target_key} and facet: {facet_key}' 

654 log.warning(f'- render is declared as false in {topic}') 

655 if not options['force']: 655 ↛ 656line 655 didn't jump to line 656 because the condition on line 655 was never true

656 return 42 

657 else: 

658 log.warning(' + overwritten by force mode') 

659 

660 approvals = process_approvals(aspect_map) 

661 if isinstance(approvals, int): 

662 return 2 

663 binder = process_binder(aspect_map) 

664 if isinstance(binder, int): 664 ↛ 665line 664 didn't jump to line 665 because the condition on line 664 was never true

665 return 3 

666 changes = process_changes(aspect_map) 

667 if isinstance(changes, int): 667 ↛ 668line 667 didn't jump to line 668 because the condition on line 667 was never true

668 return 4 

669 metadata = met.load(aspect_map) 

670 if isinstance(metadata, int): 670 ↛ 671line 670 didn't jump to line 671 because the condition on line 670 was never true

671 return 5 

672 

673 root = SLASH 

674 root_path = str(pathlib.Path.cwd().resolve()).rstrip(SLASH) + SLASH 

675 tree = treelib.Tree() 

676 tree.create_node(root, root) 

677 documents = {} 

678 insert_regions = {} 

679 img_collector = [] 

680 log.info(LOG_SEPARATOR) 

681 log.info('processing binder ...') 

682 for entry in binder: 

683 ref_path = DOC_BASE / entry 

684 log.debug(f'- {entry} as {ref_path}') 

685 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

686 documents[entry] = [line.rstrip() for line in handle.readlines()] 

687 insert_regions[entry] = [] 

688 in_region = False 

689 begin, end = 0, 0 

690 include = '' 

691 tree.create_node(entry, entry, parent=root) 

692 for slot, line in enumerate(documents[entry]): 

693 if line.startswith(IMG_LINE_STARTSWITH): 

694 documents[entry][slot] = adapt_image(line, img_collector, entry, root_path) 

695 log.debug(f'{slot :02d}|{line.rstrip()}') 

696 if not in_region: 

697 if line.startswith(READ_SLOT_FENCE_BEGIN): 

698 in_region = True 

699 begin = slot 

700 continue 

701 if line.startswith(INCLUDE_SLOT): 

702 include = line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip() 

703 insert_regions[entry].append(((slot, slot), include)) 

704 tree.create_node(include, include, parent=entry) 

705 include = '' 

706 continue 

707 if in_region: 

708 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

709 include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

710 elif line.startswith(READ_SLOT_FENCE_END): 

711 end = slot 

712 insert_regions[entry].append(((begin, end), include)) 

713 tree.create_node(include, include, parent=entry) 

714 in_region = False 

715 begin, end = 0, 0 

716 include = '' 

717 

718 for coords, include in insert_regions[entry]: # include is anchored on DOC_BASE 

719 ref_path = DOC_BASE / include 

720 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

721 documents[include] = [line.rstrip() for line in handle.readlines()] 

722 insert_regions[include] = [] 

723 in_region = False 

724 begin, end = 0, 0 

725 sub_include = '' 

726 for slot, line in enumerate(documents[include]): 

727 if line.startswith(IMG_LINE_STARTSWITH): 727 ↛ 728line 727 didn't jump to line 728 because the condition on line 727 was never true

728 documents[include][slot] = adapt_image(line, img_collector, include, root_path) 

729 log.debug(f'{slot :02d}|{line.rstrip()}') 

730 if not in_region: 

731 if line.startswith(READ_SLOT_FENCE_BEGIN): 

732 in_region = True 

733 begin = slot 

734 continue 

735 if line.startswith(INCLUDE_SLOT): 

736 harvest_include(line, slot, insert_regions, tree, include) 

737 continue 

738 if in_region: 

739 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

740 sub_include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

741 sub_include = str(pathlib.Path(include).parent / sub_include) 

742 elif line.startswith(READ_SLOT_FENCE_END): 

743 end = slot 

744 insert_regions[include].append(((begin, end), sub_include)) 

745 tree.create_node(sub_include, sub_include, parent=include) 

746 in_region = False 

747 begin, end = 0, 0 

748 sub_include = '' 

749 

750 for coords, sub_include in insert_regions[include]: 

751 ref_path = DOC_BASE / sub_include 

752 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

753 documents[sub_include] = [line.rstrip() for line in handle.readlines()] 

754 insert_regions[sub_include] = [] 

755 in_region = False 

756 begin, end = 0, 0 

757 sub_sub_include = '' 

758 for slot, line in enumerate(documents[sub_include]): 

759 if line.startswith(IMG_LINE_STARTSWITH): 

760 documents[sub_include][slot] = adapt_image(line, img_collector, sub_include, root_path) 

761 log.debug(f'{slot :02d}|{line.rstrip()}') 

762 if not in_region: 

763 if line.startswith(READ_SLOT_FENCE_BEGIN): 

764 in_region = True 

765 begin = slot 

766 continue 

767 if line.startswith(INCLUDE_SLOT): 767 ↛ 768line 767 didn't jump to line 768 because the condition on line 767 was never true

768 harvest_include(line, slot, insert_regions, tree, sub_include) 

769 continue 

770 if in_region: 

771 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

772 sub_sub_include = ( 

773 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

774 ) 

775 sub_sub_include = str(pathlib.Path(sub_include).parent / sub_sub_include) 

776 elif line.startswith(READ_SLOT_FENCE_END): 

777 end = slot 

778 insert_regions[sub_include].append(((begin, end), sub_sub_include)) 

779 tree.create_node(sub_sub_include, sub_sub_include, parent=sub_include) 

780 in_region = False 

781 begin, end = 0, 0 

782 sub_sub_include = '' 

783 

784 for coords, sub_sub_include in insert_regions[sub_include]: 

785 ref_path = DOC_BASE / sub_sub_include 

786 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

787 documents[sub_sub_include] = [line.rstrip() for line in handle.readlines()] 

788 insert_regions[sub_sub_include] = [] 

789 in_region = False 

790 begin, end = 0, 0 

791 sub_sub_sub_include = '' 

792 for slot, line in enumerate(documents[sub_sub_include]): 

793 if line.startswith(IMG_LINE_STARTSWITH): 

794 documents[sub_sub_include][slot] = adapt_image( 

795 line, img_collector, sub_sub_include, root_path 

796 ) 

797 log.debug(f'{slot :02d}|{line.rstrip()}') 

798 if not in_region: 798 ↛ 806line 798 didn't jump to line 806 because the condition on line 798 was always true

799 if line.startswith(READ_SLOT_FENCE_BEGIN): 799 ↛ 800line 799 didn't jump to line 800 because the condition on line 799 was never true

800 in_region = True 

801 begin = slot 

802 continue 

803 if line.startswith(INCLUDE_SLOT): 803 ↛ 804line 803 didn't jump to line 804 because the condition on line 803 was never true

804 harvest_include(line, slot, insert_regions, tree, sub_sub_include) 

805 continue 

806 if in_region: 806 ↛ 807line 806 didn't jump to line 807 because the condition on line 806 was never true

807 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

808 sub_sub_sub_include = ( 

809 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

810 ) 

811 sub_sub_sub_include = str(pathlib.Path(sub_sub_include).parent / sub_sub_sub_include) 

812 elif line.startswith(READ_SLOT_FENCE_END): 

813 end = slot 

814 insert_regions[sub_sub_include].append(((begin, end), sub_sub_sub_include)) 

815 tree.create_node(sub_sub_sub_include, sub_sub_sub_include, parent=sub_sub_include) 

816 in_region = False 

817 begin, end = 0, 0 

818 sub_sub_sub_include = '' 

819 

820 for coords, sub_sub_sub_include in insert_regions[sub_include]: 

821 ref_path = DOC_BASE / sub_sub_sub_include 

822 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

823 documents[sub_sub_sub_include] = [line.rstrip() for line in handle.readlines()] 

824 insert_regions[sub_sub_sub_include] = [] 

825 in_region = False 

826 begin, end = 0, 0 

827 sub_sub_sub_sub_include = '' 

828 for slot, line in enumerate(documents[sub_sub_sub_include]): 

829 if line.startswith(IMG_LINE_STARTSWITH): 

830 documents[sub_sub_sub_include][slot] = adapt_image( 

831 line, img_collector, sub_sub_sub_include, root_path 

832 ) 

833 log.debug(f'{slot :02d}|{line.rstrip()}') 

834 if not in_region: 834 ↛ 842line 834 didn't jump to line 842 because the condition on line 834 was always true

835 if line.startswith(READ_SLOT_FENCE_BEGIN): 835 ↛ 836line 835 didn't jump to line 836 because the condition on line 835 was never true

836 in_region = True 

837 begin = slot 

838 continue 

839 if line.startswith(INCLUDE_SLOT): 839 ↛ 840line 839 didn't jump to line 840 because the condition on line 839 was never true

840 harvest_include(line, slot, insert_regions, tree, sub_sub_sub_include) 

841 continue 

842 if in_region: 842 ↛ 843line 842 didn't jump to line 843 because the condition on line 842 was never true

843 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

844 sub_sub_sub_sub_include = ( 

845 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

846 ) 

847 sub_sub_sub_sub_include = str( 

848 pathlib.Path(sub_sub_sub_include).parent / sub_sub_sub_sub_include 

849 ) 

850 elif line.startswith(READ_SLOT_FENCE_END): 

851 end = slot 

852 insert_regions[sub_sub_sub_include].append(((begin, end), sub_sub_sub_include)) 

853 tree.create_node( 

854 sub_sub_sub_sub_include, sub_sub_sub_sub_include, parent=sub_sub_sub_include 

855 ) 

856 in_region = False 

857 begin, end = 0, 0 

858 sub_sub_sub_sub_include = '' 

859 

860 top_down_paths = tree.paths_to_leaves() 

861 bottom_up_paths = [list(reversed(td_p)) for td_p in top_down_paths] 

862 log.info(LOG_SEPARATOR) 

863 log.info('resulting tree:') 

864 for edge in str(tree).split(NL): 

865 log.info(edge) 

866 

867 log.info(LOG_SEPARATOR) 

868 log.info(f'provisioning chains for the {len(bottom_up_paths)} bottom up leaf paths:') 

869 for num, leaf_path in enumerate(bottom_up_paths): 

870 the_way_up = f'|-> {leaf_path[0]}' if len(leaf_path) == 1 else f'{" -> ".join(leaf_path)}' 

871 log.info(f'{num :2d}: {the_way_up}') 

872 

873 concat = {} 

874 log.info(LOG_SEPARATOR) 

875 log.info(f'dependencies for the {len(insert_regions)} document parts:') 

876 for key, regions in insert_regions.items(): 

877 num_in = len(regions) 

878 dashes = '-' * num_in 

879 incl_disp = f'( {num_in} include{"" if num_in == 1 else "s"} )' 

880 indicator = '(no includes)' if not regions else f'<{dashes + incl_disp + dashes}' 

881 log.info(f'- part {key} {indicator}') 

882 for region in regions: 

883 between = f'between lines {region[0][0] :3d} and {region[0][1] :3d}' 

884 insert = f'include fragment {region[1]}' 

885 log.info(f' + {between} {insert}') 

886 if not regions: # No includes 

887 concat[key] = '\n'.join(documents[key]) + '\n' 

888 log.info(f' * did concat {key} document for insertion') 

889 

890 chains = [leaf_path for leaf_path in bottom_up_paths] 

891 log.info(LOG_SEPARATOR) 

892 log.info(f'starting insertions bottom up for the {len(chains)} inclusion chains:') 

893 todo = [[job for job in chain if job not in concat] for chain in chains] 

894 while todo != [[]]: 

895 todo = rollup(todo, documents, insert_regions, concat) 

896 

897 log.info(LOG_SEPARATOR) 

898 log.info('writing final concat markdown to document.md') 

899 with open('document.md', 'wt', encoding=ENCODING) as handle: 

900 handle.write('\n'.join(concat[bind] for bind in binder) + '\n') 

901 

902 log.info(LOG_SEPARATOR) 

903 log.info('collecting assets (images and diagrams)') 

904 collect_assets(img_collector) 

905 log.info(LOG_SEPARATOR) 

906 log.info(f'concat result document (document.md) and artifacts are within folder ({os.getcwd()}/)') 

907 log.info(LOG_SEPARATOR) 

908 log.info('processing complete - SUCCESS') 

909 log.info(LOG_SEPARATOR) 

910 return 0