Coverage for liitos/concat.py: 88.81%

507 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-08-31 13:07:35 +00:00

1"""Given a target and facet, concatenate a tree of markdown files to a single file rewriting all image refs.""" 

2 

3import json 

4import os 

5import pathlib 

6import re 

7import shutil 

8from typing import Union, no_type_check 

9 

10import treelib 

11import yaml 

12 

13import liitos.gather as gat 

14import liitos.meta as met 

15import liitos.placeholder as plh 

16import liitos.tools as too 

17from liitos import ENCODING, LOG_SEPARATOR, PathLike, log 

18 

19ALT_INJECTOR_HACK = 'INJECTED-ALT-TEXT-TO-TRIGGER-FIGURE-ENVIRONMENT-AROUND-IMAGE-IN-PANDOC' 

20CAP_INJECTOR_HACK = 'INJECTED-CAP-TEXT-TO-MARK-MISSING-CAPTION-IN-OUTPUT' 

21DOC_BASE = pathlib.Path('..', '..') 

22STRUCTURE_PATH = DOC_BASE / 'structure.yml' 

23SLASH = '/' 

24IMAGES_FOLDER = 'images/' 

25DIAGRAMS_FOLDER = 'diagrams/' 

26 

27""" 

28```{.python .cb.run} 

29with open('sub/as.md') as fp: 

30 print(fp.read()) 

31``` 

32""" 

33READ_SLOT_FENCE_BEGIN = '```{.python .cb.run}' 

34READ_SLOT_CONTEXT_BEGIN = 'with open(' 

35READ_SLOT_FENCE_END = '```' 

36 

37r""" 

38\include{markdown_file_path} 

39""" 

40INCLUDE_SLOT = '\\include{' 

41 

42""" 

43![Alt Text Red](images/red.png "Caption Text Red") 

44![Alt Text Dot Dot Lime](../images/lime.png "Caption Text Dot Dot Lime") 

45![Alt Text Blue](images/blue.png "Caption Text Blue") 

46![Alt Text Sting Red](other/images/red.png "Caption Text Sting Red") 

47""" 

48IMG_LINE_STARTSWITH = '![' 

49MD_IMG_PATTERN = re.compile(r'^!\[(?P<cap>[^(]*)\]\((?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$') 

50MD_IMG_PATTERN_RIGHT_SPLIT = re.compile(r'^(?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$') 

51 

52NL = '\n' 

53 

54 

55@no_type_check 

56def process_approvals(aspects: dict[str, str]) -> Union[gat.Approvals, int]: 

57 """Best effort loading of approvals data. 

58 

59 Examples: 

60 

61 >>> aspects = {gat.KEY_APPROVALS: 'missing-file'} 

62 >>> process_approvals(aspects) 

63 1 

64 

65 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

66 >>> approvals_name = 'empty-as-approvals.yml' 

67 >>> aspects = {gat.KEY_APPROVALS: str(DOC_BASE / approvals_name)} 

68 >>> process_approvals(aspects) 

69 1 

70 

71 >>> DOC_BASE = pathlib.Path('.') 

72 >>> aspects = {gat.KEY_APPROVALS: __file__} 

73 >>> process_approvals(aspects) 

74 1 

75 

76 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

77 >>> approvals_name = 'space-as-approvals.yml' 

78 >>> aspects = {gat.KEY_APPROVALS: str(DOC_BASE / approvals_name)} 

79 >>> process_approvals(aspects) 

80 1 

81 """ 

82 approvals_path = DOC_BASE / aspects[gat.KEY_APPROVALS] 

83 if not approvals_path.is_file() or not approvals_path.stat().st_size: 

84 log.error(f'destructure failed to find non-empty approvals file at {approvals_path}') 

85 return 1 

86 if approvals_path.suffix.lower() not in ('.json', '.yaml', '.yml'): 

87 log.error(f'approvals file format per suffix ({approvals_path.suffix}) not supported') 

88 return 1 

89 approvals_channel = 'yaml' if approvals_path.suffix.lower() in ('.yaml', '.yml') else 'json' 

90 with open(approvals_path, 'rt', encoding=ENCODING) as handle: 

91 approvals = yaml.safe_load(handle) if approvals_channel == 'yaml' else json.load(handle) 

92 if not approvals: 92 ↛ 93line 92 didn't jump to line 93 because the condition on line 92 was never true

93 log.error(f'empty approvals file? Please add approvals to ({approvals_path})') 

94 return 1 

95 if approvals_channel == 'yaml': 95 ↛ 99line 95 didn't jump to line 99 because the condition on line 95 was always true

96 with open('approvals.yml', 'wt', encoding=ENCODING) as handle: 

97 yaml.dump(approvals, handle, default_flow_style=False) 

98 else: 

99 with open('approvals.json', 'wt', encoding=ENCODING) as handle: 

100 json.dump(approvals, handle, indent=2) 

101 return approvals 

102 

103 

104@no_type_check 

105def process_binder(aspects: dict[str, str]) -> Union[gat.Binder, int]: 

106 """Best effort loading of binder data. 

107 

108 Examples: 

109 

110 >>> aspects = {gat.KEY_BIND: 'missing-file'} 

111 >>> process_binder(aspects) 

112 1 

113 

114 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

115 >>> binder_name = 'empty-as-bind.txt' 

116 >>> aspects = {gat.KEY_BIND: str(DOC_BASE / binder_name)} 

117 >>> process_binder(aspects) 

118 1 

119 

120 >>> DOC_BASE = pathlib.Path('.') 

121 >>> aspects = {gat.KEY_BIND: __file__} 

122 >>> process_binder(aspects) 

123 1 

124 

125 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

126 >>> binder_name = 'space-as-bind.txt' 

127 >>> aspects = {gat.KEY_BIND: str(DOC_BASE / binder_name)} 

128 >>> process_binder(aspects) 

129 1 

130 """ 

131 bind_path = DOC_BASE / aspects[gat.KEY_BIND] 

132 if not bind_path.is_file() or not bind_path.stat().st_size: 

133 log.error(f'destructure failed to find non-empty bind file at {bind_path}') 

134 return 1 

135 if bind_path.suffix.lower() not in ('.txt',): 

136 log.error(f'bind file format per suffix ({bind_path.suffix}) not supported') 

137 return 1 

138 with open(bind_path, 'rt', encoding=ENCODING) as handle: 

139 binder = [line.strip() for line in handle.readlines() if line.strip()] 

140 if not binder: 140 ↛ 141line 140 didn't jump to line 141 because the condition on line 140 was never true

141 log.error(f'empty bind file? Please add component paths to ({bind_path})') 

142 return 1 

143 with open('bind.txt', 'wt', encoding=ENCODING) as handle: 

144 handle.write('\n'.join(binder) + '\n') 

145 return binder 

146 

147 

148@no_type_check 

149def process_changes(aspects: dict[str, str]) -> Union[gat.Changes, int]: 

150 """Best effort loading of changes data. 

151 

152 Examples: 

153 

154 >>> aspects = {gat.KEY_CHANGES: 'missing-file'} 

155 >>> process_changes(aspects) 

156 1 

157 

158 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

159 >>> changes_name = 'empty-as-changtes.yml' 

160 >>> aspects = {gat.KEY_CHANGES: str(DOC_BASE / changes_name)} 

161 >>> process_changes(aspects) 

162 1 

163 

164 >>> DOC_BASE = pathlib.Path('.') 

165 >>> aspects = {gat.KEY_CHANGES: __file__} 

166 >>> process_changes(aspects) 

167 1 

168 

169 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

170 >>> changes_name = 'space-as-changes.yml' 

171 >>> aspects = {gat.KEY_CHANGES: str(DOC_BASE / changes_name)} 

172 >>> process_changes(aspects) 

173 1 

174 """ 

175 changes_path = DOC_BASE / aspects[gat.KEY_CHANGES] 

176 if not changes_path.is_file() or not changes_path.stat().st_size: 

177 log.error(f'destructure failed to find non-empty changes file at {changes_path}') 

178 return 1 

179 if changes_path.suffix.lower() not in ('.json', '.yaml', '.yml'): 

180 log.error(f'changes file format per suffix ({changes_path.suffix}) not supported') 

181 return 1 

182 changes_channel = 'yaml' if changes_path.suffix.lower() in ('.yaml', '.yml') else 'json' 

183 with open(changes_path, 'rt', encoding=ENCODING) as handle: 

184 changes = yaml.safe_load(handle) if changes_channel == 'yaml' else json.load(handle) 

185 if not changes: 185 ↛ 186line 185 didn't jump to line 186 because the condition on line 185 was never true

186 log.error(f'empty changes file? Please add changes data to ({changes_path})') 

187 return 1 

188 if changes_channel == 'yaml': 188 ↛ 192line 188 didn't jump to line 192 because the condition on line 188 was always true

189 with open('changes.yml', 'wt', encoding=ENCODING) as handle: 

190 yaml.dump(changes, handle, default_flow_style=False) 

191 else: 

192 with open('changes.json', 'wt', encoding=ENCODING) as handle: 

193 json.dump(changes, handle, indent=2) 

194 return changes 

195 

196 

197@no_type_check 

198def parse_markdown_image(text_line: str) -> tuple[str, str, str, str]: 

199 """Parse a markdown image line within our conventions into caption, src, alt, and optional rest. 

200 

201 Examples: 

202 

203 >>> t = '' 

204 >>> parse_markdown_image(t) 

205 ('', '', '', '') 

206 

207 >>> t = '![]()' 

208 >>> parse_markdown_image(t) 

209 ('', '', '', '![]()') 

210 

211 >>> t = '![a](b "c")' 

212 >>> parse_markdown_image(t) 

213 ('a', 'b', 'c', '') 

214 

215 >>> t = '![a](liitos/placeholders/this-resource-is-missing.png "c")' 

216 >>> parse_markdown_image(t) 

217 ('a', 'liitos/placeholders/this-resource-is-missing.png', 'c', '') 

218 """ 

219 invalid_marker = ('', '', '', text_line) 

220 

221 exclam = '!' 

222 osb = '[' 

223 if not text_line or not text_line.startswith(f'{exclam}{osb}'): 

224 log.error(f'- INVALID-MD-IMG_LINE::START <<{text_line.rstrip()}>>') 

225 return invalid_marker 

226 

227 csb = ']' 

228 osb_cnt = text_line.count(osb) 

229 csb_cnt = text_line.count(csb) 

230 if osb_cnt + csb_cnt < 2: 

231 log.error(f'- INVALID-MD-IMG_LINE::SB-TOK-CNT-LOW <<{text_line.rstrip()}>>') 

232 return invalid_marker 

233 if osb_cnt != csb_cnt: 

234 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>') 

235 

236 orb = '(' 

237 cap_src_boundary = f'{csb}{orb}' 

238 if cap_src_boundary not in text_line: 

239 log.error(f'- INVALID-MD-IMG_LINE::CAP-SRC-BOUNDARY <<{text_line.rstrip()}>>') 

240 return invalid_marker 

241 

242 crb = ')' 

243 orb_cnt = text_line.count(orb) 

244 crb_cnt = text_line.count(crb) 

245 if orb_cnt + crb_cnt < 2: 

246 log.error(f'- INVALID-MD-IMG_LINE::RB-TOK-CNT-LOW <<{text_line.rstrip()}>>') 

247 return invalid_marker 

248 if orb_cnt != crb_cnt: 

249 log.warning(f'- INCOMPLETE-MD-IMG_LINE::RB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>') 

250 

251 quo = '"' 

252 quo_cnt = text_line.count(quo) 

253 if quo_cnt < 2: 

254 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-LOW <<{text_line.rstrip()}>>') 

255 if quo_cnt % 2: 

256 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>') 

257 

258 sp = ' ' 

259 sp_cnt = text_line.count(sp) 

260 if not sp_cnt: 

261 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SP-TOK-CNT-LOW <<{text_line.rstrip()}>>') 

262 

263 dot = '.' 

264 sla = '/' 

265 abs_path_indicator = f'{csb}{orb}{sla}' 

266 may_have_abs_path = abs_path_indicator in text_line 

267 if may_have_abs_path: 

268 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-ABS-PATH <<{text_line.rstrip()}>>') 

269 naive_upwards_path_indicator = f'{csb}{orb}{dot}{dot}{sla}' 

270 may_have_upwards_path = naive_upwards_path_indicator in text_line 

271 if may_have_upwards_path: 

272 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-UPWARDS-PATH <<{text_line.rstrip()}>>') 

273 

274 log.info('- parsing the markdown image text line ...') 

275 if orb_cnt + crb_cnt > 2 or orb_cnt != crb_cnt: 

276 # The regex is not safe for orb inside caption 

277 left, right = text_line.split(cap_src_boundary, 1) 

278 match_right = MD_IMG_PATTERN_RIGHT_SPLIT.match(right) 

279 if not match_right: 

280 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-RIGHT-SPLIT-FAILED <<{text_line.rstrip()}>>') 

281 return invalid_marker 

282 

283 parts = match_right.groupdict() 

284 cap = left[2:] 

285 if not cap: 

286 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>') 

287 cap = CAP_INJECTOR_HACK 

288 

289 src = parts['src'] 

290 alt = parts['alt'] 

291 rest = parts['rest'] 

292 if orb in alt or crb in alt: 

293 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-PARTIAL-MATCH <<{text_line.rstrip()}>>') 

294 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}') 

295 

296 return cap, src, alt, rest 

297 

298 match = MD_IMG_PATTERN.match(text_line) 

299 if not match: 

300 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-FAILED <<{text_line.rstrip()}>>') 

301 return invalid_marker 

302 

303 parts = match.groupdict() 

304 cap = parts['cap'] 

305 if not cap: 

306 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>') 

307 cap = CAP_INJECTOR_HACK 

308 

309 src = parts['src'] 

310 alt = parts['alt'] 

311 rest = parts['rest'] 

312 if orb in alt or crb in alt: 312 ↛ 313line 312 didn't jump to line 313 because the condition on line 312 was never true

313 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-FULL-MATCH <<{text_line.rstrip()}>>') 

314 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}') 

315 

316 return cap, src, alt, rest 

317 

318 

319@no_type_check 

320def adapt_image(text_line: str, collector: list[str], upstream: str, root: str) -> str: 

321 """YES.""" 

322 cap, src, alt, rest = parse_markdown_image(text_line) 

323 if not src: 323 ↛ 324line 323 didn't jump to line 324 because the condition on line 323 was never true

324 log.error(f'parse of markdown image text line failed - empty src, and rest is <<{rest.rstrip()}>>') 

325 return text_line 

326 

327 img_path = str((pathlib.Path(upstream).parent / src).resolve()).replace(root, '') 

328 collector.append(img_path) 

329 img_hack = img_path 

330 if f'/{IMAGES_FOLDER}' in img_path: 

331 img_hack = IMAGES_FOLDER + img_path.split(f'/{IMAGES_FOLDER}', 1)[1] 

332 elif f'/{DIAGRAMS_FOLDER}' in img_path: 

333 img_hack = DIAGRAMS_FOLDER + img_path.split(f'/{DIAGRAMS_FOLDER}', 1)[1] 

334 

335 if img_hack != img_path: 

336 log.info(f'{img_hack} <--- OK? --- {img_path}') 

337 

338 alt_text = f'"{alt}"' if alt else f'"{ALT_INJECTOR_HACK}"' 

339 belte_og_seler = f'![{cap}]({img_hack} {alt_text}){rest}' 

340 log.info(f'==> belte-og-seler: ->>{belte_og_seler}<<-') 

341 return belte_og_seler 

342 

343 

344@no_type_check 

345def harvest_include( 

346 text_line: str, slot: int, regions: dict[str, list[tuple[tuple[int, int], str]]], tree: treelib.Tree, parent: str 

347) -> None: 

348 r"""TODO. 

349 

350 Examples: 

351 

352 >>> text = 'baz\n\\include{c}\nquux' 

353 >>> slot = 0 

354 >>> regions = {SLASH: [((0, 1), 'b')], 'b': [((0, 1), 'c')], 'c': [((0, 1), 'cx')]} 

355 >>> tr = treelib.Tree() 

356 >>> root = SLASH 

357 >>> tr.create_node(root, root) 

358 Node(tag=/, identifier=/, data=None) 

359 >>> harvest_include(text, slot, regions, tr, root) 

360 >>> print(tr) 

361 / 

362 └── /c} 

363 quux 

364 <BLANKLINE> 

365 """ 

366 include_local = text_line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip() 

367 include = str(pathlib.Path(parent).parent / include_local) 

368 regions[parent].append(((slot, slot), include)) 

369 tree.create_node(include, include, parent=parent) 

370 

371 

372@no_type_check 

373def rollup( 

374 jobs: list[list[str]], 

375 docs: dict[str, list[str]], 

376 regions: dict[str, list[tuple[tuple[int, int], str]]], 

377 flat: dict[str, str], 

378) -> list[list[str]]: 

379 r"""TODO. 

380 

381 Examples: 

382 

383 >>> jobs = [['a', 'b'], ['b', 'c']] 

384 >>> docs = {'a': ['a1', 'a2'], 'b': ['b1', 'b2'], 'c': ['c1', 'c2', 'c3']} 

385 >>> regions = {'a': [((0, 1), 'b')], 'b': [((0, 1), 'c')], 'c': [((0, 1), 'cx')]} 

386 >>> flat = {'a': 'a1\na2', 'b': 'b1\nb2', 'c': 'c1\nc2\nc3'} 

387 >>> rollup(jobs, docs, regions, flat) 

388 [[], []] 

389 >>> flat 

390 {'a': 'b1\nb2\n', 'b': 'c1\nc2\nc3\n', 'c': 'c1\nc2\nc3'} 

391 

392 >>> jobs = [['/', 'b'], ['/', 'c']] 

393 >>> docs, regions, flat = {}, {}, {'baz': 'quux'} 

394 >>> rollup(jobs, docs, regions, flat) 

395 [[]] 

396 >>> flat 

397 {'baz': 'quux'} 

398 """ 

399 tackle = [those[0] for those in jobs if those and those[0] != SLASH] 

400 if tackle: 

401 log.info(f' Insertion ongoing with parts ({", ".join(tuple(sorted(tackle)))}) remaining') 

402 else: 

403 return [[]] 

404 for that in tackle: 

405 buf = [] 

406 for slot, line in enumerate(docs[that]): 

407 special = False 

408 the_first = False 

409 the_include = '' 

410 for pair, include in regions[that]: 

411 low, high = pair 

412 if low <= slot <= high: 

413 special = True 

414 if low == slot: 

415 the_first = True 

416 the_include = include 

417 if not special: 

418 buf.append(line) 

419 continue 

420 if the_first: 

421 buf.append(flat[the_include]) 

422 flat[that] = '\n'.join(buf) + '\n' 

423 

424 return [[job for job in chain if job not in flat] for chain in jobs] 

425 

426 

427def copy_eventually(src_base: pathlib.Path, tgt_base: pathlib.Path, local_path: PathLike) -> None: 

428 """Copy visual assets eventually and warn on overwrites.""" 

429 if not tgt_base.is_dir(): 

430 try: 

431 tgt_base.mkdir(parents=True, exist_ok=True) 

432 except FileExistsError as err: 

433 log.error(f'failed to create folder {tgt_base} - detail: {err}') 

434 source_asset = src_base / local_path 

435 target_asset = tgt_base / pathlib.Path(local_path).name 

436 if target_asset.is_file(): 

437 log.warning(f'overwriting existing {target_asset} from {source_asset}') 

438 try: 

439 shutil.copy(source_asset, target_asset) 

440 except FileNotFoundError as err: 

441 log.error(err) 

442 code, msg = plh.dump_placeholder(target_asset) 

443 log.warning(msg) if code else log.info(msg) 

444 except NotADirectoryError as err: 

445 log.error(err) 

446 code, msg = plh.dump_placeholder(target_asset) 

447 log.warning(msg) if code else log.info(msg) 

448 

449 

450@no_type_check 

451def collect_assets( 

452 collector: list[str], 

453 doc_base: Union[PathLike, None] = None, 

454 images_folder: Union[PathLike, None] = None, 

455 diagrams_folder: Union[PathLike, None] = None, 

456) -> None: 

457 """Collect assets into the rendering space. 

458 

459 Examples: 

460 

461 >>> c = ['foo'] 

462 >>> collect_assets(c) 

463 

464 >>> import tempfile 

465 >>> with tempfile.TemporaryDirectory() as imaf: 

466 ... c = [imaf + 'foo'] 

467 ... collect_assets(c, doc_base='.', images_folder=imaf) 

468 

469 >>> import tempfile 

470 >>> with tempfile.TemporaryDirectory() as imaf: 

471 ... with tempfile.TemporaryDirectory() as diaf: 

472 ... c = [imaf + 'foo', diaf + 'bar'] 

473 ... collect_assets(c, doc_base='.', images_folder=imaf, diagrams_folder=diaf) 

474 

475 >>> import tempfile 

476 >>> with tempfile.TemporaryDirectory() as imaf: 

477 ... ima = pathlib.Path(imaf) / 'images' 

478 ... ima.touch() 

479 ... with tempfile.TemporaryDirectory() as diaf: 

480 ... dia = pathlib.Path(diaf) / 'diagrams' 

481 ... dia.touch() 

482 ... c = [str(ima / 'foo'), str(dia / 'bar')] 

483 ... collect_assets(c, doc_base='.', images_folder=ima, diagrams_folder=dia) 

484 """ 

485 doc_base = pathlib.Path(doc_base) if doc_base else DOC_BASE 

486 img_part = str(images_folder) if images_folder else IMAGES_FOLDER 

487 dia_part = str(diagrams_folder) if diagrams_folder else DIAGRAMS_FOLDER 

488 for img_path in collector: 

489 where_to = img_part if img_part in img_path else (dia_part if dia_part in img_path else None) 

490 if where_to is not None: 

491 copy_eventually(doc_base, pathlib.Path(where_to), img_path) 

492 else: 

493 log.error(f'asset collection for neither images nor diagrams requested per {img_path} - ignoring') 

494 

495 

496@no_type_check 

497def concatenate( 

498 doc_root: Union[str, pathlib.Path], 

499 structure_name: str, 

500 target_key: str, 

501 facet_key: str, 

502 options: dict[str, Union[bool, str]], 

503) -> int: 

504 """Later alligator. 

505 

506 Examples: 

507 

508 >>> restore_cwd = os.getcwd() 

509 >>> dr = '.' 

510 >>> sn = 'foo' 

511 >>> tk = '' 

512 >>> fk = '' 

513 >>> op = {'bar': True} 

514 >>> concatenate(dr, sn, tk, fk, op, ) 

515 2 

516 >>> os.chdir(restore_cwd) 

517 

518 >>> restore_cwd = os.getcwd() 

519 >>> dr = 'example/no-renda' 

520 >>> sn = 'structure.yml' 

521 >>> tk = 'prod_kind' 

522 >>> fk = 'no-renda' 

523 >>> op = {'force': True} 

524 >>> concatenate(dr, sn, tk, fk, op) 

525 0 

526 >>> os.chdir(restore_cwd) 

527 

528 >>> restore_cwd = os.getcwd() 

529 >>> dr = 'example/tuna' 

530 >>> sn = 'structure.yml' 

531 >>> tk = 'prod_kind' 

532 >>> fk = 'non-existing-facet-key' 

533 >>> op = {'bar': True} 

534 >>> concatenate(dr, sn, tk, fk, op) 

535 1 

536 >>> os.chdir(restore_cwd) 

537 

538 >>> restore_cwd = os.getcwd() 

539 >>> dr = 'test/fixtures/basic/' 

540 >>> sn = 'structure.yml' 

541 >>> tk = 'abc' 

542 >>> fk = 'missing' 

543 >>> op = {'bar': True} 

544 >>> concatenate(dr, sn, tk, fk, op) 

545 2 

546 >>> os.chdir(restore_cwd) 

547 

548 >>> restore_cwd = os.getcwd() 

549 >>> dr = 'example/tuna' 

550 >>> sn = 'structure.yml' 

551 >>> tk = 'prod_kind' 

552 >>> fk = 'tuna' 

553 >>> op = {'bar': True} 

554 >>> concatenate(dr, sn, tk, fk, op) 

555 0 

556 >>> os.chdir(restore_cwd) 

557 

558 >>> restore_cwd = os.getcwd() 

559 >>> dr = 'example/tuna' 

560 >>> sn = 'structure.yml' 

561 >>> tk = 'prod_kind' 

562 >>> fk = 'tuna' 

563 >>> op = {'bar': True} 

564 >>> try: 

565 ... code = concatenate(dr, sn, tk, fk, op) 

566 ... except FileNotFoundError: 

567 ... code = -1 

568 >>> os.chdir(restore_cwd) 

569 >>> code 

570 0 

571 

572 >>> restore_cwd = os.getcwd() 

573 >>> dr = 'example/ejected-templates' 

574 >>> sn = 'structure.yml' 

575 >>> tk = 'prod_kind' 

576 >>> fk = 'ejected-templates' 

577 >>> op = {'bar': True} 

578 >>> try: 

579 ... code = concatenate(dr, sn, tk, fk, op) 

580 ... except FileNotFoundError: 

581 ... code = -1 

582 >>> os.chdir(restore_cwd) 

583 >>> code 

584 0 

585 

586 >>> restore_cwd = os.getcwd() 

587 >>> dr = 'example/ejected-templates' 

588 >>> sn = 'structure.yml' 

589 >>> tk = 'prod_kind' 

590 >>> fk = 'ejected-templates-borked' 

591 >>> op = {'bar': True} 

592 >>> try: 

593 ... code = concatenate(dr, sn, tk, fk, op) 

594 ... except FileNotFoundError: 

595 ... code = -1 

596 >>> os.chdir(restore_cwd) 

597 >>> code 

598 0 

599 

600 >>> restore_cwd = os.getcwd() 

601 >>> dr = 'example/tuna' 

602 >>> sn = 'structure.yml' 

603 >>> tk = 'prod_kind' 

604 >>> fk = 'tuna' 

605 >>> op = {'bar': True} 

606 >>> abs_here = pathlib.Path().resolve() 

607 >>> try: 

608 ... code = concatenate(dr, sn, tk, fk, op) 

609 ... except FileNotFoundError: 

610 ... code = -1 

611 >>> os.chdir(restore_cwd) 

612 >>> code 

613 0 

614 

615 """ 

616 log.info(LOG_SEPARATOR) 

617 log.warning('entered concat function ...') 

618 target_code = target_key 

619 facet_code = facet_key 

620 if not facet_code.strip() or not target_code.strip(): 

621 log.error(f'concatenate requires non-empty target ({target_code}) and facet ({facet_code}) codes') 

622 return 2 

623 

624 log.info(f'parsed target ({target_code}) and facet ({facet_code}) from request') 

625 

626 structure, asset_map = gat.prelude( 

627 doc_root=doc_root, structure_name=structure_name, target_key=target_key, facet_key=facet_key, command='concat' 

628 ) 

629 log.info(f'prelude teleported processor into the document root at ({os.getcwd()}/)') 

630 rel_concat_folder_path = pathlib.Path('render/pdf/') 

631 rel_concat_folder_path.mkdir(parents=True, exist_ok=True) 

632 os.chdir(rel_concat_folder_path) 

633 log.info(f'concatenate (this processor) teleported into the render/pdf location ({os.getcwd()}/)') 

634 

635 ok, aspect_map = too.load_target(target_code, facet_code) 

636 if not ok or not aspect_map: 

637 return 0 if ok else 1 

638 

639 may_render = aspect_map.get('render', True) 

640 if not may_render: 

641 topic = f'structure({pathlib.Path(doc_root) / structure_name}) for target: {target_key} and facet: {facet_key}' 

642 log.warning(f'- render is declared as false in {topic}') 

643 if not options['force']: 643 ↛ 644line 643 didn't jump to line 644 because the condition on line 643 was never true

644 return 42 

645 else: 

646 log.warning(' + overwritten by force mode') 

647 

648 approvals = process_approvals(aspect_map) 

649 if isinstance(approvals, int): 

650 return 2 

651 binder = process_binder(aspect_map) 

652 if isinstance(binder, int): 652 ↛ 653line 652 didn't jump to line 653 because the condition on line 652 was never true

653 return 3 

654 changes = process_changes(aspect_map) 

655 if isinstance(changes, int): 655 ↛ 656line 655 didn't jump to line 656 because the condition on line 655 was never true

656 return 4 

657 metadata = met.load(aspect_map) 

658 if isinstance(metadata, int): 658 ↛ 659line 658 didn't jump to line 659 because the condition on line 658 was never true

659 return 5 

660 

661 root = SLASH 

662 root_path = str(pathlib.Path.cwd().resolve()).rstrip(SLASH) + SLASH 

663 tree = treelib.Tree() 

664 tree.create_node(root, root) 

665 documents = {} 

666 insert_regions = {} 

667 img_collector = [] 

668 log.info(LOG_SEPARATOR) 

669 log.info('processing binder ...') 

670 for entry in binder: 

671 ref_path = DOC_BASE / entry 

672 log.debug(f'- {entry} as {ref_path}') 

673 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

674 documents[entry] = [line.rstrip() for line in handle.readlines()] 

675 insert_regions[entry] = [] 

676 in_region = False 

677 begin, end = 0, 0 

678 include = '' 

679 tree.create_node(entry, entry, parent=root) 

680 for slot, line in enumerate(documents[entry]): 

681 if line.startswith(IMG_LINE_STARTSWITH): 

682 documents[entry][slot] = adapt_image(line, img_collector, entry, root_path) 

683 log.debug(f'{slot :02d}|{line.rstrip()}') 

684 if not in_region: 

685 if line.startswith(READ_SLOT_FENCE_BEGIN): 

686 in_region = True 

687 begin = slot 

688 continue 

689 if line.startswith(INCLUDE_SLOT): 

690 include = line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip() 

691 insert_regions[entry].append(((slot, slot), include)) 

692 tree.create_node(include, include, parent=entry) 

693 include = '' 

694 continue 

695 if in_region: 

696 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

697 include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

698 elif line.startswith(READ_SLOT_FENCE_END): 

699 end = slot 

700 insert_regions[entry].append(((begin, end), include)) 

701 tree.create_node(include, include, parent=entry) 

702 in_region = False 

703 begin, end = 0, 0 

704 include = '' 

705 

706 for coords, include in insert_regions[entry]: # include is anchored on DOC_BASE 

707 ref_path = DOC_BASE / include 

708 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

709 documents[include] = [line.rstrip() for line in handle.readlines()] 

710 insert_regions[include] = [] 

711 in_region = False 

712 begin, end = 0, 0 

713 sub_include = '' 

714 for slot, line in enumerate(documents[include]): 

715 if line.startswith(IMG_LINE_STARTSWITH): 715 ↛ 716line 715 didn't jump to line 716 because the condition on line 715 was never true

716 documents[include][slot] = adapt_image(line, img_collector, include, root_path) 

717 log.debug(f'{slot :02d}|{line.rstrip()}') 

718 if not in_region: 

719 if line.startswith(READ_SLOT_FENCE_BEGIN): 

720 in_region = True 

721 begin = slot 

722 continue 

723 if line.startswith(INCLUDE_SLOT): 

724 harvest_include(line, slot, insert_regions, tree, include) 

725 continue 

726 if in_region: 

727 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

728 sub_include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

729 sub_include = str(pathlib.Path(include).parent / sub_include) 

730 elif line.startswith(READ_SLOT_FENCE_END): 

731 end = slot 

732 insert_regions[include].append(((begin, end), sub_include)) 

733 tree.create_node(sub_include, sub_include, parent=include) 

734 in_region = False 

735 begin, end = 0, 0 

736 sub_include = '' 

737 

738 for coords, sub_include in insert_regions[include]: 

739 ref_path = DOC_BASE / sub_include 

740 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

741 documents[sub_include] = [line.rstrip() for line in handle.readlines()] 

742 insert_regions[sub_include] = [] 

743 in_region = False 

744 begin, end = 0, 0 

745 sub_sub_include = '' 

746 for slot, line in enumerate(documents[sub_include]): 

747 if line.startswith(IMG_LINE_STARTSWITH): 

748 documents[sub_include][slot] = adapt_image(line, img_collector, sub_include, root_path) 

749 log.debug(f'{slot :02d}|{line.rstrip()}') 

750 if not in_region: 

751 if line.startswith(READ_SLOT_FENCE_BEGIN): 

752 in_region = True 

753 begin = slot 

754 continue 

755 if line.startswith(INCLUDE_SLOT): 755 ↛ 756line 755 didn't jump to line 756 because the condition on line 755 was never true

756 harvest_include(line, slot, insert_regions, tree, sub_include) 

757 continue 

758 if in_region: 

759 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

760 sub_sub_include = ( 

761 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

762 ) 

763 sub_sub_include = str(pathlib.Path(sub_include).parent / sub_sub_include) 

764 elif line.startswith(READ_SLOT_FENCE_END): 

765 end = slot 

766 insert_regions[sub_include].append(((begin, end), sub_sub_include)) 

767 tree.create_node(sub_sub_include, sub_sub_include, parent=sub_include) 

768 in_region = False 

769 begin, end = 0, 0 

770 sub_sub_include = '' 

771 

772 for coords, sub_sub_include in insert_regions[sub_include]: 

773 ref_path = DOC_BASE / sub_sub_include 

774 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

775 documents[sub_sub_include] = [line.rstrip() for line in handle.readlines()] 

776 insert_regions[sub_sub_include] = [] 

777 in_region = False 

778 begin, end = 0, 0 

779 sub_sub_sub_include = '' 

780 for slot, line in enumerate(documents[sub_sub_include]): 

781 if line.startswith(IMG_LINE_STARTSWITH): 

782 documents[sub_sub_include][slot] = adapt_image( 

783 line, img_collector, sub_sub_include, root_path 

784 ) 

785 log.debug(f'{slot :02d}|{line.rstrip()}') 

786 if not in_region: 786 ↛ 794line 786 didn't jump to line 794 because the condition on line 786 was always true

787 if line.startswith(READ_SLOT_FENCE_BEGIN): 787 ↛ 788line 787 didn't jump to line 788 because the condition on line 787 was never true

788 in_region = True 

789 begin = slot 

790 continue 

791 if line.startswith(INCLUDE_SLOT): 791 ↛ 792line 791 didn't jump to line 792 because the condition on line 791 was never true

792 harvest_include(line, slot, insert_regions, tree, sub_sub_include) 

793 continue 

794 if in_region: 794 ↛ 795line 794 didn't jump to line 795 because the condition on line 794 was never true

795 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

796 sub_sub_sub_include = ( 

797 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

798 ) 

799 sub_sub_sub_include = str(pathlib.Path(sub_sub_include).parent / sub_sub_sub_include) 

800 elif line.startswith(READ_SLOT_FENCE_END): 

801 end = slot 

802 insert_regions[sub_sub_include].append(((begin, end), sub_sub_sub_include)) 

803 tree.create_node(sub_sub_sub_include, sub_sub_sub_include, parent=sub_sub_include) 

804 in_region = False 

805 begin, end = 0, 0 

806 sub_sub_sub_include = '' 

807 

808 for coords, sub_sub_sub_include in insert_regions[sub_include]: 

809 ref_path = DOC_BASE / sub_sub_sub_include 

810 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

811 documents[sub_sub_sub_include] = [line.rstrip() for line in handle.readlines()] 

812 insert_regions[sub_sub_sub_include] = [] 

813 in_region = False 

814 begin, end = 0, 0 

815 sub_sub_sub_sub_include = '' 

816 for slot, line in enumerate(documents[sub_sub_sub_include]): 

817 if line.startswith(IMG_LINE_STARTSWITH): 

818 documents[sub_sub_sub_include][slot] = adapt_image( 

819 line, img_collector, sub_sub_sub_include, root_path 

820 ) 

821 log.debug(f'{slot :02d}|{line.rstrip()}') 

822 if not in_region: 822 ↛ 830line 822 didn't jump to line 830 because the condition on line 822 was always true

823 if line.startswith(READ_SLOT_FENCE_BEGIN): 823 ↛ 824line 823 didn't jump to line 824 because the condition on line 823 was never true

824 in_region = True 

825 begin = slot 

826 continue 

827 if line.startswith(INCLUDE_SLOT): 827 ↛ 828line 827 didn't jump to line 828 because the condition on line 827 was never true

828 harvest_include(line, slot, insert_regions, tree, sub_sub_sub_include) 

829 continue 

830 if in_region: 830 ↛ 831line 830 didn't jump to line 831 because the condition on line 830 was never true

831 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

832 sub_sub_sub_sub_include = ( 

833 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

834 ) 

835 sub_sub_sub_sub_include = str( 

836 pathlib.Path(sub_sub_sub_include).parent / sub_sub_sub_sub_include 

837 ) 

838 elif line.startswith(READ_SLOT_FENCE_END): 

839 end = slot 

840 insert_regions[sub_sub_sub_include].append(((begin, end), sub_sub_sub_include)) 

841 tree.create_node( 

842 sub_sub_sub_sub_include, sub_sub_sub_sub_include, parent=sub_sub_sub_include 

843 ) 

844 in_region = False 

845 begin, end = 0, 0 

846 sub_sub_sub_sub_include = '' 

847 

848 top_down_paths = tree.paths_to_leaves() 

849 bottom_up_paths = [list(reversed(td_p)) for td_p in top_down_paths] 

850 log.info(LOG_SEPARATOR) 

851 log.info('resulting tree:') 

852 for edge in str(tree).split(NL): 

853 log.info(edge) 

854 

855 log.info(LOG_SEPARATOR) 

856 log.info(f'provisioning chains for the {len(bottom_up_paths)} bottom up leaf paths:') 

857 for num, leaf_path in enumerate(bottom_up_paths): 

858 the_way_up = f'|-> {leaf_path[0]}' if len(leaf_path) == 1 else f'{" -> ".join(leaf_path)}' 

859 log.info(f'{num :2d}: {the_way_up}') 

860 

861 concat = {} 

862 log.info(LOG_SEPARATOR) 

863 log.info(f'dependencies for the {len(insert_regions)} document parts:') 

864 for key, regions in insert_regions.items(): 

865 num_in = len(regions) 

866 dashes = '-' * num_in 

867 incl_disp = f'( {num_in} include{"" if num_in == 1 else "s"} )' 

868 indicator = '(no includes)' if not regions else f'<{dashes + incl_disp + dashes}' 

869 log.info(f'- part {key} {indicator}') 

870 for region in regions: 

871 between = f'between lines {region[0][0] :3d} and {region[0][1] :3d}' 

872 insert = f'include fragment {region[1]}' 

873 log.info(f' + {between} {insert}') 

874 if not regions: # No includes 

875 concat[key] = '\n'.join(documents[key]) + '\n' 

876 log.info(f' * did concat {key} document for insertion') 

877 

878 chains = [leaf_path for leaf_path in bottom_up_paths] 

879 log.info(LOG_SEPARATOR) 

880 log.info(f'starting insertions bottom up for the {len(chains)} inclusion chains:') 

881 todo = [[job for job in chain if job not in concat] for chain in chains] 

882 while todo != [[]]: 

883 todo = rollup(todo, documents, insert_regions, concat) 

884 

885 log.info(LOG_SEPARATOR) 

886 log.info('writing final concat markdown to document.md') 

887 with open('document.md', 'wt', encoding=ENCODING) as handle: 

888 handle.write('\n'.join(concat[bind] for bind in binder) + '\n') 

889 

890 log.info(LOG_SEPARATOR) 

891 log.info('collecting assets (images and diagrams)') 

892 collect_assets(img_collector) 

893 log.info(LOG_SEPARATOR) 

894 log.info(f'concat result document (document.md) and artifacts are within folder ({os.getcwd()}/)') 

895 log.info(LOG_SEPARATOR) 

896 log.info('processing complete - SUCCESS') 

897 log.info(LOG_SEPARATOR) 

898 return 0