Coverage for liitos/concat.py: 90.01%

515 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-01-05 17:22:35 +00:00

1"""Given a target and facet, concatenate a tree of markdown files to a single file rewriting all image refs.""" 

2 

3import json 

4import os 

5import pathlib 

6import re 

7import shutil 

8from typing import Union, no_type_check 

9 

10import treelib # type: ignore 

11import yaml 

12 

13import liitos.gather as gat 

14import liitos.meta as met 

15import liitos.placeholder as plh 

16import liitos.tools as too 

17from liitos import ENCODING, LOG_SEPARATOR, PathLike, log 

18 

19ALT_INJECTOR_HACK = 'INJECTED-ALT-TEXT-TO-TRIGGER-FIGURE-ENVIRONMENT-AROUND-IMAGE-IN-PANDOC' 

20CAP_INJECTOR_HACK = 'INJECTED-CAP-TEXT-TO-MARK-MISSING-CAPTION-IN-OUTPUT' 

21DOC_BASE = pathlib.Path('..', '..') 

22STRUCTURE_PATH = DOC_BASE / 'structure.yml' 

23SLASH = '/' 

24IMAGES_FOLDER = 'images/' 

25DIAGRAMS_FOLDER = 'diagrams/' 

26 

27""" 

28```{.python .cb.run} 

29with open('sub/as.md') as fp: 

30 print(fp.read()) 

31``` 

32""" 

33READ_SLOT_FENCE_BEGIN = '```{.python .cb.run}' 

34READ_SLOT_CONTEXT_BEGIN = 'with open(' 

35READ_SLOT_FENCE_END = '```' 

36 

37r""" 

38\include{markdown_file_path} 

39""" 

40INCLUDE_SLOT = '\\include{' 

41 

42""" 

43![Alt Text Red](images/red.png "Caption Text Red") 

44![Alt Text Dot Dot Lime](../images/lime.png "Caption Text Dot Dot Lime") 

45![Alt Text Blue](images/blue.png "Caption Text Blue") 

46![Alt Text Sting Red](other/images/red.png "Caption Text Sting Red") 

47""" 

48IMG_LINE_STARTSWITH = '![' 

49MD_IMG_PATTERN = re.compile(r'^!\[(?P<cap>[^(]*)\]\((?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$') 

50MD_IMG_PATTERN_RIGHT_SPLIT = re.compile(r'^(?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$') 

51 

52NL = '\n' 

53 

54 

55@no_type_check 

56def process_approvals(aspects: dict[str, str]) -> Union[gat.Approvals, int]: 

57 """Best effort loading of approvals data. 

58 

59 Examples: 

60 

61 >>> aspects = {gat.KEY_APPROVALS: 'missing-file'} 

62 >>> process_approvals(aspects) 

63 1 

64 

65 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

66 >>> approvals_name = 'empty-as-approvals.yml' 

67 >>> aspects = {gat.KEY_APPROVALS: str(DOC_BASE / approvals_name)} 

68 >>> process_approvals(aspects) 

69 1 

70 

71 >>> DOC_BASE = pathlib.Path('.') 

72 >>> aspects = {gat.KEY_APPROVALS: __file__} 

73 >>> process_approvals(aspects) 

74 1 

75 

76 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

77 >>> approvals_name = 'space-as-approvals.yml' 

78 >>> aspects = {gat.KEY_APPROVALS: str(DOC_BASE / approvals_name)} 

79 >>> process_approvals(aspects) 

80 1 

81 """ 

82 approvals_path = DOC_BASE / aspects[gat.KEY_APPROVALS] 

83 if not approvals_path.is_file() or not approvals_path.stat().st_size: 

84 log.error(f'destructure failed to find non-empty approvals file at {approvals_path}') 

85 return 1 

86 if approvals_path.suffix.lower() not in ('.json', '.yaml', '.yml'): 

87 log.error(f'approvals file format per suffix ({approvals_path.suffix}) not supported') 

88 return 1 

89 approvals_channel = 'yaml' if approvals_path.suffix.lower() in ('.yaml', '.yml') else 'json' 

90 with open(approvals_path, 'rt', encoding=ENCODING) as handle: 

91 approvals = yaml.safe_load(handle) if approvals_channel == 'yaml' else json.load(handle) 

92 if not approvals: 92 ↛ 93line 92 didn't jump to line 93 because the condition on line 92 was never true

93 log.error(f'empty approvals file? Please add approvals to ({approvals_path})') 

94 return 1 

95 if approvals_channel == 'yaml': 

96 with open('approvals.yml', 'wt', encoding=ENCODING) as handle: 

97 yaml.dump(approvals, handle, default_flow_style=False) 

98 else: 

99 with open('approvals.json', 'wt', encoding=ENCODING) as handle: 

100 json.dump(approvals, handle, indent=2) 

101 return approvals 

102 

103 

104@no_type_check 

105def process_binder(aspects: dict[str, str]) -> Union[gat.Binder, int]: 

106 """Best effort loading of binder data. 

107 

108 Examples: 

109 

110 >>> aspects = {gat.KEY_BIND: 'missing-file'} 

111 >>> process_binder(aspects) 

112 1 

113 

114 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

115 >>> binder_name = 'empty-as-bind.txt' 

116 >>> aspects = {gat.KEY_BIND: str(DOC_BASE / binder_name)} 

117 >>> process_binder(aspects) 

118 1 

119 

120 >>> DOC_BASE = pathlib.Path('.') 

121 >>> aspects = {gat.KEY_BIND: __file__} 

122 >>> process_binder(aspects) 

123 1 

124 

125 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

126 >>> binder_name = 'space-as-bind.txt' 

127 >>> aspects = {gat.KEY_BIND: str(DOC_BASE / binder_name)} 

128 >>> process_binder(aspects) 

129 1 

130 """ 

131 bind_path = DOC_BASE / aspects[gat.KEY_BIND] 

132 if not bind_path.is_file() or not bind_path.stat().st_size: 

133 log.error(f'destructure failed to find non-empty bind file at {bind_path}') 

134 return 1 

135 if bind_path.suffix.lower() not in ('.txt',): 

136 log.error(f'bind file format per suffix ({bind_path.suffix}) not supported') 

137 return 1 

138 with open(bind_path, 'rt', encoding=ENCODING) as handle: 

139 binder = [line.strip() for line in handle.readlines() if line.strip()] 

140 if not binder: 140 ↛ 141line 140 didn't jump to line 141 because the condition on line 140 was never true

141 log.error(f'empty bind file? Please add component paths to ({bind_path})') 

142 return 1 

143 with open('bind.txt', 'wt', encoding=ENCODING) as handle: 

144 handle.write('\n'.join(binder) + '\n') 

145 return binder 

146 

147 

148@no_type_check 

149def process_changes(aspects: dict[str, str]) -> Union[gat.Changes, int]: 

150 """Best effort loading of changes data. 

151 

152 Examples: 

153 

154 >>> aspects = {gat.KEY_CHANGES: 'missing-file'} 

155 >>> process_changes(aspects) 

156 1 

157 

158 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

159 >>> changes_name = 'empty-as-changtes.yml' 

160 >>> aspects = {gat.KEY_CHANGES: str(DOC_BASE / changes_name)} 

161 >>> process_changes(aspects) 

162 1 

163 

164 >>> DOC_BASE = pathlib.Path('.') 

165 >>> aspects = {gat.KEY_CHANGES: __file__} 

166 >>> process_changes(aspects) 

167 1 

168 

169 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/' 

170 >>> changes_name = 'space-as-changes.yml' 

171 >>> aspects = {gat.KEY_CHANGES: str(DOC_BASE / changes_name)} 

172 >>> process_changes(aspects) 

173 1 

174 """ 

175 changes_path = DOC_BASE / aspects[gat.KEY_CHANGES] 

176 if not changes_path.is_file() or not changes_path.stat().st_size: 

177 log.error(f'destructure failed to find non-empty changes file at {changes_path}') 

178 return 1 

179 if changes_path.suffix.lower() not in ('.json', '.yaml', '.yml'): 

180 log.error(f'changes file format per suffix ({changes_path.suffix}) not supported') 

181 return 1 

182 changes_channel = 'yaml' if changes_path.suffix.lower() in ('.yaml', '.yml') else 'json' 

183 with open(changes_path, 'rt', encoding=ENCODING) as handle: 

184 changes = yaml.safe_load(handle) if changes_channel == 'yaml' else json.load(handle) 

185 if not changes: 185 ↛ 186line 185 didn't jump to line 186 because the condition on line 185 was never true

186 log.error(f'empty changes file? Please add changes data to ({changes_path})') 

187 return 1 

188 if changes_channel == 'yaml': 

189 with open('changes.yml', 'wt', encoding=ENCODING) as handle: 

190 yaml.dump(changes, handle, default_flow_style=False) 

191 else: 

192 with open('changes.json', 'wt', encoding=ENCODING) as handle: 

193 json.dump(changes, handle, indent=2) 

194 return changes 

195 

196 

197@no_type_check 

198def parse_markdown_image(text_line: str) -> tuple[str, str, str, str]: 

199 """Parse a markdown image line within our conventions into caption, src, alt, and optional rest. 

200 

201 Examples: 

202 

203 >>> t = '' 

204 >>> parse_markdown_image(t) 

205 ('', '', '', '') 

206 

207 >>> t = '![]()' 

208 >>> parse_markdown_image(t) 

209 ('', '', '', '![]()') 

210 

211 >>> t = '![a](b "c")' 

212 >>> parse_markdown_image(t) 

213 ('a', 'b', 'c', '') 

214 

215 >>> t = '![a](liitos/placeholders/this-resource-is-missing.png "c")' 

216 >>> parse_markdown_image(t) 

217 ('a', 'liitos/placeholders/this-resource-is-missing.png', 'c', '') 

218 """ 

219 invalid_marker = ('', '', '', text_line) 

220 

221 exclam = '!' 

222 osb = '[' 

223 if not text_line or not text_line.startswith(f'{exclam}{osb}'): 

224 log.error(f'- INVALID-MD-IMG_LINE::START <<{text_line.rstrip()}>>') 

225 return invalid_marker 

226 

227 csb = ']' 

228 osb_cnt = text_line.count(osb) 

229 csb_cnt = text_line.count(csb) 

230 if osb_cnt + csb_cnt < 2: 

231 log.error(f'- INVALID-MD-IMG_LINE::SB-TOK-CNT-LOW <<{text_line.rstrip()}>>') 

232 return invalid_marker 

233 if osb_cnt != csb_cnt: 

234 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>') 

235 

236 orb = '(' 

237 cap_src_boundary = f'{csb}{orb}' 

238 if cap_src_boundary not in text_line: 

239 log.error(f'- INVALID-MD-IMG_LINE::CAP-SRC-BOUNDARY <<{text_line.rstrip()}>>') 

240 return invalid_marker 

241 

242 crb = ')' 

243 orb_cnt = text_line.count(orb) 

244 crb_cnt = text_line.count(crb) 

245 if orb_cnt + crb_cnt < 2: 

246 log.error(f'- INVALID-MD-IMG_LINE::RB-TOK-CNT-LOW <<{text_line.rstrip()}>>') 

247 return invalid_marker 

248 if orb_cnt != crb_cnt: 

249 log.warning(f'- INCOMPLETE-MD-IMG_LINE::RB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>') 

250 

251 quo = '"' 

252 quo_cnt = text_line.count(quo) 

253 if quo_cnt < 2: 

254 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-LOW <<{text_line.rstrip()}>>') 

255 if quo_cnt % 2: 

256 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>') 

257 

258 sp = ' ' 

259 sp_cnt = text_line.count(sp) 

260 if not sp_cnt: 

261 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SP-TOK-CNT-LOW <<{text_line.rstrip()}>>') 

262 

263 dot = '.' 

264 sla = '/' 

265 abs_path_indicator = f'{csb}{orb}{sla}' 

266 may_have_abs_path = abs_path_indicator in text_line 

267 if may_have_abs_path: 

268 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-ABS-PATH <<{text_line.rstrip()}>>') 

269 naive_upwards_path_indicator = f'{csb}{orb}{dot}{dot}{sla}' 

270 may_have_upwards_path = naive_upwards_path_indicator in text_line 

271 if may_have_upwards_path: 

272 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-UPWARDS-PATH <<{text_line.rstrip()}>>') 

273 

274 log.info('- parsing the markdown image text line ...') 

275 if orb_cnt + crb_cnt > 2 or orb_cnt != crb_cnt: 

276 # The regex is not safe for orb inside caption 

277 left, right = text_line.split(cap_src_boundary, 1) 

278 match_right = MD_IMG_PATTERN_RIGHT_SPLIT.match(right) 

279 if not match_right: 

280 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-RIGHT-SPLIT-FAILED <<{text_line.rstrip()}>>') 

281 return invalid_marker 

282 

283 parts = match_right.groupdict() 

284 cap = left[2:] 

285 if not cap: 

286 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>') 

287 cap = CAP_INJECTOR_HACK 

288 

289 src = parts['src'] 

290 alt = parts['alt'] 

291 rest = parts['rest'] 

292 if orb in alt or crb in alt: 

293 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-PARTIAL-MATCH <<{text_line.rstrip()}>>') 

294 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}') 

295 

296 return cap, src, alt, rest 

297 

298 match = MD_IMG_PATTERN.match(text_line) 

299 if not match: 

300 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-FAILED <<{text_line.rstrip()}>>') 

301 return invalid_marker 

302 

303 parts = match.groupdict() 

304 cap = parts['cap'] 

305 if not cap: 

306 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>') 

307 cap = CAP_INJECTOR_HACK 

308 

309 src = parts['src'] 

310 alt = parts['alt'] 

311 rest = parts['rest'] 

312 if orb in alt or crb in alt: 312 ↛ 313line 312 didn't jump to line 313 because the condition on line 312 was never true

313 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-FULL-MATCH <<{text_line.rstrip()}>>') 

314 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}') 

315 

316 return cap, src, alt, rest 

317 

318 

319@no_type_check 

320def adapt_image(text_line: str, collector: list[str], upstream: str, root: str) -> str: 

321 """YES.""" 

322 cap, src, alt, rest = parse_markdown_image(text_line) 

323 if not src: 323 ↛ 324line 323 didn't jump to line 324 because the condition on line 323 was never true

324 log.error(f'parse of markdown image text line failed - empty src, and rest is <<{rest.rstrip()}>>') 

325 return text_line 

326 

327 img_path = str((pathlib.Path(upstream).parent / src).resolve()).replace(root, '') 

328 collector.append(img_path) 

329 img_hack = img_path 

330 if f'/{IMAGES_FOLDER}' in img_path: 

331 img_hack = IMAGES_FOLDER + img_path.split(f'/{IMAGES_FOLDER}', 1)[1] 

332 elif f'/{DIAGRAMS_FOLDER}' in img_path: 

333 img_hack = DIAGRAMS_FOLDER + img_path.split(f'/{DIAGRAMS_FOLDER}', 1)[1] 

334 

335 if img_hack != img_path: 

336 log.info(f'{img_hack} <--- OK? --- {img_path}') 

337 

338 alt_text = f'"{alt}"' if alt else f'"{ALT_INJECTOR_HACK}"' 

339 belte_og_seler = f'![{cap}]({img_hack} {alt_text}){rest}' 

340 log.info(f'==> belte-og-seler: ->>{belte_og_seler}<<-') 

341 return belte_og_seler 

342 

343 

344@no_type_check 

345def harvest_include( 

346 text_line: str, slot: int, regions: dict[str, list[tuple[tuple[int, int], str]]], tree: treelib.Tree, parent: str 

347) -> None: 

348 r"""TODO. 

349 

350 Examples: 

351 

352 >>> text = 'baz\n\\include{c}\nquux' 

353 >>> slot = 0 

354 >>> regions = {SLASH: [((0, 1), 'b')], 'b': [((0, 1), 'c')], 'c': [((0, 1), 'cx')]} 

355 >>> tr = treelib.Tree() 

356 >>> root = SLASH 

357 >>> tr.create_node(root, root) 

358 Node(tag=/, identifier=/, data=None) 

359 >>> harvest_include(text, slot, regions, tr, root) 

360 >>> print(tr) 

361 / 

362 └── /c} 

363 quux 

364 <BLANKLINE> 

365 """ 

366 include_local = text_line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip() 

367 include = str(pathlib.Path(parent).parent / include_local) 

368 regions[parent].append(((slot, slot), include)) 

369 tree.create_node(include, include, parent=parent) 

370 

371 

372@no_type_check 

373def rollup( 

374 jobs: list[list[str]], 

375 docs: dict[str, list[str]], 

376 regions: dict[str, list[tuple[tuple[int, int], str]]], 

377 flat: dict[str, str], 

378) -> list[list[str]]: 

379 r"""TODO. 

380 

381 Examples: 

382 

383 >>> jobs = [['a', 'b'], ['b', 'c']] 

384 >>> docs = {'a': ['a1', 'a2'], 'b': ['b1', 'b2'], 'c': ['c1', 'c2', 'c3']} 

385 >>> regions = {'a': [((0, 1), 'b')], 'b': [((0, 1), 'c')], 'c': [((0, 1), 'cx')]} 

386 >>> flat = {'a': 'a1\na2', 'b': 'b1\nb2', 'c': 'c1\nc2\nc3'} 

387 >>> rollup(jobs, docs, regions, flat) 

388 [[], []] 

389 >>> flat 

390 {'a': 'b1\nb2\n', 'b': 'c1\nc2\nc3\n', 'c': 'c1\nc2\nc3'} 

391 

392 >>> jobs = [['/', 'b'], ['/', 'c']] 

393 >>> docs, regions, flat = {}, {}, {'baz': 'quux'} 

394 >>> rollup(jobs, docs, regions, flat) 

395 [[]] 

396 >>> flat 

397 {'baz': 'quux'} 

398 """ 

399 tackle = [those[0] for those in jobs if those and those[0] != SLASH] 

400 if tackle: 

401 log.info(f' Insertion ongoing with parts ({", ".join(tuple(sorted(tackle)))}) remaining') 

402 else: 

403 return [[]] 

404 for that in tackle: 

405 buf = [] 

406 for slot, line in enumerate(docs[that]): 

407 special = False 

408 the_first = False 

409 the_include = '' 

410 for pair, include in regions[that]: 

411 low, high = pair 

412 if low <= slot <= high: 

413 special = True 

414 if low == slot: 

415 the_first = True 

416 the_include = include 

417 if not special: 

418 buf.append(line) 

419 continue 

420 if the_first: 

421 buf.append(flat[the_include]) 

422 flat[that] = '\n'.join(buf) + '\n' 

423 

424 return [[job for job in chain if job not in flat] for chain in jobs] 

425 

426 

427@no_type_check 

428def collect_assets( 

429 collector: list[str], 

430 doc_base: Union[PathLike, None] = None, 

431 images_folder: Union[PathLike, None] = None, 

432 diagrams_folder: Union[PathLike, None] = None, 

433) -> None: 

434 """TODO 

435 

436 Examples: 

437 

438 >>> c = ['foo'] 

439 >>> collect_assets(c) 

440 

441 >>> import tempfile 

442 >>> with tempfile.TemporaryDirectory() as imaf: 

443 ... c = [imaf + 'foo'] 

444 ... collect_assets(c, doc_base='.', images_folder=imaf) 

445 

446 >>> import tempfile 

447 >>> with tempfile.TemporaryDirectory() as imaf: 

448 ... with tempfile.TemporaryDirectory() as diaf: 

449 ... c = [imaf + 'foo', diaf + 'bar'] 

450 ... collect_assets(c, doc_base='.', images_folder=imaf, diagrams_folder=diaf) 

451 

452 >>> import tempfile 

453 >>> with tempfile.TemporaryDirectory() as imaf: 

454 ... ima = pathlib.Path(imaf) / 'images' 

455 ... ima.touch() 

456 ... with tempfile.TemporaryDirectory() as diaf: 

457 ... dia = pathlib.Path(diaf) / 'diagrams' 

458 ... dia.touch() 

459 ... c = [str(ima / 'foo'), str(dia / 'bar')] 

460 ... collect_assets(c, doc_base='.', images_folder=ima, diagrams_folder=dia) 

461 """ 

462 doc_base = pathlib.Path(doc_base) if doc_base else DOC_BASE 

463 images_folder = str(images_folder) if images_folder else IMAGES_FOLDER 

464 diagrams_folder = str(diagrams_folder) if diagrams_folder else DIAGRAMS_FOLDER 

465 

466 images = pathlib.Path(images_folder) 

467 diagrams = pathlib.Path(diagrams_folder) 

468 for img_path in collector: 

469 if images_folder in img_path: 

470 if not images.is_dir(): 

471 try: 

472 images.mkdir(parents=True, exist_ok=True) 

473 except FileExistsError as err: 

474 log.error(f'failed to create {images} - detail: {err}') 

475 source_asset = doc_base / img_path 

476 target_asset = images / pathlib.Path(img_path).name 

477 try: 

478 shutil.copy(source_asset, target_asset) 

479 except FileNotFoundError as err: 

480 log.error(err) 

481 code, msg = plh.dump_placeholder(target_asset) 

482 log.warning(msg) if code else log.info(msg) 

483 except NotADirectoryError as err: 

484 log.error(err) 

485 code, msg = plh.dump_placeholder(target_asset) 

486 log.warning(msg) if code else log.info(msg) 

487 continue 

488 if diagrams_folder in img_path: 

489 if not diagrams.is_dir(): 

490 try: 

491 diagrams.mkdir(parents=True, exist_ok=True) 

492 except FileExistsError as err: 

493 log.error(f'failed to create {diagrams} - detail: {err}') 

494 source_asset = doc_base / img_path 

495 target_asset = diagrams / pathlib.Path(img_path).name 

496 try: 

497 shutil.copy(source_asset, target_asset) 

498 except FileNotFoundError as err: 

499 log.error(err) 

500 code, msg = plh.dump_placeholder(target_asset) 

501 log.warning(msg) if code else log.info(msg) 

502 except NotADirectoryError as err: 

503 log.error(err) 

504 code, msg = plh.dump_placeholder(target_asset) 

505 log.warning(msg) if code else log.info(msg) 

506 

507 

508@no_type_check 

509def concatenate( 

510 doc_root: Union[str, pathlib.Path], 

511 structure_name: str, 

512 target_key: str, 

513 facet_key: str, 

514 options: dict[str, Union[bool, str]], 

515) -> int: 

516 """Later alligator. 

517 

518 Examples: 

519 

520 >>> restore_cwd = os.getcwd() 

521 >>> dr = '.' 

522 >>> sn = 'foo' 

523 >>> tk = '' 

524 >>> fk = '' 

525 >>> op = {'bar': True} 

526 >>> concatenate(dr, sn, tk, fk, op, ) 

527 2 

528 >>> os.chdir(restore_cwd) 

529 

530 >>> restore_cwd = os.getcwd() 

531 >>> dr = 'example/tuna' 

532 >>> sn = 'structure.yml' 

533 >>> tk = 'prod_kind' 

534 >>> fk = 'non-existing-facet-key' 

535 >>> op = {'bar': True} 

536 >>> concatenate(dr, sn, tk, fk, op) 

537 1 

538 >>> os.chdir(restore_cwd) 

539 

540 >>> restore_cwd = os.getcwd() 

541 >>> dr = 'test/fixtures/basic/' 

542 >>> sn = 'structure.yml' 

543 >>> tk = 'abc' 

544 >>> fk = 'missing' 

545 >>> op = {'bar': True} 

546 >>> concatenate(dr, sn, tk, fk, op) 

547 2 

548 >>> os.chdir(restore_cwd) 

549 

550 >>> restore_cwd = os.getcwd() 

551 >>> dr = 'example/tuna' 

552 >>> sn = 'structure.yml' 

553 >>> tk = 'prod_kind' 

554 >>> fk = 'tuna' 

555 >>> op = {'bar': True} 

556 >>> concatenate(dr, sn, tk, fk, op) 

557 0 

558 >>> os.chdir(restore_cwd) 

559 

560 >>> restore_cwd = os.getcwd() 

561 >>> dr = 'example/tuna' 

562 >>> sn = 'structure.yml' 

563 >>> tk = 'prod_kind' 

564 >>> fk = 'tuna' 

565 >>> op = {'bar': True} 

566 >>> try: 

567 ... code = concatenate(dr, sn, tk, fk, op) 

568 ... except FileNotFoundError: 

569 ... code = -1 

570 >>> os.chdir(restore_cwd) 

571 >>> code 

572 0 

573 

574 >>> restore_cwd = os.getcwd() 

575 >>> dr = 'example/ejected-templates' 

576 >>> sn = 'structure.yml' 

577 >>> tk = 'prod_kind' 

578 >>> fk = 'ejected-templates' 

579 >>> op = {'bar': True} 

580 >>> try: 

581 ... code = concatenate(dr, sn, tk, fk, op) 

582 ... except FileNotFoundError: 

583 ... code = -1 

584 >>> os.chdir(restore_cwd) 

585 >>> code 

586 0 

587 

588 >>> restore_cwd = os.getcwd() 

589 >>> dr = 'example/ejected-templates' 

590 >>> sn = 'structure.yml' 

591 >>> tk = 'prod_kind' 

592 >>> fk = 'ejected-templates-borked' 

593 >>> op = {'bar': True} 

594 >>> try: 

595 ... code = concatenate(dr, sn, tk, fk, op) 

596 ... except FileNotFoundError: 

597 ... code = -1 

598 >>> os.chdir(restore_cwd) 

599 >>> code 

600 0 

601 

602 >>> restore_cwd = os.getcwd() 

603 >>> dr = 'example/tuna' 

604 >>> sn = 'structure.yml' 

605 >>> tk = 'prod_kind' 

606 >>> fk = 'tuna' 

607 >>> op = {'bar': True} 

608 >>> abs_here = pathlib.Path().resolve() 

609 >>> try: 

610 ... code = concatenate(dr, sn, tk, fk, op) 

611 ... except FileNotFoundError: 

612 ... code = -1 

613 >>> os.chdir(restore_cwd) 

614 >>> code 

615 0 

616 

617 """ 

618 log.info(LOG_SEPARATOR) 

619 log.info('entered concat function ...') 

620 target_code = target_key 

621 facet_code = facet_key 

622 if not facet_code.strip() or not target_code.strip(): 

623 log.error(f'concatenate requires non-empty target ({target_code}) and facet ({facet_code}) codes') 

624 return 2 

625 

626 log.info(f'parsed target ({target_code}) and facet ({facet_code}) from request') 

627 

628 structure, asset_map = gat.prelude( 

629 doc_root=doc_root, structure_name=structure_name, target_key=target_key, facet_key=facet_key, command='concat' 

630 ) 

631 log.info(f'prelude teleported processor into the document root at ({os.getcwd()}/)') 

632 rel_concat_folder_path = pathlib.Path('render/pdf/') 

633 rel_concat_folder_path.mkdir(parents=True, exist_ok=True) 

634 os.chdir(rel_concat_folder_path) 

635 log.info(f'concatenate (this processor) teleported into the render/pdf location ({os.getcwd()}/)') 

636 

637 ok, aspect_map = too.load_target(target_code, facet_code) 

638 if not ok or not aspect_map: 

639 return 0 if ok else 1 

640 

641 approvals = process_approvals(aspect_map) 

642 if isinstance(approvals, int): 

643 return 2 

644 binder = process_binder(aspect_map) 

645 if isinstance(binder, int): 645 ↛ 646line 645 didn't jump to line 646 because the condition on line 645 was never true

646 return 3 

647 changes = process_changes(aspect_map) 

648 if isinstance(changes, int): 648 ↛ 649line 648 didn't jump to line 649 because the condition on line 648 was never true

649 return 4 

650 metadata = met.load(aspect_map) 

651 if isinstance(metadata, int): 651 ↛ 652line 651 didn't jump to line 652 because the condition on line 651 was never true

652 return 5 

653 

654 root = SLASH 

655 root_path = str(pathlib.Path.cwd().resolve()).rstrip(SLASH) + SLASH 

656 tree = treelib.Tree() 

657 tree.create_node(root, root) 

658 documents = {} 

659 insert_regions = {} 

660 img_collector = [] 

661 log.info(LOG_SEPARATOR) 

662 log.info('processing binder ...') 

663 for entry in binder: 

664 ref_path = DOC_BASE / entry 

665 log.debug(f'- {entry} as {ref_path}') 

666 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

667 documents[entry] = [line.rstrip() for line in handle.readlines()] 

668 insert_regions[entry] = [] 

669 in_region = False 

670 begin, end = 0, 0 

671 include = '' 

672 tree.create_node(entry, entry, parent=root) 

673 for slot, line in enumerate(documents[entry]): 

674 if line.startswith(IMG_LINE_STARTSWITH): 

675 documents[entry][slot] = adapt_image(line, img_collector, entry, root_path) 

676 log.debug(f'{slot :02d}|{line.rstrip()}') 

677 if not in_region: 

678 if line.startswith(READ_SLOT_FENCE_BEGIN): 

679 in_region = True 

680 begin = slot 

681 continue 

682 if line.startswith(INCLUDE_SLOT): 

683 include = line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip() 

684 insert_regions[entry].append(((slot, slot), include)) 

685 tree.create_node(include, include, parent=entry) 

686 include = '' 

687 continue 

688 if in_region: 

689 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

690 include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

691 elif line.startswith(READ_SLOT_FENCE_END): 

692 end = slot 

693 insert_regions[entry].append(((begin, end), include)) 

694 tree.create_node(include, include, parent=entry) 

695 in_region = False 

696 begin, end = 0, 0 

697 include = '' 

698 

699 for coords, include in insert_regions[entry]: # include is anchored on DOC_BASE 

700 ref_path = DOC_BASE / include 

701 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

702 documents[include] = [line.rstrip() for line in handle.readlines()] 

703 insert_regions[include] = [] 

704 in_region = False 

705 begin, end = 0, 0 

706 sub_include = '' 

707 for slot, line in enumerate(documents[include]): 

708 if line.startswith(IMG_LINE_STARTSWITH): 708 ↛ 709line 708 didn't jump to line 709 because the condition on line 708 was never true

709 documents[include][slot] = adapt_image(line, img_collector, include, root_path) 

710 log.debug(f'{slot :02d}|{line.rstrip()}') 

711 if not in_region: 

712 if line.startswith(READ_SLOT_FENCE_BEGIN): 

713 in_region = True 

714 begin = slot 

715 continue 

716 if line.startswith(INCLUDE_SLOT): 

717 harvest_include(line, slot, insert_regions, tree, include) 

718 continue 

719 if in_region: 

720 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

721 sub_include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

722 sub_include = str(pathlib.Path(include).parent / sub_include) 

723 elif line.startswith(READ_SLOT_FENCE_END): 

724 end = slot 

725 insert_regions[include].append(((begin, end), sub_include)) 

726 tree.create_node(sub_include, sub_include, parent=include) 

727 in_region = False 

728 begin, end = 0, 0 

729 sub_include = '' 

730 

731 for coords, sub_include in insert_regions[include]: 

732 ref_path = DOC_BASE / sub_include 

733 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

734 documents[sub_include] = [line.rstrip() for line in handle.readlines()] 

735 insert_regions[sub_include] = [] 

736 in_region = False 

737 begin, end = 0, 0 

738 sub_sub_include = '' 

739 for slot, line in enumerate(documents[sub_include]): 

740 if line.startswith(IMG_LINE_STARTSWITH): 

741 documents[sub_include][slot] = adapt_image(line, img_collector, sub_include, root_path) 

742 log.debug(f'{slot :02d}|{line.rstrip()}') 

743 if not in_region: 

744 if line.startswith(READ_SLOT_FENCE_BEGIN): 

745 in_region = True 

746 begin = slot 

747 continue 

748 if line.startswith(INCLUDE_SLOT): 748 ↛ 749line 748 didn't jump to line 749 because the condition on line 748 was never true

749 harvest_include(line, slot, insert_regions, tree, sub_include) 

750 continue 

751 if in_region: 

752 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

753 sub_sub_include = ( 

754 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

755 ) 

756 sub_sub_include = str(pathlib.Path(sub_include).parent / sub_sub_include) 

757 elif line.startswith(READ_SLOT_FENCE_END): 

758 end = slot 

759 insert_regions[sub_include].append(((begin, end), sub_sub_include)) 

760 tree.create_node(sub_sub_include, sub_sub_include, parent=sub_include) 

761 in_region = False 

762 begin, end = 0, 0 

763 sub_sub_include = '' 

764 

765 for coords, sub_sub_include in insert_regions[sub_include]: 

766 ref_path = DOC_BASE / sub_sub_include 

767 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

768 documents[sub_sub_include] = [line.rstrip() for line in handle.readlines()] 

769 insert_regions[sub_sub_include] = [] 

770 in_region = False 

771 begin, end = 0, 0 

772 sub_sub_sub_include = '' 

773 for slot, line in enumerate(documents[sub_sub_include]): 

774 if line.startswith(IMG_LINE_STARTSWITH): 

775 documents[sub_sub_include][slot] = adapt_image( 

776 line, img_collector, sub_sub_include, root_path 

777 ) 

778 log.debug(f'{slot :02d}|{line.rstrip()}') 

779 if not in_region: 779 ↛ 787line 779 didn't jump to line 787 because the condition on line 779 was always true

780 if line.startswith(READ_SLOT_FENCE_BEGIN): 780 ↛ 781line 780 didn't jump to line 781 because the condition on line 780 was never true

781 in_region = True 

782 begin = slot 

783 continue 

784 if line.startswith(INCLUDE_SLOT): 784 ↛ 785line 784 didn't jump to line 785 because the condition on line 784 was never true

785 harvest_include(line, slot, insert_regions, tree, sub_sub_include) 

786 continue 

787 if in_region: 787 ↛ 788line 787 didn't jump to line 788 because the condition on line 787 was never true

788 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

789 sub_sub_sub_include = ( 

790 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

791 ) 

792 sub_sub_sub_include = str(pathlib.Path(sub_sub_include).parent / sub_sub_sub_include) 

793 elif line.startswith(READ_SLOT_FENCE_END): 

794 end = slot 

795 insert_regions[sub_sub_include].append(((begin, end), sub_sub_sub_include)) 

796 tree.create_node(sub_sub_sub_include, sub_sub_sub_include, parent=sub_sub_include) 

797 in_region = False 

798 begin, end = 0, 0 

799 sub_sub_sub_include = '' 

800 

801 for coords, sub_sub_sub_include in insert_regions[sub_include]: 

802 ref_path = DOC_BASE / sub_sub_sub_include 

803 with open(ref_path, 'rt', encoding=ENCODING) as handle: 

804 documents[sub_sub_sub_include] = [line.rstrip() for line in handle.readlines()] 

805 insert_regions[sub_sub_sub_include] = [] 

806 in_region = False 

807 begin, end = 0, 0 

808 sub_sub_sub_sub_include = '' 

809 for slot, line in enumerate(documents[sub_sub_sub_include]): 

810 if line.startswith(IMG_LINE_STARTSWITH): 

811 documents[sub_sub_sub_include][slot] = adapt_image( 

812 line, img_collector, sub_sub_sub_include, root_path 

813 ) 

814 log.debug(f'{slot :02d}|{line.rstrip()}') 

815 if not in_region: 815 ↛ 823line 815 didn't jump to line 823 because the condition on line 815 was always true

816 if line.startswith(READ_SLOT_FENCE_BEGIN): 816 ↛ 817line 816 didn't jump to line 817 because the condition on line 816 was never true

817 in_region = True 

818 begin = slot 

819 continue 

820 if line.startswith(INCLUDE_SLOT): 820 ↛ 821line 820 didn't jump to line 821 because the condition on line 820 was never true

821 harvest_include(line, slot, insert_regions, tree, sub_sub_sub_include) 

822 continue 

823 if in_region: 823 ↛ 824line 823 didn't jump to line 824 because the condition on line 823 was never true

824 if line.startswith(READ_SLOT_CONTEXT_BEGIN): 

825 sub_sub_sub_sub_include = ( 

826 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"') 

827 ) 

828 sub_sub_sub_sub_include = str( 

829 pathlib.Path(sub_sub_sub_include).parent / sub_sub_sub_sub_include 

830 ) 

831 elif line.startswith(READ_SLOT_FENCE_END): 

832 end = slot 

833 insert_regions[sub_sub_sub_include].append(((begin, end), sub_sub_sub_include)) 

834 tree.create_node( 

835 sub_sub_sub_sub_include, sub_sub_sub_sub_include, parent=sub_sub_sub_include 

836 ) 

837 in_region = False 

838 begin, end = 0, 0 

839 sub_sub_sub_sub_include = '' 

840 

841 top_down_paths = tree.paths_to_leaves() 

842 bottom_up_paths = [list(reversed(td_p)) for td_p in top_down_paths] 

843 log.info(LOG_SEPARATOR) 

844 log.info('resulting tree:') 

845 for edge in str(tree).split(NL): 

846 log.info(edge) 

847 

848 log.info(LOG_SEPARATOR) 

849 log.info(f'provisioning chains for the {len(bottom_up_paths)} bottom up leaf paths:') 

850 for num, leaf_path in enumerate(bottom_up_paths): 

851 the_way_up = f'|-> {leaf_path[0]}' if len(leaf_path) == 1 else f'{" -> ".join(leaf_path)}' 

852 log.info(f'{num :2d}: {the_way_up}') 

853 

854 concat = {} 

855 log.info(LOG_SEPARATOR) 

856 log.info(f'dependencies for the {len(insert_regions)} document parts:') 

857 for key, regions in insert_regions.items(): 

858 num_in = len(regions) 

859 dashes = '-' * num_in 

860 incl_disp = f'( {num_in} include{"" if num_in == 1 else "s"} )' 

861 indicator = '(no includes)' if not regions else f'<{dashes + incl_disp + dashes}' 

862 log.info(f'- part {key} {indicator}') 

863 for region in regions: 

864 between = f'between lines {region[0][0] :3d} and {region[0][1] :3d}' 

865 insert = f'include fragment {region[1]}' 

866 log.info(f' + {between} {insert}') 

867 if not regions: # No includes 

868 concat[key] = '\n'.join(documents[key]) + '\n' 

869 log.info(f' * did concat {key} document for insertion') 

870 

871 chains = [leaf_path for leaf_path in bottom_up_paths] 

872 log.info(LOG_SEPARATOR) 

873 log.info(f'starting insertions bottom up for the {len(chains)} inclusion chains:') 

874 todo = [[job for job in chain if job not in concat] for chain in chains] 

875 while todo != [[]]: 

876 todo = rollup(todo, documents, insert_regions, concat) 

877 

878 log.info(LOG_SEPARATOR) 

879 log.info('writing final concat markdown to document.md') 

880 with open('document.md', 'wt', encoding=ENCODING) as handle: 

881 handle.write('\n'.join(concat[bind] for bind in binder) + '\n') 

882 

883 log.info(LOG_SEPARATOR) 

884 log.info('collecting assets (images and diagrams)') 

885 collect_assets(img_collector) 

886 log.info(LOG_SEPARATOR) 

887 log.info(f'concat result document (document.md) and artifacts are within folder ({os.getcwd()}/)') 

888 log.info(LOG_SEPARATOR) 

889 log.info('processing complete - SUCCESS') 

890 log.info(LOG_SEPARATOR) 

891 return 0