Coverage for liitos/concat.py: 89.18%

1"""Given a target and facet, concatenate a tree of markdown files to a single file rewriting all image refs."""

3import json

4import os

5import pathlib

6import re

7import shutil

8from typing import Union, no_type_check

10import treelib # type: ignore

11import yaml

13import liitos.gather as gat

14import liitos.meta as met

15import liitos.placeholder as plh

16import liitos.tools as too

17from liitos import ENCODING, LOG_SEPARATOR, PathLike, log

19ALT_INJECTOR_HACK = 'INJECTED-ALT-TEXT-TO-TRIGGER-FIGURE-ENVIRONMENT-AROUND-IMAGE-IN-PANDOC'

20CAP_INJECTOR_HACK = 'INJECTED-CAP-TEXT-TO-MARK-MISSING-CAPTION-IN-OUTPUT'

21DOC_BASE = pathlib.Path('..', '..')

22STRUCTURE_PATH = DOC_BASE / 'structure.yml'

23SLASH = '/'

24IMAGES_FOLDER = 'images/'

25DIAGRAMS_FOLDER = 'diagrams/'

27"""

28```{.python .cb.run}

29with open('sub/as.md') as fp:

30 print(fp.read())

31```

32"""

33READ_SLOT_FENCE_BEGIN = '```{.python .cb.run}'

34READ_SLOT_CONTEXT_BEGIN = 'with open('

35READ_SLOT_FENCE_END = '```'

37r"""

38\include{markdown_file_path}

39"""

40INCLUDE_SLOT = '\\include{'

42"""

43![Alt Text Red](images/red.png "Caption Text Red")

44![Alt Text Dot Dot Lime](../images/lime.png "Caption Text Dot Dot Lime")

45![Alt Text Blue](images/blue.png "Caption Text Blue")

46![Alt Text Sting Red](other/images/red.png "Caption Text Sting Red")

47"""

48IMG_LINE_STARTSWITH = '!['

49MD_IMG_PATTERN = re.compile(r'^!\[(?P<cap>[^(]*)\]$(?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?$(?P<rest>.*)?$')

50MD_IMG_PATTERN_RIGHT_SPLIT = re.compile(r'^(?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$')

52NL = '\n'

55@no_type_check

56def process_approvals(aspects: dict[str, str]) -> Union[gat.Approvals, int]:

57 """Best effort loading of approvals data.

59 Examples:

61 >>> aspects = {gat.KEY_APPROVALS: 'missing-file'}

62 >>> process_approvals(aspects)

63 1

65 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'

66 >>> approvals_name = 'empty-as-approvals.yml'

67 >>> aspects = {gat.KEY_APPROVALS: str(DOC_BASE / approvals_name)}

68 >>> process_approvals(aspects)

69 1

71 >>> DOC_BASE = pathlib.Path('.')

72 >>> aspects = {gat.KEY_APPROVALS: __file__}

73 >>> process_approvals(aspects)

74 1

76 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'

77 >>> approvals_name = 'space-as-approvals.yml'

78 >>> aspects = {gat.KEY_APPROVALS: str(DOC_BASE / approvals_name)}

79 >>> process_approvals(aspects)

80 1

81 """

82 approvals_path = DOC_BASE / aspects[gat.KEY_APPROVALS]

83 if not approvals_path.is_file() or not approvals_path.stat().st_size:

84 log.error(f'destructure failed to find non-empty approvals file at {approvals_path}')

85 return 1

86 if approvals_path.suffix.lower() not in ('.json', '.yaml', '.yml'):

87 log.error(f'approvals file format per suffix ({approvals_path.suffix}) not supported')

88 return 1

89 approvals_channel = 'yaml' if approvals_path.suffix.lower() in ('.yaml', '.yml') else 'json'

90 with open(approvals_path, 'rt', encoding=ENCODING) as handle:

91 approvals = yaml.safe_load(handle) if approvals_channel == 'yaml' else json.load(handle)

92 if not approvals: 92 ↛ 93line 92 didn't jump to line 93 because the condition on line 92 was never true

93 log.error(f'empty approvals file? Please add approvals to ({approvals_path})')

94 return 1

95 if approvals_channel == 'yaml': 95 ↛ 99line 95 didn't jump to line 99 because the condition on line 95 was always true

96 with open('approvals.yml', 'wt', encoding=ENCODING) as handle:

97 yaml.dump(approvals, handle, default_flow_style=False)

98 else:

99 with open('approvals.json', 'wt', encoding=ENCODING) as handle:

100 json.dump(approvals, handle, indent=2)

101 return approvals

102

103

104@no_type_check

105def process_binder(aspects: dict[str, str]) -> Union[gat.Binder, int]:

106 """Best effort loading of binder data.

107

108 Examples:

109

110 >>> aspects = {gat.KEY_BIND: 'missing-file'}

111 >>> process_binder(aspects)

112 1

113

114 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'

115 >>> binder_name = 'empty-as-bind.txt'

116 >>> aspects = {gat.KEY_BIND: str(DOC_BASE / binder_name)}

117 >>> process_binder(aspects)

118 1

119

120 >>> DOC_BASE = pathlib.Path('.')

121 >>> aspects = {gat.KEY_BIND: __file__}

122 >>> process_binder(aspects)

123 1

124

125 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'

126 >>> binder_name = 'space-as-bind.txt'

127 >>> aspects = {gat.KEY_BIND: str(DOC_BASE / binder_name)}

128 >>> process_binder(aspects)

129 1

130 """

131 bind_path = DOC_BASE / aspects[gat.KEY_BIND]

132 if not bind_path.is_file() or not bind_path.stat().st_size:

133 log.error(f'destructure failed to find non-empty bind file at {bind_path}')

134 return 1

135 if bind_path.suffix.lower() not in ('.txt',):

136 log.error(f'bind file format per suffix ({bind_path.suffix}) not supported')

137 return 1

138 with open(bind_path, 'rt', encoding=ENCODING) as handle:

139 binder = [line.strip() for line in handle.readlines() if line.strip()]

140 if not binder: 140 ↛ 141line 140 didn't jump to line 141 because the condition on line 140 was never true

141 log.error(f'empty bind file? Please add component paths to ({bind_path})')

142 return 1

143 with open('bind.txt', 'wt', encoding=ENCODING) as handle:

144 handle.write('\n'.join(binder) + '\n')

145 return binder

146

147

148@no_type_check

149def process_changes(aspects: dict[str, str]) -> Union[gat.Changes, int]:

150 """Best effort loading of changes data.

151

152 Examples:

153

154 >>> aspects = {gat.KEY_CHANGES: 'missing-file'}

155 >>> process_changes(aspects)

156 1

157

158 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'

159 >>> changes_name = 'empty-as-changtes.yml'

160 >>> aspects = {gat.KEY_CHANGES: str(DOC_BASE / changes_name)}

161 >>> process_changes(aspects)

162 1

163

164 >>> DOC_BASE = pathlib.Path('.')

165 >>> aspects = {gat.KEY_CHANGES: __file__}

166 >>> process_changes(aspects)

167 1

168

169 >>> DOC_BASE = pathlib.Path('..') / 'test/fixtures/basic/'

170 >>> changes_name = 'space-as-changes.yml'

171 >>> aspects = {gat.KEY_CHANGES: str(DOC_BASE / changes_name)}

172 >>> process_changes(aspects)

173 1

174 """

175 changes_path = DOC_BASE / aspects[gat.KEY_CHANGES]

176 if not changes_path.is_file() or not changes_path.stat().st_size:

177 log.error(f'destructure failed to find non-empty changes file at {changes_path}')

178 return 1

179 if changes_path.suffix.lower() not in ('.json', '.yaml', '.yml'):

180 log.error(f'changes file format per suffix ({changes_path.suffix}) not supported')

181 return 1

182 changes_channel = 'yaml' if changes_path.suffix.lower() in ('.yaml', '.yml') else 'json'

183 with open(changes_path, 'rt', encoding=ENCODING) as handle:

184 changes = yaml.safe_load(handle) if changes_channel == 'yaml' else json.load(handle)

185 if not changes: 185 ↛ 186line 185 didn't jump to line 186 because the condition on line 185 was never true

186 log.error(f'empty changes file? Please add changes data to ({changes_path})')

187 return 1

188 if changes_channel == 'yaml': 188 ↛ 192line 188 didn't jump to line 192 because the condition on line 188 was always true

189 with open('changes.yml', 'wt', encoding=ENCODING) as handle:

190 yaml.dump(changes, handle, default_flow_style=False)

191 else:

192 with open('changes.json', 'wt', encoding=ENCODING) as handle:

193 json.dump(changes, handle, indent=2)

194 return changes

195

196

197@no_type_check

198def parse_markdown_image(text_line: str) -> tuple[str, str, str, str]:

199 """Parse a markdown image line within our conventions into caption, src, alt, and optional rest.

200

201 Examples:

202

203 >>> t = ''

204 >>> parse_markdown_image(t)

205 ('', '', '', '')

206

207 >>> t = '![]()'

208 >>> parse_markdown_image(t)

209 ('', '', '', '![]()')

210

211 >>> t = '![a](b "c")'

212 >>> parse_markdown_image(t)

213 ('a', 'b', 'c', '')

214

215 >>> t = '![a](liitos/placeholders/this-resource-is-missing.png "c")'

216 >>> parse_markdown_image(t)

217 ('a', 'liitos/placeholders/this-resource-is-missing.png', 'c', '')

218 """

219 invalid_marker = ('', '', '', text_line)

220

221 exclam = '!'

222 osb = '['

223 if not text_line or not text_line.startswith(f'{exclam}{osb}'):

224 log.error(f'- INVALID-MD-IMG_LINE::START <<{text_line.rstrip()}>>')

225 return invalid_marker

226

227 csb = ']'

228 osb_cnt = text_line.count(osb)

229 csb_cnt = text_line.count(csb)

230 if osb_cnt + csb_cnt < 2:

231 log.error(f'- INVALID-MD-IMG_LINE::SB-TOK-CNT-LOW <<{text_line.rstrip()}>>')

232 return invalid_marker

233 if osb_cnt != csb_cnt:

234 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')

235

236 orb = '('

237 cap_src_boundary = f'{csb}{orb}'

238 if cap_src_boundary not in text_line:

239 log.error(f'- INVALID-MD-IMG_LINE::CAP-SRC-BOUNDARY <<{text_line.rstrip()}>>')

240 return invalid_marker

241

242 crb = ')'

243 orb_cnt = text_line.count(orb)

244 crb_cnt = text_line.count(crb)

245 if orb_cnt + crb_cnt < 2:

246 log.error(f'- INVALID-MD-IMG_LINE::RB-TOK-CNT-LOW <<{text_line.rstrip()}>>')

247 return invalid_marker

248 if orb_cnt != crb_cnt:

249 log.warning(f'- INCOMPLETE-MD-IMG_LINE::RB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')

250

251 quo = '"'

252 quo_cnt = text_line.count(quo)

253 if quo_cnt < 2:

254 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-LOW <<{text_line.rstrip()}>>')

255 if quo_cnt % 2:

256 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')

257

258 sp = ' '

259 sp_cnt = text_line.count(sp)

260 if not sp_cnt:

261 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SP-TOK-CNT-LOW <<{text_line.rstrip()}>>')

262

263 dot = '.'

264 sla = '/'

265 abs_path_indicator = f'{csb}{orb}{sla}'

266 may_have_abs_path = abs_path_indicator in text_line

267 if may_have_abs_path:

268 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-ABS-PATH <<{text_line.rstrip()}>>')

269 naive_upwards_path_indicator = f'{csb}{orb}{dot}{dot}{sla}'

270 may_have_upwards_path = naive_upwards_path_indicator in text_line

271 if may_have_upwards_path:

272 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-UPWARDS-PATH <<{text_line.rstrip()}>>')

273

274 log.info('- parsing the markdown image text line ...')

275 if orb_cnt + crb_cnt > 2 or orb_cnt != crb_cnt:

276 # The regex is not safe for orb inside caption

277 left, right = text_line.split(cap_src_boundary, 1)

278 match_right = MD_IMG_PATTERN_RIGHT_SPLIT.match(right)

279 if not match_right:

280 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-RIGHT-SPLIT-FAILED <<{text_line.rstrip()}>>')

281 return invalid_marker

282

283 parts = match_right.groupdict()

284 cap = left[2:]

285 if not cap:

286 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>')

287 cap = CAP_INJECTOR_HACK

288

289 src = parts['src']

290 alt = parts['alt']

291 rest = parts['rest']

292 if orb in alt or crb in alt:

293 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-PARTIAL-MATCH <<{text_line.rstrip()}>>')

294 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}')

295

296 return cap, src, alt, rest

297

298 match = MD_IMG_PATTERN.match(text_line)

299 if not match:

300 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-FAILED <<{text_line.rstrip()}>>')

301 return invalid_marker

302

303 parts = match.groupdict()

304 cap = parts['cap']

305 if not cap:

306 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>')

307 cap = CAP_INJECTOR_HACK

308

309 src = parts['src']

310 alt = parts['alt']

311 rest = parts['rest']

312 if orb in alt or crb in alt: 312 ↛ 313line 312 didn't jump to line 313 because the condition on line 312 was never true

313 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-FULL-MATCH <<{text_line.rstrip()}>>')

314 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}')

315

316 return cap, src, alt, rest

317

318

319@no_type_check

320def adapt_image(text_line: str, collector: list[str], upstream: str, root: str) -> str:

321 """YES."""

322 cap, src, alt, rest = parse_markdown_image(text_line)

323 if not src: 323 ↛ 324line 323 didn't jump to line 324 because the condition on line 323 was never true

324 log.error(f'parse of markdown image text line failed - empty src, and rest is <<{rest.rstrip()}>>')

325 return text_line

326

327 img_path = str((pathlib.Path(upstream).parent / src).resolve()).replace(root, '')

328 collector.append(img_path)

329 img_hack = img_path

330 if f'/{IMAGES_FOLDER}' in img_path:

331 img_hack = IMAGES_FOLDER + img_path.split(f'/{IMAGES_FOLDER}', 1)[1]

332 elif f'/{DIAGRAMS_FOLDER}' in img_path:

333 img_hack = DIAGRAMS_FOLDER + img_path.split(f'/{DIAGRAMS_FOLDER}', 1)[1]

334

335 if img_hack != img_path:

336 log.info(f'{img_hack} <--- OK? --- {img_path}')

337

338 alt_text = f'"{alt}"' if alt else f'"{ALT_INJECTOR_HACK}"'

339 belte_og_seler = f'![{cap}]({img_hack} {alt_text}){rest}'

340 log.info(f'==> belte-og-seler: ->>{belte_og_seler}<<-')

341 return belte_og_seler

342

343

344@no_type_check

345def harvest_include(

346 text_line: str, slot: int, regions: dict[str, list[tuple[tuple[int, int], str]]], tree: treelib.Tree, parent: str

347) -> None:

348 r"""TODO.

349

350 Examples:

351

352 >>> text = 'baz\n\\include{c}\nquux'

353 >>> slot = 0

354 >>> regions = {SLASH: [((0, 1), 'b')], 'b': [((0, 1), 'c')], 'c': [((0, 1), 'cx')]}

355 >>> tr = treelib.Tree()

356 >>> root = SLASH

357 >>> tr.create_node(root, root)

358 Node(tag=/, identifier=/, data=None)

359 >>> harvest_include(text, slot, regions, tr, root)

360 >>> print(tr)

361 /

362 └── /c}

363 quux

364 <BLANKLINE>

365 """

366 include_local = text_line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip()

367 include = str(pathlib.Path(parent).parent / include_local)

368 regions[parent].append(((slot, slot), include))

369 tree.create_node(include, include, parent=parent)

370

371

372@no_type_check

373def rollup(

374 jobs: list[list[str]],

375 docs: dict[str, list[str]],

376 regions: dict[str, list[tuple[tuple[int, int], str]]],

377 flat: dict[str, str],

378) -> list[list[str]]:

379 r"""TODO.

380

381 Examples:

382

383 >>> jobs = [['a', 'b'], ['b', 'c']]

384 >>> docs = {'a': ['a1', 'a2'], 'b': ['b1', 'b2'], 'c': ['c1', 'c2', 'c3']}

385 >>> regions = {'a': [((0, 1), 'b')], 'b': [((0, 1), 'c')], 'c': [((0, 1), 'cx')]}

386 >>> flat = {'a': 'a1\na2', 'b': 'b1\nb2', 'c': 'c1\nc2\nc3'}

387 >>> rollup(jobs, docs, regions, flat)

388 [[], []]

389 >>> flat

390 {'a': 'b1\nb2\n', 'b': 'c1\nc2\nc3\n', 'c': 'c1\nc2\nc3'}

391

392 >>> jobs = [['/', 'b'], ['/', 'c']]

393 >>> docs, regions, flat = {}, {}, {'baz': 'quux'}

394 >>> rollup(jobs, docs, regions, flat)

395 [[]]

396 >>> flat

397 {'baz': 'quux'}

398 """

399 tackle = [those[0] for those in jobs if those and those[0] != SLASH]

400 if tackle:

401 log.info(f' Insertion ongoing with parts ({", ".join(tuple(sorted(tackle)))}) remaining')

402 else:

403 return [[]]

404 for that in tackle:

405 buf = []

406 for slot, line in enumerate(docs[that]):

407 special = False

408 the_first = False

409 the_include = ''

410 for pair, include in regions[that]:

411 low, high = pair

412 if low <= slot <= high:

413 special = True

414 if low == slot:

415 the_first = True

416 the_include = include

417 if not special:

418 buf.append(line)

419 continue

420 if the_first:

421 buf.append(flat[the_include])

422 flat[that] = '\n'.join(buf) + '\n'

423

424 return [[job for job in chain if job not in flat] for chain in jobs]

425

426

427@no_type_check

428def collect_assets(

429 collector: list[str],

430 doc_base: Union[PathLike, None] = None,

431 images_folder: Union[PathLike, None] = None,

432 diagrams_folder: Union[PathLike, None] = None,

433) -> None:

434 """TODO

435

436 Examples:

437

438 >>> c = ['foo']

439 >>> collect_assets(c)

440

441 >>> import tempfile

442 >>> with tempfile.TemporaryDirectory() as imaf:

443 ... c = [imaf + 'foo']

444 ... collect_assets(c, doc_base='.', images_folder=imaf)

445

446 >>> import tempfile

447 >>> with tempfile.TemporaryDirectory() as imaf:

448 ... with tempfile.TemporaryDirectory() as diaf:

449 ... c = [imaf + 'foo', diaf + 'bar']

450 ... collect_assets(c, doc_base='.', images_folder=imaf, diagrams_folder=diaf)

451

452 >>> import tempfile

453 >>> with tempfile.TemporaryDirectory() as imaf:

454 ... ima = pathlib.Path(imaf) / 'images'

455 ... ima.touch()

456 ... with tempfile.TemporaryDirectory() as diaf:

457 ... dia = pathlib.Path(diaf) / 'diagrams'

458 ... dia.touch()

459 ... c = [str(ima / 'foo'), str(dia / 'bar')]

460 ... collect_assets(c, doc_base='.', images_folder=ima, diagrams_folder=dia)

461 """

462 doc_base = pathlib.Path(doc_base) if doc_base else DOC_BASE

463 images_folder = str(images_folder) if images_folder else IMAGES_FOLDER

464 diagrams_folder = str(diagrams_folder) if diagrams_folder else DIAGRAMS_FOLDER

465

466 images = pathlib.Path(images_folder)

467 diagrams = pathlib.Path(diagrams_folder)

468 for img_path in collector:

469 if images_folder in img_path:

470 if not images.is_dir():

471 try:

472 images.mkdir(parents=True, exist_ok=True)

473 except FileExistsError as err:

474 log.error(f'failed to create {images} - detail: {err}')

475 source_asset = doc_base / img_path

476 target_asset = images / pathlib.Path(img_path).name

477 try:

478 shutil.copy(source_asset, target_asset)

479 except FileNotFoundError as err:

480 log.error(err)

481 code, msg = plh.dump_placeholder(target_asset)

482 log.warning(msg) if code else log.info(msg)

483 except NotADirectoryError as err:

484 log.error(err)

485 code, msg = plh.dump_placeholder(target_asset)

486 log.warning(msg) if code else log.info(msg)

487 continue

488 if diagrams_folder in img_path:

489 if not diagrams.is_dir():

490 try:

491 diagrams.mkdir(parents=True, exist_ok=True)

492 except FileExistsError as err:

493 log.error(f'failed to create {diagrams} - detail: {err}')

494 source_asset = doc_base / img_path

495 target_asset = diagrams / pathlib.Path(img_path).name

496 try:

497 shutil.copy(source_asset, target_asset)

498 except FileNotFoundError as err:

499 log.error(err)

500 code, msg = plh.dump_placeholder(target_asset)

501 log.warning(msg) if code else log.info(msg)

502 except NotADirectoryError as err:

503 log.error(err)

504 code, msg = plh.dump_placeholder(target_asset)

505 log.warning(msg) if code else log.info(msg)

506

507

508@no_type_check

509def concatenate(

510 doc_root: Union[str, pathlib.Path],

511 structure_name: str,

512 target_key: str,

513 facet_key: str,

514 options: dict[str, Union[bool, str]],

515) -> int:

516 """Later alligator.

517

518 Examples:

519

520 >>> restore_cwd = os.getcwd()

521 >>> dr = '.'

522 >>> sn = 'foo'

523 >>> tk = ''

524 >>> fk = ''

525 >>> op = {'bar': True}

526 >>> concatenate(dr, sn, tk, fk, op, )

527 2

528 >>> os.chdir(restore_cwd)

529

530 >>> restore_cwd = os.getcwd()

531 >>> dr = 'example/tuna'

532 >>> sn = 'structure.yml'

533 >>> tk = 'prod_kind'

534 >>> fk = 'non-existing-facet-key'

535 >>> op = {'bar': True}

536 >>> concatenate(dr, sn, tk, fk, op)

537 1

538 >>> os.chdir(restore_cwd)

539

540 >>> restore_cwd = os.getcwd()

541 >>> dr = 'test/fixtures/basic/'

542 >>> sn = 'structure.yml'

543 >>> tk = 'abc'

544 >>> fk = 'missing'

545 >>> op = {'bar': True}

546 >>> concatenate(dr, sn, tk, fk, op)

547 2

548 >>> os.chdir(restore_cwd)

549

550 >>> restore_cwd = os.getcwd()

551 >>> dr = 'example/tuna'

552 >>> sn = 'structure.yml'

553 >>> tk = 'prod_kind'

554 >>> fk = 'tuna'

555 >>> op = {'bar': True}

556 >>> concatenate(dr, sn, tk, fk, op)

557 0

558 >>> os.chdir(restore_cwd)

559

560 >>> restore_cwd = os.getcwd()

561 >>> dr = 'example/tuna'

562 >>> sn = 'structure.yml'

563 >>> tk = 'prod_kind'

564 >>> fk = 'tuna'

565 >>> op = {'bar': True}

566 >>> try:

567 ... code = concatenate(dr, sn, tk, fk, op)

568 ... except FileNotFoundError:

569 ... code = -1

570 >>> os.chdir(restore_cwd)

571 >>> code

572 0

573

574 >>> restore_cwd = os.getcwd()

575 >>> dr = 'example/ejected-templates'

576 >>> sn = 'structure.yml'

577 >>> tk = 'prod_kind'

578 >>> fk = 'ejected-templates'

579 >>> op = {'bar': True}

580 >>> try:

581 ... code = concatenate(dr, sn, tk, fk, op)

582 ... except FileNotFoundError:

583 ... code = -1

584 >>> os.chdir(restore_cwd)

585 >>> code

586 0

587

588 >>> restore_cwd = os.getcwd()

589 >>> dr = 'example/ejected-templates'

590 >>> sn = 'structure.yml'

591 >>> tk = 'prod_kind'

592 >>> fk = 'ejected-templates-borked'

593 >>> op = {'bar': True}

594 >>> try:

595 ... code = concatenate(dr, sn, tk, fk, op)

596 ... except FileNotFoundError:

597 ... code = -1

598 >>> os.chdir(restore_cwd)

599 >>> code

600 0

601

602 >>> restore_cwd = os.getcwd()

603 >>> dr = 'example/tuna'

604 >>> sn = 'structure.yml'

605 >>> tk = 'prod_kind'

606 >>> fk = 'tuna'

607 >>> op = {'bar': True}

608 >>> abs_here = pathlib.Path().resolve()

609 >>> try:

610 ... code = concatenate(dr, sn, tk, fk, op)

611 ... except FileNotFoundError:

612 ... code = -1

613 >>> os.chdir(restore_cwd)

614 >>> code

615 0

616

617 """

618 log.info(LOG_SEPARATOR)

619 log.info('entered concat function ...')

620 target_code = target_key

621 facet_code = facet_key

622 if not facet_code.strip() or not target_code.strip():

623 log.error(f'concatenate requires non-empty target ({target_code}) and facet ({facet_code}) codes')

624 return 2

625

626 log.info(f'parsed target ({target_code}) and facet ({facet_code}) from request')

627

628 structure, asset_map = gat.prelude(

629 doc_root=doc_root, structure_name=structure_name, target_key=target_key, facet_key=facet_key, command='concat'

630 )

631 log.info(f'prelude teleported processor into the document root at ({os.getcwd()}/)')

632 rel_concat_folder_path = pathlib.Path('render/pdf/')

633 rel_concat_folder_path.mkdir(parents=True, exist_ok=True)

634 os.chdir(rel_concat_folder_path)

635 log.info(f'concatenate (this processor) teleported into the render/pdf location ({os.getcwd()}/)')

636

637 ok, aspect_map = too.load_target(target_code, facet_code)

638 if not ok or not aspect_map:

639 return 0 if ok else 1

640

641 approvals = process_approvals(aspect_map)

642 if isinstance(approvals, int):

643 return 2

644 binder = process_binder(aspect_map)

645 if isinstance(binder, int): 645 ↛ 646line 645 didn't jump to line 646 because the condition on line 645 was never true

646 return 3

647 changes = process_changes(aspect_map)

648 if isinstance(changes, int): 648 ↛ 649line 648 didn't jump to line 649 because the condition on line 648 was never true

649 return 4

650 metadata = met.load(aspect_map)

651 if isinstance(metadata, int): 651 ↛ 652line 651 didn't jump to line 652 because the condition on line 651 was never true

652 return 5

653

654 root = SLASH

655 root_path = str(pathlib.Path.cwd().resolve()).rstrip(SLASH) + SLASH

656 tree = treelib.Tree()

657 tree.create_node(root, root)

658 documents = {}

659 insert_regions = {}

660 img_collector = []

661 log.info(LOG_SEPARATOR)

662 log.info('processing binder ...')

663 for entry in binder:

664 ref_path = DOC_BASE / entry

665 log.debug(f'- {entry} as {ref_path}')

666 with open(ref_path, 'rt', encoding=ENCODING) as handle:

667 documents[entry] = [line.rstrip() for line in handle.readlines()]

668 insert_regions[entry] = []

669 in_region = False

670 begin, end = 0, 0

671 include = ''

672 tree.create_node(entry, entry, parent=root)

673 for slot, line in enumerate(documents[entry]):

674 if line.startswith(IMG_LINE_STARTSWITH):

675 documents[entry][slot] = adapt_image(line, img_collector, entry, root_path)

676 log.debug(f'{slot :02d}|{line.rstrip()}')

677 if not in_region:

678 if line.startswith(READ_SLOT_FENCE_BEGIN):

679 in_region = True

680 begin = slot

681 continue

682 if line.startswith(INCLUDE_SLOT):

683 include = line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip()

684 insert_regions[entry].append(((slot, slot), include))

685 tree.create_node(include, include, parent=entry)

686 include = ''

687 continue

688 if in_region:

689 if line.startswith(READ_SLOT_CONTEXT_BEGIN):

690 include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')

691 elif line.startswith(READ_SLOT_FENCE_END):

692 end = slot

693 insert_regions[entry].append(((begin, end), include))

694 tree.create_node(include, include, parent=entry)

695 in_region = False

696 begin, end = 0, 0

697 include = ''

698

699 for coords, include in insert_regions[entry]: # include is anchored on DOC_BASE

700 ref_path = DOC_BASE / include

701 with open(ref_path, 'rt', encoding=ENCODING) as handle:

702 documents[include] = [line.rstrip() for line in handle.readlines()]

703 insert_regions[include] = []

704 in_region = False

705 begin, end = 0, 0

706 sub_include = ''

707 for slot, line in enumerate(documents[include]):

708 if line.startswith(IMG_LINE_STARTSWITH): 708 ↛ 709line 708 didn't jump to line 709 because the condition on line 708 was never true

709 documents[include][slot] = adapt_image(line, img_collector, include, root_path)

710 log.debug(f'{slot :02d}|{line.rstrip()}')

711 if not in_region:

712 if line.startswith(READ_SLOT_FENCE_BEGIN):

713 in_region = True

714 begin = slot

715 continue

716 if line.startswith(INCLUDE_SLOT):

717 harvest_include(line, slot, insert_regions, tree, include)

718 continue

719 if in_region:

720 if line.startswith(READ_SLOT_CONTEXT_BEGIN):

721 sub_include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')

722 sub_include = str(pathlib.Path(include).parent / sub_include)

723 elif line.startswith(READ_SLOT_FENCE_END):

724 end = slot

725 insert_regions[include].append(((begin, end), sub_include))

726 tree.create_node(sub_include, sub_include, parent=include)

727 in_region = False

728 begin, end = 0, 0

729 sub_include = ''

730

731 for coords, sub_include in insert_regions[include]:

732 ref_path = DOC_BASE / sub_include

733 with open(ref_path, 'rt', encoding=ENCODING) as handle:

734 documents[sub_include] = [line.rstrip() for line in handle.readlines()]

735 insert_regions[sub_include] = []

736 in_region = False

737 begin, end = 0, 0

738 sub_sub_include = ''

739 for slot, line in enumerate(documents[sub_include]):

740 if line.startswith(IMG_LINE_STARTSWITH):

741 documents[sub_include][slot] = adapt_image(line, img_collector, sub_include, root_path)

742 log.debug(f'{slot :02d}|{line.rstrip()}')

743 if not in_region:

744 if line.startswith(READ_SLOT_FENCE_BEGIN):

745 in_region = True

746 begin = slot

747 continue

748 if line.startswith(INCLUDE_SLOT): 748 ↛ 749line 748 didn't jump to line 749 because the condition on line 748 was never true

749 harvest_include(line, slot, insert_regions, tree, sub_include)

750 continue

751 if in_region:

752 if line.startswith(READ_SLOT_CONTEXT_BEGIN):

753 sub_sub_include = (

754 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')

755 )

756 sub_sub_include = str(pathlib.Path(sub_include).parent / sub_sub_include)

757 elif line.startswith(READ_SLOT_FENCE_END):

758 end = slot

759 insert_regions[sub_include].append(((begin, end), sub_sub_include))

760 tree.create_node(sub_sub_include, sub_sub_include, parent=sub_include)

761 in_region = False

762 begin, end = 0, 0

763 sub_sub_include = ''

764

765 for coords, sub_sub_include in insert_regions[sub_include]:

766 ref_path = DOC_BASE / sub_sub_include

767 with open(ref_path, 'rt', encoding=ENCODING) as handle:

768 documents[sub_sub_include] = [line.rstrip() for line in handle.readlines()]

769 insert_regions[sub_sub_include] = []

770 in_region = False

771 begin, end = 0, 0

772 sub_sub_sub_include = ''

773 for slot, line in enumerate(documents[sub_sub_include]):

774 if line.startswith(IMG_LINE_STARTSWITH):

775 documents[sub_sub_include][slot] = adapt_image(

776 line, img_collector, sub_sub_include, root_path

777 )

778 log.debug(f'{slot :02d}|{line.rstrip()}')

779 if not in_region: 779 ↛ 787line 779 didn't jump to line 787 because the condition on line 779 was always true

780 if line.startswith(READ_SLOT_FENCE_BEGIN): 780 ↛ 781line 780 didn't jump to line 781 because the condition on line 780 was never true

781 in_region = True

782 begin = slot

783 continue

784 if line.startswith(INCLUDE_SLOT): 784 ↛ 785line 784 didn't jump to line 785 because the condition on line 784 was never true

785 harvest_include(line, slot, insert_regions, tree, sub_sub_include)

786 continue

787 if in_region: 787 ↛ 788line 787 didn't jump to line 788 because the condition on line 787 was never true

788 if line.startswith(READ_SLOT_CONTEXT_BEGIN):

789 sub_sub_sub_include = (

790 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')

791 )

792 sub_sub_sub_include = str(pathlib.Path(sub_sub_include).parent / sub_sub_sub_include)

793 elif line.startswith(READ_SLOT_FENCE_END):

794 end = slot

795 insert_regions[sub_sub_include].append(((begin, end), sub_sub_sub_include))

796 tree.create_node(sub_sub_sub_include, sub_sub_sub_include, parent=sub_sub_include)

797 in_region = False

798 begin, end = 0, 0

799 sub_sub_sub_include = ''

800

801 for coords, sub_sub_sub_include in insert_regions[sub_include]:

802 ref_path = DOC_BASE / sub_sub_sub_include

803 with open(ref_path, 'rt', encoding=ENCODING) as handle:

804 documents[sub_sub_sub_include] = [line.rstrip() for line in handle.readlines()]

805 insert_regions[sub_sub_sub_include] = []

806 in_region = False

807 begin, end = 0, 0

808 sub_sub_sub_sub_include = ''

809 for slot, line in enumerate(documents[sub_sub_sub_include]):

810 if line.startswith(IMG_LINE_STARTSWITH):

811 documents[sub_sub_sub_include][slot] = adapt_image(

812 line, img_collector, sub_sub_sub_include, root_path

813 )

814 log.debug(f'{slot :02d}|{line.rstrip()}')

815 if not in_region: 815 ↛ 823line 815 didn't jump to line 823 because the condition on line 815 was always true

816 if line.startswith(READ_SLOT_FENCE_BEGIN): 816 ↛ 817line 816 didn't jump to line 817 because the condition on line 816 was never true

817 in_region = True

818 begin = slot

819 continue

820 if line.startswith(INCLUDE_SLOT): 820 ↛ 821line 820 didn't jump to line 821 because the condition on line 820 was never true

821 harvest_include(line, slot, insert_regions, tree, sub_sub_sub_include)

822 continue

823 if in_region: 823 ↛ 824line 823 didn't jump to line 824 because the condition on line 823 was never true

824 if line.startswith(READ_SLOT_CONTEXT_BEGIN):

825 sub_sub_sub_sub_include = (

826 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')

827 )

828 sub_sub_sub_sub_include = str(

829 pathlib.Path(sub_sub_sub_include).parent / sub_sub_sub_sub_include

830 )

831 elif line.startswith(READ_SLOT_FENCE_END):

832 end = slot

833 insert_regions[sub_sub_sub_include].append(((begin, end), sub_sub_sub_include))

834 tree.create_node(

835 sub_sub_sub_sub_include, sub_sub_sub_sub_include, parent=sub_sub_sub_include

836 )

837 in_region = False

838 begin, end = 0, 0

839 sub_sub_sub_sub_include = ''

840

841 top_down_paths = tree.paths_to_leaves()

842 bottom_up_paths = [list(reversed(td_p)) for td_p in top_down_paths]

843 log.info(LOG_SEPARATOR)

844 log.info('resulting tree:')

845 for edge in str(tree).split(NL):

846 log.info(edge)

847

848 log.info(LOG_SEPARATOR)

849 log.info(f'provisioning chains for the {len(bottom_up_paths)} bottom up leaf paths:')

850 for num, leaf_path in enumerate(bottom_up_paths):

851 the_way_up = f'|-> {leaf_path[0]}' if len(leaf_path) == 1 else f'{" -> ".join(leaf_path)}'

852 log.info(f'{num :2d}: {the_way_up}')

853

854 concat = {}

855 log.info(LOG_SEPARATOR)

856 log.info(f'dependencies for the {len(insert_regions)} document parts:')

857 for key, regions in insert_regions.items():

858 num_in = len(regions)

859 dashes = '-' * num_in

860 incl_disp = f'( {num_in} include{"" if num_in == 1 else "s"} )'

861 indicator = '(no includes)' if not regions else f'<{dashes + incl_disp + dashes}'

862 log.info(f'- part {key} {indicator}')

863 for region in regions:

864 between = f'between lines {region[0][0] :3d} and {region[0][1] :3d}'

865 insert = f'include fragment {region[1]}'

866 log.info(f' + {between} {insert}')

867 if not regions: # No includes

868 concat[key] = '\n'.join(documents[key]) + '\n'

869 log.info(f' * did concat {key} document for insertion')

870

871 chains = [leaf_path for leaf_path in bottom_up_paths]

872 log.info(LOG_SEPARATOR)

873 log.info(f'starting insertions bottom up for the {len(chains)} inclusion chains:')

874 todo = [[job for job in chain if job not in concat] for chain in chains]

875 while todo != [[]]:

876 todo = rollup(todo, documents, insert_regions, concat)

877

878 log.info(LOG_SEPARATOR)

879 log.info('writing final concat markdown to document.md')

880 with open('document.md', 'wt', encoding=ENCODING) as handle:

881 handle.write('\n'.join(concat[bind] for bind in binder) + '\n')

882

883 log.info(LOG_SEPARATOR)

884 log.info('collecting assets (images and diagrams)')

885 collect_assets(img_collector)

886 log.info(LOG_SEPARATOR)

887 log.info(f'concat result document (document.md) and artifacts are within folder ({os.getcwd()}/)')

888 log.info(LOG_SEPARATOR)

889 log.info('processing complete - SUCCESS')

890 log.info(LOG_SEPARATOR)

891 return 0