1"""Given a target and facet, concatenate a tree of markdown files to a single file rewriting all image refs."""

3import json

4import os

5import pathlib

6import re

7import shutil

8import sys

9from io import StringIO

10from typing import Union, no_type_check

12import treelib # type: ignore

13import yaml

15import liitos.gather as gat

16import liitos.placeholder as plh

17import liitos.tools as too

18from liitos import ENCODING, LOG_SEPARATOR, log

20ALT_INJECTOR_HACK = 'INJECTED-ALT-TEXT-TO-TRIGGER-FIGURE-ENVIRONMENT-AROUND-IMAGE-IN-PANDOC'

21CAP_INJECTOR_HACK = 'INJECTED-CAP-TEXT-TO-MARK-MISSING-CAPTION-IN-OUTPUT'

22DOC_BASE = pathlib.Path('..', '..')

23STRUCTURE_PATH = DOC_BASE / 'structure.yml'

24SLASH = '/'

25IMAGES_FOLDER = 'images/'

26DIAGRAMS_FOLDER = 'diagrams/'

28"""

29```{.python .cb.run}

30with open('sub/as.md') as fp:

31 print(fp.read())

32```

33"""

34READ_SLOT_FENCE_BEGIN = '```{.python .cb.run}'

35READ_SLOT_CONTEXT_BEGIN = 'with open('

36READ_SLOT_FENCE_END = '```'

38r"""

39\include{markdown_file_path}

40"""

41INCLUDE_SLOT = '\\include{'

43"""

44![Alt Text Red](images/red.png "Caption Text Red")

45![Alt Text Dot Dot Lime](../images/lime.png "Caption Text Dot Dot Lime")

46![Alt Text Blue](images/blue.png "Caption Text Blue")

47![Alt Text Sting Red](other/images/red.png "Caption Text Sting Red")

48"""

49IMG_LINE_STARTSWITH = '!['

50MD_IMG_PATTERN = re.compile(r'^!\[(?P<cap>[^(]*)\]$(?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?$(?P<rest>.*)?$')

51MD_IMG_PATTERN_RIGHT_SPLIT = re.compile(r'^(?P<src>[^ ]+)\ *\"?(?P<alt>[^\"]*)\"?\)(?P<rest>.*)?$')

53NL = '\n'

56@no_type_check

57class RedirectedStdout:

58 @no_type_check

59 def __init__(self):

60 self._stdout = None

61 self._string_io = None

63 @no_type_check

64 def __enter__(self):

65 self._stdout = sys.stdout

66 sys.stdout = self._string_io = StringIO()

67 return self

69 @no_type_check

70 def __exit__(self, type, value, traceback):

71 sys.stdout = self._stdout

73 @no_type_check

74 def __str__(self):

75 return self._string_io.getvalue()

78@no_type_check

79def process_approvals(aspects: dict[str, str]) -> Union[gat.Approvals, int]:

80 """TODO."""

81 approvals_path = DOC_BASE / aspects[gat.KEY_APPROVALS]

82 if not approvals_path.is_file() or not approvals_path.stat().st_size:

83 log.error(f'destructure failed to find non-empty approvals file at {approvals_path}')

84 return 1

85 if approvals_path.suffix.lower() not in ('.json', '.yaml', '.yml'):

86 log.error(f'approvals file format per suffix ({approvals_path.suffix}) not supported')

87 return 1

88 approvals_channel = 'yaml' if approvals_path.suffix.lower() in ('.yaml', '.yml') else 'json'

89 with open(approvals_path, 'rt', encoding=ENCODING) as handle:

90 approvals = yaml.safe_load(handle) if approvals_channel == 'yaml' else json.load(handle)

91 if not approvals:

92 log.error(f'empty approvals file? Please add approvals to ({approvals_path})')

93 return 1

94 if approvals_channel == 'yaml':

95 with open('approvals.yml', 'wt', encoding=ENCODING) as handle:

96 yaml.dump(approvals, handle, default_flow_style=False)

97 else:

98 with open('approvals.json', 'wt', encoding=ENCODING) as handle:

99 json.dump(approvals, handle, indent=2)

100 return approvals

101

102

103@no_type_check

104def process_binder(aspects: dict[str, str]) -> Union[gat.Binder, int]:

105 """TODO."""

106 bind_path = DOC_BASE / aspects[gat.KEY_BIND]

107 if not bind_path.is_file() or not bind_path.stat().st_size:

108 log.error(f'destructure failed to find non-empty bind file at {bind_path}')

109 return 1

110 if bind_path.suffix.lower() not in ('.txt',):

111 log.error(f'bind file format per suffix ({bind_path.suffix}) not supported')

112 return 1

113 with open(bind_path, 'rt', encoding=ENCODING) as handle:

114 binder = [line.strip() for line in handle.readlines() if line.strip()]

115 if not binder:

116 log.error(f'empty bind file? Please add component paths to ({bind_path})')

117 return 1

118 with open('bind.txt', 'wt', encoding=ENCODING) as handle:

119 handle.write('\n'.join(binder) + '\n')

120 return binder

121

122

123@no_type_check

124def process_changes(aspects: dict[str, str]) -> Union[gat.Changes, int]:

125 """TODO."""

126 changes_path = DOC_BASE / aspects[gat.KEY_CHANGES]

127 if not changes_path.is_file() or not changes_path.stat().st_size:

128 log.error(f'destructure failed to find non-empty changes file at {changes_path}')

129 return 1

130 if changes_path.suffix.lower() not in ('.json', '.yaml', '.yml'):

131 log.error(f'changes file format per suffix ({changes_path.suffix}) not supported')

132 return 1

133 changes_channel = 'yaml' if changes_path.suffix.lower() in ('.yaml', '.yml') else 'json'

134 with open(changes_path, 'rt', encoding=ENCODING) as handle:

135 changes = yaml.safe_load(handle) if changes_channel == 'yaml' else json.load(handle)

136 if not changes:

137 log.error(f'empty changes file? Please add changes data to ({changes_path})')

138 return 1

139 if changes_channel == 'yaml':

140 with open('changes.yml', 'wt', encoding=ENCODING) as handle:

141 yaml.dump(changes, handle, default_flow_style=False)

142 else:

143 with open('changes.json', 'wt', encoding=ENCODING) as handle:

144 json.dump(changes, handle, indent=2)

145 return changes

146

147

148@no_type_check

149def process_meta(aspects: dict[str, str]) -> Union[gat.Meta, int]:

150 """TODO."""

151 meta_path = DOC_BASE / aspects[gat.KEY_META]

152 if not meta_path.is_file() or not meta_path.stat().st_size:

153 log.error(f'destructure failed to find non-empty meta file at {meta_path}')

154 return 1

155 if meta_path.suffix.lower() not in ('.yaml', '.yml'):

156 log.error(f'meta file format per suffix ({meta_path.suffix}) not supported')

157 return 1

158 with open(meta_path, 'rt', encoding=ENCODING) as handle:

159 metadata = yaml.safe_load(handle)

160 if not metadata:

161 log.error(f'empty metadata file? Please add metadata to ({meta_path})')

162 return 1

163 if 'import' in metadata['document']:

164 base_meta_path = DOC_BASE / metadata['document']['import']

165 if not base_meta_path.is_file() or not base_meta_path.stat().st_size:

166 log.error(

167 f'metadata declares import of base data from ({base_meta_path.name})'

168 f' but failed to find non-empty base file at {base_meta_path}'

169 )

170 return 1

171 with open(base_meta_path, 'rt', encoding=ENCODING) as handle:

172 base_data = yaml.safe_load(handle)

173 for key, value in metadata['document']['patch'].items():

174 base_data['document']['common'][key] = value

175 metadata = base_data

176 with open('metadata.yml', 'wt', encoding=ENCODING) as handle:

177 yaml.dump(metadata, handle, default_flow_style=False)

178 return metadata

179

180

181@no_type_check

182def parse_markdown_image(text_line: str) -> tuple[str, str, str, str]:

183 """Parse a markdown image line within our conventions into caption, src, alt, and optional rest."""

184 invalid_marker = ('', '', '', text_line)

185

186 exclam = '!'

187 osb = '['

188 if not text_line or not text_line.startswith(f'{exclam}{osb}'):

189 log.error(f'- INVALID-MD-IMG_LINE::START <<{text_line.rstrip()}>>')

190 return invalid_marker

191

192 csb = ']'

193 osb_cnt = text_line.count(osb)

194 csb_cnt = text_line.count(csb)

195 if osb_cnt + csb_cnt < 2:

196 log.error(f'- INVALID-MD-IMG_LINE::SB-TOK-CNT-LOW <<{text_line.rstrip()}>>')

197 return invalid_marker

198 if osb_cnt != csb_cnt:

199 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')

200

201 orb = '('

202 cap_src_boundary = f'{csb}{orb}'

203 if cap_src_boundary not in text_line:

204 log.error(f'- INVALID-MD-IMG_LINE::CAP-SRC-BOUNDARY <<{text_line.rstrip()}>>')

205 return invalid_marker

206

207 crb = ')'

208 orb_cnt = text_line.count(orb)

209 crb_cnt = text_line.count(crb)

210 if orb_cnt + crb_cnt < 2:

211 log.error(f'- INVALID-MD-IMG_LINE::RB-TOK-CNT-LOW <<{text_line.rstrip()}>>')

212 return invalid_marker

213 if orb_cnt != crb_cnt:

214 log.warning(f'- INCOMPLETE-MD-IMG_LINE::RB-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')

215

216 quo = '"'

217 quo_cnt = text_line.count(quo)

218 if quo_cnt < 2:

219 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-LOW <<{text_line.rstrip()}>>')

220 if quo_cnt % 2:

221 log.warning(f'- INCOMPLETE-MD-IMG_LINE::QU-TOK-CNT-UNBALANCED <<{text_line.rstrip()}>>')

222

223 sp = ' '

224 sp_cnt = text_line.count(sp)

225 if not sp_cnt:

226 log.warning(f'- INCOMPLETE-MD-IMG_LINE::SP-TOK-CNT-LOW <<{text_line.rstrip()}>>')

227

228 dot = '.'

229 sla = '/'

230 abs_path_indicator = f'{csb}{orb}{sla}'

231 may_have_abs_path = abs_path_indicator in text_line

232 if may_have_abs_path:

233 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-ABS-PATH <<{text_line.rstrip()}>>')

234 naive_upwards_path_indicator = f'{csb}{orb}{dot}{dot}{sla}'

235 may_have_upwards_path = naive_upwards_path_indicator in text_line

236 if may_have_upwards_path:

237 log.info(f'- SUSPICIOUS-MD-IMG_LINE::MAY-HAVE-UPWARDS-PATH <<{text_line.rstrip()}>>')

238

239 log.info('- parsing the markdown image text line ...')

240 if orb_cnt + crb_cnt > 2 or orb_cnt != crb_cnt:

241 # The regex is not safe for orb inside caption

242 left, right = text_line.split(cap_src_boundary, 1)

243 match_right = MD_IMG_PATTERN_RIGHT_SPLIT.match(right)

244 if not match_right:

245 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-RIGHT-SPLIT-FAILED <<{text_line.rstrip()}>>')

246 return invalid_marker

247

248 parts = match_right.groupdict()

249 cap = left[2:]

250 if not cap:

251 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>')

252 cap = CAP_INJECTOR_HACK

253

254 src = parts['src']

255 alt = parts['alt']

256 rest = parts['rest']

257 if orb in alt or crb in alt:

258 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-PARTIAL-MATCH <<{text_line.rstrip()}>>')

259 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}')

260

261 return cap, src, alt, rest

262

263 match = MD_IMG_PATTERN.match(text_line)

264 if not match:

265 log.error(f'- INVALID-MD-IMG_LINE::RE-MATCH-FAILED <<{text_line.rstrip()}>>')

266 return invalid_marker

267

268 parts = match.groupdict()

269 cap = parts['cap']

270 if not cap:

271 log.warning(f'- INCOMPLETE-MD-IMG_LINE::CAP-MISS-INJECTED <<{text_line.rstrip()}>>')

272 cap = CAP_INJECTOR_HACK

273

274 src = parts['src']

275 alt = parts['alt']

276 rest = parts['rest']

277 if orb in alt or crb in alt:

278 log.warning(f'- MAYBE-MD-IMG_LINE::ALT-TRUNCATED-FULL-MATCH <<{text_line.rstrip()}>>')

279 log.warning(f' + parsed as ({cap=}, {src=}, {alt=}, {rest=}')

280

281 return cap, src, alt, rest

282

283

284@no_type_check

285def adapt_image(text_line: str, collector: list[str], upstream: str, root: str) -> str:

286 """YES."""

287 cap, src, alt, rest = parse_markdown_image(text_line)

288 if not src:

289 log.error(f'parse of markdown image text line failed - empty src, and rest is <<{rest.rstrip()}>>')

290 return text_line

291

292 img_path = str((pathlib.Path(upstream).parent / src).resolve()).replace(root, '')

293 collector.append(img_path)

294 img_hack = img_path

295 if f'/{IMAGES_FOLDER}' in img_path:

296 img_hack = IMAGES_FOLDER + img_path.split(f'/{IMAGES_FOLDER}', 1)[1]

297 elif f'/{DIAGRAMS_FOLDER}' in img_path:

298 img_hack = DIAGRAMS_FOLDER + img_path.split(f'/{DIAGRAMS_FOLDER}', 1)[1]

299

300 if img_hack != img_path:

301 log.info(f'{img_hack} <--- OK? --- {img_path}')

302

303 alt_text = f'"{alt}"' if alt else f'"{ALT_INJECTOR_HACK}"'

304 belte_og_seler = f'![{cap}]({img_hack} {alt_text}){rest}'

305 log.info(f'==> belte-og-seler: ->>{belte_og_seler}<<-')

306 return belte_og_seler

307

308

309@no_type_check

310def harvest_include(

311 text_line: str, slot: int, regions: dict[str, list[tuple[tuple[int, int], str]]], tree: treelib.Tree, parent: str

312) -> None:

313 """TODO."""

314 include_local = text_line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip()

315 include = str(pathlib.Path(parent).parent / include_local)

316 regions[parent].append(((slot, slot), include))

317 tree.create_node(include, include, parent=parent)

318

319

320@no_type_check

321def rollup(

322 jobs: list[list[str]],

323 docs: dict[str, list[str]],

324 regions: dict[str, list[tuple[tuple[int, int], str]]],

325 flat: dict[str, str],

326) -> list[list[str]]:

327 """TODO."""

328 tackle = [those[0] for those in jobs if those and those[0] != SLASH]

329 if tackle:

330 log.info(f' Insertion ongoing with parts ({", ".join(tuple(sorted(tackle)))}) remaining')

331 else:

332 return [[]]

333 for that in tackle:

334 buf = []

335 for slot, line in enumerate(docs[that]):

336 special = False

337 the_first = False

338 the_include = ''

339 for pair, include in regions[that]:

340 low, high = pair

341 if low <= slot <= high:

342 special = True

343 if low == slot:

344 the_first = True

345 the_include = include

346 if not special:

347 buf.append(line)

348 continue

349 if the_first:

350 buf.append(flat[the_include])

351 flat[that] = '\n'.join(buf) + '\n'

352

353 return [[job for job in chain if job not in flat] for chain in jobs]

354

355

356@no_type_check

357def collect_assets(collector: list[str]) -> None:

358 """TODO"""

359 images = pathlib.Path(IMAGES_FOLDER)

360 images.mkdir(parents=True, exist_ok=True)

361 diagrams = pathlib.Path(DIAGRAMS_FOLDER)

362 diagrams.mkdir(parents=True, exist_ok=True)

363 for img_path in collector:

364 if IMAGES_FOLDER in img_path:

365 source_asset = DOC_BASE / img_path

366 target_asset = images / pathlib.Path(img_path).name

367 try:

368 shutil.copy(source_asset, target_asset)

369 except FileNotFoundError as err:

370 log.error(err)

371 code, msg = plh.dump_placeholder(target_asset)

372 log.warning(msg) if code else log.info(msg)

373 continue

374 if DIAGRAMS_FOLDER in img_path:

375 source_asset = DOC_BASE / img_path

376 target_asset = diagrams / pathlib.Path(img_path).name

377 try:

378 shutil.copy(source_asset, target_asset)

379 except FileNotFoundError as err:

380 log.error(err)

381 code, msg = plh.dump_placeholder(target_asset)

382 log.warning(msg) if code else log.info(msg)

383

384

385@no_type_check

386def concatenate(

387 doc_root: Union[str, pathlib.Path],

388 structure_name: str,

389 target_key: str,

390 facet_key: str,

391 options: dict[str, Union[bool, str]],

392) -> int:

393 """Later alligator."""

394 log.info(LOG_SEPARATOR)

395 log.info('entered concat function ...')

396 target_code = target_key

397 facet_code = facet_key

398 if not facet_code.strip() or not target_code.strip():

399 log.error(f'concatenate requires non-empty target ({target_code}) and facet ({facet_code}) codes')

400 return 2

401

402 log.info(f'parsed target ({target_code}) and facet ({facet_code}) from request')

403

404 structure, asset_map = gat.prelude(

405 doc_root=doc_root, structure_name=structure_name, target_key=target_key, facet_key=facet_key, command='concat'

406 )

407 log.info(f'prelude teleported processor into the document root at ({os.getcwd()}/)')

408 rel_concat_folder_path = pathlib.Path('render/pdf/')

409 rel_concat_folder_path.mkdir(parents=True, exist_ok=True)

410 os.chdir(rel_concat_folder_path)

411 log.info(f'concatenate (this processor) teleported into the render/pdf location ({os.getcwd()}/)')

412

413 ok, aspect_map = too.load_target(target_code, facet_code)

414 if not ok or not aspect_map:

415 return 0 if ok else 1

416

417 approvals = process_approvals(aspect_map)

418 if isinstance(approvals, int):

419 return 1

420 binder = process_binder(aspect_map)

421 if isinstance(binder, int):

422 return 1

423 changes = process_changes(aspect_map)

424 if isinstance(changes, int):

425 return 1

426 metadata = process_meta(aspect_map)

427 if isinstance(metadata, int):

428 return 1

429

430 root = SLASH

431 root_path = str(pathlib.Path.cwd().resolve()).rstrip(SLASH) + SLASH

432 tree = treelib.Tree()

433 tree.create_node(root, root)

434 documents = {}

435 insert_regions = {}

436 img_collector = []

437 log.info(LOG_SEPARATOR)

438 log.info('processing binder ...')

439 for entry in binder:

440 ref_path = DOC_BASE / entry

441 log.debug(f'- {entry} as {ref_path}')

442 with open(ref_path, 'rt', encoding=ENCODING) as handle:

443 documents[entry] = [line.rstrip() for line in handle.readlines()]

444 insert_regions[entry] = []

445 in_region = False

446 begin, end = 0, 0

447 include = ''

448 tree.create_node(entry, entry, parent=root)

449 for slot, line in enumerate(documents[entry]):

450 if line.startswith(IMG_LINE_STARTSWITH):

451 documents[entry][slot] = adapt_image(line, img_collector, entry, root_path)

452 log.debug(f'{slot :02d}|{line.rstrip()}')

453 if not in_region:

454 if line.startswith(READ_SLOT_FENCE_BEGIN):

455 in_region = True

456 begin = slot

457 continue

458 if line.startswith(INCLUDE_SLOT):

459 include = line.split(INCLUDE_SLOT, 1)[1].rstrip('}').strip()

460 insert_regions[entry].append(((slot, slot), include))

461 tree.create_node(include, include, parent=entry)

462 include = ''

463 continue

464 if in_region:

465 if line.startswith(READ_SLOT_CONTEXT_BEGIN):

466 include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')

467 elif line.startswith(READ_SLOT_FENCE_END):

468 end = slot

469 insert_regions[entry].append(((begin, end), include))

470 tree.create_node(include, include, parent=entry)

471 in_region = False

472 begin, end = 0, 0

473 include = ''

474

475 for coords, include in insert_regions[entry]: # include is anchored on DOC_BASE

476 ref_path = DOC_BASE / include

477 with open(ref_path, 'rt', encoding=ENCODING) as handle:

478 documents[include] = [line.rstrip() for line in handle.readlines()]

479 insert_regions[include] = []

480 in_region = False

481 begin, end = 0, 0

482 sub_include = ''

483 for slot, line in enumerate(documents[include]):

484 if line.startswith(IMG_LINE_STARTSWITH):

485 documents[include][slot] = adapt_image(line, img_collector, include, root_path)

486 log.debug(f'{slot :02d}|{line.rstrip()}')

487 if not in_region:

488 if line.startswith(READ_SLOT_FENCE_BEGIN):

489 in_region = True

490 begin = slot

491 continue

492 if line.startswith(INCLUDE_SLOT):

493 harvest_include(line, slot, insert_regions, tree, include)

494 continue

495 if in_region:

496 if line.startswith(READ_SLOT_CONTEXT_BEGIN):

497 sub_include = line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')

498 sub_include = str(pathlib.Path(include).parent / sub_include)

499 elif line.startswith(READ_SLOT_FENCE_END):

500 end = slot

501 insert_regions[include].append(((begin, end), sub_include))

502 tree.create_node(sub_include, sub_include, parent=include)

503 in_region = False

504 begin, end = 0, 0

505 sub_include = ''

506

507 for coords, sub_include in insert_regions[include]:

508 ref_path = DOC_BASE / sub_include

509 with open(ref_path, 'rt', encoding=ENCODING) as handle:

510 documents[sub_include] = [line.rstrip() for line in handle.readlines()]

511 insert_regions[sub_include] = []

512 in_region = False

513 begin, end = 0, 0

514 sub_sub_include = ''

515 for slot, line in enumerate(documents[sub_include]):

516 if line.startswith(IMG_LINE_STARTSWITH):

517 documents[sub_include][slot] = adapt_image(line, img_collector, sub_include, root_path)

518 log.debug(f'{slot :02d}|{line.rstrip()}')

519 if not in_region:

520 if line.startswith(READ_SLOT_FENCE_BEGIN):

521 in_region = True

522 begin = slot

523 continue

524 if line.startswith(INCLUDE_SLOT):

525 harvest_include(line, slot, insert_regions, tree, sub_include)

526 continue

527 if in_region:

528 if line.startswith(READ_SLOT_CONTEXT_BEGIN):

529 sub_sub_include = (

530 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')

531 )

532 sub_sub_include = str(pathlib.Path(sub_include).parent / sub_sub_include)

533 elif line.startswith(READ_SLOT_FENCE_END):

534 end = slot

535 insert_regions[sub_include].append(((begin, end), sub_sub_include))

536 tree.create_node(sub_sub_include, sub_sub_include, parent=sub_include)

537 in_region = False

538 begin, end = 0, 0

539 sub_sub_include = ''

540

541 for coords, sub_sub_include in insert_regions[sub_include]:

542 ref_path = DOC_BASE / sub_sub_include

543 with open(ref_path, 'rt', encoding=ENCODING) as handle:

544 documents[sub_sub_include] = [line.rstrip() for line in handle.readlines()]

545 insert_regions[sub_sub_include] = []

546 in_region = False

547 begin, end = 0, 0

548 sub_sub_sub_include = ''

549 for slot, line in enumerate(documents[sub_sub_include]):

550 if line.startswith(IMG_LINE_STARTSWITH):

551 documents[sub_sub_include][slot] = adapt_image(

552 line, img_collector, sub_sub_include, root_path

553 )

554 log.debug(f'{slot :02d}|{line.rstrip()}')

555 if not in_region:

556 if line.startswith(READ_SLOT_FENCE_BEGIN):

557 in_region = True

558 begin = slot

559 continue

560 if line.startswith(INCLUDE_SLOT):

561 harvest_include(line, slot, insert_regions, tree, sub_sub_include)

562 continue

563 if in_region:

564 if line.startswith(READ_SLOT_CONTEXT_BEGIN):

565 sub_sub_sub_include = (

566 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')

567 )

568 sub_sub_sub_include = str(pathlib.Path(sub_sub_include).parent / sub_sub_sub_include)

569 elif line.startswith(READ_SLOT_FENCE_END):

570 end = slot

571 insert_regions[sub_sub_include].append(((begin, end), sub_sub_sub_include))

572 tree.create_node(sub_sub_sub_include, sub_sub_sub_include, parent=sub_sub_include)

573 in_region = False

574 begin, end = 0, 0

575 sub_sub_sub_include = ''

576

577 for coords, sub_sub_sub_include in insert_regions[sub_include]:

578 ref_path = DOC_BASE / sub_sub_sub_include

579 with open(ref_path, 'rt', encoding=ENCODING) as handle:

580 documents[sub_sub_sub_include] = [line.rstrip() for line in handle.readlines()]

581 insert_regions[sub_sub_sub_include] = []

582 in_region = False

583 begin, end = 0, 0

584 sub_sub_sub_sub_include = ''

585 for slot, line in enumerate(documents[sub_sub_sub_include]):

586 if line.startswith(IMG_LINE_STARTSWITH):

587 documents[sub_sub_sub_include][slot] = adapt_image(

588 line, img_collector, sub_sub_sub_include, root_path

589 )

590 log.debug(f'{slot :02d}|{line.rstrip()}')

591 if not in_region:

592 if line.startswith(READ_SLOT_FENCE_BEGIN):

593 in_region = True

594 begin = slot

595 continue

596 if line.startswith(INCLUDE_SLOT):

597 harvest_include(line, slot, insert_regions, tree, sub_sub_sub_include)

598 continue

599 if in_region:

600 if line.startswith(READ_SLOT_CONTEXT_BEGIN):

601 sub_sub_sub_sub_include = (

602 line.replace(READ_SLOT_CONTEXT_BEGIN, '').split(')', 1)[0].strip("'").strip('"')

603 )

604 sub_sub_sub_sub_include = str(

605 pathlib.Path(sub_sub_sub_include).parent / sub_sub_sub_sub_include

606 )

607 elif line.startswith(READ_SLOT_FENCE_END):

608 end = slot

609 insert_regions[sub_sub_sub_include].append(((begin, end), sub_sub_sub_include))

610 tree.create_node(

611 sub_sub_sub_sub_include, sub_sub_sub_sub_include, parent=sub_sub_sub_include

612 )

613 in_region = False

614 begin, end = 0, 0

615 sub_sub_sub_sub_include = ''

616

617 top_down_paths = tree.paths_to_leaves()

618 bottom_up_paths = [list(reversed(td_p)) for td_p in top_down_paths]

619 log.info(LOG_SEPARATOR)

620 log.info('resulting tree:')

621 for edge in str(tree).split(NL):

622 log.info(edge)

623

624 log.info(LOG_SEPARATOR)

625 log.info(f'provisioning chains for the {len(bottom_up_paths)} bottom up leaf paths:')

626 for num, leaf_path in enumerate(bottom_up_paths):

627 the_way_up = f'|-> {leaf_path[0]}' if len(leaf_path) == 1 else f'{" -> ".join(leaf_path)}'

628 log.info(f'{num :2d}: {the_way_up}')

629

630 concat = {}

631 log.info(LOG_SEPARATOR)

632 log.info(f'dependencies for the {len(insert_regions)} document parts:')

633 for key, regions in insert_regions.items():

634 num_in = len(regions)

635 dashes = '-' * num_in

636 incl_disp = f'( {num_in} include{"" if num_in == 1 else "s"} )'

637 indicator = '(no includes)' if not regions else f'<{dashes + incl_disp + dashes}'

638 log.info(f'- part {key} {indicator}')

639 for region in regions:

640 between = f'between lines {region[0][0] :3d} and {region[0][1] :3d}'

641 insert = f'include fragment {region[1]}'

642 log.info(f' + {between} {insert}')

643 if not regions: # No includes

644 concat[key] = '\n'.join(documents[key]) + '\n'

645 log.info(f' * did concat {key} document for insertion')

646

647 chains = [leaf_path for leaf_path in bottom_up_paths]

648 log.info(LOG_SEPARATOR)

649 log.info(f'starting insertions bottom up for the {len(chains)} inclusion chains:')

650 todo = [[job for job in chain if job not in concat] for chain in chains]

651 while todo != [[]]:

652 todo = rollup(todo, documents, insert_regions, concat)

653

654 log.info(LOG_SEPARATOR)

655 log.info('writing final concat markdown to document.md')

656 with open('document.md', 'wt', encoding=ENCODING) as handle:

657 handle.write('\n'.join(concat[bind] for bind in binder) + '\n')

658

659 log.info(LOG_SEPARATOR)

660 log.info('collecting assets (images and diagrams)')

661 collect_assets(img_collector)

662 log.info(LOG_SEPARATOR)

663 log.info(f'concat result document (document.md) and artifacts are within folder ({os.getcwd()}/)')

664 log.info(LOG_SEPARATOR)

665 log.info('processing complete - SUCCESS')

666 log.info(LOG_SEPARATOR)

667 return 0

8.21%

541 statements