Coverage for etiketti/discover.py: 82.18%

73 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-04 17:54:24 +00:00

1import hashlib 

2import io 

3import pathlib 

4import platform 

5import subprocess # nosec B404 

6from typing import Any, Callable, Union, no_type_check 

7 

8import yaml 

9 

10from etiketti import ( 

11 DEFAULT_AUTHOR, 

12 ENCODING, 

13 LOG_SEPARATOR, 

14 ContextType, 

15 ConventionsType, 

16 PathLike, 

17 log, 

18) 

19 

20CHUNK_SIZE = 2 << 15 

21 

22 

23def get_producer() -> str: 

24 """Assume the producer is fixed and retrieve the terse version repr from a --version call.""" 

25 version_text = 'Version unknown' 

26 proc = subprocess.Popen(['lualatex', '--version'], stdout=subprocess.PIPE) # nosec B603, B607 

27 for line in io.TextIOWrapper(proc.stdout, encoding='utf-8'): # type: ignore 27 ↛ 31line 27 didn't jump to line 31, because the loop on line 27 didn't complete

28 if line.startswith('This is LuaHBTeX, Version '): 28 ↛ 27line 28 didn't jump to line 27, because the condition on line 28 was never false

29 version_text = line.rstrip() 

30 break 

31 log.info(f'producer version banner: ({version_text})') 

32 

33 # Example: 'This is LuaHBTeX, Version 1.15.0 (TeX Live 2022)' 

34 engine = 'lltx' 

35 version = version_text.split('Version ', 1)[1].rstrip().replace(' (TeX Live ', '-txlv-').rstrip(')') 

36 where = platform.platform().lower() 

37 producer_version = f'{engine}-{version}-{where}' 

38 log.info(f'- noting as: {producer_version=}') 

39 log.info(LOG_SEPARATOR) 

40 return producer_version 

41 

42 

43def hash_file(path: pathlib.Path, hasher: Union[Callable[..., Any], None] = None) -> str: 

44 """Return the SHA512 hex digest of the data from file.""" 

45 if hasher is None: 45 ↛ 47line 45 didn't jump to line 47, because the condition on line 45 was never false

46 hasher = hashlib.sha512 

47 the_hash = hasher() 

48 try: 

49 with open(path, 'rb') as handle: 

50 while chunk := handle.read(CHUNK_SIZE): 

51 the_hash.update(chunk) 

52 return the_hash.hexdigest() 

53 except FileNotFoundError as err: 

54 log.warn(f'hash file failed with: ({err})') 

55 return 'error:plain:file-to-hash-not-found' 

56 

57 

58@no_type_check 

59def load_label_context(path: PathLike) -> ContextType: 

60 """Load the label context providing prefix, site-id, and action-id.""" 

61 with open(path, 'rt', encoding=ENCODING) as handle: 

62 return yaml.safe_load(handle) 

63 

64 

65@no_type_check 

66def extract_author(path: PathLike) -> str: 

67 """Extract the author from the approvals file if DEFAULT_AUTHOR is false-like.""" 

68 if DEFAULT_AUTHOR: 68 ↛ 69line 68 didn't jump to line 69, because the condition on line 68 was never true

69 return DEFAULT_AUTHOR 

70 try: 

71 with open(path, 'rt', encoding=ENCODING) as handle: 

72 approvals = yaml.safe_load(handle) 

73 entries = approvals['approvals'] 

74 for entry in entries: 74 ↛ 79line 74 didn't jump to line 79, because the loop on line 74 didn't complete

75 if entry.get('role').lower() == 'author': 75 ↛ 74line 75 didn't jump to line 74, because the condition on line 75 was never false

76 return entry.get('name', '') or DEFAULT_AUTHOR 

77 except FileNotFoundError as err: 

78 log.warning(f'extract author failed with: ({err})') 

79 return DEFAULT_AUTHOR 

80 

81 

82def extract_meta_parts(path: PathLike) -> tuple[str, str, str]: 

83 """Extract the title, subject, keywords in that order from the metadata file.""" 

84 try: 

85 with open(path, 'rt', encoding=ENCODING) as handle: 

86 metadata = yaml.safe_load(handle) 

87 mapping = metadata['document']['common'] 

88 title = mapping.get('title', '').replace('\\\\', '').replace(' ', ' ').title() 

89 subject = mapping.get('header_id', '') 

90 if subject.startswith('Issue,'): 90 ↛ 91line 90 didn't jump to line 91, because the condition on line 90 was never true

91 subject = mapping.get('header_issue_revision_combined_label', '') 

92 keywords = mapping.get('keywords_csl', '') 

93 return title or '', subject or '', keywords or '' 

94 except FileNotFoundError as err: 

95 log.warning(f'extract meta parts failed with: ({err})') 

96 return '', '', '' 

97 

98 

99def load_conventions(context: ContextType, path: PathLike) -> ConventionsType: 

100 """Derive conventions from path to input pdf file.""" 

101 in_pdf = pathlib.Path(path) 

102 workspace = in_pdf.parent 

103 names = context['label'] 

104 return { 

105 'workspace-folder-path': workspace, 

106 'approvals-yml-path': workspace / names.get('approvals-yml-name', 'approvals.yml'), 

107 'metadata-yml-path': workspace / names.get('metadata-yml-name', 'metadata.yml'), 

108 'bookmatter-tex-path': workspace / names.get('bookmatter-tex-name', 'bookmatter.tex'), 

109 'document-tex-path': workspace / names.get('document-tex-name', 'document.tex'), 

110 'driver-tex-path': workspace / names.get('driver-tex-name', 'driver.tex'), 

111 'metadata-tex-path': workspace / names.get('metadata-tex-name', 'metadata.tex'), 

112 'publisher-tex-path': workspace / names.get('publisher-tex-name', 'publisher.tex'), 

113 'setup-tex-path': workspace / names.get('setup-tex-name', 'setup.tex'), 

114 'this-tex-path': workspace / names.get('this-tex-name', 'this.tex'), 

115 }