Coverage for laskea/transform.py: 88.49%

93 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-10 22:19:18 +00:00

1"""Transformer API for laskea. 

2 

3Typical filter data comes in as JSON and maps column keys to filter tasks: 

4 

5{ 

6 "order": ["keep", "drop", "replace"], 

7 "keep": [ 

8 ["startswith", "ABC-"], 

9 ["contains", "Z"], 

10 ["icontains", "m"], 

11 ["equals", "DEF-42"], 

12 ["endswith", "-123"] 

13 ], 

14 "drop": [ 

15 ["matches", "[A-Z]+-\\d+"] 

16 ], 

17 "replace": [ 

18 ["DEF-", "definition-"] 

19 ] 

20} 

21 

22""" 

23import re 

24from laskea import FILTER_MAP_TYPE, FILTER_ORDER_TYPE, FILTER_PAYLOAD_TYPE, log 

25 

26# operation keys 

27DROP = 'drop' 

28KEEP = 'keep' 

29ORDER = 'order' 

30REPLACE = 'replace' 

31 

32# action keys 

33CONTAINS = 'contains' 

34ENDSWITH = 'endswith' 

35EQUALS = 'equals' 

36ICONTAINS = 'icontains' 

37IENDSWITH = 'iendswith' 

38IEQUALS = 'iequals' 

39ISTARTSWITH = 'istartswith' 

40MATCHES = 'matches' 

41STARTSWITH = 'startswith' 

42 

43ACTION_KEYS = ( 

44 CONTAINS, 

45 ENDSWITH, 

46 EQUALS, 

47 ICONTAINS, 

48 IENDSWITH, 

49 IEQUALS, 

50 ISTARTSWITH, 

51 MATCHES, 

52 STARTSWITH, 

53) 

54 

55 

56def op_contains(entry: str, that: str) -> bool: 

57 """Case sensitive contains.""" 

58 return bool(that in entry) 

59 

60 

61def op_endswith(entry: str, that: str) -> bool: 

62 """Case sensitive ends with.""" 

63 return bool(entry.endswith(that)) 

64 

65 

66def op_equals(entry: str, that: str) -> bool: 

67 """Case sensitive equals.""" 

68 return bool(that == entry) 

69 

70 

71def op_icontains(entry: str, that: str) -> bool: 

72 """Case insensitive contains.""" 

73 return bool(that.lower() in entry.lower()) 

74 

75 

76def op_iendswith(entry: str, that: str) -> bool: 

77 """Case insensitive ends with.""" 

78 return bool(entry.lower().endswith(that.lower())) 

79 

80 

81def op_iequals(entry: str, that: str) -> bool: 

82 """Case insensitive equals.""" 

83 return bool(that.lower() == entry.lower()) 

84 

85 

86def op_istartswith(entry: str, that: str) -> bool: 

87 """Case insensitive starts with.""" 

88 return bool(entry.lower().startswith(that.lower())) 

89 

90 

91def op_matches(entry: str, that: str) -> bool: 

92 """Matches regular expression.""" 

93 return bool(re.compile(that).match(entry)) 

94 

95 

96def op_startswith(entry: str, that: str) -> bool: 

97 """Case sensitive starts with.""" 

98 return bool(entry.startswith(that)) 

99 

100 

101ACTION_MAP = { 

102 CONTAINS: op_contains, 

103 ENDSWITH: op_endswith, 

104 EQUALS: op_equals, 

105 ICONTAINS: op_icontains, 

106 IENDSWITH: op_iendswith, 

107 IEQUALS: op_iequals, 

108 ISTARTSWITH: op_istartswith, 

109 MATCHES: op_matches, 

110 STARTSWITH: op_startswith, 

111} 

112 

113 

114class FilterMap: 

115 """The class FilterMap validates the task data against known operations and required arguments. 

116 

117 Known operations are: 

118 

119 - drop 

120 - keep 

121 - replace 

122 

123 A meta operation is: 

124 

125 - order 

126 

127 This "operation" is optional but if present must fully specify the order of application of the "real" operations. 

128 

129 The default order of application is 

130 

131 1. keep 

132 2. drop 

133 3. replace 

134 

135 Real operation JSON member values are a list of list of strings (the payloads). 

136 The payloads have length two with semantics depending on the operation. 

137 

138 Operations keep amd drop both iterate over all payloads in the order given 

139 by applying the action encoded in the first list item and using the second item as parameter 

140 on the cell content (list of strings) elements. 

141 

142 The encoding of actions is as follows (for cell entry `entry` and payload parameter `that`): 

143 

144 - contains - `that in entry` 

145 - endswith - `entry.endswith(that)` 

146 - equals - `that == entry` 

147 - icontains - `that.lower() in entry.lower()` 

148 - iendswith - `entry.lower().endswith(that.lower())` 

149 - iequals - `that.lower() == entry.lower()` 

150 - istartswith - `entry.lower().startswith(that.lower())` 

151 - matches - `re.compile(that).match(entry)` 

152 - startswith - `entry.startswith(that)` 

153 

154 The third operation (replace) is delegated to the string replace function as action. 

155 In this case a payload pair like ["this", "with that"] is applied as `entry.replace('this', 'with that')`. 

156 

157 """ 

158 

159 ORDER: FILTER_ORDER_TYPE = [KEEP, DROP, REPLACE] 

160 

161 def __init__(self, column: str, filter_data: FILTER_MAP_TYPE): 

162 self.column = column 

163 self.filter_data: FILTER_MAP_TYPE = filter_data 

164 

165 self.order = self.filter_data[ORDER] if self.filter_data.get(ORDER, []) else FilterMap.ORDER 

166 log.debug(f'{self.order=}') 

167 

168 self.keeps: FILTER_PAYLOAD_TYPE = self.filter_data.get(KEEP, []) # type: ignore 

169 self.drops: FILTER_PAYLOAD_TYPE = self.filter_data.get(DROP, []) # type: ignore 

170 self.replaces: FILTER_PAYLOAD_TYPE = self.filter_data.get(REPLACE, []) # type: ignore 

171 

172 self.operations = [] 

173 for kind in self.order: 

174 if kind == KEEP: 

175 if self.keeps: 

176 self.operations.append((kind, self.keeps)) 

177 elif kind == DROP: 

178 if self.drops: 

179 self.operations.append((kind, self.drops)) 

180 elif kind == REPLACE: 180 ↛ 184line 180 didn't jump to line 184, because the condition on line 180 was never false

181 if self.replaces: 

182 self.operations.append((kind, self.replaces)) 

183 else: 

184 log.warning(f'ignored order element ({kind}) - please verify your filter data') 

185 

186 def apply(self, entry: str) -> str: 

187 """Initial naive application during stage 1 implementation of transformer.""" 

188 if not entry.strip(): # TODO(sthagen) - this may exclude use cases of manipulating space ;-) 

189 return '' 

190 if not self.operations: 

191 return entry 

192 transformed = entry 

193 pre_replace = False 

194 if self.operations: 194 ↛ 203line 194 didn't jump to line 203, because the condition on line 194 was never false

195 kind, tasks = self.operations[0] 

196 if kind == REPLACE and tasks: 

197 pre_replace = True 

198 for this, with_that in tasks: 

199 log.debug(f'before replace("{this}", "{with_that}") call on content({transformed})') 

200 transformed = transformed.replace(this, with_that) 

201 log.debug(f' replace("{this}", "{with_that}") --> content({transformed})') 

202 

203 for kind, tasks in self.operations: 

204 log.debug(f'+ applying ({kind}) operations to ({transformed})') 

205 if kind in (KEEP, DROP): 

206 if tasks: 206 ↛ 203line 206 didn't jump to line 203, because the condition on line 206 was never false

207 for key, parameter in tasks: 

208 log.debug( 

209 f' - applying action ({key})({parameter}) for operation type ({kind}) on ({transformed})' 

210 ) 

211 if key.lower() not in ACTION_KEYS: 211 ↛ 212line 211 didn't jump to line 212, because the condition on line 211 was never true

212 log.warning(f'skipping action with unknown key ({key}) for operation type ({kind})') 

213 continue 

214 hit = ACTION_MAP[key.lower()](transformed, parameter) 

215 log.debug(f' ==> {"hit" if hit else "miss"} for ({transformed})') 

216 if hit: 

217 if kind == DROP: 

218 return '' 

219 if kind == KEEP: 219 ↛ 207line 219 didn't jump to line 207, because the condition on line 219 was never false

220 return transformed 

221 elif not pre_replace: # REPLACE 221 ↛ 222line 221 didn't jump to line 222, because the condition on line 221 was never true

222 if tasks: 

223 for this, with_that in tasks: 

224 transformed = transformed.replace(this, with_that) 

225 

226 return transformed