Coverage for laskea/transform.py: 88.49%
93 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-10 22:19:18 +00:00
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-10 22:19:18 +00:00
1"""Transformer API for laskea.
3Typical filter data comes in as JSON and maps column keys to filter tasks:
5{
6 "order": ["keep", "drop", "replace"],
7 "keep": [
8 ["startswith", "ABC-"],
9 ["contains", "Z"],
10 ["icontains", "m"],
11 ["equals", "DEF-42"],
12 ["endswith", "-123"]
13 ],
14 "drop": [
15 ["matches", "[A-Z]+-\\d+"]
16 ],
17 "replace": [
18 ["DEF-", "definition-"]
19 ]
20}
22"""
23import re
24from laskea import FILTER_MAP_TYPE, FILTER_ORDER_TYPE, FILTER_PAYLOAD_TYPE, log
26# operation keys
27DROP = 'drop'
28KEEP = 'keep'
29ORDER = 'order'
30REPLACE = 'replace'
32# action keys
33CONTAINS = 'contains'
34ENDSWITH = 'endswith'
35EQUALS = 'equals'
36ICONTAINS = 'icontains'
37IENDSWITH = 'iendswith'
38IEQUALS = 'iequals'
39ISTARTSWITH = 'istartswith'
40MATCHES = 'matches'
41STARTSWITH = 'startswith'
43ACTION_KEYS = (
44 CONTAINS,
45 ENDSWITH,
46 EQUALS,
47 ICONTAINS,
48 IENDSWITH,
49 IEQUALS,
50 ISTARTSWITH,
51 MATCHES,
52 STARTSWITH,
53)
56def op_contains(entry: str, that: str) -> bool:
57 """Case sensitive contains."""
58 return bool(that in entry)
61def op_endswith(entry: str, that: str) -> bool:
62 """Case sensitive ends with."""
63 return bool(entry.endswith(that))
66def op_equals(entry: str, that: str) -> bool:
67 """Case sensitive equals."""
68 return bool(that == entry)
71def op_icontains(entry: str, that: str) -> bool:
72 """Case insensitive contains."""
73 return bool(that.lower() in entry.lower())
76def op_iendswith(entry: str, that: str) -> bool:
77 """Case insensitive ends with."""
78 return bool(entry.lower().endswith(that.lower()))
81def op_iequals(entry: str, that: str) -> bool:
82 """Case insensitive equals."""
83 return bool(that.lower() == entry.lower())
86def op_istartswith(entry: str, that: str) -> bool:
87 """Case insensitive starts with."""
88 return bool(entry.lower().startswith(that.lower()))
91def op_matches(entry: str, that: str) -> bool:
92 """Matches regular expression."""
93 return bool(re.compile(that).match(entry))
96def op_startswith(entry: str, that: str) -> bool:
97 """Case sensitive starts with."""
98 return bool(entry.startswith(that))
101ACTION_MAP = {
102 CONTAINS: op_contains,
103 ENDSWITH: op_endswith,
104 EQUALS: op_equals,
105 ICONTAINS: op_icontains,
106 IENDSWITH: op_iendswith,
107 IEQUALS: op_iequals,
108 ISTARTSWITH: op_istartswith,
109 MATCHES: op_matches,
110 STARTSWITH: op_startswith,
111}
114class FilterMap:
115 """The class FilterMap validates the task data against known operations and required arguments.
117 Known operations are:
119 - drop
120 - keep
121 - replace
123 A meta operation is:
125 - order
127 This "operation" is optional but if present must fully specify the order of application of the "real" operations.
129 The default order of application is
131 1. keep
132 2. drop
133 3. replace
135 Real operation JSON member values are a list of list of strings (the payloads).
136 The payloads have length two with semantics depending on the operation.
138 Operations keep amd drop both iterate over all payloads in the order given
139 by applying the action encoded in the first list item and using the second item as parameter
140 on the cell content (list of strings) elements.
142 The encoding of actions is as follows (for cell entry `entry` and payload parameter `that`):
144 - contains - `that in entry`
145 - endswith - `entry.endswith(that)`
146 - equals - `that == entry`
147 - icontains - `that.lower() in entry.lower()`
148 - iendswith - `entry.lower().endswith(that.lower())`
149 - iequals - `that.lower() == entry.lower()`
150 - istartswith - `entry.lower().startswith(that.lower())`
151 - matches - `re.compile(that).match(entry)`
152 - startswith - `entry.startswith(that)`
154 The third operation (replace) is delegated to the string replace function as action.
155 In this case a payload pair like ["this", "with that"] is applied as `entry.replace('this', 'with that')`.
157 """
159 ORDER: FILTER_ORDER_TYPE = [KEEP, DROP, REPLACE]
161 def __init__(self, column: str, filter_data: FILTER_MAP_TYPE):
162 self.column = column
163 self.filter_data: FILTER_MAP_TYPE = filter_data
165 self.order = self.filter_data[ORDER] if self.filter_data.get(ORDER, []) else FilterMap.ORDER
166 log.debug(f'{self.order=}')
168 self.keeps: FILTER_PAYLOAD_TYPE = self.filter_data.get(KEEP, []) # type: ignore
169 self.drops: FILTER_PAYLOAD_TYPE = self.filter_data.get(DROP, []) # type: ignore
170 self.replaces: FILTER_PAYLOAD_TYPE = self.filter_data.get(REPLACE, []) # type: ignore
172 self.operations = []
173 for kind in self.order:
174 if kind == KEEP:
175 if self.keeps:
176 self.operations.append((kind, self.keeps))
177 elif kind == DROP:
178 if self.drops:
179 self.operations.append((kind, self.drops))
180 elif kind == REPLACE: 180 ↛ 184line 180 didn't jump to line 184, because the condition on line 180 was never false
181 if self.replaces:
182 self.operations.append((kind, self.replaces))
183 else:
184 log.warning(f'ignored order element ({kind}) - please verify your filter data')
186 def apply(self, entry: str) -> str:
187 """Initial naive application during stage 1 implementation of transformer."""
188 if not entry.strip(): # TODO(sthagen) - this may exclude use cases of manipulating space ;-)
189 return ''
190 if not self.operations:
191 return entry
192 transformed = entry
193 pre_replace = False
194 if self.operations: 194 ↛ 203line 194 didn't jump to line 203, because the condition on line 194 was never false
195 kind, tasks = self.operations[0]
196 if kind == REPLACE and tasks:
197 pre_replace = True
198 for this, with_that in tasks:
199 log.debug(f'before replace("{this}", "{with_that}") call on content({transformed})')
200 transformed = transformed.replace(this, with_that)
201 log.debug(f' replace("{this}", "{with_that}") --> content({transformed})')
203 for kind, tasks in self.operations:
204 log.debug(f'+ applying ({kind}) operations to ({transformed})')
205 if kind in (KEEP, DROP):
206 if tasks: 206 ↛ 203line 206 didn't jump to line 203, because the condition on line 206 was never false
207 for key, parameter in tasks:
208 log.debug(
209 f' - applying action ({key})({parameter}) for operation type ({kind}) on ({transformed})'
210 )
211 if key.lower() not in ACTION_KEYS: 211 ↛ 212line 211 didn't jump to line 212, because the condition on line 211 was never true
212 log.warning(f'skipping action with unknown key ({key}) for operation type ({kind})')
213 continue
214 hit = ACTION_MAP[key.lower()](transformed, parameter)
215 log.debug(f' ==> {"hit" if hit else "miss"} for ({transformed})')
216 if hit:
217 if kind == DROP:
218 return ''
219 if kind == KEEP: 219 ↛ 207line 219 didn't jump to line 207, because the condition on line 219 was never false
220 return transformed
221 elif not pre_replace: # REPLACE 221 ↛ 222line 221 didn't jump to line 222, because the condition on line 221 was never true
222 if tasks:
223 for this, with_that in tasks:
224 transformed = transformed.replace(this, with_that)
226 return transformed