Coverage for taksonomia/anglify.py: 79.14%
181 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-04 23:02:14 +00:00
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-04 23:02:14 +00:00
1"""Transform taxonomy to XML."""
3import collections.abc
4import numbers
5import xml.etree.ElementTree as ET # nosec B405
6from random import randint
7from typing import no_type_check
8from xml.dom.minidom import parseString # nosec B408
10from taksonomia import ENCODING
12UNIQUE_IDS = [] # type: ignore
15@no_type_check
16def make_id(element, start=100000, end=999999):
17 """Returns a random integer"""
18 return '%s_%s' % (element, randint(start, end)) # nosec B311
21@no_type_check
22def get_unique_id(element):
23 """Returns a unique id for a given element"""
24 this_id = make_id(element)
25 dup = True
26 while dup:
27 if this_id not in UNIQUE_IDS: 27 ↛ 31line 27 didn't jump to line 31, because the condition on line 27 was never false
28 dup = False
29 UNIQUE_IDS.append(this_id)
30 else:
31 this_id = make_id(element)
32 return UNIQUE_IDS[-1]
35@no_type_check
36def get_xml_type(val):
37 """Returns the data type for the xml type attribute"""
38 if type(val).__name__ == 'str':
39 return 'str'
40 if type(val).__name__ == 'int':
41 return 'int'
42 if type(val).__name__ == 'float':
43 return 'float'
44 if type(val).__name__ == 'bool':
45 return 'bool'
46 if isinstance(val, numbers.Number):
47 return 'number'
48 if type(val).__name__ == 'NoneType':
49 return 'null'
50 if isinstance(val, dict):
51 return 'dict'
52 if isinstance(val, collections.abc.Iterable):
53 return 'list'
54 return type(val).__name__
57@no_type_check
58def escape_xml(s):
59 if isinstance(s, str):
60 s = str(s)
61 s = s.replace('&', '&')
62 s = s.replace('"', '"')
63 s = s.replace("'", ''')
64 s = s.replace('<', '<')
65 s = s.replace('>', '>')
66 return s
69@no_type_check
70def make_attrstring(attr):
71 """Returns an attribute string in the form key="val" """
72 attrstring = ' '.join(['%s="%s"' % (k, v) for k, v in attr.items()])
73 return '%s%s' % (' ' if attrstring != '' else '', attrstring)
76@no_type_check
77def key_is_valid_xml(key):
78 """Checks that a key is a valid XML name"""
79 test_xml = '<?xml version="1.0" encoding="UTF-8" ?><%s>foo</%s>' % (key, key)
80 try:
81 parseString(test_xml) # nosec B318
82 return True
83 except Exception: # minidom does not implement exceptions well
84 return False
87@no_type_check
88def make_valid_xml_name(key, attr):
89 """Tests an XML name and fixes it if invalid"""
90 key = escape_xml(key)
91 attr = escape_xml(attr)
93 # HAC KAD IDA CK
94 if set(key).intersection('0123456789'):
95 if all(n not in key for n in ('sha256', 'sha512')):
96 attr['id'] = key
97 key = 'key'
98 return key, attr
100 # pass through if key is already valid
101 if key_is_valid_xml(key):
102 return key, attr
104 # prepend a lowercase n if the key is numeric
105 if key.isdigit(): 105 ↛ 106line 105 didn't jump to line 106, because the condition on line 105 was never true
106 return 'n%s' % (key), attr
108 # replace spaces with underscores if that fixes the problem
109 if key_is_valid_xml(key.replace(' ', '_')):
110 return key.replace(' ', '_'), attr
112 # key is still invalid - move it into a name attribute
113 attr['name'] = key
114 key = 'key'
115 return key, attr
118@no_type_check
119def wrap_cdata(s):
120 """Wraps a string into CDATA sections"""
121 s = str(s).replace(']]>', ']]]]><![CDATA[>')
122 return '<![CDATA[' + s + ']]>'
125@no_type_check
126def default_item_func(parent):
127 return 'item'
130@no_type_check
131def convert(obj, ids, attr_type, item_func, cdata, parent='root'):
132 """Routes the elements of an object to the right function to convert them
133 based on their data type"""
135 item_name = item_func(parent)
137 if isinstance(obj, (numbers.Number, str)): 137 ↛ 138line 137 didn't jump to line 138, because the condition on line 137 was never true
138 return convert_kv(item_name, obj, attr_type, cdata)
140 if hasattr(obj, 'isoformat'): 140 ↛ 141line 140 didn't jump to line 141, because the condition on line 140 was never true
141 return convert_kv(item_name, obj.isoformat(), attr_type, cdata)
143 if isinstance(obj, bool): 143 ↛ 144line 143 didn't jump to line 144, because the condition on line 143 was never true
144 return convert_bool(item_name, obj, attr_type, cdata)
146 if obj is None: 146 ↛ 147line 146 didn't jump to line 147, because the condition on line 146 was never true
147 return convert_none(item_name, '', attr_type, cdata)
149 if isinstance(obj, dict): 149 ↛ 152line 149 didn't jump to line 152, because the condition on line 149 was never false
150 return convert_dict(obj, ids, parent, attr_type, item_func, cdata)
152 if isinstance(obj, collections.abc.Iterable):
153 return convert_list(obj, ids, parent, attr_type, item_func, cdata)
155 raise TypeError('Unsupported data type: %s (%s)' % (obj, type(obj).__name__))
158@no_type_check
159def convert_dict(obj, ids, parent, attr_type, item_func, cdata):
160 """Converts a dict into an XML string."""
161 lines = []
162 for key, val in obj.items():
163 attr = {} if not ids else {'id': '%s' % (get_unique_id(parent))}
165 key, attr = make_valid_xml_name(key, attr)
167 if isinstance(val, (numbers.Number, str)):
168 lines.append(convert_kv(key, val, attr_type, attr, cdata))
170 elif hasattr(val, 'isoformat'): # datetime 170 ↛ 171line 170 didn't jump to line 171, because the condition on line 170 was never true
171 lines.append(convert_kv(key, val.isoformat(), attr_type, attr, cdata))
173 elif isinstance(val, bool): 173 ↛ 174line 173 didn't jump to line 174, because the condition on line 173 was never true
174 lines.append(convert_bool(key, val, attr_type, attr, cdata))
176 elif isinstance(val, dict):
177 if attr_type: 177 ↛ 178line 177 didn't jump to line 178, because the condition on line 177 was never true
178 attr['type'] = get_xml_type(val)
179 lines.append(
180 '<%s%s>%s</%s>'
181 % (key, make_attrstring(attr), convert_dict(val, ids, key, attr_type, item_func, cdata), key)
182 )
184 elif isinstance(val, collections.abc.Iterable):
185 if attr_type: 185 ↛ 186line 185 didn't jump to line 186, because the condition on line 185 was never true
186 attr['type'] = get_xml_type(val)
187 lines.append(
188 '<%s%s>%s</%s>'
189 % (key, make_attrstring(attr), convert_list(val, ids, key, attr_type, item_func, cdata), key)
190 )
192 elif val is None: 192 ↛ 196line 192 didn't jump to line 196, because the condition on line 192 was never false
193 lines.append(convert_none(key, val, attr_type, attr, cdata))
195 else:
196 raise TypeError('Unsupported data type: %s (%s)' % (val, type(val).__name__))
198 return ''.join(lines)
201@no_type_check
202def convert_list(items, ids, parent, attr_type, item_func, cdata):
203 """Converts a list into an XML string."""
204 lines = []
206 item_name = item_func(parent)
208 if ids: 208 ↛ 209line 208 didn't jump to line 209, because the condition on line 208 was never true
209 this_id = get_unique_id(parent)
211 for i, item in enumerate(items):
212 attr = {} if not ids else {'id': '%s_%s' % (this_id, i + 1)}
213 if isinstance(item, (numbers.Number, str)):
214 lines.append(convert_kv(item_name, item, attr_type, attr, cdata))
216 elif hasattr(item, 'isoformat'): # datetime 216 ↛ 217line 216 didn't jump to line 217, because the condition on line 216 was never true
217 lines.append(convert_kv(item_name, item.isoformat(), attr_type, attr, cdata))
219 elif isinstance(item, bool): 219 ↛ 220line 219 didn't jump to line 220, because the condition on line 219 was never true
220 lines.append(convert_bool(item_name, item, attr_type, attr, cdata))
222 elif isinstance(item, dict): 222 ↛ 242line 222 didn't jump to line 242, because the condition on line 222 was never false
223 if not attr_type: 223 ↛ 233line 223 didn't jump to line 233, because the condition on line 223 was never false
224 lines.append(
225 '<%s>%s</%s>'
226 % (
227 item_name,
228 convert_dict(item, ids, parent, attr_type, item_func, cdata),
229 item_name,
230 )
231 )
232 else:
233 lines.append(
234 '<%s type="dict">%s</%s>'
235 % (
236 item_name,
237 convert_dict(item, ids, parent, attr_type, item_func, cdata),
238 item_name,
239 )
240 )
242 elif isinstance(item, collections.abc.Iterable):
243 if not attr_type:
244 lines.append(
245 '<%s %s>%s</%s>'
246 % (
247 item_name,
248 make_attrstring(attr),
249 convert_list(item, ids, item_name, attr_type, item_func, cdata),
250 item_name,
251 )
252 )
253 else:
254 lines.append(
255 '<%s type="list"%s>%s</%s>'
256 % (
257 item_name,
258 make_attrstring(attr),
259 convert_list(item, ids, item_name, attr_type, item_func, cdata),
260 item_name,
261 )
262 )
264 elif item is None:
265 lines.append(convert_none(item_name, None, attr_type, attr, cdata))
267 else:
268 raise TypeError('Unsupported data type: %s (%s)' % (item, type(item).__name__))
269 return ''.join(lines)
272@no_type_check
273def convert_kv(key, val, attr_type, attr={}, cdata=False):
274 """Converts a number or string into an XML element"""
275 key, attr = make_valid_xml_name(key, attr)
277 if attr_type: 277 ↛ 278line 277 didn't jump to line 278, because the condition on line 277 was never true
278 attr['type'] = get_xml_type(val)
279 attrstring = make_attrstring(attr)
280 return '<%s%s>%s</%s>' % (key, attrstring, wrap_cdata(val) if cdata else escape_xml(val), key)
283@no_type_check
284def convert_bool(key, val, attr_type, attr={}, cdata=False):
285 """Converts a boolean into an XML element"""
286 key, attr = make_valid_xml_name(key, attr)
288 if attr_type:
289 attr['type'] = get_xml_type(val)
290 attrstring = make_attrstring(attr)
291 return '<%s%s>%s</%s>' % (key, attrstring, str(val).lower(), key)
294@no_type_check
295def convert_none(key, val, attr_type, attr={}, cdata=False):
296 """Converts a null value into an XML element"""
297 key, attr = make_valid_xml_name(key, attr)
299 if attr_type: 299 ↛ 300line 299 didn't jump to line 300, because the condition on line 299 was never true
300 attr['type'] = get_xml_type(val)
301 attrstring = make_attrstring(attr)
302 return '<%s%s></%s>' % (key, attrstring, key)
305@no_type_check
306def as_xml(obj, ids=False, attr_type=False, item_func=default_item_func, cdata=False):
307 """Converts taxonomy tree into annotated XML."""
308 lines = ['<?xml version="1.0" encoding="utf-8" ?>'] + [convert(obj, ids, attr_type, item_func, cdata, parent='')]
309 x = ''.join(lines).encode(ENCODING)
310 element = ET.XML(x)
311 ET.indent(element)
312 return '<?xml version="1.0" encoding="utf-8" ?>\n' + ET.tostring(element, encoding='unicode') + '\n'