Coverage for taksonomia/anglify.py: 79.14%

181 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-04 23:02:14 +00:00

1"""Transform taxonomy to XML.""" 

2 

3import collections.abc 

4import numbers 

5import xml.etree.ElementTree as ET # nosec B405 

6from random import randint 

7from typing import no_type_check 

8from xml.dom.minidom import parseString # nosec B408 

9 

10from taksonomia import ENCODING 

11 

12UNIQUE_IDS = [] # type: ignore 

13 

14 

15@no_type_check 

16def make_id(element, start=100000, end=999999): 

17 """Returns a random integer""" 

18 return '%s_%s' % (element, randint(start, end)) # nosec B311 

19 

20 

21@no_type_check 

22def get_unique_id(element): 

23 """Returns a unique id for a given element""" 

24 this_id = make_id(element) 

25 dup = True 

26 while dup: 

27 if this_id not in UNIQUE_IDS: 27 ↛ 31line 27 didn't jump to line 31, because the condition on line 27 was never false

28 dup = False 

29 UNIQUE_IDS.append(this_id) 

30 else: 

31 this_id = make_id(element) 

32 return UNIQUE_IDS[-1] 

33 

34 

35@no_type_check 

36def get_xml_type(val): 

37 """Returns the data type for the xml type attribute""" 

38 if type(val).__name__ == 'str': 

39 return 'str' 

40 if type(val).__name__ == 'int': 

41 return 'int' 

42 if type(val).__name__ == 'float': 

43 return 'float' 

44 if type(val).__name__ == 'bool': 

45 return 'bool' 

46 if isinstance(val, numbers.Number): 

47 return 'number' 

48 if type(val).__name__ == 'NoneType': 

49 return 'null' 

50 if isinstance(val, dict): 

51 return 'dict' 

52 if isinstance(val, collections.abc.Iterable): 

53 return 'list' 

54 return type(val).__name__ 

55 

56 

57@no_type_check 

58def escape_xml(s): 

59 if isinstance(s, str): 

60 s = str(s) 

61 s = s.replace('&', '&') 

62 s = s.replace('"', '"') 

63 s = s.replace("'", ''') 

64 s = s.replace('<', '&lt;') 

65 s = s.replace('>', '&gt;') 

66 return s 

67 

68 

69@no_type_check 

70def make_attrstring(attr): 

71 """Returns an attribute string in the form key="val" """ 

72 attrstring = ' '.join(['%s="%s"' % (k, v) for k, v in attr.items()]) 

73 return '%s%s' % (' ' if attrstring != '' else '', attrstring) 

74 

75 

76@no_type_check 

77def key_is_valid_xml(key): 

78 """Checks that a key is a valid XML name""" 

79 test_xml = '<?xml version="1.0" encoding="UTF-8" ?><%s>foo</%s>' % (key, key) 

80 try: 

81 parseString(test_xml) # nosec B318 

82 return True 

83 except Exception: # minidom does not implement exceptions well 

84 return False 

85 

86 

87@no_type_check 

88def make_valid_xml_name(key, attr): 

89 """Tests an XML name and fixes it if invalid""" 

90 key = escape_xml(key) 

91 attr = escape_xml(attr) 

92 

93 # HAC KAD IDA CK 

94 if set(key).intersection('0123456789'): 

95 if all(n not in key for n in ('sha256', 'sha512')): 

96 attr['id'] = key 

97 key = 'key' 

98 return key, attr 

99 

100 # pass through if key is already valid 

101 if key_is_valid_xml(key): 

102 return key, attr 

103 

104 # prepend a lowercase n if the key is numeric 

105 if key.isdigit(): 105 ↛ 106line 105 didn't jump to line 106, because the condition on line 105 was never true

106 return 'n%s' % (key), attr 

107 

108 # replace spaces with underscores if that fixes the problem 

109 if key_is_valid_xml(key.replace(' ', '_')): 

110 return key.replace(' ', '_'), attr 

111 

112 # key is still invalid - move it into a name attribute 

113 attr['name'] = key 

114 key = 'key' 

115 return key, attr 

116 

117 

118@no_type_check 

119def wrap_cdata(s): 

120 """Wraps a string into CDATA sections""" 

121 s = str(s).replace(']]>', ']]]]><![CDATA[>') 

122 return '<![CDATA[' + s + ']]>' 

123 

124 

125@no_type_check 

126def default_item_func(parent): 

127 return 'item' 

128 

129 

130@no_type_check 

131def convert(obj, ids, attr_type, item_func, cdata, parent='root'): 

132 """Routes the elements of an object to the right function to convert them 

133 based on their data type""" 

134 

135 item_name = item_func(parent) 

136 

137 if isinstance(obj, (numbers.Number, str)): 137 ↛ 138line 137 didn't jump to line 138, because the condition on line 137 was never true

138 return convert_kv(item_name, obj, attr_type, cdata) 

139 

140 if hasattr(obj, 'isoformat'): 140 ↛ 141line 140 didn't jump to line 141, because the condition on line 140 was never true

141 return convert_kv(item_name, obj.isoformat(), attr_type, cdata) 

142 

143 if isinstance(obj, bool): 143 ↛ 144line 143 didn't jump to line 144, because the condition on line 143 was never true

144 return convert_bool(item_name, obj, attr_type, cdata) 

145 

146 if obj is None: 146 ↛ 147line 146 didn't jump to line 147, because the condition on line 146 was never true

147 return convert_none(item_name, '', attr_type, cdata) 

148 

149 if isinstance(obj, dict): 149 ↛ 152line 149 didn't jump to line 152, because the condition on line 149 was never false

150 return convert_dict(obj, ids, parent, attr_type, item_func, cdata) 

151 

152 if isinstance(obj, collections.abc.Iterable): 

153 return convert_list(obj, ids, parent, attr_type, item_func, cdata) 

154 

155 raise TypeError('Unsupported data type: %s (%s)' % (obj, type(obj).__name__)) 

156 

157 

158@no_type_check 

159def convert_dict(obj, ids, parent, attr_type, item_func, cdata): 

160 """Converts a dict into an XML string.""" 

161 lines = [] 

162 for key, val in obj.items(): 

163 attr = {} if not ids else {'id': '%s' % (get_unique_id(parent))} 

164 

165 key, attr = make_valid_xml_name(key, attr) 

166 

167 if isinstance(val, (numbers.Number, str)): 

168 lines.append(convert_kv(key, val, attr_type, attr, cdata)) 

169 

170 elif hasattr(val, 'isoformat'): # datetime 170 ↛ 171line 170 didn't jump to line 171, because the condition on line 170 was never true

171 lines.append(convert_kv(key, val.isoformat(), attr_type, attr, cdata)) 

172 

173 elif isinstance(val, bool): 173 ↛ 174line 173 didn't jump to line 174, because the condition on line 173 was never true

174 lines.append(convert_bool(key, val, attr_type, attr, cdata)) 

175 

176 elif isinstance(val, dict): 

177 if attr_type: 177 ↛ 178line 177 didn't jump to line 178, because the condition on line 177 was never true

178 attr['type'] = get_xml_type(val) 

179 lines.append( 

180 '<%s%s>%s</%s>' 

181 % (key, make_attrstring(attr), convert_dict(val, ids, key, attr_type, item_func, cdata), key) 

182 ) 

183 

184 elif isinstance(val, collections.abc.Iterable): 

185 if attr_type: 185 ↛ 186line 185 didn't jump to line 186, because the condition on line 185 was never true

186 attr['type'] = get_xml_type(val) 

187 lines.append( 

188 '<%s%s>%s</%s>' 

189 % (key, make_attrstring(attr), convert_list(val, ids, key, attr_type, item_func, cdata), key) 

190 ) 

191 

192 elif val is None: 192 ↛ 196line 192 didn't jump to line 196, because the condition on line 192 was never false

193 lines.append(convert_none(key, val, attr_type, attr, cdata)) 

194 

195 else: 

196 raise TypeError('Unsupported data type: %s (%s)' % (val, type(val).__name__)) 

197 

198 return ''.join(lines) 

199 

200 

201@no_type_check 

202def convert_list(items, ids, parent, attr_type, item_func, cdata): 

203 """Converts a list into an XML string.""" 

204 lines = [] 

205 

206 item_name = item_func(parent) 

207 

208 if ids: 208 ↛ 209line 208 didn't jump to line 209, because the condition on line 208 was never true

209 this_id = get_unique_id(parent) 

210 

211 for i, item in enumerate(items): 

212 attr = {} if not ids else {'id': '%s_%s' % (this_id, i + 1)} 

213 if isinstance(item, (numbers.Number, str)): 

214 lines.append(convert_kv(item_name, item, attr_type, attr, cdata)) 

215 

216 elif hasattr(item, 'isoformat'): # datetime 216 ↛ 217line 216 didn't jump to line 217, because the condition on line 216 was never true

217 lines.append(convert_kv(item_name, item.isoformat(), attr_type, attr, cdata)) 

218 

219 elif isinstance(item, bool): 219 ↛ 220line 219 didn't jump to line 220, because the condition on line 219 was never true

220 lines.append(convert_bool(item_name, item, attr_type, attr, cdata)) 

221 

222 elif isinstance(item, dict): 222 ↛ 242line 222 didn't jump to line 242, because the condition on line 222 was never false

223 if not attr_type: 223 ↛ 233line 223 didn't jump to line 233, because the condition on line 223 was never false

224 lines.append( 

225 '<%s>%s</%s>' 

226 % ( 

227 item_name, 

228 convert_dict(item, ids, parent, attr_type, item_func, cdata), 

229 item_name, 

230 ) 

231 ) 

232 else: 

233 lines.append( 

234 '<%s type="dict">%s</%s>' 

235 % ( 

236 item_name, 

237 convert_dict(item, ids, parent, attr_type, item_func, cdata), 

238 item_name, 

239 ) 

240 ) 

241 

242 elif isinstance(item, collections.abc.Iterable): 

243 if not attr_type: 

244 lines.append( 

245 '<%s %s>%s</%s>' 

246 % ( 

247 item_name, 

248 make_attrstring(attr), 

249 convert_list(item, ids, item_name, attr_type, item_func, cdata), 

250 item_name, 

251 ) 

252 ) 

253 else: 

254 lines.append( 

255 '<%s type="list"%s>%s</%s>' 

256 % ( 

257 item_name, 

258 make_attrstring(attr), 

259 convert_list(item, ids, item_name, attr_type, item_func, cdata), 

260 item_name, 

261 ) 

262 ) 

263 

264 elif item is None: 

265 lines.append(convert_none(item_name, None, attr_type, attr, cdata)) 

266 

267 else: 

268 raise TypeError('Unsupported data type: %s (%s)' % (item, type(item).__name__)) 

269 return ''.join(lines) 

270 

271 

272@no_type_check 

273def convert_kv(key, val, attr_type, attr={}, cdata=False): 

274 """Converts a number or string into an XML element""" 

275 key, attr = make_valid_xml_name(key, attr) 

276 

277 if attr_type: 277 ↛ 278line 277 didn't jump to line 278, because the condition on line 277 was never true

278 attr['type'] = get_xml_type(val) 

279 attrstring = make_attrstring(attr) 

280 return '<%s%s>%s</%s>' % (key, attrstring, wrap_cdata(val) if cdata else escape_xml(val), key) 

281 

282 

283@no_type_check 

284def convert_bool(key, val, attr_type, attr={}, cdata=False): 

285 """Converts a boolean into an XML element""" 

286 key, attr = make_valid_xml_name(key, attr) 

287 

288 if attr_type: 

289 attr['type'] = get_xml_type(val) 

290 attrstring = make_attrstring(attr) 

291 return '<%s%s>%s</%s>' % (key, attrstring, str(val).lower(), key) 

292 

293 

294@no_type_check 

295def convert_none(key, val, attr_type, attr={}, cdata=False): 

296 """Converts a null value into an XML element""" 

297 key, attr = make_valid_xml_name(key, attr) 

298 

299 if attr_type: 299 ↛ 300line 299 didn't jump to line 300, because the condition on line 299 was never true

300 attr['type'] = get_xml_type(val) 

301 attrstring = make_attrstring(attr) 

302 return '<%s%s></%s>' % (key, attrstring, key) 

303 

304 

305@no_type_check 

306def as_xml(obj, ids=False, attr_type=False, item_func=default_item_func, cdata=False): 

307 """Converts taxonomy tree into annotated XML.""" 

308 lines = ['<?xml version="1.0" encoding="utf-8" ?>'] + [convert(obj, ids, attr_type, item_func, cdata, parent='')] 

309 x = ''.join(lines).encode(ENCODING) 

310 element = ET.XML(x) 

311 ET.indent(element) 

312 return '<?xml version="1.0" encoding="utf-8" ?>\n' + ET.tostring(element, encoding='unicode') + '\n'