Coverage for tallipoika/speedup.py: 57.14%

43 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-04 23:32:02 +00:00

1"""JSON encoding dispatch that offers C implementations from standard Python install if available.""" 

2 

3import re 

4from typing import no_type_check 

5 

6try: 

7 from _json import encode_basestring_ascii as c_encode_basestring_ascii 

8except ImportError: 

9 c_encode_basestring_ascii = None # type: ignore 

10try: 

11 from _json import encode_basestring as c_encode_basestring # type: ignore 

12except ImportError: 

13 c_encode_basestring = None 

14try: 

15 from _json import make_encoder as accelerated_make_encoder 

16except ImportError: 

17 accelerated_make_encoder = None # type: ignore 

18 

19UC_SPLIT = 0x10000 

20ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') 

21ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') 

22HAS_UTF8 = re.compile(b'[\x80-\xff]') 

23ESCAPE_DCT = { 

24 '\\': '\\\\', 

25 '"': '\\"', 

26 '\b': '\\b', 

27 '\f': '\\f', 

28 '\n': '\\n', 

29 '\r': '\\r', 

30 '\t': '\\t', 

31} 

32for i in range(0x20): 

33 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) 

34 

35 

36@no_type_check 

37def py_encode_basestring(text: str) -> str: 

38 """Return a JSON representation of a Python string.""" 

39 

40 def replace(match): 

41 return ESCAPE_DCT[match.group(0)] 

42 

43 return f'"{ESCAPE.sub(replace, text)}"' 

44 

45 

46encode_basestring = c_encode_basestring or py_encode_basestring 

47 

48 

49def handle_surrogate_pair(char_num: int) -> str: 

50 """Encode surrogate pair.""" 

51 char_num -= UC_SPLIT 

52 s1 = 0xD800 | ((char_num >> 10) & 0x3FF) 

53 s2 = 0xDC00 | (char_num & 0x3FF) 

54 return f'\\u{s1:04x}\\u{s2:04x}' 

55 

56 

57@no_type_check 

58def py_encode_basestring_ascii(text: str) -> str: 

59 """Return an ASCII-only JSON representation of a Python string.""" 

60 

61 def replace(match): 

62 char = match.group(0) 

63 try: 

64 return ESCAPE_DCT[char] 

65 except KeyError: 

66 char_num = ord(char) 

67 return '\\u{0:04x}'.format(char_num) if char_num < UC_SPLIT else handle_surrogate_pair(char_num) 

68 

69 return f'"{ESCAPE_ASCII.sub(replace, text)}"' 

70 

71 

72encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii # type: ignore