Coverage for nineties/privacy.py: 98%

58 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-04 20:53:48 +00:00

1# -*- coding: utf-8 -*- 

2# pylint: disable=missing-docstring,unused-import,reimported,no-member 

3"""Compliance with data protection regulations / constraints. 

4 

5Replace personal identifiable information with surrogates. 

6Likewise non-personal sensitive information. 

7Do so reproducibly to maintain cross record references. 

8Accept synonyms / aliases mapping multiple identifiers to single entity. 

9 

10The ALIASES record is a two level dictionary mapping 

11aspect -> alias -> entity, where aspect is in 

12(NAME, EMAIL, TEXT) for this version.""" 

13import json 

14import os 

15from typing import no_type_check 

16 

17from faker import Faker 

18 

19NAME, EMAIL, TEXT = 'name', 'email', 'text' 

20ASPECTS = NAME, EMAIL, TEXT 

21EMPTY_ALIASES = {k: {} for k in ASPECTS} # type: ignore 

22ALIASES_ENV = 'ALIASES_90S' 

23ALIASES = os.getenv(ALIASES_ENV, EMPTY_ALIASES) 

24 

25if ALIASES != EMPTY_ALIASES: 

26 if os.path.isfile(ALIASES): # type: ignore 

27 with open(ALIASES, 'rt') as json_file: # type: ignore 

28 ALIASES = json.load(json_file) 

29 else: 

30 ALIASES = json.loads(ALIASES) # type: ignore 

31 for asp in ASPECTS: 

32 if asp not in ALIASES: 

33 ALIASES[asp] = {} # type: ignore 

34 

35NO_NAME, NO_EMAIL, NO_TEXT = 'no_name', 'no_email', 'no_text' 

36PLACE_HOLDERS = NO_NAME, NO_EMAIL, NO_TEXT 

37UNKNOWN_ENTITIES = {asp: e for asp, e in zip(ASPECTS, PLACE_HOLDERS)} 

38 

39 

40FAKE = Faker() 

41Faker.seed(42) 

42 

43 

44@no_type_check 

45def sentence() -> str: 

46 return FAKE.sentence(nb_words=6) 

47 

48 

49MAP = {NAME: FAKE.name, EMAIL: FAKE.email, TEXT: sentence} 

50SURROGATES = {k: {} for k in ASPECTS} # type: ignore 

51for asp in ASPECTS: 

52 unique = [] 

53 for e in ALIASES[asp].values(): # type: ignore 

54 if e not in unique: 54 ↛ 53line 54 didn't jump to line 53, because the condition on line 54 was never false

55 unique.append(e) 

56 for e in unique: 

57 SURROGATES[asp][e] = MAP[asp]() 

58 

59 

60@no_type_check 

61def expose_aliases(aspect=None): 

62 """Expose the current aliases.""" 

63 return ALIASES if aspect is None else ALIASES[aspect] 

64 

65 

66@no_type_check 

67def expose_surrogates(aspect=None): 

68 """Expose the current mappings to safe identifiers.""" 

69 return SURROGATES if aspect is None else SURROGATES[aspect] 

70 

71 

72@no_type_check 

73def ensure_privacy(aspect, alias, entity=None): 

74 """Return safe identifier, update ALIASES and SURROGATES accordingly.""" 

75 safe = MAP[aspect] 

76 found = ALIASES[aspect].get(alias) 

77 if found is None: 

78 if entity is None: 78 ↛ 80line 78 didn't jump to line 80, because the condition on line 78 was never false

79 entity = UNKNOWN_ENTITIES[asp] 

80 ALIASES[aspect][alias] = entity 

81 found = entity 

82 surrogate = SURROGATES[aspect].get(found) 

83 if surrogate is None: 

84 SURROGATES[aspect][found] = safe() 

85 

86 return SURROGATES[aspect][found] 

87 

88 

89def safe_name(text_alias: str) -> str: 

90 """Provide specialized name parser / anonymity provider.""" 

91 return ensure_privacy(NAME, text_alias) # type: ignore 

92 

93 

94def safe_email(text_alias: str) -> str: 

95 """Provide specialized name parser / anonymity provider.""" 

96 return ensure_privacy(EMAIL, text_alias) # type: ignore