Coverage for puristaa/puristaa.py: 100.00%

21 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-04 22:03:31 +00:00

1# -*- coding: utf-8 -*- 

2"""Extract common prefix from sequence of strings and yield sequence of rest strings. 

3 

4Implementation uses min-max left matching, single character backtracking policy and a list. 

5""" 

6import typing 

7 

8 

9@typing.no_type_check 

10def prefix_compression(texts, policy=None): 

11 """Return common prefix string abiding policy and compressed texts string list.""" 

12 if not texts: # Early out return empty prefix and empty sequence 

13 return '', texts 

14 

15 if not isinstance(texts, (list, tuple)): 

16 texts = [texts] 

17 

18 prefix_guard, first, last = 0, min(texts), max(texts) 

19 for pos, char in enumerate(first): 

20 if char == last[pos]: 

21 prefix_guard += 1 

22 else: 

23 break 

24 

25 if policy: 

26 for here in range(prefix_guard - 1, -1, -1): 

27 if policy(first[here]): 

28 prefix_guard = here + 1 

29 break 

30 

31 if not prefix_guard: # Reduce memory pressure for all different texts 

32 return '', texts 

33 

34 return first[:prefix_guard], [text[prefix_guard:] for text in texts]