Coverage for puristaa/puristaa.py: 100.00%
21 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-04 22:03:31 +00:00
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-04 22:03:31 +00:00
1# -*- coding: utf-8 -*-
2"""Extract common prefix from sequence of strings and yield sequence of rest strings.
4Implementation uses min-max left matching, single character backtracking policy and a list.
5"""
6import typing
9@typing.no_type_check
10def prefix_compression(texts, policy=None):
11 """Return common prefix string abiding policy and compressed texts string list."""
12 if not texts: # Early out return empty prefix and empty sequence
13 return '', texts
15 if not isinstance(texts, (list, tuple)):
16 texts = [texts]
18 prefix_guard, first, last = 0, min(texts), max(texts)
19 for pos, char in enumerate(first):
20 if char == last[pos]:
21 prefix_guard += 1
22 else:
23 break
25 if policy:
26 for here in range(prefix_guard - 1, -1, -1):
27 if policy(first[here]):
28 prefix_guard = here + 1
29 break
31 if not prefix_guard: # Reduce memory pressure for all different texts
32 return '', texts
34 return first[:prefix_guard], [text[prefix_guard:] for text in texts]