From 850082334322b14f6c5d98975aa3d93fb95a860c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Judith=20S=C3=A1inz-Pardo=20D=C3=ADaz?= Date: Tue, 7 May 2024 12:15:50 +0200 Subject: [PATCH] feat: generate interval-based hierarchies --- anjana/anonymity/utils/__init__.py | 2 ++ anjana/anonymity/utils/utils.py | 38 ++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/anjana/anonymity/utils/__init__.py b/anjana/anonymity/utils/__init__.py index 491758a..7318105 100644 --- a/anjana/anonymity/utils/__init__.py +++ b/anjana/anonymity/utils/__init__.py @@ -20,6 +20,7 @@ apply_hierarchy, check_gen_level, get_transformation, + generate_intervals, ) __all__ = [ @@ -27,4 +28,5 @@ "apply_hierarchy", "check_gen_level", "get_transformation", + "generate_intervals", ] diff --git a/anjana/anonymity/utils/utils.py b/anjana/anonymity/utils/utils.py index 7f61747..08f0eee 100644 --- a/anjana/anonymity/utils/utils.py +++ b/anjana/anonymity/utils/utils.py @@ -149,3 +149,41 @@ def get_transformation( transformation.append(0) return transformation + + +@beartype() +def generate_intervals( + quasi_ident: typing.Union[typing.List, np.ndarray], + inf: typing.Union[int, float], + sup: typing.Union[int, float], + step: int, +) -> list: + """Given a quasi-identifier of numeric type, creates a list containing an + interval-based generalization (hierarchy) of the values of the quasi-identifier. + The intervals will have the length entered in the parameter step. + + :param quasi_ident: values of the quasi-identifier on which the interval-based + generalization is to be obtained + :type quasi_ident: list or numpy array + + :param inf: lower value of the set of intervals + :type inf: int or float + + :param sup: bigger value of the set of intervals + :type sup: int or float + + :param step: spacing between values of the intervals + :type step: int + + :return: list with the intervals associated with the given values + :rtype: list + """ + values = np.arange(inf, sup + 1, step) + interval = [] + for num in quasi_ident: + lower = np.searchsorted(values, num) + if lower == 0: + lower = 1 + interval.append(f"[{values[lower - 1]}, {values[lower]})") + + return interval