From abd315e4c37701dbbf540726496dfb45b3625992 Mon Sep 17 00:00:00 2001 From: Alessandro Berti Date: Tue, 28 Nov 2023 15:44:04 +0100 Subject: [PATCH] feat(pm4py): improvement to variants computation --- .../objects/log/util/pandas_numpy_variants.py | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/pm4py/objects/log/util/pandas_numpy_variants.py b/pm4py/objects/log/util/pandas_numpy_variants.py index 361efe021..fe7484ff8 100644 --- a/pm4py/objects/log/util/pandas_numpy_variants.py +++ b/pm4py/objects/log/util/pandas_numpy_variants.py @@ -4,6 +4,7 @@ import numpy as np from collections import Counter from typing import Tuple, Dict, Collection +import importlib.util class Parameters(Enum): @@ -59,19 +60,25 @@ def apply(dataframe: pd.DataFrame, parameters=None) -> Tuple[Dict[Collection[str dataframe = pandas_utils.insert_index(dataframe, index_key) dataframe.sort_values([case_id_key, timestamp_key, index_key]) - cases = dataframe[case_id_key].to_numpy() - activities = dataframe[activity_key].to_numpy() - - c_unq, c_ind, c_counts = np.unique(cases, return_index=True, return_counts=True) - variants_counter = Counter() case_variant = dict() - for i in range(len(c_ind)): - si = c_ind[i] - ei = si + c_counts[i] - acts = tuple(activities[si:ei]) - variants_counter[acts] += 1 - case_variant[c_unq[i]] = acts + if importlib.util.find_spec("cudf"): + case_variant = dataframe.groupby(case_id_key)[activity_key].agg(list).to_dict() + case_variant = {x: tuple(y) for x, y in case_variant.items()} + variants_counter = Counter(case_variant.items()) + else: + variants_counter = Counter() + cases = dataframe[case_id_key].to_numpy() + activities = dataframe[activity_key].to_numpy() + + c_unq, c_ind, c_counts = np.unique(cases, return_index=True, return_counts=True) + + for i in range(len(c_ind)): + si = c_ind[i] + ei = si + c_counts[i] + acts = tuple(activities[si:ei]) + variants_counter[acts] += 1 + case_variant[c_unq[i]] = acts # return as Python dictionary variants_dict = {x: y for x, y in variants_counter.items()}