Skip to content

Commit

Permalink
feat(pm4py): improvement to variants computation
Browse files Browse the repository at this point in the history
  • Loading branch information
fit-alessandro-berti committed Nov 28, 2023
1 parent 979cc90 commit abd315e
Showing 1 changed file with 18 additions and 11 deletions.
29 changes: 18 additions & 11 deletions pm4py/objects/log/util/pandas_numpy_variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np
from collections import Counter
from typing import Tuple, Dict, Collection
import importlib.util


class Parameters(Enum):
Expand Down Expand Up @@ -59,19 +60,25 @@ def apply(dataframe: pd.DataFrame, parameters=None) -> Tuple[Dict[Collection[str
dataframe = pandas_utils.insert_index(dataframe, index_key)
dataframe.sort_values([case_id_key, timestamp_key, index_key])

cases = dataframe[case_id_key].to_numpy()
activities = dataframe[activity_key].to_numpy()

c_unq, c_ind, c_counts = np.unique(cases, return_index=True, return_counts=True)
variants_counter = Counter()
case_variant = dict()

for i in range(len(c_ind)):
si = c_ind[i]
ei = si + c_counts[i]
acts = tuple(activities[si:ei])
variants_counter[acts] += 1
case_variant[c_unq[i]] = acts
if importlib.util.find_spec("cudf"):
case_variant = dataframe.groupby(case_id_key)[activity_key].agg(list).to_dict()
case_variant = {x: tuple(y) for x, y in case_variant.items()}
variants_counter = Counter(case_variant.items())
else:
variants_counter = Counter()
cases = dataframe[case_id_key].to_numpy()
activities = dataframe[activity_key].to_numpy()

c_unq, c_ind, c_counts = np.unique(cases, return_index=True, return_counts=True)

for i in range(len(c_ind)):
si = c_ind[i]
ei = si + c_counts[i]
acts = tuple(activities[si:ei])
variants_counter[acts] += 1
case_variant[c_unq[i]] = acts

# return as Python dictionary
variants_dict = {x: y for x, y in variants_counter.items()}
Expand Down

0 comments on commit abd315e

Please sign in to comment.