Skip to content

Commit

Permalink
PMPY-1625 Bug fix / Efficiency change Format Dataframe
Browse files Browse the repository at this point in the history
  • Loading branch information
fit-alessandro-berti committed Mar 5, 2022
1 parent 65b4919 commit 1bcadff
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 4 deletions.
1 change: 1 addition & 0 deletions pm4py/util/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

DEFAULT_VARIANT_SEP = ","
DEFAULT_INDEX_KEY = "@@index"
DEFAULT_CASE_INDEX_KEY = "@@case_index"
DEFAULT_INDEX_IN_TRACE_KEY = "@@index_in_trace"
DEFAULT_EVENT_INDEX_KEY = "@@event_index"
DEFAULT_FLOW_TIME = "@@flow_time"
Expand Down
33 changes: 31 additions & 2 deletions pm4py/util/pandas_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def to_dict_index(df):
return df.to_dict('index')


def insert_index(df, column_name=constants.DEFAULT_INDEX_KEY):
def insert_index(df, column_name=constants.DEFAULT_INDEX_KEY, copy_dataframe=True):
"""
Inserts the dataframe index in the specified column
Expand All @@ -51,17 +51,46 @@ def insert_index(df, column_name=constants.DEFAULT_INDEX_KEY):
Dataframe
column_name
Name of the column that should host the index
copy_dataframe
Establishes if the original dataframe should be copied before inserting the column
Returns
--------------
df
Dataframe with index
"""
df = df.copy()
if copy_dataframe:
df = df.copy()
df[column_name] = df.index
return df


def insert_case_index(df, column_name=constants.DEFAULT_CASE_INDEX_KEY, case_id=constants.CASE_CONCEPT_NAME, copy_dataframe=True):
"""
Inserts the case number in the dataframe
Parameters
---------------
df
Dataframe
column_name
Name of the column that should host the case index
case_id
Case identifier
copy_dataframe
Establishes if the original dataframe should be copied before inserting the column
Returns
---------------
df
Dataframe with case index
"""
if copy_dataframe:
df = df.copy()
df[column_name] = df.groupby(case_id).ngroup()
return df


def insert_ev_in_tr_index(df: pd.DataFrame, case_id: str = constants.CASE_CONCEPT_NAME,
column_name: str = constants.DEFAULT_INDEX_IN_TRACE_KEY) -> pd.DataFrame:
"""
Expand Down
5 changes: 3 additions & 2 deletions pm4py/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pm4py.objects.ocel.obj import OCEL
from pm4py.util import constants, xes_constants, pandas_utils


INDEX_COLUMN = "@@index"


Expand Down Expand Up @@ -71,11 +72,11 @@ def format_dataframe(df: pd.DataFrame, case_id: str = constants.CASE_CONCEPT_NAM
# make sure the activity column is of string type
df[xes_constants.DEFAULT_NAME_KEY] = df[xes_constants.DEFAULT_NAME_KEY].astype("string")
# set an index column
df = pandas_utils.insert_index(df, INDEX_COLUMN)
df = pandas_utils.insert_index(df, INDEX_COLUMN, copy_dataframe=False)
# sorts the dataframe
df = df.sort_values([constants.CASE_CONCEPT_NAME, xes_constants.DEFAULT_TIMESTAMP_KEY, INDEX_COLUMN])
# re-set the index column
df = pandas_utils.insert_index(df, INDEX_COLUMN)
df = pandas_utils.insert_index(df, INDEX_COLUMN, copy_dataframe=False)
# sets the properties
if not hasattr(df, 'attrs'):
# legacy (Python 3.6) support
Expand Down

0 comments on commit 1bcadff

Please sign in to comment.