Skip to content

Commit

Permalink
Merge branch 'ft-1398-sampling-simplified-interface' into 'integration'
Browse files Browse the repository at this point in the history
FT 1398 Sampling in simplified interface

See merge request process-mining/pm4py/pm4py-core!556
fit-sebastiaan-van-zelst committed Dec 14, 2021

Verified

This commit was signed with the committer’s verified signature.
jrieken Johannes Rieken
2 parents fd5037e + 3fca34a commit ff62d66
Showing 4 changed files with 104 additions and 3 deletions.
3 changes: 2 additions & 1 deletion pm4py/__init__.py
Original file line number Diff line number Diff line change
@@ -34,7 +34,8 @@
get_minimum_self_distances, get_minimum_self_distance_witnesses, \
get_case_arrival_average, get_rework_cases_per_activity, get_case_overlap, get_cycle_time, \
get_all_case_durations, get_case_duration
from pm4py.utils import format_dataframe, parse_process_tree, serialize, deserialize, set_classifier, parse_event_log_string, project_on_event_attribute
from pm4py.utils import format_dataframe, parse_process_tree, serialize, deserialize, set_classifier, parse_event_log_string, project_on_event_attribute, \
sample_cases, sample_events
from pm4py.vis import view_petri_net, save_vis_petri_net, view_dfg, save_vis_dfg, view_process_tree, \
save_vis_process_tree, \
view_ocdfg, save_vis_ocdfg, view_heuristics_net, save_vis_heuristics_net, view_bpmn, save_vis_bpmn, view_sna, save_vis_sna,\
2 changes: 1 addition & 1 deletion pm4py/objects/ocel/util/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from pm4py.objects.ocel.util import attributes_names, extended_table, flattening, related_objects, related_events, filtering_utils, log_ocel
from pm4py.objects.ocel.util import attributes_names, extended_table, flattening, related_objects, related_events, filtering_utils, log_ocel, sampling
50 changes: 50 additions & 0 deletions pm4py/objects/ocel/util/sampling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from enum import Enum
from pm4py.util import exec_utils
from pm4py.objects.ocel import constants
import random
from pm4py.objects.ocel.util import filtering_utils
from copy import copy
from pm4py.objects.ocel.obj import OCEL
from typing import Optional, Dict, Any


class Parameters(Enum):
OBJECT_ID = constants.PARAM_OBJECT_ID
EVENT_ID = constants.PARAM_EVENT_ID
NUM_ENTITIES = "num_entities"


def sample_ocel_events(ocel: OCEL, parameters: Optional[Dict[Any, Any]] = None) -> OCEL:
"""
Keeps a sample of the events of an object-centric event log
Parameters
------------------
ocel
Object-centric event log
parameters
Parameters of the algorithm, including:
- Parameters.EVENT_ID => event identifier
- Parameters.NUM_EVENTS => number of events
Returns
------------------
sampled_ocel
Sampled object-centric event log
"""
if parameters is None:
parameters = {}

event_id_column = exec_utils.get_param_value(Parameters.EVENT_ID, parameters, ocel.event_id_column)
num_entities = exec_utils.get_param_value(Parameters.NUM_ENTITIES, parameters, 100)

events = list(ocel.events[event_id_column].unique())
num_events = min(len(events), num_entities)

random.shuffle(events)
picked_events = events[:num_events]

ocel = copy(ocel)
ocel.events = ocel.events[ocel.events[event_id_column].isin(picked_events)]

return filtering_utils.propagate_event_filtering(ocel, parameters=parameters)
52 changes: 51 additions & 1 deletion pm4py/utils.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@

import pandas as pd

from pm4py.objects.log.obj import EventLog, Trace, Event
from pm4py.objects.log.obj import EventLog, EventStream, Trace, Event
from pm4py.objects.process_tree.obj import ProcessTree
from pm4py.objects.ocel.obj import OCEL
from pm4py.util import constants, xes_constants, pandas_utils
@@ -368,3 +368,53 @@ def general_checks_classical_event_log(log):
if type(log) is OCEL:
raise Exception("the method cannot be applied on object-centric event logs!")
return True


def sample_cases(log: Union[EventLog, pd.DataFrame], num_cases: int) -> Union[EventLog, pd.DataFrame]:
"""
(Random) Sample a given number of cases from the event log.
Parameters
---------------
log
Event log / Pandas dataframe
num_cases
Number of cases to sample
Returns
---------------
sampled_log
Sampled event log (containing the specified amount of cases)
"""
if isinstance(log, EventLog):
from pm4py.objects.log.util import sampling
return sampling.sample(log, num_cases)
elif isinstance(log, pd.DataFrame):
from pm4py.objects.log.util import dataframe_utils
return dataframe_utils.sample_dataframe(log, parameters={"max_no_cases": num_cases})


def sample_events(log: Union[EventStream, OCEL], num_events: int) -> Union[EventStream, OCEL]:
"""
(Random) Sample a given number of events from the event log.
Parameters
---------------
log
Event stream / OCEL / Pandas dataframes
num_events
Number of events to sample
Returns
---------------
sampled_log
Sampled event stream / OCEL / Pandas dataframes (containing the specified amount of events)
"""
if isinstance(log, EventStream):
from pm4py.objects.log.util import sampling
return sampling.sample_stream(log, num_events)
elif isinstance(log, OCEL):
from pm4py.objects.ocel.util import sampling
return sampling.sample_ocel_events(log, parameters={"num_entities": num_events})
elif isinstance(log, pd.DataFrame):
return log.sample(n=num_events)

0 comments on commit ff62d66

Please sign in to comment.