-
Notifications
You must be signed in to change notification settings - Fork 300
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'PMPY-2016' into 'integration'
PMPY-2016 Log to target vectors (for ML purposes) Closes PMPY-2016 See merge request process-mining/pm4py/pm4py-core!913
- Loading branch information
Showing
10 changed files
with
273 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import pm4py | ||
from pm4py.algo.transformation.log_to_target import algorithm as log_to_target | ||
|
||
|
||
def execute_script(): | ||
log = pm4py.read_xes("../tests/input_data/running-example.xes") | ||
rem_time_target, classes = log_to_target.apply(log, variant=log_to_target.Variants.REMAINING_TIME) | ||
print(rem_time_target) | ||
next_time_target, classes = log_to_target.apply(log, variant=log_to_target.Variants.NEXT_TIME) | ||
print(next_time_target) | ||
next_activity_target, next_activities = log_to_target.apply(log, variant=log_to_target.Variants.NEXT_ACTIVITY) | ||
print(next_activity_target) | ||
print(next_activities) | ||
|
||
|
||
if __name__ == "__main__": | ||
execute_script() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from pm4py.algo.transformation.log_to_target import algorithm, variants |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
from enum import Enum | ||
from pm4py.algo.transformation.log_to_target.variants import next_activity, next_time, remaining_time | ||
from pm4py.objects.log.obj import EventLog, EventStream | ||
import pandas as pd | ||
from typing import Union, Dict, Optional, Any, Tuple, List | ||
from pm4py.util import exec_utils | ||
|
||
|
||
class Variants(Enum): | ||
NEXT_ACTIVITY = next_activity | ||
NEXT_TIME = next_time | ||
REMAINING_TIME = remaining_time | ||
|
||
|
||
def apply(log: Union[EventLog, EventStream, pd.DataFrame], variant=None, parameters: Optional[Dict[Any, Any]] = None) -> Tuple[Any, List[str]]: | ||
""" | ||
Extracts from the event log | ||
the target vector for a specific ML use case | ||
Parameters | ||
--------------- | ||
log | ||
Event log / Event stream / Pandas dataframe | ||
variant | ||
Specification of the target vector: | ||
- Variants.NEXT_ACTIVITY => encodes the next activity | ||
- Variants.NEXT_TIME => encodes the next timestamp | ||
- Variants.REMAINING_TIME => encodes the remaining time | ||
Returns | ||
-------------- | ||
vector | ||
Target vector for the specified ML use case | ||
classes | ||
Classes (for every column of the target vector) | ||
""" | ||
if variant is None: | ||
raise Exception("please provide the variant between: Variants.NEXT_ACTIVITY, Variants.NEXT_TIME, Variants.REMAINING_TIME") | ||
return exec_utils.get_variant(variant).apply(log, parameters=parameters) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from pm4py.algo.transformation.log_to_target.variants import next_activity, next_time, remaining_time |
57 changes: 57 additions & 0 deletions
57
pm4py/algo/transformation/log_to_target/variants/next_activity.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
from enum import Enum | ||
from pm4py.util import exec_utils, constants, xes_constants | ||
from pm4py.objects.log.obj import EventLog, EventStream | ||
import pandas as pd | ||
from typing import Union, Dict, Optional, Any, Tuple, List | ||
from pm4py.objects.conversion.log import converter as log_converter | ||
|
||
|
||
class Parameters(Enum): | ||
ACTIVITIES = "activities" | ||
ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY | ||
|
||
|
||
def apply(log: Union[EventLog, EventStream, pd.DataFrame], parameters: Optional[Dict[Any, Any]] = None) -> Tuple[List[List[List[int]]], List[str]]: | ||
""" | ||
Returns a list of matrixes (one for every case). | ||
Every matrix contains as many rows as many events are contained in the case, | ||
and as many columns as many distinct activities are in the log. | ||
The corresponding activity to the given event is assigned to the value 1; | ||
the remaining activities are assigned to the value 0. | ||
Parameters | ||
-------------- | ||
log | ||
Event log / Event stream / Pandas dataframe | ||
parameters | ||
Parameters of the algorithm, including: | ||
- Parameters.ACTIVITIES => list of activities to consider | ||
- Parameters.ACTIVITY_KEY => attribute that should be used as activity | ||
Returns | ||
------------- | ||
target | ||
The aforementioned list of matrixes. | ||
activities | ||
The considered list of activities | ||
""" | ||
if parameters is None: | ||
parameters = {} | ||
|
||
log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) | ||
|
||
activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) | ||
activities = exec_utils.get_param_value(Parameters.ACTIVITIES, parameters, sorted(list(set(y[activity_key] for x in log for y in x)))) | ||
|
||
target = [] | ||
for trace in log: | ||
target.append([]) | ||
for i in range(len(trace)): | ||
target[-1].append([0] * len(activities)) | ||
if i < len(trace) - 1: | ||
act = trace[i+1][activity_key] | ||
if act in activities: | ||
target[-1][-1][activities.index(act)] = 1 | ||
|
||
return target, activities |
50 changes: 50 additions & 0 deletions
50
pm4py/algo/transformation/log_to_target/variants/next_time.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
from enum import Enum | ||
from pm4py.util import exec_utils, constants, xes_constants | ||
from pm4py.objects.log.obj import EventLog, EventStream | ||
import pandas as pd | ||
from typing import Union, Dict, Optional, Any, Tuple, List | ||
from pm4py.objects.conversion.log import converter as log_converter | ||
|
||
|
||
class Parameters(Enum): | ||
TIMESTAMP_KEY = constants.PARAMETER_CONSTANT_TIMESTAMP_KEY | ||
|
||
|
||
def apply(log: Union[EventLog, EventStream, pd.DataFrame], parameters: Optional[Dict[Any, Any]] = None) -> Tuple[List[List[int]], List[str]]: | ||
""" | ||
Returns a list of lists (one for every case of the log) containing the difference between the timestamp of the current event | ||
and the timestamp of the next event of the case. | ||
For the last event of the case, the difference is defaulted to 0. | ||
Parameters | ||
--------------- | ||
log | ||
Event log | ||
parameters | ||
Parameters of the algorithm, including: | ||
- Parameters.TIMESTAMP_KEY => the attribute of the log to be used as timestamp | ||
Returns | ||
--------------- | ||
target | ||
The aforementioned list | ||
classes | ||
Dummy list (of classes) | ||
""" | ||
if parameters is None: | ||
parameters = {} | ||
|
||
log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) | ||
|
||
timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) | ||
|
||
target = [] | ||
for trace in log: | ||
target.append([]) | ||
for i in range(len(trace)): | ||
curr_time = trace[i][timestamp_key].timestamp() | ||
next_time = trace[i+1][timestamp_key].timestamp() if i < len(trace)-1 else curr_time | ||
|
||
target[-1].append(next_time-curr_time) | ||
|
||
return target, ["@@next_time"] |
49 changes: 49 additions & 0 deletions
49
pm4py/algo/transformation/log_to_target/variants/remaining_time.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from enum import Enum | ||
from pm4py.util import exec_utils, constants, xes_constants | ||
from pm4py.objects.log.obj import EventLog, EventStream | ||
import pandas as pd | ||
from typing import Union, Dict, Optional, Any, Tuple, List | ||
from pm4py.objects.conversion.log import converter as log_converter | ||
|
||
|
||
class Parameters(Enum): | ||
TIMESTAMP_KEY = constants.PARAMETER_CONSTANT_TIMESTAMP_KEY | ||
|
||
|
||
def apply(log: Union[EventLog, EventStream, pd.DataFrame], parameters: Optional[Dict[Any, Any]] = None) -> Tuple[List[List[int]], List[str]]: | ||
""" | ||
Returns a list of lists (one for every case of the log) containing the remaining time in seconds | ||
from an event to the end of the case. | ||
Parameters | ||
--------------- | ||
log | ||
Event log | ||
parameters | ||
Parameters of the algorithm, including: | ||
- Parameters.TIMESTAMP_KEY => the attribute of the log to be used as timestamp | ||
Returns | ||
--------------- | ||
target | ||
The aforementioned list | ||
classes | ||
Dummy list (of classes) | ||
""" | ||
if parameters is None: | ||
parameters = {} | ||
|
||
log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) | ||
|
||
timestamp_key = exec_utils.get_param_value(Parameters.TIMESTAMP_KEY, parameters, xes_constants.DEFAULT_TIMESTAMP_KEY) | ||
|
||
target = [] | ||
for trace in log: | ||
target.append([]) | ||
for i in range(len(trace)): | ||
curr_time = trace[i][timestamp_key].timestamp() | ||
last_time = trace[-1][timestamp_key].timestamp() | ||
|
||
target[-1].append(last_time-curr_time) | ||
|
||
return target, ["@@remaining_time"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters