Skip to content

Commit

Permalink
Merge branch 'ft-670-min-trace-length-extensive-playout' into 'integr…
Browse files Browse the repository at this point in the history
…ation'

(3) Introducing minimum trace length in process tree extensive playout

See merge request pm4py/pm4py-core!247
  • Loading branch information
fit-sebastiaan-van-zelst committed Jan 4, 2021
2 parents 50722bb + 69e1484 commit 0888ab2
Show file tree
Hide file tree
Showing 4 changed files with 232 additions and 23 deletions.
90 changes: 90 additions & 0 deletions pm4py/objects/process_tree/bottomup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,36 @@
import math

from pm4py.objects.process_tree.pt_operator import Operator


def get_max_trace_length(tree, parameters=None):
"""
Get the maximum length of a trace allowed by the process tree
(can be infty)
Parameters
---------------
tree
Process tree
parameters
Possible parameters of the algorithm
Returns
--------------
max_trace_length
The maximum length of a trace
"""
if parameters is None:
parameters = {}

bottomup = get_bottomup_nodes(tree, parameters=parameters)
max_length_dict = {}
for i in range(len(bottomup)):
get_max_length_dict(bottomup[i], max_length_dict, len(bottomup))

return max_length_dict[tree]


def get_min_trace_length(tree, parameters=None):
"""
Get the minimum length of a trace allowed by the process tree
Expand Down Expand Up @@ -28,6 +58,38 @@ def get_min_trace_length(tree, parameters=None):
return min_length_dict[tree]


def get_max_rem_dict(tree, parameters=None):
"""
Gets for each node of the tree the maximum number of activities
that are inserted to 'complete' a trace of the overall tree
Parameters
----------------
tree
Process tree
parameters
Parameters of the algorithm
Returns
---------------
max_rem_dict
Dictionary described in the docstring
"""
if parameters is None:
parameters = {}

bottomup = get_bottomup_nodes(tree, parameters=parameters)
max_length_dict = {}
for i in range(len(bottomup)):
get_max_length_dict(bottomup[i], max_length_dict, len(bottomup))

max_rem_dict = {}
for i in range(len(bottomup)):
max_rem_dict[bottomup[i]] = max_length_dict[tree] - max_length_dict[bottomup[i]]

return max_rem_dict


def get_min_rem_dict(tree, parameters=None):
"""
Gets for each node of the tree the minimum number of activities
Expand Down Expand Up @@ -60,6 +122,34 @@ def get_min_rem_dict(tree, parameters=None):
return min_rem_dict


def get_max_length_dict(node, max_length_dict, num_nodes):
"""
Populates, given the nodes of a tree in a bottom-up order, the maximum length dictionary
(every trace generated from that point of the tree has at most length N)
Parameters
---------------
node
Node
max_length_dict
Dictionary that is populated in-place
num_nodes
Number of nodes in the process tree
"""
if len(node.children) == 0:
if node.label is None:
max_length_dict[node] = 0
else:
max_length_dict[node] = 1
elif node.operator == Operator.XOR:
max_length_dict[node] = max(max_length_dict[x] for x in node.children)
elif node.operator == Operator.PARALLEL or node.operator == Operator.SEQUENCE:
max_length_dict[node] = sum(max_length_dict[x] for x in node.children)
elif node.operator == Operator.LOOP:
max_length_dict[node] = sum(max_length_dict[x] for x in node.children) + 2 ** (
48 - math.ceil(math.log(num_nodes) / math.log(2)))


def get_min_length_dict(node, min_length_dict):
"""
Populates, given the nodes of a tree in a bottom-up order, the minimum length dictionary
Expand Down
97 changes: 74 additions & 23 deletions pm4py/simulation/tree_playout/variants/extensive.py
Original file line number Diff line number Diff line change
@@ -1,38 +1,42 @@
from enum import Enum

from pm4py.algo.discovery.footprints import algorithm as fp_discovery
from pm4py.objects.process_tree import bottomup as bottomup_discovery
from pm4py.util import exec_utils
from pm4py.objects.log.log import EventLog, Trace, Event
from pm4py.objects.process_tree import bottomup as bottomup_discovery
from pm4py.objects.process_tree.pt_operator import Operator
from pm4py.util import constants, xes_constants
from enum import Enum
import sys
from pm4py.util import exec_utils

TRACES = "traces"
SKIPPABLE = "skippable"


class Parameters(Enum):
MIN_TRACE_LENGTH = "min_trace_length"
MAX_TRACE_LENGTH = "max_trace_length"
MAX_LOOP_OCC = "max_loop_occ"
ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
MAX_LIMIT_NUM_TRACES = "max_limit_num_traces"
RETURN_SET_STRINGS = "return_set_strings"


def get_playout_leaf(node, playout_dictio, max_trace_length, max_loop_occ, min_rem_dict, max_limit_num_traces):
def get_playout_leaf(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict, max_rem_dict,
max_limit_num_traces):
"""
Performs the playout of a leaf (activity or invisible), returning the traces allowed by the tree
"""
mr = min_rem_dict[node]
mar = max_rem_dict[node]
playout_dictio[node] = {TRACES: set()}
if node.label is None:
playout_dictio[node][TRACES].add(tuple([]))
else:
if max_trace_length - mr >= 1:
if mar + 1 >= min_trace_length and max_trace_length - mr >= 1:
playout_dictio[node][TRACES].add((node.label,))


def get_playout_xor(node, playout_dictio, max_trace_length, max_loop_occ, min_rem_dict, max_limit_num_traces):
def get_playout_xor(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict, max_rem_dict,
max_limit_num_traces):
"""
Performs the playout of a XOR node, returning the traces allowed by the tree
"""
Expand Down Expand Up @@ -71,6 +75,32 @@ def get_min_remaining_length(traces):
return min_len_traces, min_rem_length


def get_max_remaining_length(traces):
"""
Maximum remaining length (for sequential, parallel cut detection)
Parameters
--------------
traces
Traces
"""
max_len_traces = []
max_rem_length = []
for x in traces:
if len(x) == 0:
max_len_traces.append(0)
else:
max_len_traces.append(len(x[-1]))
max_rem_length.append(0)
max_rem_length[-1] = 0
max_rem_length[-2] = max_len_traces[-1]
j = len(traces) - 3
while j >= 0:
max_rem_length[j] = max_rem_length[j + 1] + max_len_traces[j + 1]
j = j - 1
return max_len_traces, max_rem_length


def flatten(x):
"""
Flattens a list of tuples
Expand All @@ -82,17 +112,20 @@ def flatten(x):
return ret


def get_sequential_compositions_children(traces, max_trace_length, mr, max_limit_num_traces):
def get_sequential_compositions_children(traces, min_trace_length, max_trace_length, mr, mar, max_limit_num_traces):
"""
Returns alls the possible sequential combinations between
the children of a tree
"""
diff = max_trace_length - mr
diff2 = min_trace_length - mar
min_len_traces, min_rem_length = get_min_remaining_length(traces)
max_len_traces, max_rem_length = get_max_remaining_length(traces)
curr = list(traces[0])
i = 1
while i < len(traces):
mrl = min_rem_length[i]
marl = max_rem_length[i]
to_visit = []
j = 0
while j < len(curr):
Expand All @@ -104,7 +137,8 @@ def get_sequential_compositions_children(traces, max_trace_length, mr, max_limit
y = traces[i][z]
xy = list(x)
xy.append(y)
if sum(len(k) for k in xy) + mrl <= diff:
val = sum(len(k) for k in xy)
if val + mrl <= diff and val + marl >= diff2:
to_visit.append(xy)
z = z + 1
j = j + 1
Expand All @@ -113,13 +147,15 @@ def get_sequential_compositions_children(traces, max_trace_length, mr, max_limit
return curr


def get_playout_parallel(node, playout_dictio, max_trace_length, max_loop_occ, min_rem_dict, max_limit_num_traces):
def get_playout_parallel(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
max_rem_dict, max_limit_num_traces):
"""
Performs the playout of an AND node, returning the traces allowed by the tree
"""
mr = min_rem_dict[node]
mar = max_rem_dict[node]
traces = list(sorted(playout_dictio[x][TRACES], key=lambda x: len(x)) for x in node.children)
sequential_compositions = get_sequential_compositions_children(traces, max_trace_length, mr, max_limit_num_traces)
sequential_compositions = get_sequential_compositions_children(traces, min_trace_length, max_trace_length, mr, mar, max_limit_num_traces)
final_traces = list()
for x in sequential_compositions:
if len(final_traces) >= max_limit_num_traces:
Expand All @@ -141,26 +177,30 @@ def get_playout_parallel(node, playout_dictio, max_trace_length, max_loop_occ, m
playout_dictio[node] = {TRACES: set(final_traces)}


def get_playout_sequence(node, playout_dictio, max_trace_length, max_loop_occ, min_rem_dict, max_limit_num_traces):
def get_playout_sequence(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
max_rem_dict, max_limit_num_traces):
"""
Performs the playout of a sequence node, returning the traces allowed by the tree
"""
mr = min_rem_dict[node]
mar = max_rem_dict[node]
final_traces = set()
traces = list(sorted(playout_dictio[x][TRACES], key=lambda x: len(x)) for x in node.children)
sequential_compositions = get_sequential_compositions_children(traces, max_trace_length, mr, max_limit_num_traces)
sequential_compositions = get_sequential_compositions_children(traces, min_trace_length, max_trace_length, mr, mar, max_limit_num_traces)
for x in sequential_compositions:
final_traces.add(tuple(flatten(x)))
for n in node.children:
del playout_dictio[n][TRACES]
playout_dictio[node] = {TRACES: final_traces}


def get_playout_loop(node, playout_dictio, max_trace_length, max_loop_occ, min_rem_dict, max_limit_num_traces):
def get_playout_loop(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict, max_rem_dict,
max_limit_num_traces):
"""
Performs the playout of a loop node, returning the traces allowed by the tree
"""
mr = min_rem_dict[node]
mar = max_rem_dict[node]
final_traces = set()
do_traces = sorted(list(playout_dictio[node.children[0]][TRACES]), key=lambda x: len(x))
redo_traces = sorted(list(playout_dictio[node.children[1]][TRACES]), key=lambda x: len(x))
Expand All @@ -169,6 +209,7 @@ def get_playout_loop(node, playout_dictio, max_trace_length, max_loop_occ, min_r
closed = set()
diff1 = max_trace_length - mr
diff2 = max_trace_length - min_do_trace - mr
diff3 = min_trace_length - mar
while to_visit:
curr = to_visit.pop(0)
curr_trace = curr[0]
Expand All @@ -179,7 +220,7 @@ def get_playout_loop(node, playout_dictio, max_trace_length, max_loop_occ, min_r
continue
closed.add(curr_trace)

if len(curr_trace) <= diff1:
if diff3 <= len(curr_trace) <= diff1:
final_traces.add(curr_trace)
if len(final_traces) > max_limit_num_traces:
break
Expand All @@ -201,20 +242,26 @@ def get_playout_loop(node, playout_dictio, max_trace_length, max_loop_occ, min_r
playout_dictio[node] = {TRACES: final_traces}


def get_playout(node, playout_dictio, max_trace_length, max_loop_occ, min_rem_dict, max_limit_num_traces):
def get_playout(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict, max_rem_dict,
max_limit_num_traces):
"""
Performs a playout of an ode of the process tree, given the type
"""
if len(node.children) == 0:
get_playout_leaf(node, playout_dictio, max_trace_length, max_loop_occ, min_rem_dict, max_limit_num_traces)
get_playout_leaf(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
max_rem_dict, max_limit_num_traces)
elif node.operator == Operator.XOR:
get_playout_xor(node, playout_dictio, max_trace_length, max_loop_occ, min_rem_dict, max_limit_num_traces)
get_playout_xor(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
max_rem_dict, max_limit_num_traces)
elif node.operator == Operator.PARALLEL:
get_playout_parallel(node, playout_dictio, max_trace_length, max_loop_occ, min_rem_dict, max_limit_num_traces)
get_playout_parallel(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
max_rem_dict, max_limit_num_traces)
elif node.operator == Operator.SEQUENCE:
get_playout_sequence(node, playout_dictio, max_trace_length, max_loop_occ, min_rem_dict, max_limit_num_traces)
get_playout_sequence(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
max_rem_dict, max_limit_num_traces)
elif node.operator == Operator.LOOP:
get_playout_loop(node, playout_dictio, max_trace_length, max_loop_occ, min_rem_dict, max_limit_num_traces)
get_playout_loop(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
max_rem_dict, max_limit_num_traces)


def apply(tree, parameters=None):
Expand All @@ -227,6 +274,7 @@ def apply(tree, parameters=None):
Process tree
parameters
Possible parameters, including:
- Parameters.MIN_TRACE_LENGTH => minimum length of a trace (default: 1)
- Parameters.MAX_TRACE_LENGTH => maximum length of a trace (default: min_allowed_trace_length)
- Parameters.MAX_LOOP_OCC => maximum number of occurrences for a loop (default: MAX_TRACE_LENGTH)
- Parameters.ACTIVITY_KEY => activity key
Expand All @@ -247,17 +295,20 @@ def apply(tree, parameters=None):
activities = {act: Event({activity_key: act}) for act in activities}

min_allowed_trace_length = bottomup_discovery.get_min_trace_length(tree, parameters=parameters)
min_trace_length = exec_utils.get_param_value(Parameters.MIN_TRACE_LENGTH, parameters, 1)
max_trace_length = exec_utils.get_param_value(Parameters.MAX_TRACE_LENGTH, parameters, min_allowed_trace_length)
max_loop_occ = exec_utils.get_param_value(Parameters.MAX_LOOP_OCC, parameters, int(max_trace_length/2))
max_loop_occ = exec_utils.get_param_value(Parameters.MAX_LOOP_OCC, parameters, int(max_trace_length / 2))
max_limit_num_traces = exec_utils.get_param_value(Parameters.MAX_LIMIT_NUM_TRACES, parameters, 100000)
return_set_strings = exec_utils.get_param_value(Parameters.RETURN_SET_STRINGS, parameters, False)

bottomup = bottomup_discovery.get_bottomup_nodes(tree, parameters=parameters)
min_rem_dict = bottomup_discovery.get_min_rem_dict(tree, parameters=parameters)
max_rem_dict = bottomup_discovery.get_max_rem_dict(tree, parameters=parameters)

playout_dictio = {}
for i in range(len(bottomup)):
get_playout(bottomup[i], playout_dictio, max_trace_length, max_loop_occ, min_rem_dict, max_limit_num_traces)
get_playout(bottomup[i], playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
max_rem_dict, max_limit_num_traces)
tree_playout_traces = playout_dictio[tree][TRACES]

if return_set_strings:
Expand Down
38 changes: 38 additions & 0 deletions tests/input_data/tree_ex_with_loops.ptml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
<?xml version='1.0' encoding='UTF-8'?>
<ptml>
<processTree name="ddb481a1-c17f-4f31-a912-c076ffddbd53" root="422baaf2-aeac-4c00-b9bd-de44aedaeac5" id="db6737a6-742d-4726-9cb2-50363ea6b7c7">
<sequence name="" id="422baaf2-aeac-4c00-b9bd-de44aedaeac5"/>
<xorLoop name="" id="82dcab6e-64ae-4b2a-9546-911789475ba2"/>
<and name="" id="76a90ef5-a937-4746-9f11-a77d92db1d91"/>
<and name="" id="5bca3af2-4d62-4021-8051-42776530930a"/>
<manualTask name="h" id="33d7366e-da8d-4ae6-8fb3-e1aeac30212b"/>
<manualTask name="i" id="73b7d79d-9c19-4de3-880e-7b36e3ce186e"/>
<automaticTask name="" id="defd52e0-3b40-49fa-b348-f52132047f4b"/>
<manualTask name="a" id="086379f6-e8ac-4cbe-8845-b88e5efd6890"/>
<manualTask name="b" id="0f9b33ac-7c11-406a-985f-a3cc170d99be"/>
<xor name="" id="7c28cdb9-394e-40de-a463-0297d9980c0f"/>
<manualTask name="d" id="0d0c9181-0ced-4c70-b40c-49fc8730883a"/>
<manualTask name="e" id="1be27e2c-7063-482d-ae2a-fdcfef7eb56f"/>
<and name="" id="79bfce80-33a7-4c89-870a-6b54c9074d9b"/>
<manualTask name="c" id="37262dfc-a470-4e5c-a88c-75357c96ded3"/>
<manualTask name="d" id="0d0c9181-0ced-4c70-b40c-49fc8730883a"/>
<manualTask name="f" id="d5269467-1506-424e-900e-8c199304c600"/>
<manualTask name="g" id="a3df91b0-0d93-4a4c-8a1d-0d236fec9a43"/>
<parentsNode id="a29342a3-331e-49f7-b229-a773e0783cdb" sourceId="422baaf2-aeac-4c00-b9bd-de44aedaeac5" targetId="82dcab6e-64ae-4b2a-9546-911789475ba2"/>
<parentsNode id="b4811bc4-3021-40d8-944d-63e0416dcbe2" sourceId="422baaf2-aeac-4c00-b9bd-de44aedaeac5" targetId="76a90ef5-a937-4746-9f11-a77d92db1d91"/>
<parentsNode id="7ce16e46-76f8-4008-96f2-696a5fdd9af6" sourceId="422baaf2-aeac-4c00-b9bd-de44aedaeac5" targetId="5bca3af2-4d62-4021-8051-42776530930a"/>
<parentsNode id="a9d7f0eb-38a1-4666-bc13-a84da3ff4192" sourceId="82dcab6e-64ae-4b2a-9546-911789475ba2" targetId="33d7366e-da8d-4ae6-8fb3-e1aeac30212b"/>
<parentsNode id="915109a8-d80b-4eeb-8326-e2d216e24c52" sourceId="82dcab6e-64ae-4b2a-9546-911789475ba2" targetId="73b7d79d-9c19-4de3-880e-7b36e3ce186e"/>
<parentsNode id="73fb72a2-c1c3-44a3-b58f-9a45934e22c3" sourceId="82dcab6e-64ae-4b2a-9546-911789475ba2" targetId="defd52e0-3b40-49fa-b348-f52132047f4b"/>
<parentsNode id="39e75f36-212c-406c-92a3-11ad0bd9cf00" sourceId="76a90ef5-a937-4746-9f11-a77d92db1d91" targetId="086379f6-e8ac-4cbe-8845-b88e5efd6890"/>
<parentsNode id="007c1220-7de4-42f2-ae19-13d3158631e2" sourceId="76a90ef5-a937-4746-9f11-a77d92db1d91" targetId="0f9b33ac-7c11-406a-985f-a3cc170d99be"/>
<parentsNode id="030206d2-fa35-40a2-88cb-70891652da40" sourceId="76a90ef5-a937-4746-9f11-a77d92db1d91" targetId="7c28cdb9-394e-40de-a463-0297d9980c0f"/>
<parentsNode id="d1cc56a5-3360-4b4b-acdb-e47c62c0ba03" sourceId="5bca3af2-4d62-4021-8051-42776530930a" targetId="0d0c9181-0ced-4c70-b40c-49fc8730883a"/>
<parentsNode id="30a195b3-a2d6-4401-8fde-53093f1323b3" sourceId="5bca3af2-4d62-4021-8051-42776530930a" targetId="1be27e2c-7063-482d-ae2a-fdcfef7eb56f"/>
<parentsNode id="2607d58a-a1f9-4b6c-8098-d968432c6517" sourceId="5bca3af2-4d62-4021-8051-42776530930a" targetId="79bfce80-33a7-4c89-870a-6b54c9074d9b"/>
<parentsNode id="a2d13fa1-d5e2-4d84-9cc4-13b60185a002" sourceId="7c28cdb9-394e-40de-a463-0297d9980c0f" targetId="37262dfc-a470-4e5c-a88c-75357c96ded3"/>
<parentsNode id="48e72773-e24f-4f58-859b-d732c8a2ba3e" sourceId="7c28cdb9-394e-40de-a463-0297d9980c0f" targetId="0d0c9181-0ced-4c70-b40c-49fc8730883a"/>
<parentsNode id="8874a104-df05-43f5-960d-a28ad57374bc" sourceId="79bfce80-33a7-4c89-870a-6b54c9074d9b" targetId="d5269467-1506-424e-900e-8c199304c600"/>
<parentsNode id="6fa932d1-6ae0-4905-bb53-ef0a73a124b5" sourceId="79bfce80-33a7-4c89-870a-6b54c9074d9b" targetId="a3df91b0-0d93-4a4c-8a1d-0d236fec9a43"/>
</processTree>
</ptml>
Loading

0 comments on commit 0888ab2

Please sign in to comment.