nirdizati_light.utils.log_metrics
1import statistics 2from collections import defaultdict, OrderedDict 3 4from pm4py.objects.log.obj import EventLog 5 6TIMESTAMP_CLASSIFIER = "time:timestamp" 7NAME_CLASSIFIER = "concept:name" 8 9 10def events_by_date(log: EventLog) -> OrderedDict: 11 """Creates dict of events by date ordered by date 12 13 :return {'2010-12-30': 7, '2011-01-06': 8} 14 :rtype: OrderedDict 15 """ 16 17 stamp_dict = defaultdict(lambda: 0) 18 for trace in log: 19 for event in trace: 20 timestamp = event[TIMESTAMP_CLASSIFIER] 21 stamp_dict[str(timestamp.date())] += 1 22 return OrderedDict(sorted(stamp_dict.items())) 23 24 25def resources_by_date(log: EventLog) -> OrderedDict: 26 """Creates dict of used unique resources ordered by date 27 28 Resource and timestamp delimited by &&. If this is in resources name, bad stuff will happen. 29 Returns a dict with a date and the number of unique resources used on that day. 30 :return {'2010-12-30': 7, '2011-01-06': 8} 31 """ 32 stamp_dict = defaultdict(lambda: []) 33 for trace in log: 34 for event in trace: 35 resource = event.get("Resource", event.get("org:resource", "")) 36 timestamp = event[TIMESTAMP_CLASSIFIER] 37 stamp_dict[str(timestamp.date())].append(resource) 38 39 for key, value in stamp_dict.items(): 40 stamp_dict[key] = len(set(value)) 41 42 return OrderedDict(sorted(stamp_dict.items())) 43 44 45def event_executions(log: EventLog) -> OrderedDict: 46 """Creates dict of event execution count 47 48 :return {'Event A': 7, '2011-01-06': 8} 49 """ 50 executions = defaultdict(lambda: 0) 51 for trace in log: 52 for event in trace: 53 executions[event[NAME_CLASSIFIER]] += 1 54 return OrderedDict(sorted(executions.items())) 55 56 57def new_trace_start(log: EventLog) -> OrderedDict: 58 """Creates dict of new traces by date 59 60 :return {'2010-12-30': 1, '2011-01-06': 2} 61 """ 62 executions = defaultdict(lambda: 0) 63 for trace in log: 64 timestamp = trace[0][TIMESTAMP_CLASSIFIER] 65 executions[str(timestamp.date())] += 1 66 return OrderedDict(sorted(executions.items())) 67 68 69def trace_attributes(log: EventLog) -> list: 70 """Creates an array of dicts that describe trace attributes. 71 Only looks at first trace. Filters out `concept:name`. 72 73 :return [{name: 'name', type: 'string', example: 34}] 74 """ 75 values = [] 76 trace = log[0] # TODO: this might be a bug if first trace has different events then others 77 for attribute in trace.attributes: 78 if attribute != "concept:name": 79 atr_type = _is_number(trace.attributes[attribute]) 80 atr = {'name': attribute, 'type': atr_type, 'example': str(trace.attributes[attribute])} 81 values.append(atr) 82 values = sorted(values, key=lambda k: k['name']) 83 return values 84 85 86def _is_number(s) -> str: 87 if (isinstance(s, (float, int)) or (s.isdigit() if hasattr(s, 'isdigit') else False)) and not isinstance(s, bool): 88 return 'number' 89 return 'string' 90 91 92def events_in_trace(log: EventLog) -> OrderedDict: 93 """Creates dict of number of events in trace 94 95 :return {'4': 11, '3': 8} 96 """ 97 stamp_dict = defaultdict(lambda: 0) 98 for trace in log: 99 stamp_dict[trace.attributes[NAME_CLASSIFIER]] = len(trace) 100 return OrderedDict(sorted(stamp_dict.items())) 101 102 103def max_events_in_log(log: EventLog) -> int: 104 """Returns the maximum number of events in any trace 105 106 :return 3 107 """ 108 return max([len(trace) for trace in log]) 109 110 111def avg_events_in_log(log: EventLog) -> int: 112 """Returns the average number of events in any trace 113 114 :return 3 115 """ 116 return statistics.mean([len(trace) for trace in log]) 117 118 119def std_var_events_in_log(log: EventLog) -> int: 120 """Returns the standard variation of the average number of events in any trace 121 122 :return 3 123 """ 124 return statistics.stdev([len(trace) for trace in log]) 125 126 127def trace_ids_in_log(log: EventLog) -> list: 128 return [trace.attributes[NAME_CLASSIFIER] for trace in log] 129 130 131def traces_in_log(log: EventLog) -> list: 132 return [{'attributes': trace.attributes, 'events': [event for event in trace]} for trace in log]
TIMESTAMP_CLASSIFIER =
'time:timestamp'
NAME_CLASSIFIER =
'concept:name'
def
events_by_date(log: pm4py.objects.log.obj.EventLog) -> collections.OrderedDict:
11def events_by_date(log: EventLog) -> OrderedDict: 12 """Creates dict of events by date ordered by date 13 14 :return {'2010-12-30': 7, '2011-01-06': 8} 15 :rtype: OrderedDict 16 """ 17 18 stamp_dict = defaultdict(lambda: 0) 19 for trace in log: 20 for event in trace: 21 timestamp = event[TIMESTAMP_CLASSIFIER] 22 stamp_dict[str(timestamp.date())] += 1 23 return OrderedDict(sorted(stamp_dict.items()))
Creates dict of events by date ordered by date
Returns
8}
def
resources_by_date(log: pm4py.objects.log.obj.EventLog) -> collections.OrderedDict:
26def resources_by_date(log: EventLog) -> OrderedDict: 27 """Creates dict of used unique resources ordered by date 28 29 Resource and timestamp delimited by &&. If this is in resources name, bad stuff will happen. 30 Returns a dict with a date and the number of unique resources used on that day. 31 :return {'2010-12-30': 7, '2011-01-06': 8} 32 """ 33 stamp_dict = defaultdict(lambda: []) 34 for trace in log: 35 for event in trace: 36 resource = event.get("Resource", event.get("org:resource", "")) 37 timestamp = event[TIMESTAMP_CLASSIFIER] 38 stamp_dict[str(timestamp.date())].append(resource) 39 40 for key, value in stamp_dict.items(): 41 stamp_dict[key] = len(set(value)) 42 43 return OrderedDict(sorted(stamp_dict.items()))
Creates dict of used unique resources ordered by date
Resource and timestamp delimited by &&. If this is in resources name, bad stuff will happen. Returns a dict with a date and the number of unique resources used on that day.
Returns
8}
def
event_executions(log: pm4py.objects.log.obj.EventLog) -> collections.OrderedDict:
46def event_executions(log: EventLog) -> OrderedDict: 47 """Creates dict of event execution count 48 49 :return {'Event A': 7, '2011-01-06': 8} 50 """ 51 executions = defaultdict(lambda: 0) 52 for trace in log: 53 for event in trace: 54 executions[event[NAME_CLASSIFIER]] += 1 55 return OrderedDict(sorted(executions.items()))
Creates dict of event execution count
Returns
8}
def
new_trace_start(log: pm4py.objects.log.obj.EventLog) -> collections.OrderedDict:
58def new_trace_start(log: EventLog) -> OrderedDict: 59 """Creates dict of new traces by date 60 61 :return {'2010-12-30': 1, '2011-01-06': 2} 62 """ 63 executions = defaultdict(lambda: 0) 64 for trace in log: 65 timestamp = trace[0][TIMESTAMP_CLASSIFIER] 66 executions[str(timestamp.date())] += 1 67 return OrderedDict(sorted(executions.items()))
Creates dict of new traces by date
Returns
2}
def
trace_attributes(log: pm4py.objects.log.obj.EventLog) -> list:
70def trace_attributes(log: EventLog) -> list: 71 """Creates an array of dicts that describe trace attributes. 72 Only looks at first trace. Filters out `concept:name`. 73 74 :return [{name: 'name', type: 'string', example: 34}] 75 """ 76 values = [] 77 trace = log[0] # TODO: this might be a bug if first trace has different events then others 78 for attribute in trace.attributes: 79 if attribute != "concept:name": 80 atr_type = _is_number(trace.attributes[attribute]) 81 atr = {'name': attribute, 'type': atr_type, 'example': str(trace.attributes[attribute])} 82 values.append(atr) 83 values = sorted(values, key=lambda k: k['name']) 84 return values
Creates an array of dicts that describe trace attributes.
Only looks at first trace. Filters out concept:name.
Returns
34}]
def
events_in_trace(log: pm4py.objects.log.obj.EventLog) -> collections.OrderedDict:
93def events_in_trace(log: EventLog) -> OrderedDict: 94 """Creates dict of number of events in trace 95 96 :return {'4': 11, '3': 8} 97 """ 98 stamp_dict = defaultdict(lambda: 0) 99 for trace in log: 100 stamp_dict[trace.attributes[NAME_CLASSIFIER]] = len(trace) 101 return OrderedDict(sorted(stamp_dict.items()))
Creates dict of number of events in trace
Returns
8}
def
max_events_in_log(log: pm4py.objects.log.obj.EventLog) -> int:
104def max_events_in_log(log: EventLog) -> int: 105 """Returns the maximum number of events in any trace 106 107 :return 3 108 """ 109 return max([len(trace) for trace in log])
Returns the maximum number of events in any trace
:return 3
def
avg_events_in_log(log: pm4py.objects.log.obj.EventLog) -> int:
112def avg_events_in_log(log: EventLog) -> int: 113 """Returns the average number of events in any trace 114 115 :return 3 116 """ 117 return statistics.mean([len(trace) for trace in log])
Returns the average number of events in any trace
:return 3
def
std_var_events_in_log(log: pm4py.objects.log.obj.EventLog) -> int:
120def std_var_events_in_log(log: EventLog) -> int: 121 """Returns the standard variation of the average number of events in any trace 122 123 :return 3 124 """ 125 return statistics.stdev([len(trace) for trace in log])
Returns the standard variation of the average number of events in any trace
:return 3
def
trace_ids_in_log(log: pm4py.objects.log.obj.EventLog) -> list:
def
traces_in_log(log: pm4py.objects.log.obj.EventLog) -> list: