nirdizati_light.utils.log_metrics

  1import statistics
  2from collections import defaultdict, OrderedDict
  3
  4from pm4py.objects.log.obj import EventLog
  5
  6TIMESTAMP_CLASSIFIER = "time:timestamp"
  7NAME_CLASSIFIER = "concept:name"
  8
  9
 10def events_by_date(log: EventLog) -> OrderedDict:
 11    """Creates dict of events by date ordered by date
 12
 13    :return {'2010-12-30': 7, '2011-01-06': 8}
 14    :rtype: OrderedDict
 15    """
 16
 17    stamp_dict = defaultdict(lambda: 0)
 18    for trace in log:
 19        for event in trace:
 20            timestamp = event[TIMESTAMP_CLASSIFIER]
 21            stamp_dict[str(timestamp.date())] += 1
 22    return OrderedDict(sorted(stamp_dict.items()))
 23
 24
 25def resources_by_date(log: EventLog) -> OrderedDict:
 26    """Creates dict of used unique resources ordered by date
 27
 28    Resource and timestamp delimited by &&. If this is in resources name, bad stuff will happen.
 29    Returns a dict with a date and the number of unique resources used on that day.
 30    :return {'2010-12-30': 7, '2011-01-06': 8}
 31    """
 32    stamp_dict = defaultdict(lambda: [])
 33    for trace in log:
 34        for event in trace:
 35            resource = event.get("Resource", event.get("org:resource", ""))
 36            timestamp = event[TIMESTAMP_CLASSIFIER]
 37            stamp_dict[str(timestamp.date())].append(resource)
 38
 39    for key, value in stamp_dict.items():
 40        stamp_dict[key] = len(set(value))
 41
 42    return OrderedDict(sorted(stamp_dict.items()))
 43
 44
 45def event_executions(log: EventLog) -> OrderedDict:
 46    """Creates dict of event execution count
 47
 48    :return {'Event A': 7, '2011-01-06': 8}
 49    """
 50    executions = defaultdict(lambda: 0)
 51    for trace in log:
 52        for event in trace:
 53            executions[event[NAME_CLASSIFIER]] += 1
 54    return OrderedDict(sorted(executions.items()))
 55
 56
 57def new_trace_start(log: EventLog) -> OrderedDict:
 58    """Creates dict of new traces by date
 59
 60    :return {'2010-12-30': 1, '2011-01-06': 2}
 61    """
 62    executions = defaultdict(lambda: 0)
 63    for trace in log:
 64        timestamp = trace[0][TIMESTAMP_CLASSIFIER]
 65        executions[str(timestamp.date())] += 1
 66    return OrderedDict(sorted(executions.items()))
 67
 68
 69def trace_attributes(log: EventLog) -> list:
 70    """Creates an array of dicts that describe trace attributes.
 71    Only looks at first trace. Filters out `concept:name`.
 72
 73    :return [{name: 'name', type: 'string', example: 34}]
 74    """
 75    values = []
 76    trace = log[0]  # TODO: this might be a bug if first trace has different events then others
 77    for attribute in trace.attributes:
 78        if attribute != "concept:name":
 79            atr_type = _is_number(trace.attributes[attribute])
 80            atr = {'name': attribute, 'type': atr_type, 'example': str(trace.attributes[attribute])}
 81            values.append(atr)
 82    values = sorted(values, key=lambda k: k['name'])
 83    return values
 84
 85
 86def _is_number(s) -> str:
 87    if (isinstance(s, (float, int)) or (s.isdigit() if hasattr(s, 'isdigit') else False)) and not isinstance(s, bool):
 88        return 'number'
 89    return 'string'
 90
 91
 92def events_in_trace(log: EventLog) -> OrderedDict:
 93    """Creates dict of number of events in trace
 94
 95    :return {'4': 11, '3': 8}
 96    """
 97    stamp_dict = defaultdict(lambda: 0)
 98    for trace in log:
 99        stamp_dict[trace.attributes[NAME_CLASSIFIER]] = len(trace)
100    return OrderedDict(sorted(stamp_dict.items()))
101
102
103def max_events_in_log(log: EventLog) -> int:
104    """Returns the maximum number of events in any trace
105
106    :return 3
107    """
108    return max([len(trace) for trace in log])
109
110
111def avg_events_in_log(log: EventLog) -> int:
112    """Returns the average number of events in any trace
113
114    :return 3
115    """
116    return statistics.mean([len(trace) for trace in log])
117
118
119def std_var_events_in_log(log: EventLog) -> int:
120    """Returns the standard variation of the average number of events in any trace
121
122    :return 3
123    """
124    return statistics.stdev([len(trace) for trace in log])
125
126
127def trace_ids_in_log(log: EventLog) -> list:
128    return [trace.attributes[NAME_CLASSIFIER] for trace in log]
129
130
131def traces_in_log(log: EventLog) -> list:
132    return [{'attributes': trace.attributes, 'events': [event for event in trace]} for trace in log]
TIMESTAMP_CLASSIFIER = 'time:timestamp'
NAME_CLASSIFIER = 'concept:name'
def events_by_date(log: pm4py.objects.log.obj.EventLog) -> collections.OrderedDict:
11def events_by_date(log: EventLog) -> OrderedDict:
12    """Creates dict of events by date ordered by date
13
14    :return {'2010-12-30': 7, '2011-01-06': 8}
15    :rtype: OrderedDict
16    """
17
18    stamp_dict = defaultdict(lambda: 0)
19    for trace in log:
20        for event in trace:
21            timestamp = event[TIMESTAMP_CLASSIFIER]
22            stamp_dict[str(timestamp.date())] += 1
23    return OrderedDict(sorted(stamp_dict.items()))

Creates dict of events by date ordered by date

Returns

8}

def resources_by_date(log: pm4py.objects.log.obj.EventLog) -> collections.OrderedDict:
26def resources_by_date(log: EventLog) -> OrderedDict:
27    """Creates dict of used unique resources ordered by date
28
29    Resource and timestamp delimited by &&. If this is in resources name, bad stuff will happen.
30    Returns a dict with a date and the number of unique resources used on that day.
31    :return {'2010-12-30': 7, '2011-01-06': 8}
32    """
33    stamp_dict = defaultdict(lambda: [])
34    for trace in log:
35        for event in trace:
36            resource = event.get("Resource", event.get("org:resource", ""))
37            timestamp = event[TIMESTAMP_CLASSIFIER]
38            stamp_dict[str(timestamp.date())].append(resource)
39
40    for key, value in stamp_dict.items():
41        stamp_dict[key] = len(set(value))
42
43    return OrderedDict(sorted(stamp_dict.items()))

Creates dict of used unique resources ordered by date

Resource and timestamp delimited by &&. If this is in resources name, bad stuff will happen. Returns a dict with a date and the number of unique resources used on that day.

Returns

8}

def event_executions(log: pm4py.objects.log.obj.EventLog) -> collections.OrderedDict:
46def event_executions(log: EventLog) -> OrderedDict:
47    """Creates dict of event execution count
48
49    :return {'Event A': 7, '2011-01-06': 8}
50    """
51    executions = defaultdict(lambda: 0)
52    for trace in log:
53        for event in trace:
54            executions[event[NAME_CLASSIFIER]] += 1
55    return OrderedDict(sorted(executions.items()))

Creates dict of event execution count

Returns

8}

def new_trace_start(log: pm4py.objects.log.obj.EventLog) -> collections.OrderedDict:
58def new_trace_start(log: EventLog) -> OrderedDict:
59    """Creates dict of new traces by date
60
61    :return {'2010-12-30': 1, '2011-01-06': 2}
62    """
63    executions = defaultdict(lambda: 0)
64    for trace in log:
65        timestamp = trace[0][TIMESTAMP_CLASSIFIER]
66        executions[str(timestamp.date())] += 1
67    return OrderedDict(sorted(executions.items()))

Creates dict of new traces by date

Returns

2}

def trace_attributes(log: pm4py.objects.log.obj.EventLog) -> list:
70def trace_attributes(log: EventLog) -> list:
71    """Creates an array of dicts that describe trace attributes.
72    Only looks at first trace. Filters out `concept:name`.
73
74    :return [{name: 'name', type: 'string', example: 34}]
75    """
76    values = []
77    trace = log[0]  # TODO: this might be a bug if first trace has different events then others
78    for attribute in trace.attributes:
79        if attribute != "concept:name":
80            atr_type = _is_number(trace.attributes[attribute])
81            atr = {'name': attribute, 'type': atr_type, 'example': str(trace.attributes[attribute])}
82            values.append(atr)
83    values = sorted(values, key=lambda k: k['name'])
84    return values

Creates an array of dicts that describe trace attributes. Only looks at first trace. Filters out concept:name.

Returns

34}]

def events_in_trace(log: pm4py.objects.log.obj.EventLog) -> collections.OrderedDict:
 93def events_in_trace(log: EventLog) -> OrderedDict:
 94    """Creates dict of number of events in trace
 95
 96    :return {'4': 11, '3': 8}
 97    """
 98    stamp_dict = defaultdict(lambda: 0)
 99    for trace in log:
100        stamp_dict[trace.attributes[NAME_CLASSIFIER]] = len(trace)
101    return OrderedDict(sorted(stamp_dict.items()))

Creates dict of number of events in trace

Returns

8}

def max_events_in_log(log: pm4py.objects.log.obj.EventLog) -> int:
104def max_events_in_log(log: EventLog) -> int:
105    """Returns the maximum number of events in any trace
106
107    :return 3
108    """
109    return max([len(trace) for trace in log])

Returns the maximum number of events in any trace

:return 3

def avg_events_in_log(log: pm4py.objects.log.obj.EventLog) -> int:
112def avg_events_in_log(log: EventLog) -> int:
113    """Returns the average number of events in any trace
114
115    :return 3
116    """
117    return statistics.mean([len(trace) for trace in log])

Returns the average number of events in any trace

:return 3

def std_var_events_in_log(log: pm4py.objects.log.obj.EventLog) -> int:
120def std_var_events_in_log(log: EventLog) -> int:
121    """Returns the standard variation of the average number of events in any trace
122
123    :return 3
124    """
125    return statistics.stdev([len(trace) for trace in log])

Returns the standard variation of the average number of events in any trace

:return 3

def trace_ids_in_log(log: pm4py.objects.log.obj.EventLog) -> list:
128def trace_ids_in_log(log: EventLog) -> list:
129    return [trace.attributes[NAME_CLASSIFIER] for trace in log]
def traces_in_log(log: pm4py.objects.log.obj.EventLog) -> list:
132def traces_in_log(log: EventLog) -> list:
133    return [{'attributes': trace.attributes, 'events': [event for event in trace]} for trace in log]