nirdizati_light.encoding.feature_encoder.simple_features

 1from pandas import DataFrame
 2from pm4py.objects.log.obj import EventLog, Trace
 3
 4from nirdizati_light.encoding.constants import TaskGenerationType, get_prefix_length, get_max_prefix_length, PrefixLengthStrategy
 5from nirdizati_light.labeling.common import add_label_column
 6
 7ATTRIBUTE_CLASSIFIER = None
 8PREFIX_ = 'prefix_'
 9
10
11def simple_features(log: EventLog, prefix_length: int, padding: bool, prefix_length_strategy: str, labeling_type: str, generation_type: str, feature_list=None, target_event: str = None) -> DataFrame:
12    """Generates a DataFrame with simple features from an event log."""
13    max_prefix_length = get_max_prefix_length(log, prefix_length, prefix_length_strategy, target_event)
14    columns = _generate_columns(max_prefix_length)
15    encoded_data = _generate_encoded_data(log, prefix_length, padding, prefix_length_strategy, labeling_type, generation_type, target_event, columns)
16
17    return DataFrame(columns=columns, data=encoded_data)
18
19
20def _generate_encoded_data(log, prefix_length, padding, prefix_length_strategy, labeling_type, generation_type, target_event, columns):
21    """Generates encoded data for the DataFrame."""
22    encoded_data = []
23    for trace in log:
24        trace_prefix_length = get_prefix_length(trace, prefix_length, prefix_length_strategy, target_event)
25        if _should_skip_trace(trace, prefix_length, padding):
26            continue
27        encoded_data += _encode_trace(trace, trace_prefix_length, generation_type, columns, prefix_length_strategy, padding, labeling_type)
28    return encoded_data
29
30
31def _should_skip_trace(trace, prefix_length, padding):
32    """Determines if a trace should be skipped based on its length and padding."""
33    return len(trace) <= prefix_length - 1 and not padding
34
35
36def _encode_trace(trace, trace_prefix_length, generation_type, columns, prefix_length_strategy, padding, labeling_type):
37    """Encodes a single trace into a row or rows for the DataFrame."""
38    encoded_rows = []
39    if generation_type == TaskGenerationType.ALL_IN_ONE.value:
40        for event_index in range(1, min(trace_prefix_length + 1, len(trace) + 1)):
41            encoded_rows.append(_trace_to_row(trace, event_index, len(columns), prefix_length_strategy, padding, labeling_type))
42    else:
43        encoded_rows.append(_trace_to_row(trace, trace_prefix_length, len(columns), prefix_length_strategy, padding, labeling_type))
44    return encoded_rows
45
46
47def _trace_to_row(trace: Trace, prefix_length: int, columns_number: int, prefix_length_strategy: str, padding: bool = True, labeling_type: str = None) -> list:
48    """Converts a trace to a row for the DataFrame."""
49    trace_row = [trace.attributes['concept:name']] + _trace_prefixes(trace, prefix_length)
50    trace_row += _pad_trace_row(trace_row, columns_number, padding, prefix_length_strategy)
51    trace_row += [add_label_column(trace, labeling_type, prefix_length)]
52    return trace_row
53
54
55def _trace_prefixes(trace: Trace, prefix_length: int) -> list:
56    """Extracts prefixes from a trace."""
57    return [event['concept:name'] for idx, event in enumerate(trace) if idx < prefix_length]
58
59
60def _pad_trace_row(trace_row, columns_number, padding, prefix_length_strategy):
61    """Pads a trace row to match the expected number of columns."""
62    if padding or prefix_length_strategy == PrefixLengthStrategy.PERCENTAGE.value:
63        return [0 for _ in range(len(trace_row), columns_number - 1)]
64    return []
65
66
67def _generate_columns(prefix_length: int) -> list:
68    """Generates column names for the DataFrame."""
69    return ["trace_id"] + [PREFIX_ + str(i + 1) for i in range(prefix_length)] + ['label']
ATTRIBUTE_CLASSIFIER = None
PREFIX_ = 'prefix_'
def simple_features( log: pm4py.objects.log.obj.EventLog, prefix_length: int, padding: bool, prefix_length_strategy: str, labeling_type: str, generation_type: str, feature_list=None, target_event: str = None) -> pandas.core.frame.DataFrame:
12def simple_features(log: EventLog, prefix_length: int, padding: bool, prefix_length_strategy: str, labeling_type: str, generation_type: str, feature_list=None, target_event: str = None) -> DataFrame:
13    """Generates a DataFrame with simple features from an event log."""
14    max_prefix_length = get_max_prefix_length(log, prefix_length, prefix_length_strategy, target_event)
15    columns = _generate_columns(max_prefix_length)
16    encoded_data = _generate_encoded_data(log, prefix_length, padding, prefix_length_strategy, labeling_type, generation_type, target_event, columns)
17
18    return DataFrame(columns=columns, data=encoded_data)

Generates a DataFrame with simple features from an event log.