nirdizati_light.encoding.feature_encoder.simple_features
1from pandas import DataFrame 2from pm4py.objects.log.obj import EventLog, Trace 3 4from nirdizati_light.encoding.constants import TaskGenerationType, get_prefix_length, get_max_prefix_length, PrefixLengthStrategy 5from nirdizati_light.labeling.common import add_label_column 6 7ATTRIBUTE_CLASSIFIER = None 8PREFIX_ = 'prefix_' 9 10 11def simple_features(log: EventLog, prefix_length: int, padding: bool, prefix_length_strategy: str, labeling_type: str, generation_type: str, feature_list=None, target_event: str = None) -> DataFrame: 12 """Generates a DataFrame with simple features from an event log.""" 13 max_prefix_length = get_max_prefix_length(log, prefix_length, prefix_length_strategy, target_event) 14 columns = _generate_columns(max_prefix_length) 15 encoded_data = _generate_encoded_data(log, prefix_length, padding, prefix_length_strategy, labeling_type, generation_type, target_event, columns) 16 17 return DataFrame(columns=columns, data=encoded_data) 18 19 20def _generate_encoded_data(log, prefix_length, padding, prefix_length_strategy, labeling_type, generation_type, target_event, columns): 21 """Generates encoded data for the DataFrame.""" 22 encoded_data = [] 23 for trace in log: 24 trace_prefix_length = get_prefix_length(trace, prefix_length, prefix_length_strategy, target_event) 25 if _should_skip_trace(trace, prefix_length, padding): 26 continue 27 encoded_data += _encode_trace(trace, trace_prefix_length, generation_type, columns, prefix_length_strategy, padding, labeling_type) 28 return encoded_data 29 30 31def _should_skip_trace(trace, prefix_length, padding): 32 """Determines if a trace should be skipped based on its length and padding.""" 33 return len(trace) <= prefix_length - 1 and not padding 34 35 36def _encode_trace(trace, trace_prefix_length, generation_type, columns, prefix_length_strategy, padding, labeling_type): 37 """Encodes a single trace into a row or rows for the DataFrame.""" 38 encoded_rows = [] 39 if generation_type == TaskGenerationType.ALL_IN_ONE.value: 40 for event_index in range(1, min(trace_prefix_length + 1, len(trace) + 1)): 41 encoded_rows.append(_trace_to_row(trace, event_index, len(columns), prefix_length_strategy, padding, labeling_type)) 42 else: 43 encoded_rows.append(_trace_to_row(trace, trace_prefix_length, len(columns), prefix_length_strategy, padding, labeling_type)) 44 return encoded_rows 45 46 47def _trace_to_row(trace: Trace, prefix_length: int, columns_number: int, prefix_length_strategy: str, padding: bool = True, labeling_type: str = None) -> list: 48 """Converts a trace to a row for the DataFrame.""" 49 trace_row = [trace.attributes['concept:name']] + _trace_prefixes(trace, prefix_length) 50 trace_row += _pad_trace_row(trace_row, columns_number, padding, prefix_length_strategy) 51 trace_row += [add_label_column(trace, labeling_type, prefix_length)] 52 return trace_row 53 54 55def _trace_prefixes(trace: Trace, prefix_length: int) -> list: 56 """Extracts prefixes from a trace.""" 57 return [event['concept:name'] for idx, event in enumerate(trace) if idx < prefix_length] 58 59 60def _pad_trace_row(trace_row, columns_number, padding, prefix_length_strategy): 61 """Pads a trace row to match the expected number of columns.""" 62 if padding or prefix_length_strategy == PrefixLengthStrategy.PERCENTAGE.value: 63 return [0 for _ in range(len(trace_row), columns_number - 1)] 64 return [] 65 66 67def _generate_columns(prefix_length: int) -> list: 68 """Generates column names for the DataFrame.""" 69 return ["trace_id"] + [PREFIX_ + str(i + 1) for i in range(prefix_length)] + ['label']
ATTRIBUTE_CLASSIFIER =
None
PREFIX_ =
'prefix_'
def
simple_features( log: pm4py.objects.log.obj.EventLog, prefix_length: int, padding: bool, prefix_length_strategy: str, labeling_type: str, generation_type: str, feature_list=None, target_event: str = None) -> pandas.core.frame.DataFrame:
12def simple_features(log: EventLog, prefix_length: int, padding: bool, prefix_length_strategy: str, labeling_type: str, generation_type: str, feature_list=None, target_event: str = None) -> DataFrame: 13 """Generates a DataFrame with simple features from an event log.""" 14 max_prefix_length = get_max_prefix_length(log, prefix_length, prefix_length_strategy, target_event) 15 columns = _generate_columns(max_prefix_length) 16 encoded_data = _generate_encoded_data(log, prefix_length, padding, prefix_length_strategy, labeling_type, generation_type, target_event, columns) 17 18 return DataFrame(columns=columns, data=encoded_data)
Generates a DataFrame with simple features from an event log.