nirdizati_light.predictive_model.common

  1from enum import Enum
  2import torch
  3import numpy as np
  4from funcy import flatten
  5from pandas import DataFrame
  6
  7
  8class ClassificationMethods(Enum):
  9    """
 10    Available classification methods
 11    """
 12    RANDOM_FOREST = 'randomForestClassifier'
 13    KNN = 'knn'
 14    XGBOOST = 'xgboost'
 15    SGDCLASSIFIER = 'SGDClassifier'
 16    PERCEPTRON = 'perceptron'
 17    LSTM = 'lstm'
 18    CUSTOM_PYTORCH = 'customPytorch'
 19    MLP = 'mlp'
 20    SVM = 'svc'
 21    DT = 'DecisionTree'
 22
 23
 24class RegressionMethods(Enum):
 25    """
 26    Available regression methods
 27    """
 28    RANDOM_FOREST = 'randomForestRegressor'
 29
 30
 31def get_tensor(df: DataFrame, prefix_length):
 32    trace_attributes = [att for att in df.columns if 'prefix_' not in att]
 33    event_attributes = [att[:-2] for att in df.columns if att[-2:] == '_1']
 34
 35    reshaped_data = {
 36            trace_index: {
 37                prefix_index:
 38                    list(flatten(
 39                        feat_values if isinstance(feat_values, tuple) else [feat_values]
 40                        for feat_name, feat_values in trace.items()
 41                        if feat_name in trace_attributes + [event_attribute + '_' + str(prefix_index) for event_attribute in event_attributes]
 42                    ))
 43                for prefix_index in range(1, prefix_length + 1)
 44            }
 45            for trace_index, trace in df.iterrows()
 46    }
 47
 48    flattened_features = max(
 49        len(reshaped_data[trace][prefix])
 50        for trace in reshaped_data
 51        for prefix in reshaped_data[trace]
 52    )
 53
 54    tensor = np.zeros((
 55        len(df),                # sample
 56        prefix_length,          # time steps
 57        flattened_features      # features x single time step (trace and event attributes)
 58    ))
 59
 60    for i, trace_index in enumerate(reshaped_data):  # prefix
 61        for j, prefix_index in enumerate(reshaped_data[trace_index]):  # steps of the prefix
 62            for single_flattened_value in range(len(reshaped_data[trace_index][prefix_index])):
 63                tensor[i, j, single_flattened_value] = reshaped_data[trace_index][prefix_index][single_flattened_value]
 64
 65    return tensor
 66
 67def shape_label_df(df: DataFrame):
 68    labels_list = df['label'].tolist()
 69    labels = np.zeros((len(labels_list), int(max(df['label'].nunique(), int(max(df['label'].values))) + 1)))
 70    for label_idx, label_val in enumerate(labels_list):
 71        labels[int(label_idx), int(label_val)] = 1
 72
 73    return labels
 74
 75# General purpose class to wrap a lambda function as a torch module
 76class LambdaModule(torch.nn.Module):
 77    def __init__(self, lambd):
 78        super().__init__()
 79        import types
 80        assert type(lambd) is types.LambdaType
 81        self.lambd = lambd
 82
 83    def forward(self, x):
 84        return self.lambd(x)
 85    
 86# Class for early stopping
 87class EarlyStopper:
 88    def __init__(self, patience=1, min_delta=0):
 89        self.patience = patience
 90        self.min_delta = min_delta
 91        self.counter = 0
 92        self.min_validation_loss = float('inf')
 93
 94    def early_stop(self, validation_loss):
 95        if validation_loss < self.min_validation_loss:
 96            self.min_validation_loss = validation_loss
 97            self.counter = 0
 98        elif validation_loss > (self.min_validation_loss + self.min_delta):
 99            self.counter += 1
100            if self.counter >= self.patience:
101                return True
102        return False
class ClassificationMethods(enum.Enum):
 9class ClassificationMethods(Enum):
10    """
11    Available classification methods
12    """
13    RANDOM_FOREST = 'randomForestClassifier'
14    KNN = 'knn'
15    XGBOOST = 'xgboost'
16    SGDCLASSIFIER = 'SGDClassifier'
17    PERCEPTRON = 'perceptron'
18    LSTM = 'lstm'
19    CUSTOM_PYTORCH = 'customPytorch'
20    MLP = 'mlp'
21    SVM = 'svc'
22    DT = 'DecisionTree'

Available classification methods

RANDOM_FOREST = <ClassificationMethods.RANDOM_FOREST: 'randomForestClassifier'>
KNN = <ClassificationMethods.KNN: 'knn'>
XGBOOST = <ClassificationMethods.XGBOOST: 'xgboost'>
SGDCLASSIFIER = <ClassificationMethods.SGDCLASSIFIER: 'SGDClassifier'>
PERCEPTRON = <ClassificationMethods.PERCEPTRON: 'perceptron'>
LSTM = <ClassificationMethods.LSTM: 'lstm'>
CUSTOM_PYTORCH = <ClassificationMethods.CUSTOM_PYTORCH: 'customPytorch'>
MLP = <ClassificationMethods.MLP: 'mlp'>
SVM = <ClassificationMethods.SVM: 'svc'>
DT = <ClassificationMethods.DT: 'DecisionTree'>
Inherited Members
enum.Enum
name
value
class RegressionMethods(enum.Enum):
25class RegressionMethods(Enum):
26    """
27    Available regression methods
28    """
29    RANDOM_FOREST = 'randomForestRegressor'

Available regression methods

RANDOM_FOREST = <RegressionMethods.RANDOM_FOREST: 'randomForestRegressor'>
Inherited Members
enum.Enum
name
value
def get_tensor(df: pandas.core.frame.DataFrame, prefix_length):
32def get_tensor(df: DataFrame, prefix_length):
33    trace_attributes = [att for att in df.columns if 'prefix_' not in att]
34    event_attributes = [att[:-2] for att in df.columns if att[-2:] == '_1']
35
36    reshaped_data = {
37            trace_index: {
38                prefix_index:
39                    list(flatten(
40                        feat_values if isinstance(feat_values, tuple) else [feat_values]
41                        for feat_name, feat_values in trace.items()
42                        if feat_name in trace_attributes + [event_attribute + '_' + str(prefix_index) for event_attribute in event_attributes]
43                    ))
44                for prefix_index in range(1, prefix_length + 1)
45            }
46            for trace_index, trace in df.iterrows()
47    }
48
49    flattened_features = max(
50        len(reshaped_data[trace][prefix])
51        for trace in reshaped_data
52        for prefix in reshaped_data[trace]
53    )
54
55    tensor = np.zeros((
56        len(df),                # sample
57        prefix_length,          # time steps
58        flattened_features      # features x single time step (trace and event attributes)
59    ))
60
61    for i, trace_index in enumerate(reshaped_data):  # prefix
62        for j, prefix_index in enumerate(reshaped_data[trace_index]):  # steps of the prefix
63            for single_flattened_value in range(len(reshaped_data[trace_index][prefix_index])):
64                tensor[i, j, single_flattened_value] = reshaped_data[trace_index][prefix_index][single_flattened_value]
65
66    return tensor
def shape_label_df(df: pandas.core.frame.DataFrame):
68def shape_label_df(df: DataFrame):
69    labels_list = df['label'].tolist()
70    labels = np.zeros((len(labels_list), int(max(df['label'].nunique(), int(max(df['label'].values))) + 1)))
71    for label_idx, label_val in enumerate(labels_list):
72        labels[int(label_idx), int(label_val)] = 1
73
74    return labels
class LambdaModule(torch.nn.modules.module.Module):
77class LambdaModule(torch.nn.Module):
78    def __init__(self, lambd):
79        super().__init__()
80        import types
81        assert type(lambd) is types.LambdaType
82        self.lambd = lambd
83
84    def forward(self, x):
85        return self.lambd(x)

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will have their parameters converted too when you call to(), etc.

As per the example above, an __init__() call to the parent class must be made before assignment on the child.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

LambdaModule(lambd)
78    def __init__(self, lambd):
79        super().__init__()
80        import types
81        assert type(lambd) is types.LambdaType
82        self.lambd = lambd

Initialize internal Module state, shared by both nn.Module and ScriptModule.

lambd
def forward(self, x):
84    def forward(self, x):
85        return self.lambd(x)

Define the computation performed at every call.

Should be overridden by all subclasses.

Although the recipe for forward pass needs to be defined within this function, one should call the Module instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them.

Inherited Members
torch.nn.modules.module.Module
dump_patches
training
call_super_init
register_buffer
register_parameter
add_module
register_module
get_submodule
get_parameter
get_buffer
get_extra_state
set_extra_state
apply
cuda
ipu
xpu
cpu
type
float
double
half
bfloat16
to_empty
to
register_full_backward_pre_hook
register_backward_hook
register_full_backward_hook
register_forward_pre_hook
register_forward_hook
register_state_dict_pre_hook
state_dict
register_load_state_dict_post_hook
load_state_dict
parameters
named_parameters
buffers
named_buffers
children
named_children
modules
named_modules
train
eval
requires_grad_
zero_grad
share_memory
extra_repr
compile
class EarlyStopper:
 88class EarlyStopper:
 89    def __init__(self, patience=1, min_delta=0):
 90        self.patience = patience
 91        self.min_delta = min_delta
 92        self.counter = 0
 93        self.min_validation_loss = float('inf')
 94
 95    def early_stop(self, validation_loss):
 96        if validation_loss < self.min_validation_loss:
 97            self.min_validation_loss = validation_loss
 98            self.counter = 0
 99        elif validation_loss > (self.min_validation_loss + self.min_delta):
100            self.counter += 1
101            if self.counter >= self.patience:
102                return True
103        return False
EarlyStopper(patience=1, min_delta=0)
89    def __init__(self, patience=1, min_delta=0):
90        self.patience = patience
91        self.min_delta = min_delta
92        self.counter = 0
93        self.min_validation_loss = float('inf')
patience
min_delta
counter
min_validation_loss
def early_stop(self, validation_loss):
 95    def early_stop(self, validation_loss):
 96        if validation_loss < self.min_validation_loss:
 97            self.min_validation_loss = validation_loss
 98            self.counter = 0
 99        elif validation_loss > (self.min_validation_loss + self.min_delta):
100            self.counter += 1
101            if self.counter >= self.patience:
102                return True
103        return False