nirdizati_light.predictive_model.common

View Source

  1from enum import Enum
  2import torch
  3import numpy as np
  4from funcy import flatten
  5from pandas import DataFrame
  6
  7
  8class ClassificationMethods(Enum):
  9    """
 10    Available classification methods
 11    """
 12    RANDOM_FOREST = 'randomForestClassifier'
 13    KNN = 'knn'
 14    XGBOOST = 'xgboost'
 15    SGDCLASSIFIER = 'SGDClassifier'
 16    PERCEPTRON = 'perceptron'
 17    LSTM = 'lstm'
 18    CUSTOM_PYTORCH = 'customPytorch'
 19    MLP = 'mlp'
 20    SVM = 'svc'
 21    DT = 'DecisionTree'
 22
 23
 24class RegressionMethods(Enum):
 25    """
 26    Available regression methods
 27    """
 28    RANDOM_FOREST = 'randomForestRegressor'
 29
 30
 31def get_tensor(df: DataFrame, prefix_length):
 32    trace_attributes = [att for att in df.columns if 'prefix_' not in att]
 33    event_attributes = [att[:-2] for att in df.columns if att[-2:] == '_1']
 34
 35    reshaped_data = {
 36            trace_index: {
 37                prefix_index:
 38                    list(flatten(
 39                        feat_values if isinstance(feat_values, tuple) else [feat_values]
 40                        for feat_name, feat_values in trace.items()
 41                        if feat_name in trace_attributes + [event_attribute + '_' + str(prefix_index) for event_attribute in event_attributes]
 42                    ))
 43                for prefix_index in range(1, prefix_length + 1)
 44            }
 45            for trace_index, trace in df.iterrows()
 46    }
 47
 48    flattened_features = max(
 49        len(reshaped_data[trace][prefix])
 50        for trace in reshaped_data
 51        for prefix in reshaped_data[trace]
 52    )
 53
 54    tensor = np.zeros((
 55        len(df),                # sample
 56        prefix_length,          # time steps
 57        flattened_features      # features x single time step (trace and event attributes)
 58    ))
 59
 60    for i, trace_index in enumerate(reshaped_data):  # prefix
 61        for j, prefix_index in enumerate(reshaped_data[trace_index]):  # steps of the prefix
 62            for single_flattened_value in range(len(reshaped_data[trace_index][prefix_index])):
 63                tensor[i, j, single_flattened_value] = reshaped_data[trace_index][prefix_index][single_flattened_value]
 64
 65    return tensor
 66
 67def shape_label_df(df: DataFrame):
 68    labels_list = df['label'].tolist()
 69    labels = np.zeros((len(labels_list), int(max(df['label'].nunique(), int(max(df['label'].values))) + 1)))
 70    for label_idx, label_val in enumerate(labels_list):
 71        labels[int(label_idx), int(label_val)] = 1
 72
 73    return labels
 74
 75# General purpose class to wrap a lambda function as a torch module
 76class LambdaModule(torch.nn.Module):
 77    def __init__(self, lambd):
 78        super().__init__()
 79        import types
 80        assert type(lambd) is types.LambdaType
 81        self.lambd = lambd
 82
 83    def forward(self, x):
 84        return self.lambd(x)
 85    
 86# Class for early stopping
 87class EarlyStopper:
 88    def __init__(self, patience=1, min_delta=0):
 89        self.patience = patience
 90        self.min_delta = min_delta
 91        self.counter = 0
 92        self.min_validation_loss = float('inf')
 93
 94    def early_stop(self, validation_loss):
 95        if validation_loss < self.min_validation_loss:
 96            self.min_validation_loss = validation_loss
 97            self.counter = 0
 98        elif validation_loss > (self.min_validation_loss + self.min_delta):
 99            self.counter += 1
100            if self.counter >= self.patience:
101                return True
102        return False

class ClassificationMethods(enum.Enum): View Source

 9class ClassificationMethods(Enum):
10    """
11    Available classification methods
12    """
13    RANDOM_FOREST = 'randomForestClassifier'
14    KNN = 'knn'
15    XGBOOST = 'xgboost'
16    SGDCLASSIFIER = 'SGDClassifier'
17    PERCEPTRON = 'perceptron'
18    LSTM = 'lstm'
19    CUSTOM_PYTORCH = 'customPytorch'
20    MLP = 'mlp'
21    SVM = 'svc'
22    DT = 'DecisionTree'

Available classification methods

RANDOM_FOREST = <ClassificationMethods.RANDOM_FOREST: 'randomForestClassifier'>

KNN = <ClassificationMethods.KNN: 'knn'>

XGBOOST = <ClassificationMethods.XGBOOST: 'xgboost'>

SGDCLASSIFIER = <ClassificationMethods.SGDCLASSIFIER: 'SGDClassifier'>

PERCEPTRON = <ClassificationMethods.PERCEPTRON: 'perceptron'>

LSTM = <ClassificationMethods.LSTM: 'lstm'>

CUSTOM_PYTORCH = <ClassificationMethods.CUSTOM_PYTORCH: 'customPytorch'>

MLP = <ClassificationMethods.MLP: 'mlp'>

SVM = <ClassificationMethods.SVM: 'svc'>

DT = <ClassificationMethods.DT: 'DecisionTree'>

Inherited Members

enum.Enum: name; value

class RegressionMethods(enum.Enum): View Source

25class RegressionMethods(Enum):
26    """
27    Available regression methods
28    """
29    RANDOM_FOREST = 'randomForestRegressor'

Available regression methods

RANDOM_FOREST = <RegressionMethods.RANDOM_FOREST: 'randomForestRegressor'>

Inherited Members

enum.Enum: name; value

def get_tensor(df: pandas.core.frame.DataFrame, prefix_length): View Source

32def get_tensor(df: DataFrame, prefix_length):
33    trace_attributes = [att for att in df.columns if 'prefix_' not in att]
34    event_attributes = [att[:-2] for att in df.columns if att[-2:] == '_1']
35
36    reshaped_data = {
37            trace_index: {
38                prefix_index:
39                    list(flatten(
40                        feat_values if isinstance(feat_values, tuple) else [feat_values]
41                        for feat_name, feat_values in trace.items()
42                        if feat_name in trace_attributes + [event_attribute + '_' + str(prefix_index) for event_attribute in event_attributes]
43                    ))
44                for prefix_index in range(1, prefix_length + 1)
45            }
46            for trace_index, trace in df.iterrows()
47    }
48
49    flattened_features = max(
50        len(reshaped_data[trace][prefix])
51        for trace in reshaped_data
52        for prefix in reshaped_data[trace]
53    )
54
55    tensor = np.zeros((
56        len(df),                # sample
57        prefix_length,          # time steps
58        flattened_features      # features x single time step (trace and event attributes)
59    ))
60
61    for i, trace_index in enumerate(reshaped_data):  # prefix
62        for j, prefix_index in enumerate(reshaped_data[trace_index]):  # steps of the prefix
63            for single_flattened_value in range(len(reshaped_data[trace_index][prefix_index])):
64                tensor[i, j, single_flattened_value] = reshaped_data[trace_index][prefix_index][single_flattened_value]
65
66    return tensor

def shape_label_df(df: pandas.core.frame.DataFrame): View Source

68def shape_label_df(df: DataFrame):
69    labels_list = df['label'].tolist()
70    labels = np.zeros((len(labels_list), int(max(df['label'].nunique(), int(max(df['label'].values))) + 1)))
71    for label_idx, label_val in enumerate(labels_list):
72        labels[int(label_idx), int(label_val)] = 1
73
74    return labels

class LambdaModule(torch.nn.modules.module.Module): View Source

77class LambdaModule(torch.nn.Module):
78    def __init__(self, lambd):
79        super().__init__()
80        import types
81        assert type(lambd) is types.LambdaType
82        self.lambd = lambd
83
84    def forward(self, x):
85        return self.lambd(x)

Base class for all neural network modules.

Your models should also subclass this class.

Modules can also contain other Modules, allowing to nest them in a tree structure. You can assign the submodules as regular attributes::

import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, 5)
        self.conv2 = nn.Conv2d(20, 20, 5)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        return F.relu(self.conv2(x))

Submodules assigned in this way will be registered, and will have their parameters converted too when you call to(), etc.

As per the example above, an __init__() call to the parent class must be made before assignment on the child.

:ivar training: Boolean represents whether this module is in training or evaluation mode. :vartype training: bool

LambdaModule(lambd) View Source

78    def __init__(self, lambd):
79        super().__init__()
80        import types
81        assert type(lambd) is types.LambdaType
82        self.lambd = lambd

Initialize internal Module state, shared by both nn.Module and ScriptModule.

lambd

def forward(self, x): View Source

84    def forward(self, x):
85        return self.lambd(x)

Define the computation performed at every call.

Should be overridden by all subclasses.

Although the recipe for forward pass needs to be defined within this function, one should call the Module instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them.

Inherited Members

torch.nn.modules.module.Module: dump_patches; training; call_super_init; register_buffer; register_parameter; add_module; register_module; get_submodule; get_parameter; get_buffer; get_extra_state; set_extra_state; apply; cuda; ipu; xpu; cpu; type; float; double; half; bfloat16; to_empty; to; register_full_backward_pre_hook; register_backward_hook; register_full_backward_hook; register_forward_pre_hook; register_forward_hook; register_state_dict_pre_hook; state_dict; register_load_state_dict_post_hook; load_state_dict; parameters; named_parameters; buffers; named_buffers; children; named_children; modules; named_modules; train; eval; requires_grad_; zero_grad; share_memory; extra_repr; compile

class EarlyStopper: View Source

 88class EarlyStopper:
 89    def __init__(self, patience=1, min_delta=0):
 90        self.patience = patience
 91        self.min_delta = min_delta
 92        self.counter = 0
 93        self.min_validation_loss = float('inf')
 94
 95    def early_stop(self, validation_loss):
 96        if validation_loss < self.min_validation_loss:
 97            self.min_validation_loss = validation_loss
 98            self.counter = 0
 99        elif validation_loss > (self.min_validation_loss + self.min_delta):
100            self.counter += 1
101            if self.counter >= self.patience:
102                return True
103        return False

EarlyStopper(patience=1, min_delta=0) View Source

89    def __init__(self, patience=1, min_delta=0):
90        self.patience = patience
91        self.min_delta = min_delta
92        self.counter = 0
93        self.min_validation_loss = float('inf')

patience

min_delta

counter

min_validation_loss

def early_stop(self, validation_loss): View Source

 95    def early_stop(self, validation_loss):
 96        if validation_loss < self.min_validation_loss:
 97            self.min_validation_loss = validation_loss
 98            self.counter = 0
 99        elif validation_loss > (self.min_validation_loss + self.min_delta):
100            self.counter += 1
101            if self.counter >= self.patience:
102                return True
103        return False