nirdizati_light.predictive_model.predictive_model

  1import os
  2import logging
  3import numpy as np
  4import torch
  5from typing import Union, Optional, Type
  6from torch.nn import Module
  7from torch.utils.data import DataLoader, TensorDataset
  8from hyperopt import STATUS_OK, STATUS_FAIL
  9from pandas import DataFrame
 10from sklearn.calibration import CalibratedClassifierCV
 11from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 12from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor
 13from sklearn.linear_model import SGDClassifier, Perceptron
 14from sklearn.neighbors import KNeighborsClassifier
 15from sklearn.neural_network import MLPClassifier
 16from sklearn.svm import SVC
 17from xgboost import XGBClassifier,XGBRegressor
 18from nirdizati_light.evaluation.common import evaluate_classifier, evaluate_regressor
 19from nirdizati_light.predictive_model.common import ClassificationMethods, RegressionMethods, get_tensor, shape_label_df, LambdaModule, EarlyStopper
 20
 21logger = logging.getLogger(__name__)
 22
 23
 24def drop_columns(df: DataFrame) -> DataFrame:
 25    df = df.drop(['trace_id', 'label'],axis=1)
 26    return df
 27
 28class PredictiveModel:
 29    """
 30    A class representing a predictive model.
 31
 32    Args:
 33        model_type (Union[ClassificationMethods, RegressionMethods]): Type of predictive model.
 34        train_df (DataFrame): Training data to train model.
 35        validate_df (DataFrame): Validation data to evaluate model.
 36        test_df (DataFrame): Test data to evaluate model.
 37        prefix_length (int): Length of prefix to consider.
 38        hyperopt_space (Optional[dict]): Space to perform hyperparameter optimization on; if not provided, fallbacks to default values. Defaults to None.
 39        custom_model_class (Optional[Type[Module]]): Class of a custom PyTorch module. Defaults to None.
 40    """
 41
 42    def __init__(
 43        self,
 44        model_type: Union[ClassificationMethods, RegressionMethods],
 45        train_df: DataFrame,
 46        validate_df: DataFrame,
 47        test_df: DataFrame,
 48        prefix_length: int,
 49        hyperopt_space: Optional[dict]=None,
 50        custom_model_class: Optional[Type[Module]]=None
 51    ):
 52        self.model_type = model_type
 53        self.config = None
 54        self.model = None
 55        self.full_train_df = train_df
 56        self.train_df = drop_columns(train_df)
 57        self.train_df_shaped = None
 58        self.full_validate_df = validate_df
 59        self.validate_df = drop_columns(validate_df)
 60        self.validate_df_shaped = None
 61        self.full_test_df = test_df
 62        self.test_df = drop_columns(test_df)
 63        self.test_df_shaped = None
 64
 65        self.hyperopt_space = hyperopt_space
 66        self.custom_model_class = custom_model_class
 67
 68        if model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
 69            self.train_tensor = get_tensor(self.train_df, prefix_length)
 70            self.validate_tensor = get_tensor(self.validate_df, prefix_length)
 71            self.test_tensor = get_tensor(self.test_df, prefix_length)
 72
 73            self.train_label = shape_label_df(self.full_train_df)
 74            self.validate_label = shape_label_df(self.full_validate_df)
 75            self.test_label = shape_label_df(self.full_test_df)
 76
 77        elif model_type is ClassificationMethods.MLP.value:
 78            self.train_label = self.full_train_df['label'].nunique()
 79            self.validate_label = self.full_validate_df['label'].nunique()
 80            self.test_label = self.full_test_df['label'].unique()
 81    
 82    def train_and_evaluate_configuration(self, config, target):
 83        try:
 84            self.model = self._instantiate_model(config)
 85            self._fit_model(self.model, config)
 86            actual = self.full_validate_df['label']
 87            
 88            if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
 89                actual = np.array(actual.to_list())
 90
 91            if self.model_type in [item.value for item in ClassificationMethods]:
 92                predicted, scores = self.predict(test=False)
 93                result = evaluate_classifier(actual, predicted, scores, loss=target)
 94            elif self.model_type in [item.value for item in RegressionMethods]:
 95                predicted = self.model.predict(self.validate_df)
 96                result = evaluate_regressor(actual, predicted, loss=target)
 97            else:
 98                raise Exception('Unsupported model_type')
 99
100            return {
101                'status': STATUS_OK,
102                'loss': - result['loss'],  # we are using fmin for hyperopt
103                'exception': None,
104                'config': config,
105                'model': self.model,
106                'result': result,
107            }
108        except Exception as e:
109            return {
110                'status': STATUS_FAIL,
111                'loss': 0,
112                'exception': str(e)
113            }
114
115    def _instantiate_model(self, config):
116        if self.model_type is ClassificationMethods.RANDOM_FOREST.value:
117            model = RandomForestClassifier(**config)
118        elif self.model_type is ClassificationMethods.DT.value:
119            model = DecisionTreeClassifier(**config)
120        elif self.model_type == ClassificationMethods.KNN.value:
121            model = KNeighborsClassifier(**config)
122        elif self.model_type == ClassificationMethods.XGBOOST.value:
123            model = XGBClassifier(**config)
124        elif self.model_type == ClassificationMethods.SGDCLASSIFIER.value:
125            model = SGDClassifier(**config)
126        elif self.model_type == ClassificationMethods.PERCEPTRON.value:
127            # added CalibratedClassifier to get predict_proba from perceptron model
128            model = Perceptron(**config)
129            model = CalibratedClassifierCV(model, cv=10, method='isotonic')
130        elif self.model_type is ClassificationMethods.MLP.value:
131            model = MLPClassifier(**config)
132            #model = CalibratedClassifierCV(model, cv=10, method='isotonic')
133        elif self.model_type == RegressionMethods.RANDOM_FOREST.value:
134            model = RandomForestRegressor(**config)
135        elif self.model_type == ClassificationMethods.SVM.value:
136            model = SVC(**config,probability=True)
137        elif self.model_type is ClassificationMethods.LSTM.value:
138            model = torch.nn.Sequential(
139                torch.nn.LSTM(
140                    input_size=self.train_tensor.shape[2],
141                    hidden_size=int(config['lstm_hidden_size']),
142                    num_layers=int(config['lstm_num_layers']),
143                    batch_first=True
144                ),
145                LambdaModule(lambda x: x[0][:,-1,:]),
146                torch.nn.Linear(int(config['lstm_hidden_size']), self.train_label.shape[1]),
147                torch.nn.Softmax(dim=1),
148            ).to(torch.float32)
149        elif self.model_type is ClassificationMethods.CUSTOM_PYTORCH.value:
150            model = self.custom_model_class(
151                input_dim=self.train_tensor.shape[2],
152                output_dim=self.train_label.shape[1],
153                config=config,
154            ).to(torch.float32)
155        else:
156            raise Exception('unsupported model_type')
157        
158        return model
159
160    def _fit_model(self, model, config=None):
161        if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
162            MAX_NUM_EPOCHS = config['max_num_epochs']
163
164            train_dataset = TensorDataset(torch.tensor(self.train_tensor, dtype=torch.float32), torch.tensor(self.train_label, dtype=torch.float32))
165            validate_dataset = TensorDataset(torch.tensor(self.validate_tensor, dtype=torch.float32), torch.tensor(self.validate_label, dtype=torch.float32))
166
167            train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
168            validate_loader = DataLoader(validate_dataset, batch_size=config['batch_size'], shuffle=False)
169
170            optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
171            criterion = torch.nn.CrossEntropyLoss()
172            
173            early_stopper = EarlyStopper(patience=config['early_stop_patience'], min_delta=config['early_stop_min_delta'])
174
175            for _ in range(MAX_NUM_EPOCHS):
176                # training
177                model.train()
178
179                for inputs, labels in train_loader:
180                    output = model(inputs)
181                    loss = criterion(output, labels)
182                    
183                    optimizer.zero_grad()
184                    loss.backward()
185                    optimizer.step()
186                
187                # validation
188                model.eval()
189                validate_loss = 0
190                
191                with torch.no_grad():
192                    for inputs, labels in validate_loader:
193                        output = model(inputs)
194                        validate_loss += criterion(output, labels).item()
195                
196                validate_loss /= len(validate_loader)
197
198                if early_stopper.early_stop(validate_loss):             
199                    break
200
201        else:
202            model.fit(self.train_df, self.full_train_df['label'])
203
204    def predict(self, test: bool=True) -> str:
205        """
206        Performs predictions with the model and returns them.
207
208        Args:
209            test (bool): Whether to perform predictions on test set (`True`) or on validation set (`False`).
210
211        Returns:
212            tuple: A tuple with predicted values and scores for predictions.
213        """
214
215        data = self.test_df if test else self.validate_df
216
217        if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
218            data_tensor = torch.tensor(self.test_tensor if test else self.validate_tensor, dtype=torch.float32)
219
220            probabilities = self.model(data_tensor).detach().numpy()
221            predicted = np.argmax(probabilities, axis=1)
222            scores = np.amax(probabilities, axis=1)
223        else:
224            predicted = self.model.predict(data)
225            
226            if hasattr(self.model, 'predict_proba'):
227                scores = self.model.predict_proba(data)[:, 1]
228            else:
229                # Handle the case where predict_proba is not available
230                # For example, this may be the case for SGDClassifier trained with certain losses
231                scores = None
232
233        return predicted, scores
234    
235
236    def save(self, path: str, name: str):
237        """
238        Save the model to the given path.
239
240        Args:
241            path (str): Path to save the model.
242            name (str): Name of the model.
243
244        Returns:
245            str: Path to the saved model.
246        """
247
248        if not os.path.exists(path):
249            os.makedirs(path)
250
251        path_with_name = os.path.join(path, name)
252        
253        if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
254            # save pytorch model
255            path_with_name += '.pt'
256            torch.save(self.model.state_dict(), path_with_name)
257        else:
258            # save scikit-learn model
259            path_with_name += '.joblib'
260            import joblib
261            joblib.dump(self.model, path_with_name)
262
263        return path_with_name
def drop_columns(df: pandas.core.frame.DataFrame) -> pandas.core.frame.DataFrame:
25def drop_columns(df: DataFrame) -> DataFrame:
26    df = df.drop(['trace_id', 'label'],axis=1)
27    return df
class PredictiveModel:
 29class PredictiveModel:
 30    """
 31    A class representing a predictive model.
 32
 33    Args:
 34        model_type (Union[ClassificationMethods, RegressionMethods]): Type of predictive model.
 35        train_df (DataFrame): Training data to train model.
 36        validate_df (DataFrame): Validation data to evaluate model.
 37        test_df (DataFrame): Test data to evaluate model.
 38        prefix_length (int): Length of prefix to consider.
 39        hyperopt_space (Optional[dict]): Space to perform hyperparameter optimization on; if not provided, fallbacks to default values. Defaults to None.
 40        custom_model_class (Optional[Type[Module]]): Class of a custom PyTorch module. Defaults to None.
 41    """
 42
 43    def __init__(
 44        self,
 45        model_type: Union[ClassificationMethods, RegressionMethods],
 46        train_df: DataFrame,
 47        validate_df: DataFrame,
 48        test_df: DataFrame,
 49        prefix_length: int,
 50        hyperopt_space: Optional[dict]=None,
 51        custom_model_class: Optional[Type[Module]]=None
 52    ):
 53        self.model_type = model_type
 54        self.config = None
 55        self.model = None
 56        self.full_train_df = train_df
 57        self.train_df = drop_columns(train_df)
 58        self.train_df_shaped = None
 59        self.full_validate_df = validate_df
 60        self.validate_df = drop_columns(validate_df)
 61        self.validate_df_shaped = None
 62        self.full_test_df = test_df
 63        self.test_df = drop_columns(test_df)
 64        self.test_df_shaped = None
 65
 66        self.hyperopt_space = hyperopt_space
 67        self.custom_model_class = custom_model_class
 68
 69        if model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
 70            self.train_tensor = get_tensor(self.train_df, prefix_length)
 71            self.validate_tensor = get_tensor(self.validate_df, prefix_length)
 72            self.test_tensor = get_tensor(self.test_df, prefix_length)
 73
 74            self.train_label = shape_label_df(self.full_train_df)
 75            self.validate_label = shape_label_df(self.full_validate_df)
 76            self.test_label = shape_label_df(self.full_test_df)
 77
 78        elif model_type is ClassificationMethods.MLP.value:
 79            self.train_label = self.full_train_df['label'].nunique()
 80            self.validate_label = self.full_validate_df['label'].nunique()
 81            self.test_label = self.full_test_df['label'].unique()
 82    
 83    def train_and_evaluate_configuration(self, config, target):
 84        try:
 85            self.model = self._instantiate_model(config)
 86            self._fit_model(self.model, config)
 87            actual = self.full_validate_df['label']
 88            
 89            if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
 90                actual = np.array(actual.to_list())
 91
 92            if self.model_type in [item.value for item in ClassificationMethods]:
 93                predicted, scores = self.predict(test=False)
 94                result = evaluate_classifier(actual, predicted, scores, loss=target)
 95            elif self.model_type in [item.value for item in RegressionMethods]:
 96                predicted = self.model.predict(self.validate_df)
 97                result = evaluate_regressor(actual, predicted, loss=target)
 98            else:
 99                raise Exception('Unsupported model_type')
100
101            return {
102                'status': STATUS_OK,
103                'loss': - result['loss'],  # we are using fmin for hyperopt
104                'exception': None,
105                'config': config,
106                'model': self.model,
107                'result': result,
108            }
109        except Exception as e:
110            return {
111                'status': STATUS_FAIL,
112                'loss': 0,
113                'exception': str(e)
114            }
115
116    def _instantiate_model(self, config):
117        if self.model_type is ClassificationMethods.RANDOM_FOREST.value:
118            model = RandomForestClassifier(**config)
119        elif self.model_type is ClassificationMethods.DT.value:
120            model = DecisionTreeClassifier(**config)
121        elif self.model_type == ClassificationMethods.KNN.value:
122            model = KNeighborsClassifier(**config)
123        elif self.model_type == ClassificationMethods.XGBOOST.value:
124            model = XGBClassifier(**config)
125        elif self.model_type == ClassificationMethods.SGDCLASSIFIER.value:
126            model = SGDClassifier(**config)
127        elif self.model_type == ClassificationMethods.PERCEPTRON.value:
128            # added CalibratedClassifier to get predict_proba from perceptron model
129            model = Perceptron(**config)
130            model = CalibratedClassifierCV(model, cv=10, method='isotonic')
131        elif self.model_type is ClassificationMethods.MLP.value:
132            model = MLPClassifier(**config)
133            #model = CalibratedClassifierCV(model, cv=10, method='isotonic')
134        elif self.model_type == RegressionMethods.RANDOM_FOREST.value:
135            model = RandomForestRegressor(**config)
136        elif self.model_type == ClassificationMethods.SVM.value:
137            model = SVC(**config,probability=True)
138        elif self.model_type is ClassificationMethods.LSTM.value:
139            model = torch.nn.Sequential(
140                torch.nn.LSTM(
141                    input_size=self.train_tensor.shape[2],
142                    hidden_size=int(config['lstm_hidden_size']),
143                    num_layers=int(config['lstm_num_layers']),
144                    batch_first=True
145                ),
146                LambdaModule(lambda x: x[0][:,-1,:]),
147                torch.nn.Linear(int(config['lstm_hidden_size']), self.train_label.shape[1]),
148                torch.nn.Softmax(dim=1),
149            ).to(torch.float32)
150        elif self.model_type is ClassificationMethods.CUSTOM_PYTORCH.value:
151            model = self.custom_model_class(
152                input_dim=self.train_tensor.shape[2],
153                output_dim=self.train_label.shape[1],
154                config=config,
155            ).to(torch.float32)
156        else:
157            raise Exception('unsupported model_type')
158        
159        return model
160
161    def _fit_model(self, model, config=None):
162        if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
163            MAX_NUM_EPOCHS = config['max_num_epochs']
164
165            train_dataset = TensorDataset(torch.tensor(self.train_tensor, dtype=torch.float32), torch.tensor(self.train_label, dtype=torch.float32))
166            validate_dataset = TensorDataset(torch.tensor(self.validate_tensor, dtype=torch.float32), torch.tensor(self.validate_label, dtype=torch.float32))
167
168            train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
169            validate_loader = DataLoader(validate_dataset, batch_size=config['batch_size'], shuffle=False)
170
171            optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
172            criterion = torch.nn.CrossEntropyLoss()
173            
174            early_stopper = EarlyStopper(patience=config['early_stop_patience'], min_delta=config['early_stop_min_delta'])
175
176            for _ in range(MAX_NUM_EPOCHS):
177                # training
178                model.train()
179
180                for inputs, labels in train_loader:
181                    output = model(inputs)
182                    loss = criterion(output, labels)
183                    
184                    optimizer.zero_grad()
185                    loss.backward()
186                    optimizer.step()
187                
188                # validation
189                model.eval()
190                validate_loss = 0
191                
192                with torch.no_grad():
193                    for inputs, labels in validate_loader:
194                        output = model(inputs)
195                        validate_loss += criterion(output, labels).item()
196                
197                validate_loss /= len(validate_loader)
198
199                if early_stopper.early_stop(validate_loss):             
200                    break
201
202        else:
203            model.fit(self.train_df, self.full_train_df['label'])
204
205    def predict(self, test: bool=True) -> str:
206        """
207        Performs predictions with the model and returns them.
208
209        Args:
210            test (bool): Whether to perform predictions on test set (`True`) or on validation set (`False`).
211
212        Returns:
213            tuple: A tuple with predicted values and scores for predictions.
214        """
215
216        data = self.test_df if test else self.validate_df
217
218        if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
219            data_tensor = torch.tensor(self.test_tensor if test else self.validate_tensor, dtype=torch.float32)
220
221            probabilities = self.model(data_tensor).detach().numpy()
222            predicted = np.argmax(probabilities, axis=1)
223            scores = np.amax(probabilities, axis=1)
224        else:
225            predicted = self.model.predict(data)
226            
227            if hasattr(self.model, 'predict_proba'):
228                scores = self.model.predict_proba(data)[:, 1]
229            else:
230                # Handle the case where predict_proba is not available
231                # For example, this may be the case for SGDClassifier trained with certain losses
232                scores = None
233
234        return predicted, scores
235    
236
237    def save(self, path: str, name: str):
238        """
239        Save the model to the given path.
240
241        Args:
242            path (str): Path to save the model.
243            name (str): Name of the model.
244
245        Returns:
246            str: Path to the saved model.
247        """
248
249        if not os.path.exists(path):
250            os.makedirs(path)
251
252        path_with_name = os.path.join(path, name)
253        
254        if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
255            # save pytorch model
256            path_with_name += '.pt'
257            torch.save(self.model.state_dict(), path_with_name)
258        else:
259            # save scikit-learn model
260            path_with_name += '.joblib'
261            import joblib
262            joblib.dump(self.model, path_with_name)
263
264        return path_with_name

A class representing a predictive model.

Arguments:
  • model_type (Union[ClassificationMethods, RegressionMethods]): Type of predictive model.
  • train_df (DataFrame): Training data to train model.
  • validate_df (DataFrame): Validation data to evaluate model.
  • test_df (DataFrame): Test data to evaluate model.
  • prefix_length (int): Length of prefix to consider.
  • hyperopt_space (Optional[dict]): Space to perform hyperparameter optimization on; if not provided, fallbacks to default values. Defaults to None.
  • custom_model_class (Optional[Type[Module]]): Class of a custom PyTorch module. Defaults to None.
PredictiveModel( model_type: Union[nirdizati_light.predictive_model.common.ClassificationMethods, nirdizati_light.predictive_model.common.RegressionMethods], train_df: pandas.core.frame.DataFrame, validate_df: pandas.core.frame.DataFrame, test_df: pandas.core.frame.DataFrame, prefix_length: int, hyperopt_space: Optional[dict] = None, custom_model_class: Optional[Type[torch.nn.modules.module.Module]] = None)
43    def __init__(
44        self,
45        model_type: Union[ClassificationMethods, RegressionMethods],
46        train_df: DataFrame,
47        validate_df: DataFrame,
48        test_df: DataFrame,
49        prefix_length: int,
50        hyperopt_space: Optional[dict]=None,
51        custom_model_class: Optional[Type[Module]]=None
52    ):
53        self.model_type = model_type
54        self.config = None
55        self.model = None
56        self.full_train_df = train_df
57        self.train_df = drop_columns(train_df)
58        self.train_df_shaped = None
59        self.full_validate_df = validate_df
60        self.validate_df = drop_columns(validate_df)
61        self.validate_df_shaped = None
62        self.full_test_df = test_df
63        self.test_df = drop_columns(test_df)
64        self.test_df_shaped = None
65
66        self.hyperopt_space = hyperopt_space
67        self.custom_model_class = custom_model_class
68
69        if model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
70            self.train_tensor = get_tensor(self.train_df, prefix_length)
71            self.validate_tensor = get_tensor(self.validate_df, prefix_length)
72            self.test_tensor = get_tensor(self.test_df, prefix_length)
73
74            self.train_label = shape_label_df(self.full_train_df)
75            self.validate_label = shape_label_df(self.full_validate_df)
76            self.test_label = shape_label_df(self.full_test_df)
77
78        elif model_type is ClassificationMethods.MLP.value:
79            self.train_label = self.full_train_df['label'].nunique()
80            self.validate_label = self.full_validate_df['label'].nunique()
81            self.test_label = self.full_test_df['label'].unique()
model_type
config
model
full_train_df
train_df
train_df_shaped
full_validate_df
validate_df
validate_df_shaped
full_test_df
test_df
test_df_shaped
hyperopt_space
custom_model_class
def train_and_evaluate_configuration(self, config, target):
 83    def train_and_evaluate_configuration(self, config, target):
 84        try:
 85            self.model = self._instantiate_model(config)
 86            self._fit_model(self.model, config)
 87            actual = self.full_validate_df['label']
 88            
 89            if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
 90                actual = np.array(actual.to_list())
 91
 92            if self.model_type in [item.value for item in ClassificationMethods]:
 93                predicted, scores = self.predict(test=False)
 94                result = evaluate_classifier(actual, predicted, scores, loss=target)
 95            elif self.model_type in [item.value for item in RegressionMethods]:
 96                predicted = self.model.predict(self.validate_df)
 97                result = evaluate_regressor(actual, predicted, loss=target)
 98            else:
 99                raise Exception('Unsupported model_type')
100
101            return {
102                'status': STATUS_OK,
103                'loss': - result['loss'],  # we are using fmin for hyperopt
104                'exception': None,
105                'config': config,
106                'model': self.model,
107                'result': result,
108            }
109        except Exception as e:
110            return {
111                'status': STATUS_FAIL,
112                'loss': 0,
113                'exception': str(e)
114            }
def predict(self, test: bool = True) -> str:
205    def predict(self, test: bool=True) -> str:
206        """
207        Performs predictions with the model and returns them.
208
209        Args:
210            test (bool): Whether to perform predictions on test set (`True`) or on validation set (`False`).
211
212        Returns:
213            tuple: A tuple with predicted values and scores for predictions.
214        """
215
216        data = self.test_df if test else self.validate_df
217
218        if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
219            data_tensor = torch.tensor(self.test_tensor if test else self.validate_tensor, dtype=torch.float32)
220
221            probabilities = self.model(data_tensor).detach().numpy()
222            predicted = np.argmax(probabilities, axis=1)
223            scores = np.amax(probabilities, axis=1)
224        else:
225            predicted = self.model.predict(data)
226            
227            if hasattr(self.model, 'predict_proba'):
228                scores = self.model.predict_proba(data)[:, 1]
229            else:
230                # Handle the case where predict_proba is not available
231                # For example, this may be the case for SGDClassifier trained with certain losses
232                scores = None
233
234        return predicted, scores

Performs predictions with the model and returns them.

Arguments:
  • test (bool): Whether to perform predictions on test set (True) or on validation set (False).
Returns:

tuple: A tuple with predicted values and scores for predictions.

def save(self, path: str, name: str):
237    def save(self, path: str, name: str):
238        """
239        Save the model to the given path.
240
241        Args:
242            path (str): Path to save the model.
243            name (str): Name of the model.
244
245        Returns:
246            str: Path to the saved model.
247        """
248
249        if not os.path.exists(path):
250            os.makedirs(path)
251
252        path_with_name = os.path.join(path, name)
253        
254        if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
255            # save pytorch model
256            path_with_name += '.pt'
257            torch.save(self.model.state_dict(), path_with_name)
258        else:
259            # save scikit-learn model
260            path_with_name += '.joblib'
261            import joblib
262            joblib.dump(self.model, path_with_name)
263
264        return path_with_name

Save the model to the given path.

Arguments:
  • path (str): Path to save the model.
  • name (str): Name of the model.
Returns:

str: Path to the saved model.