nirdizati_light.predictive_model.predictive_model

View Source

  1import os
  2import logging
  3import numpy as np
  4import torch
  5from typing import Union, Optional, Type
  6from torch.nn import Module
  7from torch.utils.data import DataLoader, TensorDataset
  8from hyperopt import STATUS_OK, STATUS_FAIL
  9from pandas import DataFrame
 10from sklearn.calibration import CalibratedClassifierCV
 11from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 12from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor
 13from sklearn.linear_model import SGDClassifier, Perceptron
 14from sklearn.neighbors import KNeighborsClassifier
 15from sklearn.neural_network import MLPClassifier
 16from sklearn.svm import SVC
 17from xgboost import XGBClassifier,XGBRegressor
 18from nirdizati_light.evaluation.common import evaluate_classifier, evaluate_regressor
 19from nirdizati_light.predictive_model.common import ClassificationMethods, RegressionMethods, get_tensor, shape_label_df, LambdaModule, EarlyStopper
 20
 21logger = logging.getLogger(__name__)
 22
 23
 24def drop_columns(df: DataFrame) -> DataFrame:
 25    df = df.drop(['trace_id', 'label'],axis=1)
 26    return df
 27
 28class PredictiveModel:
 29    """
 30    A class representing a predictive model.
 31
 32    Args:
 33        model_type (Union[ClassificationMethods, RegressionMethods]): Type of predictive model.
 34        train_df (DataFrame): Training data to train model.
 35        validate_df (DataFrame): Validation data to evaluate model.
 36        test_df (DataFrame): Test data to evaluate model.
 37        prefix_length (int): Length of prefix to consider.
 38        hyperopt_space (Optional[dict]): Space to perform hyperparameter optimization on; if not provided, fallbacks to default values. Defaults to None.
 39        custom_model_class (Optional[Type[Module]]): Class of a custom PyTorch module. Defaults to None.
 40    """
 41
 42    def __init__(
 43        self,
 44        model_type: Union[ClassificationMethods, RegressionMethods],
 45        train_df: DataFrame,
 46        validate_df: DataFrame,
 47        test_df: DataFrame,
 48        prefix_length: int,
 49        hyperopt_space: Optional[dict]=None,
 50        custom_model_class: Optional[Type[Module]]=None
 51    ):
 52        self.model_type = model_type
 53        self.config = None
 54        self.model = None
 55        self.full_train_df = train_df
 56        self.train_df = drop_columns(train_df)
 57        self.train_df_shaped = None
 58        self.full_validate_df = validate_df
 59        self.validate_df = drop_columns(validate_df)
 60        self.validate_df_shaped = None
 61        self.full_test_df = test_df
 62        self.test_df = drop_columns(test_df)
 63        self.test_df_shaped = None
 64
 65        self.hyperopt_space = hyperopt_space
 66        self.custom_model_class = custom_model_class
 67
 68        if model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
 69            self.train_tensor = get_tensor(self.train_df, prefix_length)
 70            self.validate_tensor = get_tensor(self.validate_df, prefix_length)
 71            self.test_tensor = get_tensor(self.test_df, prefix_length)
 72
 73            self.train_label = shape_label_df(self.full_train_df)
 74            self.validate_label = shape_label_df(self.full_validate_df)
 75            self.test_label = shape_label_df(self.full_test_df)
 76
 77        elif model_type is ClassificationMethods.MLP.value:
 78            self.train_label = self.full_train_df['label'].nunique()
 79            self.validate_label = self.full_validate_df['label'].nunique()
 80            self.test_label = self.full_test_df['label'].unique()
 81    
 82    def train_and_evaluate_configuration(self, config, target):
 83        try:
 84            self.model = self._instantiate_model(config)
 85            self._fit_model(self.model, config)
 86            actual = self.full_validate_df['label']
 87            
 88            if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
 89                actual = np.array(actual.to_list())
 90
 91            if self.model_type in [item.value for item in ClassificationMethods]:
 92                predicted, scores = self.predict(test=False)
 93                result = evaluate_classifier(actual, predicted, scores, loss=target)
 94            elif self.model_type in [item.value for item in RegressionMethods]:
 95                predicted = self.model.predict(self.validate_df)
 96                result = evaluate_regressor(actual, predicted, loss=target)
 97            else:
 98                raise Exception('Unsupported model_type')
 99
100            return {
101                'status': STATUS_OK,
102                'loss': - result['loss'],  # we are using fmin for hyperopt
103                'exception': None,
104                'config': config,
105                'model': self.model,
106                'result': result,
107            }
108        except Exception as e:
109            return {
110                'status': STATUS_FAIL,
111                'loss': 0,
112                'exception': str(e)
113            }
114
115    def _instantiate_model(self, config):
116        if self.model_type is ClassificationMethods.RANDOM_FOREST.value:
117            model = RandomForestClassifier(**config)
118        elif self.model_type is ClassificationMethods.DT.value:
119            model = DecisionTreeClassifier(**config)
120        elif self.model_type == ClassificationMethods.KNN.value:
121            model = KNeighborsClassifier(**config)
122        elif self.model_type == ClassificationMethods.XGBOOST.value:
123            model = XGBClassifier(**config)
124        elif self.model_type == ClassificationMethods.SGDCLASSIFIER.value:
125            model = SGDClassifier(**config)
126        elif self.model_type == ClassificationMethods.PERCEPTRON.value:
127            # added CalibratedClassifier to get predict_proba from perceptron model
128            model = Perceptron(**config)
129            model = CalibratedClassifierCV(model, cv=10, method='isotonic')
130        elif self.model_type is ClassificationMethods.MLP.value:
131            model = MLPClassifier(**config)
132            #model = CalibratedClassifierCV(model, cv=10, method='isotonic')
133        elif self.model_type == RegressionMethods.RANDOM_FOREST.value:
134            model = RandomForestRegressor(**config)
135        elif self.model_type == ClassificationMethods.SVM.value:
136            model = SVC(**config,probability=True)
137        elif self.model_type is ClassificationMethods.LSTM.value:
138            model = torch.nn.Sequential(
139                torch.nn.LSTM(
140                    input_size=self.train_tensor.shape[2],
141                    hidden_size=int(config['lstm_hidden_size']),
142                    num_layers=int(config['lstm_num_layers']),
143                    batch_first=True
144                ),
145                LambdaModule(lambda x: x[0][:,-1,:]),
146                torch.nn.Linear(int(config['lstm_hidden_size']), self.train_label.shape[1]),
147                torch.nn.Softmax(dim=1),
148            ).to(torch.float32)
149        elif self.model_type is ClassificationMethods.CUSTOM_PYTORCH.value:
150            model = self.custom_model_class(
151                input_dim=self.train_tensor.shape[2],
152                output_dim=self.train_label.shape[1],
153                config=config,
154            ).to(torch.float32)
155        else:
156            raise Exception('unsupported model_type')
157        
158        return model
159
160    def _fit_model(self, model, config=None):
161        if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
162            MAX_NUM_EPOCHS = config['max_num_epochs']
163
164            train_dataset = TensorDataset(torch.tensor(self.train_tensor, dtype=torch.float32), torch.tensor(self.train_label, dtype=torch.float32))
165            validate_dataset = TensorDataset(torch.tensor(self.validate_tensor, dtype=torch.float32), torch.tensor(self.validate_label, dtype=torch.float32))
166
167            train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
168            validate_loader = DataLoader(validate_dataset, batch_size=config['batch_size'], shuffle=False)
169
170            optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
171            criterion = torch.nn.CrossEntropyLoss()
172            
173            early_stopper = EarlyStopper(patience=config['early_stop_patience'], min_delta=config['early_stop_min_delta'])
174
175            for _ in range(MAX_NUM_EPOCHS):
176                # training
177                model.train()
178
179                for inputs, labels in train_loader:
180                    output = model(inputs)
181                    loss = criterion(output, labels)
182                    
183                    optimizer.zero_grad()
184                    loss.backward()
185                    optimizer.step()
186                
187                # validation
188                model.eval()
189                validate_loss = 0
190                
191                with torch.no_grad():
192                    for inputs, labels in validate_loader:
193                        output = model(inputs)
194                        validate_loss += criterion(output, labels).item()
195                
196                validate_loss /= len(validate_loader)
197
198                if early_stopper.early_stop(validate_loss):             
199                    break
200
201        else:
202            model.fit(self.train_df, self.full_train_df['label'])
203
204    def predict(self, test: bool=True) -> str:
205        """
206        Performs predictions with the model and returns them.
207
208        Args:
209            test (bool): Whether to perform predictions on test set (`True`) or on validation set (`False`).
210
211        Returns:
212            tuple: A tuple with predicted values and scores for predictions.
213        """
214
215        data = self.test_df if test else self.validate_df
216
217        if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
218            data_tensor = torch.tensor(self.test_tensor if test else self.validate_tensor, dtype=torch.float32)
219
220            probabilities = self.model(data_tensor).detach().numpy()
221            predicted = np.argmax(probabilities, axis=1)
222            scores = np.amax(probabilities, axis=1)
223        else:
224            predicted = self.model.predict(data)
225            
226            if hasattr(self.model, 'predict_proba'):
227                scores = self.model.predict_proba(data)[:, 1]
228            else:
229                # Handle the case where predict_proba is not available
230                # For example, this may be the case for SGDClassifier trained with certain losses
231                scores = None
232
233        return predicted, scores
234    
235
236    def save(self, path: str, name: str):
237        """
238        Save the model to the given path.
239
240        Args:
241            path (str): Path to save the model.
242            name (str): Name of the model.
243
244        Returns:
245            str: Path to the saved model.
246        """
247
248        if not os.path.exists(path):
249            os.makedirs(path)
250
251        path_with_name = os.path.join(path, name)
252        
253        if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
254            # save pytorch model
255            path_with_name += '.pt'
256            torch.save(self.model.state_dict(), path_with_name)
257        else:
258            # save scikit-learn model
259            path_with_name += '.joblib'
260            import joblib
261            joblib.dump(self.model, path_with_name)
262
263        return path_with_name

logger = <Logger nirdizati_light.predictive_model.predictive_model (WARNING)>

def drop_columns(df: pandas.core.frame.DataFrame) -> pandas.core.frame.DataFrame: View Source

25def drop_columns(df: DataFrame) -> DataFrame:
26    df = df.drop(['trace_id', 'label'],axis=1)
27    return df

class PredictiveModel: View Source

 29class PredictiveModel:
 30    """
 31    A class representing a predictive model.
 32
 33    Args:
 34        model_type (Union[ClassificationMethods, RegressionMethods]): Type of predictive model.
 35        train_df (DataFrame): Training data to train model.
 36        validate_df (DataFrame): Validation data to evaluate model.
 37        test_df (DataFrame): Test data to evaluate model.
 38        prefix_length (int): Length of prefix to consider.
 39        hyperopt_space (Optional[dict]): Space to perform hyperparameter optimization on; if not provided, fallbacks to default values. Defaults to None.
 40        custom_model_class (Optional[Type[Module]]): Class of a custom PyTorch module. Defaults to None.
 41    """
 42
 43    def __init__(
 44        self,
 45        model_type: Union[ClassificationMethods, RegressionMethods],
 46        train_df: DataFrame,
 47        validate_df: DataFrame,
 48        test_df: DataFrame,
 49        prefix_length: int,
 50        hyperopt_space: Optional[dict]=None,
 51        custom_model_class: Optional[Type[Module]]=None
 52    ):
 53        self.model_type = model_type
 54        self.config = None
 55        self.model = None
 56        self.full_train_df = train_df
 57        self.train_df = drop_columns(train_df)
 58        self.train_df_shaped = None
 59        self.full_validate_df = validate_df
 60        self.validate_df = drop_columns(validate_df)
 61        self.validate_df_shaped = None
 62        self.full_test_df = test_df
 63        self.test_df = drop_columns(test_df)
 64        self.test_df_shaped = None
 65
 66        self.hyperopt_space = hyperopt_space
 67        self.custom_model_class = custom_model_class
 68
 69        if model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
 70            self.train_tensor = get_tensor(self.train_df, prefix_length)
 71            self.validate_tensor = get_tensor(self.validate_df, prefix_length)
 72            self.test_tensor = get_tensor(self.test_df, prefix_length)
 73
 74            self.train_label = shape_label_df(self.full_train_df)
 75            self.validate_label = shape_label_df(self.full_validate_df)
 76            self.test_label = shape_label_df(self.full_test_df)
 77
 78        elif model_type is ClassificationMethods.MLP.value:
 79            self.train_label = self.full_train_df['label'].nunique()
 80            self.validate_label = self.full_validate_df['label'].nunique()
 81            self.test_label = self.full_test_df['label'].unique()
 82    
 83    def train_and_evaluate_configuration(self, config, target):
 84        try:
 85            self.model = self._instantiate_model(config)
 86            self._fit_model(self.model, config)
 87            actual = self.full_validate_df['label']
 88            
 89            if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
 90                actual = np.array(actual.to_list())
 91
 92            if self.model_type in [item.value for item in ClassificationMethods]:
 93                predicted, scores = self.predict(test=False)
 94                result = evaluate_classifier(actual, predicted, scores, loss=target)
 95            elif self.model_type in [item.value for item in RegressionMethods]:
 96                predicted = self.model.predict(self.validate_df)
 97                result = evaluate_regressor(actual, predicted, loss=target)
 98            else:
 99                raise Exception('Unsupported model_type')
100
101            return {
102                'status': STATUS_OK,
103                'loss': - result['loss'],  # we are using fmin for hyperopt
104                'exception': None,
105                'config': config,
106                'model': self.model,
107                'result': result,
108            }
109        except Exception as e:
110            return {
111                'status': STATUS_FAIL,
112                'loss': 0,
113                'exception': str(e)
114            }
115
116    def _instantiate_model(self, config):
117        if self.model_type is ClassificationMethods.RANDOM_FOREST.value:
118            model = RandomForestClassifier(**config)
119        elif self.model_type is ClassificationMethods.DT.value:
120            model = DecisionTreeClassifier(**config)
121        elif self.model_type == ClassificationMethods.KNN.value:
122            model = KNeighborsClassifier(**config)
123        elif self.model_type == ClassificationMethods.XGBOOST.value:
124            model = XGBClassifier(**config)
125        elif self.model_type == ClassificationMethods.SGDCLASSIFIER.value:
126            model = SGDClassifier(**config)
127        elif self.model_type == ClassificationMethods.PERCEPTRON.value:
128            # added CalibratedClassifier to get predict_proba from perceptron model
129            model = Perceptron(**config)
130            model = CalibratedClassifierCV(model, cv=10, method='isotonic')
131        elif self.model_type is ClassificationMethods.MLP.value:
132            model = MLPClassifier(**config)
133            #model = CalibratedClassifierCV(model, cv=10, method='isotonic')
134        elif self.model_type == RegressionMethods.RANDOM_FOREST.value:
135            model = RandomForestRegressor(**config)
136        elif self.model_type == ClassificationMethods.SVM.value:
137            model = SVC(**config,probability=True)
138        elif self.model_type is ClassificationMethods.LSTM.value:
139            model = torch.nn.Sequential(
140                torch.nn.LSTM(
141                    input_size=self.train_tensor.shape[2],
142                    hidden_size=int(config['lstm_hidden_size']),
143                    num_layers=int(config['lstm_num_layers']),
144                    batch_first=True
145                ),
146                LambdaModule(lambda x: x[0][:,-1,:]),
147                torch.nn.Linear(int(config['lstm_hidden_size']), self.train_label.shape[1]),
148                torch.nn.Softmax(dim=1),
149            ).to(torch.float32)
150        elif self.model_type is ClassificationMethods.CUSTOM_PYTORCH.value:
151            model = self.custom_model_class(
152                input_dim=self.train_tensor.shape[2],
153                output_dim=self.train_label.shape[1],
154                config=config,
155            ).to(torch.float32)
156        else:
157            raise Exception('unsupported model_type')
158        
159        return model
160
161    def _fit_model(self, model, config=None):
162        if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
163            MAX_NUM_EPOCHS = config['max_num_epochs']
164
165            train_dataset = TensorDataset(torch.tensor(self.train_tensor, dtype=torch.float32), torch.tensor(self.train_label, dtype=torch.float32))
166            validate_dataset = TensorDataset(torch.tensor(self.validate_tensor, dtype=torch.float32), torch.tensor(self.validate_label, dtype=torch.float32))
167
168            train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
169            validate_loader = DataLoader(validate_dataset, batch_size=config['batch_size'], shuffle=False)
170
171            optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
172            criterion = torch.nn.CrossEntropyLoss()
173            
174            early_stopper = EarlyStopper(patience=config['early_stop_patience'], min_delta=config['early_stop_min_delta'])
175
176            for _ in range(MAX_NUM_EPOCHS):
177                # training
178                model.train()
179
180                for inputs, labels in train_loader:
181                    output = model(inputs)
182                    loss = criterion(output, labels)
183                    
184                    optimizer.zero_grad()
185                    loss.backward()
186                    optimizer.step()
187                
188                # validation
189                model.eval()
190                validate_loss = 0
191                
192                with torch.no_grad():
193                    for inputs, labels in validate_loader:
194                        output = model(inputs)
195                        validate_loss += criterion(output, labels).item()
196                
197                validate_loss /= len(validate_loader)
198
199                if early_stopper.early_stop(validate_loss):             
200                    break
201
202        else:
203            model.fit(self.train_df, self.full_train_df['label'])
204
205    def predict(self, test: bool=True) -> str:
206        """
207        Performs predictions with the model and returns them.
208
209        Args:
210            test (bool): Whether to perform predictions on test set (`True`) or on validation set (`False`).
211
212        Returns:
213            tuple: A tuple with predicted values and scores for predictions.
214        """
215
216        data = self.test_df if test else self.validate_df
217
218        if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
219            data_tensor = torch.tensor(self.test_tensor if test else self.validate_tensor, dtype=torch.float32)
220
221            probabilities = self.model(data_tensor).detach().numpy()
222            predicted = np.argmax(probabilities, axis=1)
223            scores = np.amax(probabilities, axis=1)
224        else:
225            predicted = self.model.predict(data)
226            
227            if hasattr(self.model, 'predict_proba'):
228                scores = self.model.predict_proba(data)[:, 1]
229            else:
230                # Handle the case where predict_proba is not available
231                # For example, this may be the case for SGDClassifier trained with certain losses
232                scores = None
233
234        return predicted, scores
235    
236
237    def save(self, path: str, name: str):
238        """
239        Save the model to the given path.
240
241        Args:
242            path (str): Path to save the model.
243            name (str): Name of the model.
244
245        Returns:
246            str: Path to the saved model.
247        """
248
249        if not os.path.exists(path):
250            os.makedirs(path)
251
252        path_with_name = os.path.join(path, name)
253        
254        if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
255            # save pytorch model
256            path_with_name += '.pt'
257            torch.save(self.model.state_dict(), path_with_name)
258        else:
259            # save scikit-learn model
260            path_with_name += '.joblib'
261            import joblib
262            joblib.dump(self.model, path_with_name)
263
264        return path_with_name

A class representing a predictive model.

Arguments:

model_type (Union[ClassificationMethods, RegressionMethods]): Type of predictive model.
train_df (DataFrame): Training data to train model.
validate_df (DataFrame): Validation data to evaluate model.
test_df (DataFrame): Test data to evaluate model.
prefix_length (int): Length of prefix to consider.
hyperopt_space (Optional[dict]): Space to perform hyperparameter optimization on; if not provided, fallbacks to default values. Defaults to None.
custom_model_class (Optional[Type[Module]]): Class of a custom PyTorch module. Defaults to None.

PredictiveModel( model_type: Union[nirdizati_light.predictive_model.common.ClassificationMethods, nirdizati_light.predictive_model.common.RegressionMethods], train_df: pandas.core.frame.DataFrame, validate_df: pandas.core.frame.DataFrame, test_df: pandas.core.frame.DataFrame, prefix_length: int, hyperopt_space: Optional[dict] = None, custom_model_class: Optional[Type[torch.nn.modules.module.Module]] = None) View Source

43    def __init__(
44        self,
45        model_type: Union[ClassificationMethods, RegressionMethods],
46        train_df: DataFrame,
47        validate_df: DataFrame,
48        test_df: DataFrame,
49        prefix_length: int,
50        hyperopt_space: Optional[dict]=None,
51        custom_model_class: Optional[Type[Module]]=None
52    ):
53        self.model_type = model_type
54        self.config = None
55        self.model = None
56        self.full_train_df = train_df
57        self.train_df = drop_columns(train_df)
58        self.train_df_shaped = None
59        self.full_validate_df = validate_df
60        self.validate_df = drop_columns(validate_df)
61        self.validate_df_shaped = None
62        self.full_test_df = test_df
63        self.test_df = drop_columns(test_df)
64        self.test_df_shaped = None
65
66        self.hyperopt_space = hyperopt_space
67        self.custom_model_class = custom_model_class
68
69        if model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
70            self.train_tensor = get_tensor(self.train_df, prefix_length)
71            self.validate_tensor = get_tensor(self.validate_df, prefix_length)
72            self.test_tensor = get_tensor(self.test_df, prefix_length)
73
74            self.train_label = shape_label_df(self.full_train_df)
75            self.validate_label = shape_label_df(self.full_validate_df)
76            self.test_label = shape_label_df(self.full_test_df)
77
78        elif model_type is ClassificationMethods.MLP.value:
79            self.train_label = self.full_train_df['label'].nunique()
80            self.validate_label = self.full_validate_df['label'].nunique()
81            self.test_label = self.full_test_df['label'].unique()

model_type

config

model

full_train_df

train_df

train_df_shaped

full_validate_df

validate_df

validate_df_shaped

full_test_df

test_df

test_df_shaped

hyperopt_space

custom_model_class

def train_and_evaluate_configuration(self, config, target): View Source

 83    def train_and_evaluate_configuration(self, config, target):
 84        try:
 85            self.model = self._instantiate_model(config)
 86            self._fit_model(self.model, config)
 87            actual = self.full_validate_df['label']
 88            
 89            if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
 90                actual = np.array(actual.to_list())
 91
 92            if self.model_type in [item.value for item in ClassificationMethods]:
 93                predicted, scores = self.predict(test=False)
 94                result = evaluate_classifier(actual, predicted, scores, loss=target)
 95            elif self.model_type in [item.value for item in RegressionMethods]:
 96                predicted = self.model.predict(self.validate_df)
 97                result = evaluate_regressor(actual, predicted, loss=target)
 98            else:
 99                raise Exception('Unsupported model_type')
100
101            return {
102                'status': STATUS_OK,
103                'loss': - result['loss'],  # we are using fmin for hyperopt
104                'exception': None,
105                'config': config,
106                'model': self.model,
107                'result': result,
108            }
109        except Exception as e:
110            return {
111                'status': STATUS_FAIL,
112                'loss': 0,
113                'exception': str(e)
114            }

def predict(self, test: bool = True) -> str: View Source

205    def predict(self, test: bool=True) -> str:
206        """
207        Performs predictions with the model and returns them.
208
209        Args:
210            test (bool): Whether to perform predictions on test set (`True`) or on validation set (`False`).
211
212        Returns:
213            tuple: A tuple with predicted values and scores for predictions.
214        """
215
216        data = self.test_df if test else self.validate_df
217
218        if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
219            data_tensor = torch.tensor(self.test_tensor if test else self.validate_tensor, dtype=torch.float32)
220
221            probabilities = self.model(data_tensor).detach().numpy()
222            predicted = np.argmax(probabilities, axis=1)
223            scores = np.amax(probabilities, axis=1)
224        else:
225            predicted = self.model.predict(data)
226            
227            if hasattr(self.model, 'predict_proba'):
228                scores = self.model.predict_proba(data)[:, 1]
229            else:
230                # Handle the case where predict_proba is not available
231                # For example, this may be the case for SGDClassifier trained with certain losses
232                scores = None
233
234        return predicted, scores

Performs predictions with the model and returns them.

Arguments:

test (bool): Whether to perform predictions on test set (True) or on validation set (False).

Returns:

tuple: A tuple with predicted values and scores for predictions.

def save(self, path: str, name: str): View Source

237    def save(self, path: str, name: str):
238        """
239        Save the model to the given path.
240
241        Args:
242            path (str): Path to save the model.
243            name (str): Name of the model.
244
245        Returns:
246            str: Path to the saved model.
247        """
248
249        if not os.path.exists(path):
250            os.makedirs(path)
251
252        path_with_name = os.path.join(path, name)
253        
254        if self.model_type in [ClassificationMethods.LSTM.value, ClassificationMethods.CUSTOM_PYTORCH.value]:
255            # save pytorch model
256            path_with_name += '.pt'
257            torch.save(self.model.state_dict(), path_with_name)
258        else:
259            # save scikit-learn model
260            path_with_name += '.joblib'
261            import joblib
262            joblib.dump(self.model, path_with_name)
263
264        return path_with_name

Save the model to the given path.

Arguments:

path (str): Path to save the model.
name (str): Name of the model.

Returns:

str: Path to the saved model.