nirdizati_light.evaluation.common
1from math import sqrt 2from typing import Optional 3 4import numpy as np 5from sklearn.metrics import f1_score, roc_auc_score, precision_score, recall_score, accuracy_score, mean_absolute_error, \ 6 mean_squared_error, r2_score 7import matplotlib.pyplot as plt 8import seaborn as sns 9from pandas import DataFrame 10 11 12def drop_columns(df: DataFrame) -> DataFrame: 13 df = df.drop(['trace_id', 'label'],axis=1) 14 return df 15 16def evaluate_classifier(y_true, y_pred, scores, loss: Optional[str]=None) -> dict: 17 """ 18 Evaluate the performance of a classifier using various metrics. 19 20 This function calculates the AUC, F1 score, accuracy, precision, and recall of the classifier's predictions. 21 It handles exceptions by setting the metric value to None if an error occurs during calculation. 22 Optionally, if a loss metric is provided and exists in the evaluation, its value is updated in the evaluation dictionary. 23 24 Args: 25 y_true (1d array-like): The true labels of the data. 26 y_pred (1d array-like): The predicted labels by the classifier. 27 scores (1d array-like): The score/probability of the positive class. 28 loss (Optional[str]): The name of the metric to be treated as a loss (the higher, the better). Defaults to None. 29 30 Returns: 31 dict: A dictionary containing the evaluation metrics: 'auc', 'f1_score', 'accuracy', 'precision', 'recall', and optionally 'loss' if specified. 32 33 """ 34 evaluation = {} 35 36 y_true = [str(el) for el in y_true] 37 y_pred = [str(el) for el in y_pred] 38 39 try: 40 evaluation.update({'auc': roc_auc_score(y_true, scores)}) 41 except Exception as e: 42 evaluation.update({'auc': None}) 43 try: 44 evaluation.update({'f1_score': f1_score(y_true, y_pred, average='macro')}) 45 except Exception as e: 46 evaluation.update({'f1_score': None}) 47 try: 48 evaluation.update({'accuracy': accuracy_score(y_true, y_pred)}) 49 except Exception as e: 50 evaluation.update({'accuracy': None}) 51 try: 52 evaluation.update({'precision': precision_score(y_true, y_pred, average='macro')}) 53 except Exception as e: 54 evaluation.update({'precision': None}) 55 try: 56 evaluation.update({'recall': recall_score(y_true, y_pred, average='macro')}) 57 except Exception as e: 58 evaluation.update({'recall': None}) 59 60 if loss is not None: # the higher the better 61 evaluation.update({'loss': evaluation[loss]}) 62 return evaluation 63 64 65def evaluate_regressor(y_true, y_pred, loss: Optional[str]=None): 66 """ 67 Evaluate the performance of a regression model. 68 69 This function calculates the Root Mean Square Error (RMSE), Mean Absolute Error (MAE), 70 and the R-squared score (R2 score) of the predictions made by a regression model. If a 71 specific loss metric is provided and exists in the evaluation, its value will be updated 72 in the evaluation dictionary. 73 74 Args: 75 y_true (1d array-like): True labels or actual values. 76 y_pred (1d array-like): Predicted labels or values by the regression model. 77 loss (Optional[str]): The key of the loss metric to be updated in the evaluation 78 dictionary if it exists. Defaults to None. 79 80 Returns: 81 dict: A dictionary containing the calculated metrics ('rmse', 'mae', 'rscore') with 82 their corresponding values. If a metric cannot be calculated due to an exception, 83 its value will be set to None. If the 'loss' argument is provided and exists in 84 the evaluation, its value will be updated accordingly. 85 """ 86 evaluation = {} 87 88 try: 89 evaluation.update({'rmse': sqrt(mean_squared_error(y_true, y_pred))}) 90 except Exception as e: 91 evaluation.update({'rmse': None}) 92 try: 93 evaluation.update({'mae': mean_absolute_error(y_true, y_pred)}) 94 except Exception as e: 95 evaluation.update({'mae': None}) 96 try: 97 evaluation.update({'rscore': r2_score(y_true, y_pred)}) 98 except Exception as e: 99 evaluation.update({'rscore': None}) 100 try: 101 evaluation.update({'mape': _mean_absolute_percentage_error(y_true, y_pred)}) 102 except Exception as e: 103 evaluation.update({'mape': None}) 104 105 if loss is not None: # the lower the better 106 evaluation.update({'loss': -evaluation[loss]}) 107 return evaluation 108 109 110def _mean_absolute_percentage_error(y_true, y_pred): 111 """Calculates and returns the mean absolute percentage error 112 113 :param y_true: 114 :param y_pred: 115 :return: 116 """ 117 y_true, y_pred = np.array(y_true), np.array(y_pred) 118 if 0 in y_true: 119 return -1 120 return np.mean(np.abs((y_true - y_pred) / y_true)) * 100 121 122 123def evaluate_recommender(y_true, y_pred): 124 evaluation = {} 125 126 y_true = [str(el) for el in y_true] 127 y_pred = [str(el) for el in y_pred] 128 129 try: 130 evaluation.update({'f1_score': f1_score(y_true, y_pred, average='macro')}) 131 except Exception as e: 132 evaluation.update({'f1_score': None}) 133 try: 134 evaluation.update({'accuracy': accuracy_score(y_true, y_pred)}) 135 except Exception as e: 136 evaluation.update({'accuracy': None}) 137 try: 138 evaluation.update({'precision': precision_score(y_true, y_pred, average='macro')}) 139 except Exception as e: 140 evaluation.update({'precision': None}) 141 try: 142 evaluation.update({'recall': recall_score(y_true, y_pred, average='macro')}) 143 except Exception as e: 144 evaluation.update({'recall': None}) 145 146 return evaluation 147 148 149def evaluate_classifiers(candidates,actual): 150 results = {} 151 for candidate in candidates: 152 predicted,scores = candidate.predict(test=True) 153 result = evaluate_classifier(actual, predicted, scores) 154 results[str(candidate.model_type)] = result 155 return results 156 157 158def plot_model_comparison_classification(models_data: dict): 159 """ 160 Plots a comparison of different classification models based on their performance metrics. 161 162 This function takes a dictionary where each key is a model name and its value is another 163 dictionary containing performance metrics such as F1 score, accuracy, precision, and recall. 164 It then extracts these metrics and plots them for comparison. 165 166 Args: 167 models_data (dict): A dictionary where the key is the model name (str) and the value is another 168 dictionary containing metrics. The metrics dictionary should have keys 169 'f1_score', 'accuracy', 'precision', and 'recall', each mapping to a float 170 representing the model's performance on that metric. 171 172 Returns: 173 None. This function is used for plotting the comparison and does not return anything. 174 """ 175 # Create lists to store data 176 model_names = [] 177 f1_scores = [] 178 accuracies = [] 179 precisions = [] 180 recalls = [] 181 182 # Extract data from the input dictionary 183 for model, metrics in models_data.items(): 184 model_names.append(str(model).split('(')[0]) 185 f1_scores.append(metrics['f1_score']) 186 accuracies.append(metrics['accuracy']) 187 precisions.append(metrics['precision']) 188 recalls.append(metrics['recall']) 189 190 # Create the bubble plot 191 plt.figure(figsize=(12, 8)) 192 plt.scatter(accuracies, precisions, s=[f1_score * 3000 for f1_score in f1_scores], c=recalls, cmap='viridis', alpha=0.7) 193 194 # Add labels and title 195 plt.title('Model Comparison') 196 plt.xlabel('Accuracy') 197 plt.ylabel('Precision') 198 199 # Add color bar 200 cbar = plt.colorbar() 201 cbar.set_label('Recall') 202 203 # Add text annotations for model names 204 for i, txt in enumerate(model_names): 205 plt.annotate(txt, (accuracies[i], precisions[i]), textcoords="offset points", xytext=(0,10), ha='center') 206 207 # Show plot 208 plt.grid(True) 209 plt.tight_layout() 210 plt.show()
17def evaluate_classifier(y_true, y_pred, scores, loss: Optional[str]=None) -> dict: 18 """ 19 Evaluate the performance of a classifier using various metrics. 20 21 This function calculates the AUC, F1 score, accuracy, precision, and recall of the classifier's predictions. 22 It handles exceptions by setting the metric value to None if an error occurs during calculation. 23 Optionally, if a loss metric is provided and exists in the evaluation, its value is updated in the evaluation dictionary. 24 25 Args: 26 y_true (1d array-like): The true labels of the data. 27 y_pred (1d array-like): The predicted labels by the classifier. 28 scores (1d array-like): The score/probability of the positive class. 29 loss (Optional[str]): The name of the metric to be treated as a loss (the higher, the better). Defaults to None. 30 31 Returns: 32 dict: A dictionary containing the evaluation metrics: 'auc', 'f1_score', 'accuracy', 'precision', 'recall', and optionally 'loss' if specified. 33 34 """ 35 evaluation = {} 36 37 y_true = [str(el) for el in y_true] 38 y_pred = [str(el) for el in y_pred] 39 40 try: 41 evaluation.update({'auc': roc_auc_score(y_true, scores)}) 42 except Exception as e: 43 evaluation.update({'auc': None}) 44 try: 45 evaluation.update({'f1_score': f1_score(y_true, y_pred, average='macro')}) 46 except Exception as e: 47 evaluation.update({'f1_score': None}) 48 try: 49 evaluation.update({'accuracy': accuracy_score(y_true, y_pred)}) 50 except Exception as e: 51 evaluation.update({'accuracy': None}) 52 try: 53 evaluation.update({'precision': precision_score(y_true, y_pred, average='macro')}) 54 except Exception as e: 55 evaluation.update({'precision': None}) 56 try: 57 evaluation.update({'recall': recall_score(y_true, y_pred, average='macro')}) 58 except Exception as e: 59 evaluation.update({'recall': None}) 60 61 if loss is not None: # the higher the better 62 evaluation.update({'loss': evaluation[loss]}) 63 return evaluation
Evaluate the performance of a classifier using various metrics.
This function calculates the AUC, F1 score, accuracy, precision, and recall of the classifier's predictions. It handles exceptions by setting the metric value to None if an error occurs during calculation. Optionally, if a loss metric is provided and exists in the evaluation, its value is updated in the evaluation dictionary.
Arguments:
- y_true (1d array-like): The true labels of the data.
- y_pred (1d array-like): The predicted labels by the classifier.
- scores (1d array-like): The score/probability of the positive class.
- loss (Optional[str]): The name of the metric to be treated as a loss (the higher, the better). Defaults to None.
Returns:
dict: A dictionary containing the evaluation metrics: 'auc', 'f1_score', 'accuracy', 'precision', 'recall', and optionally 'loss' if specified.
66def evaluate_regressor(y_true, y_pred, loss: Optional[str]=None): 67 """ 68 Evaluate the performance of a regression model. 69 70 This function calculates the Root Mean Square Error (RMSE), Mean Absolute Error (MAE), 71 and the R-squared score (R2 score) of the predictions made by a regression model. If a 72 specific loss metric is provided and exists in the evaluation, its value will be updated 73 in the evaluation dictionary. 74 75 Args: 76 y_true (1d array-like): True labels or actual values. 77 y_pred (1d array-like): Predicted labels or values by the regression model. 78 loss (Optional[str]): The key of the loss metric to be updated in the evaluation 79 dictionary if it exists. Defaults to None. 80 81 Returns: 82 dict: A dictionary containing the calculated metrics ('rmse', 'mae', 'rscore') with 83 their corresponding values. If a metric cannot be calculated due to an exception, 84 its value will be set to None. If the 'loss' argument is provided and exists in 85 the evaluation, its value will be updated accordingly. 86 """ 87 evaluation = {} 88 89 try: 90 evaluation.update({'rmse': sqrt(mean_squared_error(y_true, y_pred))}) 91 except Exception as e: 92 evaluation.update({'rmse': None}) 93 try: 94 evaluation.update({'mae': mean_absolute_error(y_true, y_pred)}) 95 except Exception as e: 96 evaluation.update({'mae': None}) 97 try: 98 evaluation.update({'rscore': r2_score(y_true, y_pred)}) 99 except Exception as e: 100 evaluation.update({'rscore': None}) 101 try: 102 evaluation.update({'mape': _mean_absolute_percentage_error(y_true, y_pred)}) 103 except Exception as e: 104 evaluation.update({'mape': None}) 105 106 if loss is not None: # the lower the better 107 evaluation.update({'loss': -evaluation[loss]}) 108 return evaluation
Evaluate the performance of a regression model.
This function calculates the Root Mean Square Error (RMSE), Mean Absolute Error (MAE), and the R-squared score (R2 score) of the predictions made by a regression model. If a specific loss metric is provided and exists in the evaluation, its value will be updated in the evaluation dictionary.
Arguments:
- y_true (1d array-like): True labels or actual values.
- y_pred (1d array-like): Predicted labels or values by the regression model.
- loss (Optional[str]): The key of the loss metric to be updated in the evaluation dictionary if it exists. Defaults to None.
Returns:
dict: A dictionary containing the calculated metrics ('rmse', 'mae', 'rscore') with their corresponding values. If a metric cannot be calculated due to an exception, its value will be set to None. If the 'loss' argument is provided and exists in the evaluation, its value will be updated accordingly.
124def evaluate_recommender(y_true, y_pred): 125 evaluation = {} 126 127 y_true = [str(el) for el in y_true] 128 y_pred = [str(el) for el in y_pred] 129 130 try: 131 evaluation.update({'f1_score': f1_score(y_true, y_pred, average='macro')}) 132 except Exception as e: 133 evaluation.update({'f1_score': None}) 134 try: 135 evaluation.update({'accuracy': accuracy_score(y_true, y_pred)}) 136 except Exception as e: 137 evaluation.update({'accuracy': None}) 138 try: 139 evaluation.update({'precision': precision_score(y_true, y_pred, average='macro')}) 140 except Exception as e: 141 evaluation.update({'precision': None}) 142 try: 143 evaluation.update({'recall': recall_score(y_true, y_pred, average='macro')}) 144 except Exception as e: 145 evaluation.update({'recall': None}) 146 147 return evaluation
159def plot_model_comparison_classification(models_data: dict): 160 """ 161 Plots a comparison of different classification models based on their performance metrics. 162 163 This function takes a dictionary where each key is a model name and its value is another 164 dictionary containing performance metrics such as F1 score, accuracy, precision, and recall. 165 It then extracts these metrics and plots them for comparison. 166 167 Args: 168 models_data (dict): A dictionary where the key is the model name (str) and the value is another 169 dictionary containing metrics. The metrics dictionary should have keys 170 'f1_score', 'accuracy', 'precision', and 'recall', each mapping to a float 171 representing the model's performance on that metric. 172 173 Returns: 174 None. This function is used for plotting the comparison and does not return anything. 175 """ 176 # Create lists to store data 177 model_names = [] 178 f1_scores = [] 179 accuracies = [] 180 precisions = [] 181 recalls = [] 182 183 # Extract data from the input dictionary 184 for model, metrics in models_data.items(): 185 model_names.append(str(model).split('(')[0]) 186 f1_scores.append(metrics['f1_score']) 187 accuracies.append(metrics['accuracy']) 188 precisions.append(metrics['precision']) 189 recalls.append(metrics['recall']) 190 191 # Create the bubble plot 192 plt.figure(figsize=(12, 8)) 193 plt.scatter(accuracies, precisions, s=[f1_score * 3000 for f1_score in f1_scores], c=recalls, cmap='viridis', alpha=0.7) 194 195 # Add labels and title 196 plt.title('Model Comparison') 197 plt.xlabel('Accuracy') 198 plt.ylabel('Precision') 199 200 # Add color bar 201 cbar = plt.colorbar() 202 cbar.set_label('Recall') 203 204 # Add text annotations for model names 205 for i, txt in enumerate(model_names): 206 plt.annotate(txt, (accuracies[i], precisions[i]), textcoords="offset points", xytext=(0,10), ha='center') 207 208 # Show plot 209 plt.grid(True) 210 plt.tight_layout() 211 plt.show()
Plots a comparison of different classification models based on their performance metrics.
This function takes a dictionary where each key is a model name and its value is another dictionary containing performance metrics such as F1 score, accuracy, precision, and recall. It then extracts these metrics and plots them for comparison.
Arguments:
- models_data (dict): A dictionary where the key is the model name (str) and the value is another dictionary containing metrics. The metrics dictionary should have keys 'f1_score', 'accuracy', 'precision', and 'recall', each mapping to a float representing the model's performance on that metric.
Returns:
None. This function is used for plotting the comparison and does not return anything.