nirdizati_light.evaluation.common

View Source

  1from math import sqrt
  2from typing import Optional
  3
  4import numpy as np
  5from sklearn.metrics import f1_score, roc_auc_score, precision_score, recall_score, accuracy_score, mean_absolute_error, \
  6    mean_squared_error, r2_score
  7import matplotlib.pyplot as plt
  8import seaborn as sns
  9from pandas import DataFrame
 10
 11
 12def drop_columns(df: DataFrame) -> DataFrame:
 13    df = df.drop(['trace_id', 'label'],axis=1)
 14    return df
 15
 16def evaluate_classifier(y_true, y_pred, scores, loss: Optional[str]=None) -> dict:
 17    """
 18    Evaluate the performance of a classifier using various metrics.
 19
 20    This function calculates the AUC, F1 score, accuracy, precision, and recall of the classifier's predictions.
 21    It handles exceptions by setting the metric value to None if an error occurs during calculation.
 22    Optionally, if a loss metric is provided and exists in the evaluation, its value is updated in the evaluation dictionary.
 23
 24    Args:
 25        y_true (1d array-like): The true labels of the data.
 26        y_pred (1d array-like): The predicted labels by the classifier.
 27        scores (1d array-like): The score/probability of the positive class.
 28        loss (Optional[str]): The name of the metric to be treated as a loss (the higher, the better). Defaults to None.
 29
 30    Returns:
 31        dict: A dictionary containing the evaluation metrics: 'auc', 'f1_score', 'accuracy', 'precision', 'recall', and optionally 'loss' if specified.
 32
 33    """
 34    evaluation = {}
 35
 36    y_true = [str(el) for el in y_true]
 37    y_pred = [str(el) for el in y_pred]
 38
 39    try:
 40        evaluation.update({'auc': roc_auc_score(y_true, scores)})
 41    except Exception as e:
 42        evaluation.update({'auc': None})
 43    try:
 44        evaluation.update({'f1_score': f1_score(y_true, y_pred, average='macro')})
 45    except Exception as e:
 46        evaluation.update({'f1_score': None})
 47    try:
 48        evaluation.update({'accuracy': accuracy_score(y_true, y_pred)})
 49    except Exception as e:
 50        evaluation.update({'accuracy': None})
 51    try:
 52        evaluation.update({'precision': precision_score(y_true, y_pred, average='macro')})
 53    except Exception as e:
 54        evaluation.update({'precision': None})
 55    try:
 56        evaluation.update({'recall': recall_score(y_true, y_pred, average='macro')})
 57    except Exception as e:
 58        evaluation.update({'recall': None})
 59
 60    if loss is not None:    # the higher the better
 61        evaluation.update({'loss': evaluation[loss]})
 62    return evaluation
 63
 64
 65def evaluate_regressor(y_true, y_pred, loss: Optional[str]=None):
 66    """
 67    Evaluate the performance of a regression model.
 68
 69    This function calculates the Root Mean Square Error (RMSE), Mean Absolute Error (MAE),
 70    and the R-squared score (R2 score) of the predictions made by a regression model. If a
 71    specific loss metric is provided and exists in the evaluation, its value will be updated
 72    in the evaluation dictionary.
 73
 74    Args:
 75        y_true (1d array-like): True labels or actual values.
 76        y_pred (1d array-like): Predicted labels or values by the regression model.
 77        loss (Optional[str]): The key of the loss metric to be updated in the evaluation
 78                              dictionary if it exists. Defaults to None.
 79
 80    Returns:
 81        dict: A dictionary containing the calculated metrics ('rmse', 'mae', 'rscore') with
 82              their corresponding values. If a metric cannot be calculated due to an exception,
 83              its value will be set to None. If the 'loss' argument is provided and exists in
 84              the evaluation, its value will be updated accordingly.
 85    """
 86    evaluation = {}
 87
 88    try:
 89        evaluation.update({'rmse': sqrt(mean_squared_error(y_true, y_pred))})
 90    except Exception as e:
 91        evaluation.update({'rmse': None})
 92    try:
 93        evaluation.update({'mae': mean_absolute_error(y_true, y_pred)})
 94    except Exception as e:
 95        evaluation.update({'mae': None})
 96    try:
 97        evaluation.update({'rscore': r2_score(y_true, y_pred)})
 98    except Exception as e:
 99        evaluation.update({'rscore': None})
100    try:
101        evaluation.update({'mape': _mean_absolute_percentage_error(y_true, y_pred)})
102    except Exception as e:
103        evaluation.update({'mape': None})
104
105    if loss is not None:    # the lower the better
106        evaluation.update({'loss': -evaluation[loss]})
107    return evaluation
108
109
110def _mean_absolute_percentage_error(y_true, y_pred):
111    """Calculates and returns the mean absolute percentage error
112
113    :param y_true:
114    :param y_pred:
115    :return:
116    """
117    y_true, y_pred = np.array(y_true), np.array(y_pred)
118    if 0 in y_true:
119        return -1
120    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
121
122
123def evaluate_recommender(y_true, y_pred):
124    evaluation = {}
125
126    y_true = [str(el) for el in y_true]
127    y_pred = [str(el) for el in y_pred]
128
129    try:
130        evaluation.update({'f1_score': f1_score(y_true, y_pred, average='macro')})
131    except Exception as e:
132        evaluation.update({'f1_score': None})
133    try:
134        evaluation.update({'accuracy': accuracy_score(y_true, y_pred)})
135    except Exception as e:
136        evaluation.update({'accuracy': None})
137    try:
138        evaluation.update({'precision': precision_score(y_true, y_pred, average='macro')})
139    except Exception as e:
140        evaluation.update({'precision': None})
141    try:
142        evaluation.update({'recall': recall_score(y_true, y_pred, average='macro')})
143    except Exception as e:
144        evaluation.update({'recall': None})
145
146    return evaluation
147
148
149def evaluate_classifiers(candidates,actual):
150    results = {}
151    for candidate in candidates:
152        predicted,scores = candidate.predict(test=True)
153        result = evaluate_classifier(actual, predicted, scores)
154        results[str(candidate.model_type)] = result
155    return results
156
157
158def plot_model_comparison_classification(models_data: dict):
159    """
160    Plots a comparison of different classification models based on their performance metrics.
161
162    This function takes a dictionary where each key is a model name and its value is another
163    dictionary containing performance metrics such as F1 score, accuracy, precision, and recall.
164    It then extracts these metrics and plots them for comparison.
165
166    Args:
167        models_data (dict): A dictionary where the key is the model name (str) and the value is another
168                            dictionary containing metrics. The metrics dictionary should have keys
169                            'f1_score', 'accuracy', 'precision', and 'recall', each mapping to a float
170                            representing the model's performance on that metric.
171
172    Returns:
173        None. This function is used for plotting the comparison and does not return anything.
174    """
175    # Create lists to store data
176    model_names = []
177    f1_scores = []
178    accuracies = []
179    precisions = []
180    recalls = []
181
182    # Extract data from the input dictionary
183    for model, metrics in models_data.items():
184        model_names.append(str(model).split('(')[0])
185        f1_scores.append(metrics['f1_score'])
186        accuracies.append(metrics['accuracy'])
187        precisions.append(metrics['precision'])
188        recalls.append(metrics['recall'])
189
190    # Create the bubble plot
191    plt.figure(figsize=(12, 8))
192    plt.scatter(accuracies, precisions, s=[f1_score * 3000 for f1_score in f1_scores], c=recalls, cmap='viridis', alpha=0.7)
193
194    # Add labels and title
195    plt.title('Model Comparison')
196    plt.xlabel('Accuracy')
197    plt.ylabel('Precision')
198
199    # Add color bar
200    cbar = plt.colorbar()
201    cbar.set_label('Recall')
202
203    # Add text annotations for model names
204    for i, txt in enumerate(model_names):
205        plt.annotate(txt, (accuracies[i], precisions[i]), textcoords="offset points", xytext=(0,10), ha='center')
206
207    # Show plot
208    plt.grid(True)
209    plt.tight_layout()
210    plt.show()

def drop_columns(df: pandas.core.frame.DataFrame) -> pandas.core.frame.DataFrame: View Source

13def drop_columns(df: DataFrame) -> DataFrame:
14    df = df.drop(['trace_id', 'label'],axis=1)
15    return df

def evaluate_classifier(y_true, y_pred, scores, loss: Optional[str] = None) -> dict: View Source

17def evaluate_classifier(y_true, y_pred, scores, loss: Optional[str]=None) -> dict:
18    """
19    Evaluate the performance of a classifier using various metrics.
20
21    This function calculates the AUC, F1 score, accuracy, precision, and recall of the classifier's predictions.
22    It handles exceptions by setting the metric value to None if an error occurs during calculation.
23    Optionally, if a loss metric is provided and exists in the evaluation, its value is updated in the evaluation dictionary.
24
25    Args:
26        y_true (1d array-like): The true labels of the data.
27        y_pred (1d array-like): The predicted labels by the classifier.
28        scores (1d array-like): The score/probability of the positive class.
29        loss (Optional[str]): The name of the metric to be treated as a loss (the higher, the better). Defaults to None.
30
31    Returns:
32        dict: A dictionary containing the evaluation metrics: 'auc', 'f1_score', 'accuracy', 'precision', 'recall', and optionally 'loss' if specified.
33
34    """
35    evaluation = {}
36
37    y_true = [str(el) for el in y_true]
38    y_pred = [str(el) for el in y_pred]
39
40    try:
41        evaluation.update({'auc': roc_auc_score(y_true, scores)})
42    except Exception as e:
43        evaluation.update({'auc': None})
44    try:
45        evaluation.update({'f1_score': f1_score(y_true, y_pred, average='macro')})
46    except Exception as e:
47        evaluation.update({'f1_score': None})
48    try:
49        evaluation.update({'accuracy': accuracy_score(y_true, y_pred)})
50    except Exception as e:
51        evaluation.update({'accuracy': None})
52    try:
53        evaluation.update({'precision': precision_score(y_true, y_pred, average='macro')})
54    except Exception as e:
55        evaluation.update({'precision': None})
56    try:
57        evaluation.update({'recall': recall_score(y_true, y_pred, average='macro')})
58    except Exception as e:
59        evaluation.update({'recall': None})
60
61    if loss is not None:    # the higher the better
62        evaluation.update({'loss': evaluation[loss]})
63    return evaluation

Evaluate the performance of a classifier using various metrics.

This function calculates the AUC, F1 score, accuracy, precision, and recall of the classifier's predictions. It handles exceptions by setting the metric value to None if an error occurs during calculation. Optionally, if a loss metric is provided and exists in the evaluation, its value is updated in the evaluation dictionary.

Arguments:

y_true (1d array-like): The true labels of the data.
y_pred (1d array-like): The predicted labels by the classifier.
scores (1d array-like): The score/probability of the positive class.
loss (Optional[str]): The name of the metric to be treated as a loss (the higher, the better). Defaults to None.

Returns:

dict: A dictionary containing the evaluation metrics: 'auc', 'f1_score', 'accuracy', 'precision', 'recall', and optionally 'loss' if specified.

def evaluate_regressor(y_true, y_pred, loss: Optional[str] = None): View Source

 66def evaluate_regressor(y_true, y_pred, loss: Optional[str]=None):
 67    """
 68    Evaluate the performance of a regression model.
 69
 70    This function calculates the Root Mean Square Error (RMSE), Mean Absolute Error (MAE),
 71    and the R-squared score (R2 score) of the predictions made by a regression model. If a
 72    specific loss metric is provided and exists in the evaluation, its value will be updated
 73    in the evaluation dictionary.
 74
 75    Args:
 76        y_true (1d array-like): True labels or actual values.
 77        y_pred (1d array-like): Predicted labels or values by the regression model.
 78        loss (Optional[str]): The key of the loss metric to be updated in the evaluation
 79                              dictionary if it exists. Defaults to None.
 80
 81    Returns:
 82        dict: A dictionary containing the calculated metrics ('rmse', 'mae', 'rscore') with
 83              their corresponding values. If a metric cannot be calculated due to an exception,
 84              its value will be set to None. If the 'loss' argument is provided and exists in
 85              the evaluation, its value will be updated accordingly.
 86    """
 87    evaluation = {}
 88
 89    try:
 90        evaluation.update({'rmse': sqrt(mean_squared_error(y_true, y_pred))})
 91    except Exception as e:
 92        evaluation.update({'rmse': None})
 93    try:
 94        evaluation.update({'mae': mean_absolute_error(y_true, y_pred)})
 95    except Exception as e:
 96        evaluation.update({'mae': None})
 97    try:
 98        evaluation.update({'rscore': r2_score(y_true, y_pred)})
 99    except Exception as e:
100        evaluation.update({'rscore': None})
101    try:
102        evaluation.update({'mape': _mean_absolute_percentage_error(y_true, y_pred)})
103    except Exception as e:
104        evaluation.update({'mape': None})
105
106    if loss is not None:    # the lower the better
107        evaluation.update({'loss': -evaluation[loss]})
108    return evaluation

Evaluate the performance of a regression model.

This function calculates the Root Mean Square Error (RMSE), Mean Absolute Error (MAE), and the R-squared score (R2 score) of the predictions made by a regression model. If a specific loss metric is provided and exists in the evaluation, its value will be updated in the evaluation dictionary.

Arguments:

y_true (1d array-like): True labels or actual values.
y_pred (1d array-like): Predicted labels or values by the regression model.
loss (Optional[str]): The key of the loss metric to be updated in the evaluation dictionary if it exists. Defaults to None.

Returns:

dict: A dictionary containing the calculated metrics ('rmse', 'mae', 'rscore') with their corresponding values. If a metric cannot be calculated due to an exception, its value will be set to None. If the 'loss' argument is provided and exists in the evaluation, its value will be updated accordingly.

def evaluate_recommender(y_true, y_pred): View Source

124def evaluate_recommender(y_true, y_pred):
125    evaluation = {}
126
127    y_true = [str(el) for el in y_true]
128    y_pred = [str(el) for el in y_pred]
129
130    try:
131        evaluation.update({'f1_score': f1_score(y_true, y_pred, average='macro')})
132    except Exception as e:
133        evaluation.update({'f1_score': None})
134    try:
135        evaluation.update({'accuracy': accuracy_score(y_true, y_pred)})
136    except Exception as e:
137        evaluation.update({'accuracy': None})
138    try:
139        evaluation.update({'precision': precision_score(y_true, y_pred, average='macro')})
140    except Exception as e:
141        evaluation.update({'precision': None})
142    try:
143        evaluation.update({'recall': recall_score(y_true, y_pred, average='macro')})
144    except Exception as e:
145        evaluation.update({'recall': None})
146
147    return evaluation

def evaluate_classifiers(candidates, actual): View Source

150def evaluate_classifiers(candidates,actual):
151    results = {}
152    for candidate in candidates:
153        predicted,scores = candidate.predict(test=True)
154        result = evaluate_classifier(actual, predicted, scores)
155        results[str(candidate.model_type)] = result
156    return results

def plot_model_comparison_classification(models_data: dict): View Source

159def plot_model_comparison_classification(models_data: dict):
160    """
161    Plots a comparison of different classification models based on their performance metrics.
162
163    This function takes a dictionary where each key is a model name and its value is another
164    dictionary containing performance metrics such as F1 score, accuracy, precision, and recall.
165    It then extracts these metrics and plots them for comparison.
166
167    Args:
168        models_data (dict): A dictionary where the key is the model name (str) and the value is another
169                            dictionary containing metrics. The metrics dictionary should have keys
170                            'f1_score', 'accuracy', 'precision', and 'recall', each mapping to a float
171                            representing the model's performance on that metric.
172
173    Returns:
174        None. This function is used for plotting the comparison and does not return anything.
175    """
176    # Create lists to store data
177    model_names = []
178    f1_scores = []
179    accuracies = []
180    precisions = []
181    recalls = []
182
183    # Extract data from the input dictionary
184    for model, metrics in models_data.items():
185        model_names.append(str(model).split('(')[0])
186        f1_scores.append(metrics['f1_score'])
187        accuracies.append(metrics['accuracy'])
188        precisions.append(metrics['precision'])
189        recalls.append(metrics['recall'])
190
191    # Create the bubble plot
192    plt.figure(figsize=(12, 8))
193    plt.scatter(accuracies, precisions, s=[f1_score * 3000 for f1_score in f1_scores], c=recalls, cmap='viridis', alpha=0.7)
194
195    # Add labels and title
196    plt.title('Model Comparison')
197    plt.xlabel('Accuracy')
198    plt.ylabel('Precision')
199
200    # Add color bar
201    cbar = plt.colorbar()
202    cbar.set_label('Recall')
203
204    # Add text annotations for model names
205    for i, txt in enumerate(model_names):
206        plt.annotate(txt, (accuracies[i], precisions[i]), textcoords="offset points", xytext=(0,10), ha='center')
207
208    # Show plot
209    plt.grid(True)
210    plt.tight_layout()
211    plt.show()

Plots a comparison of different classification models based on their performance metrics.

This function takes a dictionary where each key is a model name and its value is another dictionary containing performance metrics such as F1 score, accuracy, precision, and recall. It then extracts these metrics and plots them for comparison.

Arguments:

models_data (dict): A dictionary where the key is the model name (str) and the value is another dictionary containing metrics. The metrics dictionary should have keys 'f1_score', 'accuracy', 'precision', and 'recall', each mapping to a float representing the model's performance on that metric.

Returns:

None. This function is used for plotting the comparison and does not return anything.