Source code for pimkl.evaluation

import numpy as np
from sklearn.metrics import roc_curve, auc
from scipy import interp
from collections import defaultdict
from scipy.stats import rankdata


[docs]def roc_two_classes(y_test, y_score): fpr = dict() tpr = dict() threshold = dict() roc_auc = dict() fpr["class"], tpr["class"], threshold["class"] = roc_curve( y_test[:, 1], y_score[:, 1] ) roc_auc["class"] = auc(fpr["class"], tpr["class"]) return fpr, tpr, threshold, roc_auc
[docs]def roc_multiclass(y_test, y_score, n_classes=None): if n_classes is None: n_classes = y_score.shape[1] fpr = dict() tpr = dict() threshold = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], threshold[i] = roc_curve(y_test[:, i], y_score[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) # Compute micro-average ROC curve and ROC area fpr['micro'], tpr['micro'], threshold['micro'] = roc_curve( y_test.ravel(), y_score.ravel() ) roc_auc['micro'] = auc(fpr['micro'], tpr['micro']) # Compute macro-average ROC curve and ROC area # First aggregate all false positive rates all_fpr = np.unique(np.concatenate([fpr[i] for i in range(n_classes)])) # Then interpolate all ROC curves at this points mean_tpr = np.zeros_like(all_fpr) mean_threshold = np.zeros_like(all_fpr) for i in range(n_classes): mean_tpr += interp(all_fpr, fpr[i], tpr[i]) mean_threshold += interp(all_fpr, fpr[i], threshold[i]) # Finally average it and compute AUC mean_tpr /= n_classes mean_threshold /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr threshold["macro"] = mean_threshold roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) return fpr, tpr, threshold, roc_auc
[docs]def roc_analysis(y_test, y_score): n_classes = y_score.shape[1] if n_classes > 2: return roc_multiclass(y_test, y_score, n_classes) else: return roc_two_classes(y_test, y_score)
[docs]def performances(y_true, y_score): n_classes = y_score.shape[1] y_predicted = np.array( np.apply_along_axis(rankdata, 1, y_score) >= n_classes, dtype=np.int ) tp = defaultdict(int) tn = defaultdict(int) fp = defaultdict(int) fn = defaultdict(int) for a_class in range(n_classes): for sample in range(y_true.shape[0]): if ( y_predicted[sample, a_class] == 1 and y_true[sample, a_class] == 1 ): tp[a_class] += 1. elif ( y_predicted[sample, a_class] == 1 and y_true[sample, a_class] == 0 ): fp[a_class] += 1. elif ( y_predicted[sample, a_class] == 0 and y_true[sample, a_class] == 0 ): tn[a_class] += 1. elif ( y_predicted[sample, a_class] == 0 and y_true[sample, a_class] == 1 ): fn[a_class] += 1. return tp, tn, fp, fn
[docs]def sensitivity(tp, fn): return tp / float(tp + fn)
[docs]def specificity(tn, fp): return tn / float(tn + fp)