-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevaluation.py
77 lines (61 loc) · 3.29 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# Import required modules
import json
import pandas as pd
import os
import numpy as np
def main():
# Output file from test with the noise class listed in the last column
raw_detections_path = input('Path to predictions (csv file)')
# Ground truth file with the noise class listed in the last column
test_annots_path = input('Path to ground truth (csv)')
# Label list file where the order of labels is according to the columns in the predictions
# and ground truth and noise being the last label in the list
label_list_path = input('Path to list of classes (json)')
# Output path for results file
output_path = input('Where should we store the results (folder)?')
# Read true coverage, predictions and list of labels
f = open(label_list_path)
label_list = json.load(f)
labels = pd.read_csv(raw_detections_path, header=0, index_col=0)
labels = labels.to_numpy()
true_coverage = pd.read_csv(test_annots_path, header=0, index_col=0)
true_coverage = true_coverage.to_numpy()
# Create confusion matrix for segments with potentially multiple labels
confusion_matrix = np.zeros((len(label_list) + 1, len(label_list) + 1), dtype=int)
for true_label in range(len(label_list)):
for predicted_label in range(len(label_list)):
confusion_matrix[true_label, predicted_label] = sum(
labels[(true_coverage[:, true_label] == 1) & (true_coverage[:, predicted_label] != 1), predicted_label])
confusion_matrix[true_label, true_label] = sum(labels[true_coverage[:, true_label] == 1, true_label])
confusion_matrix[true_label, -1] = sum(true_coverage[:, true_label])
confusion_matrix[-1, true_label] = sum(labels[:, true_label])
# Extract diagonal and non-diagonal from confusion matrix
dia = np.diag(confusion_matrix)
non_dia_ind = ~np.eye(confusion_matrix[0:-2, 0:-2].shape[0], dtype=bool)
non_dia = confusion_matrix[0:-2, 0:-2]
non_dia = non_dia[np.array(non_dia_ind)]
non_dia = non_dia.reshape([(len(label_list) - 1), int(len(non_dia) / (len(label_list) - 1))])
# Extract single TCRs, NMRs, and CMRs per entry in the confusion matrix
TCRs = []
NMRs = []
CMRs = []
for classes in range(len(label_list) - 1):
TCRs.append(dia[classes] / confusion_matrix[classes, -1])
NMRs.append(confusion_matrix[-2, classes])
for cols in range(np.shape(non_dia)[1]):
CMRs.append(non_dia[classes, cols] / confusion_matrix[classes, -1])
# Calculate TCR, NMR, CMR, and F value
TCR = np.average(TCRs)
NMR = sum(NMRs) / confusion_matrix[-2, -1]
CMR = np.average(CMRs)
F = np.average([TCR, (1 - NMR), (1 - NMR), (1 - CMR)])
# Save confusion matrix and evaluation metrics to csv
DF = pd.DataFrame(confusion_matrix)
model_predictions = os.path.splitext(os.path.splitext(os.path.basename(raw_detections_path))[0])[0]
DF.to_csv(
os.path.join(output_path, '.'.join([''.join([model_predictions, '_confusion', '_NMR=', NMR.round(2).astype(str),
'_CMR=', CMR.round(2).astype(str), '_TCR=',
TCR.round(2).astype(str), '_F=', F.round(2).astype(str)]),
'csv'])))
if __name__ == "__main__":
main()