From 606af9dabf52f68500a48620764efd9d14dadb7a Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Thu, 20 May 2021 12:26:46 -0400 Subject: [PATCH 1/4] set device as cuda only if gpu available --- prediction/tumor_pred/pred.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prediction/tumor_pred/pred.py b/prediction/tumor_pred/pred.py index f014b7d..174b17b 100644 --- a/prediction/tumor_pred/pred.py +++ b/prediction/tumor_pred/pred.py @@ -38,7 +38,7 @@ sigma = [0.1120, 0.1459, 0.1089] -device = torch.device("cuda") +device = "cuda" if torch.cuda.is_available() else "cpu" data_aug = transforms.Compose([ transforms.Scale(PS), transforms.ToTensor(), @@ -200,7 +200,7 @@ def auc_roc(Pr, Tr): print("| Load pretrained at %s..." % old_model) -checkpoint = torch.load(old_model) +checkpoint = torch.load(old_model, map_location=device) model = PreActResNet34(1) model.load_state_dict(checkpoint['net']) model.to(device) From 959c44de7a839fbc77b14564f347953c2b1ba31a Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Thu, 20 May 2021 12:30:27 -0400 Subject: [PATCH 2/4] remove unused code --- prediction/tumor_pred/pred.py | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/prediction/tumor_pred/pred.py b/prediction/tumor_pred/pred.py index 174b17b..9b39148 100644 --- a/prediction/tumor_pred/pred.py +++ b/prediction/tumor_pred/pred.py @@ -1,22 +1,12 @@ import os import torch -from torchvision import models, transforms +from torchvision import transforms from PIL import Image import numpy as np -import torch.nn as nn from torch.autograd import Variable -from torch.utils.data import DataLoader, Dataset import time -import argparse -from torch.optim import lr_scheduler -import copy import torch.nn.parallel -import torch.optim as optim -#import data_aug as DA -from sklearn.metrics import mean_squared_error, accuracy_score, hamming_loss, roc_curve, auc, f1_score import sys -import torch.backends.cudnn as cudnn -import time import torch.nn.functional as F from model_paad import PreActResNet34 @@ -130,12 +120,6 @@ def load_data(todo_list, rind): return todo_list[lind:], X, inds, coor, rind; -def from_output_to_pred(output): - pred = np.copy(output); - pred = (pred >= 0.5).astype(np.int32); - return pred; - - def val_fn_epoch_on_disk(classn, val_fn): all_or = np.zeros(shape=(500000, classn), dtype=np.float32); all_inds = np.zeros(shape=(500000,), dtype=np.int32); @@ -183,16 +167,6 @@ def val_fn_epoch_on_disk(classn, val_fn): all_coor = all_coor[:n3]; return all_or, all_inds, all_coor; -def confusion_matrix(Or, Tr, thres): - tpos = np.sum((Or>=thres) * (Tr==1)); - tneg = np.sum((Or< thres) * (Tr==0)); - fpos = np.sum((Or>=thres) * (Tr==0)); - fneg = np.sum((Or< thres) * (Tr==1)); - return tpos, tneg, fpos, fneg; - -def auc_roc(Pr, Tr): - fpr, tpr, _ = roc_curve(Tr, Pr, pos_label=1.0); - return auc(fpr, tpr); # load model print('start predicting...') From 35ca4774e296caf34d94bef6c4c93eb7d3aaaffc Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Thu, 20 May 2021 12:32:19 -0400 Subject: [PATCH 3/4] format code with black formatter --- prediction/tumor_pred/pred.py | 196 ++++++++++++++++++---------------- 1 file changed, 106 insertions(+), 90 deletions(-) diff --git a/prediction/tumor_pred/pred.py b/prediction/tumor_pred/pred.py index 9b39148..3b0a2ad 100644 --- a/prediction/tumor_pred/pred.py +++ b/prediction/tumor_pred/pred.py @@ -12,31 +12,36 @@ from model_paad import PreActResNet34 from PIL import ImageFile + ImageFile.LOAD_TRUNCATED_IMAGES = True -APS = 350; +APS = 350 PS = 224 -TileFolder = sys.argv[1] + '/'; +TileFolder = sys.argv[1] + "/" -BatchSize = 96; +BatchSize = 96 -heat_map_out = sys.argv[3]; -old_model = sys.argv[4]; +heat_map_out = sys.argv[3] +old_model = sys.argv[4] mu = [0.7238, 0.5716, 0.6779] sigma = [0.1120, 0.1459, 0.1089] device = "cuda" if torch.cuda.is_available() else "cpu" -data_aug = transforms.Compose([ - transforms.Scale(PS), - transforms.ToTensor(), - transforms.Normalize(mu, sigma)]) +data_aug = transforms.Compose( + [transforms.Scale(PS), transforms.ToTensor(), transforms.Normalize(mu, sigma)] +) + def whiteness(png): - wh = (np.std(png[:,:,0].flatten()) + np.std(png[:,:,1].flatten()) + np.std(png[:,:,2].flatten())) / 3.0; - return wh; + wh = ( + np.std(png[:, :, 0].flatten()) + + np.std(png[:, :, 1].flatten()) + + np.std(png[:, :, 2].flatten()) + ) / 3.0 + return wh def softmax_np(x): @@ -44,159 +49,170 @@ def softmax_np(x): x = np.exp(x) / (np.sum(np.exp(x), 1, keepdims=True)) return x + def iterate_minibatches(inputs, augs, targets): if inputs.shape[0] <= BatchSize: - yield inputs, augs, targets; - return; + yield inputs, augs, targets + return - start_idx = 0; + start_idx = 0 for start_idx in range(0, len(inputs) - BatchSize + 1, BatchSize): - excerpt = slice(start_idx, start_idx + BatchSize); - yield inputs[excerpt], augs[excerpt], targets[excerpt]; + excerpt = slice(start_idx, start_idx + BatchSize) + yield inputs[excerpt], augs[excerpt], targets[excerpt] if start_idx < len(inputs) - BatchSize: - excerpt = slice(start_idx + BatchSize, len(inputs)); - yield inputs[excerpt], augs[excerpt], targets[excerpt]; + excerpt = slice(start_idx + BatchSize, len(inputs)) + yield inputs[excerpt], augs[excerpt], targets[excerpt] def load_data(todo_list, rind): - X = torch.zeros(size=(BatchSize*40, 3, PS, PS)); - inds = np.zeros(shape=(BatchSize*40,), dtype=np.int32); - coor = np.zeros(shape=(200000, 2), dtype=np.int32); + X = torch.zeros(size=(BatchSize * 40, 3, PS, PS)) + inds = np.zeros(shape=(BatchSize * 40,), dtype=np.int32) + coor = np.zeros(shape=(200000, 2), dtype=np.int32) - normalized = False # change this to true if dont have images normalized and normalize on the fly + # TODO: the comment below doesn't seem to be correct... + # change this to true if dont have images normalized and normalize on the fly + normalized = False parts = 4 if normalized: parts = 4 - xind = 0; - lind = 0; - cind = 0; + xind = 0 + lind = 0 + cind = 0 for fn in todo_list: - lind += 1; - full_fn = TileFolder + '/' + fn; + lind += 1 + full_fn = TileFolder + "/" + fn if not os.path.isfile(full_fn): - continue; - if (len(fn.split('_')) != parts) or ('.png' not in fn): - continue; + continue + if (len(fn.split("_")) != parts) or (".png" not in fn): + continue try: - x_off = float(fn.split('_')[0]); - y_off = float(fn.split('_')[1]); - svs_pw = float(fn.split('_')[2]); - png_pw = float(fn.split('_')[3].split('.png')[0]); + x_off = float(fn.split("_")[0]) + y_off = float(fn.split("_")[1]) + svs_pw = float(fn.split("_")[2]) + png_pw = float(fn.split("_")[3].split(".png")[0]) except: - print('error reading image') + print("error reading image") continue - png = np.array(Image.open(full_fn).convert('RGB')); + png = np.array(Image.open(full_fn).convert("RGB")) for x in range(0, png.shape[1], APS): if x + APS > png.shape[1]: - continue; + continue for y in range(0, png.shape[0], APS): if y + APS > png.shape[0]: - continue; + continue - if (whiteness(png[y:y+APS, x:x+APS, :]) >= 12): - a = png[y:y + APS, x:x + APS, :] - a = Image.fromarray(a.astype('uint8'), 'RGB') + if whiteness(png[y : y + APS, x : x + APS, :]) >= 12: + a = png[y : y + APS, x : x + APS, :] + a = Image.fromarray(a.astype("uint8"), "RGB") a = data_aug(a) X[xind, :, :, :] = a inds[xind] = rind xind += 1 - coor[cind, 0] = np.int32(x_off + (x + APS/2) * svs_pw / png_pw); - coor[cind, 1] = np.int32(y_off + (y + APS/2) * svs_pw / png_pw); + coor[cind, 0] = np.int32(x_off + (x + APS / 2) * svs_pw / png_pw) + coor[cind, 1] = np.int32(y_off + (y + APS / 2) * svs_pw / png_pw) - cind += 1; - rind += 1; - if rind % 100 == 0: print('Processed: ', rind) + cind += 1 + rind += 1 + if rind % 100 == 0: + print("Processed: ", rind) if xind >= BatchSize: - break; + break - X = X[0:xind]; - inds = inds[0:xind]; - coor = coor[0:cind]; + X = X[0:xind] + inds = inds[0:xind] + coor = coor[0:cind] - return todo_list[lind:], X, inds, coor, rind; + return todo_list[lind:], X, inds, coor, rind def val_fn_epoch_on_disk(classn, val_fn): - all_or = np.zeros(shape=(500000, classn), dtype=np.float32); - all_inds = np.zeros(shape=(500000,), dtype=np.int32); - all_coor = np.zeros(shape=(500000, 2), dtype=np.int32); - rind = 0; - n1 = 0; - n2 = 0; - n3 = 0; - todo_list = os.listdir(TileFolder); + all_or = np.zeros(shape=(500000, classn), dtype=np.float32) + all_inds = np.zeros(shape=(500000,), dtype=np.int32) + all_coor = np.zeros(shape=(500000, 2), dtype=np.int32) + rind = 0 + n1 = 0 + n2 = 0 + n3 = 0 + todo_list = os.listdir(TileFolder) processed = 0 total = len(todo_list) start = time.time() coor_c = 0 while len(todo_list) > 0: - todo_list, inputs, inds, coor, rind = load_data(todo_list, rind); + todo_list, inputs, inds, coor, rind = load_data(todo_list, rind) coor_c += len(coor) - #if len(inputs) == 0: + # if len(inputs) == 0: # print('len of inputs is 0"') # break; if inputs.size(0) < 2: - print('len of inputs if less than 2') + print("len of inputs if less than 2") else: processed = total - len(todo_list) - print('Processed: {}/{} \t Time Remaining: {}mins'.format(processed, total, (time.time() - start)/60*(total/processed - 1))) + print( + "Processed: {}/{} \t Time Remaining: {}mins".format( + processed, + total, + (time.time() - start) / 60 * (total / processed - 1), + ) + ) with torch.no_grad(): inputs = Variable(inputs.to(device)) output = val_fn(inputs) output = F.sigmoid(output) - output = output.data.cpu().numpy() - print('size of output: ', output.shape) + output = output.data.cpu().numpy() # TODO: the comment below doesn't seem to be correct... + + print("size of output: ", output.shape) # output = softmax_np(output)[:, 1] - all_or[n1:n1+len(output)] = output.reshape(-1,1) + all_or[n1 : n1 + len(output)] = output.reshape(-1, 1) n1 += len(output) - all_inds[n2:n2+len(inds)] = inds; - n2 += len(inds); + all_inds[n2 : n2 + len(inds)] = inds + n2 += len(inds) - all_coor[n3:n3+len(coor)] = coor; - n3 += len(coor); + all_coor[n3 : n3 + len(coor)] = coor + n3 += len(coor) - all_or = all_or[:n1]; - all_inds = all_inds[:n2]; - all_coor = all_coor[:n3]; - return all_or, all_inds, all_coor; + all_or = all_or[:n1] + all_inds = all_inds[:n2] + all_coor = all_coor[:n3] + return all_or, all_inds, all_coor # load model -print('start predicting...') +print("start predicting...") start = time.time() print("| Load pretrained at %s..." % old_model) checkpoint = torch.load(old_model, map_location=device) model = PreActResNet34(1) -model.load_state_dict(checkpoint['net']) +model.load_state_dict(checkpoint["net"]) model.to(device) model.eval() -best_auc = checkpoint['acc'] -print('previous best AUC: {:.4f} at epoch: {}'.format(best_auc, checkpoint['epoch'])) -print('=============================================') +best_auc = checkpoint["acc"] +print("previous best AUC: {:.4f} at epoch: {}".format(best_auc, checkpoint["epoch"])) +print("=============================================") -Or, inds, coor = val_fn_epoch_on_disk(1, model); -Or_all = np.zeros(shape=(coor.shape[0],), dtype=np.float32); -Or_all[inds] = Or[:, 0]; +Or, inds, coor = val_fn_epoch_on_disk(1, model) +Or_all = np.zeros(shape=(coor.shape[0],), dtype=np.float32) +Or_all[inds] = Or[:, 0] -print('len of all coor: ', coor.shape) -print('shape of Or: ', Or.shape) -print('shape of inds: ', inds.shape) +print("len of all coor: ", coor.shape) +print("shape of Or: ", Or.shape) +print("shape of inds: ", inds.shape) -fid = open(TileFolder + '/' + heat_map_out, 'w'); +fid = open(TileFolder + "/" + heat_map_out, "w") for idx in range(0, Or_all.shape[0]): - fid.write('{} {} {}\n'.format(coor[idx][0], coor[idx][1], Or_all[idx])) + fid.write("{} {} {}\n".format(coor[idx][0], coor[idx][1], Or_all[idx])) -fid.close(); +fid.close() -print('Elapsed Time: ', (time.time() - start)/60.0) -print('DONE!'); +print("Elapsed Time: ", (time.time() - start) / 60.0) +print("DONE!") From ae47752fdfcce749deb02824af26486727d61294 Mon Sep 17 00:00:00 2001 From: Jakub Kaczmarzyk Date: Thu, 20 May 2021 12:37:19 -0400 Subject: [PATCH 4/4] rm bare exception + refactor whiteness fn --- prediction/tumor_pred/pred.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/prediction/tumor_pred/pred.py b/prediction/tumor_pred/pred.py index 3b0a2ad..34502ca 100644 --- a/prediction/tumor_pred/pred.py +++ b/prediction/tumor_pred/pred.py @@ -36,12 +36,8 @@ def whiteness(png): - wh = ( - np.std(png[:, :, 0].flatten()) - + np.std(png[:, :, 1].flatten()) - + np.std(png[:, :, 2].flatten()) - ) / 3.0 - return wh + """Return the mean standard deviation across channels.""" + return np.std(png, axis=(0, 1)).mean() def softmax_np(x): @@ -92,7 +88,7 @@ def load_data(todo_list, rind): y_off = float(fn.split("_")[1]) svs_pw = float(fn.split("_")[2]) png_pw = float(fn.split("_")[3].split(".png")[0]) - except: + except Exception: print("error reading image") continue @@ -165,7 +161,7 @@ def val_fn_epoch_on_disk(classn, val_fn): output = val_fn(inputs) output = F.sigmoid(output) - output = output.data.cpu().numpy() # TODO: the comment below doesn't seem to be correct... + output = output.data.cpu().numpy() print("size of output: ", output.shape)