Skip to content

Commit

Permalink
training
Browse files Browse the repository at this point in the history
  • Loading branch information
chezhia committed Aug 18, 2019
1 parent 4660bda commit 1a8c9d5
Show file tree
Hide file tree
Showing 147 changed files with 8,094 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Ignore data directory
preprocessed/

# Ignore output directory
output/

#Ignore .txt files
*.txt
14 changes: 14 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM jupyter/minimal-notebook:latest

COPY --chown=1000:100 src /home/$NB_USER/src

COPY requirements.txt /home/$NB_USER/src/requirements.txt

COPY preproc.args /home/$NB_USER/preproc.args

RUN pip install --default-timeout=60 -r /home/$NB_USER/src/requirements.txt

WORKDIR /home/$NB_USER/

#EXPOSE 80

Empty file added T2_Masks/__init__.py
Empty file.
20 changes: 20 additions & 0 deletions T2_Masks/create_hdf5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import os
import glob

from T2_masks.train_2 import config, fetch_training_data_files
from unet3d.data import write_data_to_file, open_data_file

config["data_file"] = 'T2_25pts_resize.h5'
config["all_modalities"] = ["T2"]
config["training_modalities"] = config["all_modalities"] # change this if you want to only use some of the modalities
config["nb_channels"] = len(config["training_modalities"])
def main(overwrite=True):
# convert input images into an hdf5 file
if overwrite or not os.path.exists(config["data_file"]):
training_files, subject_ids = fetch_training_data_files(return_subject_ids=True)
write_data_to_file(training_files, config["data_file"], image_shape=config["image_shape"],
subject_ids=subject_ids)


if __name__ == "__main__":
main()
25 changes: 25 additions & 0 deletions T2_Masks/create_pickle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@

import os
import glob

from T2_masks.train_2 import config, fetch_training_data_files
from unet3d.data import write_data_to_file, open_data_file
from unet3d.generator import get_validation_split

config["training_file"] = os.path.abspath("T2_debug_Train_ids.pkl")
config["validation_file"] = os.path.abspath("T1_debug_Test_ids.pkl")
config["validation_split"] = 0.80
config["data_file"] = os.path.abspath("T2_25pts_resize.h5")
data_file_opened = open_data_file(config["data_file"])
def main(overwrite=True):
training_list, validation_list = get_validation_split(data_file_opened,
data_split=config["validation_split"],
overwrite=overwrite,
training_file=config["training_file"] ,
validation_file=config["validation_file"])
print('validation list is ', validation_list)
print('Training list is ', training_list)


if __name__ == "__main__":
main()
60 changes: 60 additions & 0 deletions T2_Masks/evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import numpy as np
import nibabel as nib
import os
import glob
import pandas as pd
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt


def get_liver_mask(data):
return data > 0

def dice_coefficient(truth, prediction):
return 2 * np.sum(truth * prediction)/(np.sum(truth) + np.sum(prediction))

def main():
header = ("DSC",)
masking_functions = (get_liver_mask,)
rows = list()
subject_ids = list()
for case_folder in glob.glob("prediction_NewTest_3DUnet_isense_overlap16/*"):
if not os.path.isdir(case_folder):
continue
subject_ids.append(os.path.basename(case_folder))
truth_file = os.path.join(case_folder, "truth.nii.gz")
truth_image = nib.load(truth_file)
truth = truth_image.get_data()
prediction_file = os.path.join(case_folder, "prediction.nii.gz")
prediction_image = nib.load(prediction_file)
prediction = prediction_image.get_data()
rows.append([dice_coefficient(func(truth), func(prediction))for func in masking_functions])

df = pd.DataFrame.from_records(rows, columns=header, index=subject_ids)
df.to_csv("./prediction_NewTest_3DUnet_isense_overlap16/Dice_scores.csv")

scores = dict()
for index, score in enumerate(df.columns):
values = df.values.T[index]
scores[score] = values[np.isnan(values) == False]

plt.boxplot(list(scores.values()), labels=list(scores.keys()))
plt.ylabel("Dice Coefficient")
plt.savefig("validation_scores_boxplot_NewTest_581model.png")
plt.close()

if os.path.exists("./training_1GPU.log"):
training_df = pd.read_csv("./training_1GPU.log").set_index('epoch')

plt.plot(training_df['loss'].values, label='training loss')
plt.plot(training_df['val_loss'].values, label='validation loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.xlim((0, len(training_df.index)))
plt.legend(loc='upper right')
plt.savefig('prediction_T1_581model.png')


if __name__ == "__main__":
main()
107 changes: 107 additions & 0 deletions T2_Masks/evaluate_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import numpy as np
import nibabel as nib
import os
import glob
import pandas as pd
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt


def get_liver_mask(data):
return data > 0

def dice_coefficient(truth, prediction):
return 2 * np.sum(truth * prediction)/(np.sum(truth) + np.sum(prediction))


def read_excel(filename,sheet):
df = pd.read_excel(io=file_name, sheet_name=sheet)
print(df.head(5)) # print first 5 rows of the dataframe
return(df)

def main():
header = ("DSC",)
masking_functions = (get_liver_mask,)
rows = list()
subject_ids = list()

disease_file = os.path.abspath("diagnoses.csv")
df_disease = pd.read_csv(disease_file)
df_disease.set_index('Key', inplace=True)

mri_file = os.path.abspath("mri_reports.csv")
df_mri = pd.read_csv(mri_file)
df_mri.set_index('Key', inplace=True)


details_file = os.path.abspath("imagedetails.csv")
df_details = pd.read_csv(details_file)
df_details.set_index('Key', inplace=True)


for case_folder in glob.glob("prediction_test2_LS200/*"):
if not os.path.isdir(case_folder):
continue
subject_ids.append(os.path.basename(case_folder))
truth_file = os.path.join(case_folder, "truth.nii.gz")
truth_image = nib.load(truth_file)
truth = truth_image.get_data()
prediction_file = os.path.join(case_folder, "prediction.nii.gz")
prediction_image = nib.load(prediction_file)
prediction = prediction_image.get_data()
rows.append([dice_coefficient(func(truth), func(prediction))for func in masking_functions])


df = pd.DataFrame.from_records(rows, columns=header, index=subject_ids)
print('Index of df:', df.index,'\n \n')


print('Index of mri: ',df_mri.index,'\n \n')
# print(df_mri.head())


# Join MRI report
df_i = pd.concat([df,df_mri],axis=1,join='inner')

# Join Diagnoses
df_id = pd.concat([df_i,df_disease],axis=1,join='inner')


# Join Image Details
df_idd = pd.concat([df_id,df_details],axis=1,join='inner')

print(df_idd.index,'\n \n')
print(df_idd.head())

df_idd.to_csv("./prediction_test2_LS200/Dice_scores_test2.csv")

scores = dict()
for index, score in enumerate(df.columns):
values = df.values.T[index]
scores[score] = values[np.isnan(values) == False]

plt.boxplot(list(scores.values()), labels=list(scores.keys()))
plt.ylabel("Dice Coefficient")
plt.savefig("validation_scores_boxplot_test2.png")
plt.close()






if os.path.exists("./training.log"):
training_df = pd.read_csv("./training.log").set_index('epoch')

plt.plot(training_df['loss'].values, label='training loss')
plt.plot(training_df['val_loss'].values, label='validation loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.xlim((0, len(training_df.index)))
plt.legend(loc='upper right')
plt.savefig('loss_graph_test1_train2.png')


if __name__ == "__main__":
main()
61 changes: 61 additions & 0 deletions T2_Masks/evaluate_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import numpy as np
import nibabel as nib
import os
import glob
import pandas as pd
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt


def get_liver_mask(data):
return data > 0

def dice_coefficient(truth, prediction):
return 2 * np.sum(truth * prediction)/(np.sum(truth) + np.sum(prediction))

def main():
header = ("DSC",)
masking_functions = (get_liver_mask,)
rows = list()
subject_ids = list()
for case_folder in glob.glob("prediction_NewTest_3DUnet_isense_40slices/*"):
if not os.path.isdir(case_folder):
continue
subject_ids.append(os.path.basename(case_folder))
truth_file = os.path.join(case_folder, "truth.nii.gz")
truth_image = nib.load(truth_file)
truth = truth_image.get_data()
#truth = truth[:,:,4:36]
prediction_file = os.path.join(case_folder, "prediction.nii.gz")
prediction_image = nib.load(prediction_file)
prediction = prediction_image.get_data()
rows.append([dice_coefficient(func(truth), func(prediction))for func in masking_functions])

df = pd.DataFrame.from_records(rows, columns=header, index=subject_ids)
df.to_csv("./prediction_NewTest_3DUnet_isense_40slices/Dice_scores.csv")

scores = dict()
for index, score in enumerate(df.columns):
values = df.values.T[index]
scores[score] = values[np.isnan(values) == False]

plt.boxplot(list(scores.values()), labels=list(scores.keys()))
plt.ylabel("Dice Coefficient")
plt.savefig("validation_scores_NewTest_3DUnet_isense_40slices.png")
plt.close()

if os.path.exists("./training_1GPU.log"):
training_df = pd.read_csv("./training_1GPU.log").set_index('epoch')

plt.plot(training_df['loss'].values, label='training loss')
plt.plot(training_df['val_loss'].values, label='validation loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.xlim((0, len(training_df.index)))
plt.legend(loc='upper right')
plt.savefig('prediction__NewTest_3DUnet_isense_40slices.png')


if __name__ == "__main__":
main()
67 changes: 67 additions & 0 deletions T2_Masks/join_hdf5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import os
import glob

from T2_masks.train_2 import config, fetch_training_data_files
from unet3d.data import write_data_to_file, open_data_file
import tables as tb

# Join 2 hdf5 files
# input_file
config["data_file1"] = 'T2_data_200_resize.h5'
config["data_file2"] = 'Test_data_381pts_resize.h5'
# ouput_file
config["out_file"] = 'T2_581pts_resize.h5'

data_file1 = open_data_file(config["data_file1"])
data_file2 = open_data_file(config["data_file2"])
out_file = tb.open_file(config["out_file"], 'w')

def main(overwrite=True):
for node in data_file1:
print(node)
for node in data_file2:
print(node)
# Copy Subject_ids array first
x = data_file1.root.subject_ids
y = data_file2.root.subject_ids
z = out_file.create_array('/', 'subject_ids', atom=x.atom, shape=(x.nrows + y.nrows,))
z[:x.nrows] = x[:]
z[x.nrows:] = y[:]
print('After Copying Arrays in data_file 1')
for node in out_file:
print(node)
# Copy E-arrays
# File 1
#x_data = data_file1.root.data_storage
#x_truth = data_file1.root.truth_storage
#x_affine = data_file1.root.affine_storage
## File 2
#y_data = data_file2.root.data_storage
#y_truth = data_file2.root.truth_storage
#y_affine = data_file2.root.affine_storage
## Copy to source 1 to new file
z_data = data_file1.copy_node('/',name='data',newparent=out_file.root,newname='data')
z_truth = data_file1.copy_node('/',name='truth',newparent=out_file.root,newname='truth')
z_affine = data_file1.copy_node('/',name='affine',newparent=out_file.root,newname='affine')
print('After Copying Earrays in data_file 1')
for node in out_file:
print(node)
# Append source 2 to the new file
z_data.append(data_file2.root.data[:])
z_truth.append(data_file2.root.truth[:])
z_affine.append(data_file2.root.affine[:])
print('After Copying Earrays in data_file 2')
for node in out_file:
print(node)
print('HDF5 files merged')
out_file.close()

# convert input images into an hdf5 file
#if overwrite or not os.path.exists(config["out_file"]):
# nchannel = len(data_file1[0])-1
# data_storage,truth_storage,affine_storage = create_data_file(out_file=config["out_file"],n_channels=nchannel,image_shape=



if __name__ == "__main__":
main()
21 changes: 21 additions & 0 deletions T2_Masks/predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os

from T2_masks.train_2 import config
from unet3d.prediction import run_validation_cases

config["validation_file"] = os.path.abspath("NewTest_ids.pkl")
config["data_file"] = os.path.abspath("NewTest_data_151pts.h5")
config["model_file"] = os.path.abspath("liver_segmentation_model_581_resize_1GPU.h5")
def main():
prediction_dir = os.path.abspath("prediction_NewTest_581Model")
run_validation_cases(validation_keys_file=config["validation_file"],
model_file=config["model_file"],
training_modalities=config["training_modalities"],
labels=config["labels"],
hdf5_file=config["data_file"],
output_label_map=True,
output_dir=prediction_dir)


if __name__ == "__main__":
main()
Loading

0 comments on commit 1a8c9d5

Please sign in to comment.