This repository has been archived by the owner on Feb 22, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 147
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add data generation, training and prediction code for nodule segmenta…
…tion
- Loading branch information
Showing
559 changed files
with
536 additions
and
93 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[dicom] | ||
path = /images_full | ||
warn = True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
flake8==3.3.0 | ||
pytest==3.1.3 | ||
pylidc==0.1.8 | ||
pylidc==0.1.9 # Fixes UnicodeDecodeError in to_volume() (https://github.com/pylidc/pylidc/issues/9) |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Git LFS file not shown
Git LFS file not shown
3 changes: 3 additions & 0 deletions
3
prediction/src/algorithms/segment/assets/segmented_lung_patient_LIDC-IDRI-0001.npy
Git LFS file not shown
3 changes: 3 additions & 0 deletions
3
prediction/src/algorithms/segment/assets/segmented_lung_patient_LIDC-IDRI-0002.npy
Git LFS file not shown
3 changes: 3 additions & 0 deletions
3
prediction/src/algorithms/segment/assets/segmented_lung_patient_LIDC-IDRI-0003.npy
Git LFS file not shown
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import glob | ||
import os | ||
|
||
import numpy as np | ||
import pylidc as pl | ||
from config import Config | ||
|
||
|
||
def prepare_training_data(in_docker=True): | ||
"""Save a boolean mask of each DICOM scan at ../assets/segmented_lung_patient_{LIDC-ID}.npy that indicates whether | ||
a pixel was annotate by an expert as at least intermediate malicious or not. | ||
Args: | ||
in_docker: whether this method is invoked from within docker or from the prediction directory | ||
""" | ||
INTERMEDIATE_MALICIOUS = 3 | ||
assets_dir = Config.SEGMENT_ASSETS_DIR | ||
dicom_wildcard = Config.DICOM_PATHS_DOCKER_WILDCARD | ||
|
||
dicom_paths = sorted(glob.glob(dicom_wildcard)) | ||
for path in dicom_paths: | ||
directories = path.split(os.path.sep) | ||
lidc_id_path_index = 2 if in_docker else 5 | ||
lidc_id = directories[lidc_id_path_index] | ||
lung_patient_file = os.path.join(assets_dir, "segmented_lung_patient_{}".format(lidc_id)) | ||
|
||
if os.path.isfile(lung_patient_file): | ||
continue | ||
|
||
# Compute and save binary mask with information whether pixel is cancerous | ||
scan = pl.query(pl.Scan).filter(pl.Scan.patient_id == lidc_id).first() | ||
if scan is None: | ||
print("Scan for path '{}' was not found".format(path)) | ||
continue | ||
vol = scan.to_volume(verbose=False) # Leading zeros have to be removed from the DICOM file names | ||
|
||
# mask_vol is a boolean, indicator volume for the first annotation of the scan. | ||
mask_vol = np.zeros(vol.shape, dtype=np.bool) | ||
|
||
# Load DICOM files and obtain z-coords for each slice, so we can index into them. | ||
dicoms = scan.load_all_dicom_images(verbose=False) | ||
zs = [float(img.ImagePositionPatient[2]) for img in dicoms] | ||
|
||
cancerous_annotations = pl.query(pl.Annotation).filter(pl.Annotation.malignancy >= INTERMEDIATE_MALICIOUS, | ||
pl.Annotation.scan_id == scan.id).all() | ||
|
||
for annotation in cancerous_annotations: | ||
mask, bbox = annotation.get_boolean_mask(return_bbox=True) | ||
|
||
# Obtain indexes of `mask` into `mask_vol` | ||
i1, i2 = bbox[0].astype(np.int) | ||
j1, j2 = bbox[1].astype(np.int) | ||
|
||
k1 = zs.index(bbox[2, 0]) | ||
k2 = zs.index(bbox[2, 1]) | ||
|
||
# In case the area already was segmented, don't overwrite it but add the annotated segmentation | ||
annotation_area = np.index_exp[i1:i2 + 1, j1:j2 + 1, k1:k2 + 1] | ||
mask_vol[annotation_area] = np.logical_or(mask, mask_vol[annotation_area]) | ||
np.save(lung_patient_file, mask_vol) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
import numpy as np | ||
from keras import backend as K | ||
from keras.engine import Input, Model | ||
from keras.layers import Conv3D, MaxPooling3D, UpSampling3D, Activation | ||
from keras.layers.merge import concatenate | ||
from keras.optimizers import Adam | ||
|
||
|
||
def simple_model_3d(input_shape, downsize_filters_factor=32, pool_size=(2, 2, 2), n_labels=1, | ||
initial_learning_rate=0.01): | ||
""" | ||
Builds a simple 3D classification model. | ||
:param input_shape: Shape of the input data (x_size, y_size, z_size, n_channels). | ||
:param downsize_filters_factor: Factor to which to reduce the number of filters. Making this value larger will | ||
reduce the amount of memory the model will need during training. | ||
:param pool_size: Pool size for the max pooling operations. | ||
:param n_labels: Number of binary labels that the model is learning. | ||
:param initial_learning_rate: Initial learning rate for the model. This will be decayed during training. | ||
:return: Untrained simple 3D Model | ||
""" | ||
inputs = Input(input_shape) | ||
conv1 = Conv3D(int(32 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(inputs) | ||
pool1 = MaxPooling3D(pool_size=pool_size)(conv1) | ||
conv2 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(pool1) | ||
up1 = UpSampling3D(size=pool_size)(conv2) | ||
conv8 = Conv3D(n_labels, (1, 1, 1))(up1) | ||
act = Activation('sigmoid')(conv8) | ||
model = Model(inputs=inputs, outputs=act) | ||
|
||
model.compile(optimizer=Adam(lr=initial_learning_rate), loss=dice_coef_loss, metrics=[dice_coef]) | ||
|
||
return model | ||
|
||
|
||
def unet_model_3d(input_shape, downsize_filters_factor=1, pool_size=(2, 2, 2), n_labels=1, | ||
initial_learning_rate=0.01, deconvolution=False): | ||
""" | ||
Builds the 3D U-Net Keras model. | ||
The [U-Net](https://arxiv.org/abs/1505.04597) uses a fully-convolutional architecture consisting of an encoder and | ||
a decoder. The encoder is able to capture contextual information while the decoder enables precise localization. | ||
Due to the large amount of parameters, the input shape has to be small since for e.g. images of shape 144x144x144 | ||
the model already consumes 32 GB of memory. | ||
:param input_shape: Shape of the input data (x_size, y_size, z_size, n_channels). | ||
:param downsize_filters_factor: Factor to which to reduce the number of filters. Making this value larger will | ||
reduce the amount of memory the model will need during training. | ||
:param pool_size: Pool size for the max pooling operations. | ||
:param n_labels: Number of binary labels that the model is learning. | ||
:param initial_learning_rate: Initial learning rate for the model. This will be decayed during training. | ||
:param deconvolution: If set to True, will use transpose convolution(deconvolution) instead of upsamping. This | ||
increases the amount memory required during training. | ||
:return: Untrained 3D UNet Model | ||
""" | ||
inputs = Input(input_shape) | ||
conv1 = Conv3D(int(32 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(inputs) | ||
conv1 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv1) | ||
pool1 = MaxPooling3D(pool_size=pool_size)(conv1) | ||
|
||
conv2 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(pool1) | ||
conv2 = Conv3D(int(128 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv2) | ||
pool2 = MaxPooling3D(pool_size=pool_size)(conv2) | ||
|
||
conv3 = Conv3D(int(128 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(pool2) | ||
conv3 = Conv3D(int(256 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv3) | ||
print(conv3.shape) | ||
pool3 = MaxPooling3D(pool_size=pool_size)(conv3) | ||
|
||
conv4 = Conv3D(int(256 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(pool3) | ||
conv4 = Conv3D(int(512 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv4) | ||
print(conv4.shape) | ||
|
||
up5 = get_upconv(pool_size=pool_size, deconvolution=deconvolution, depth=2, | ||
nb_filters=int(512 / downsize_filters_factor), image_shape=input_shape[-3:])(conv4) | ||
print(up5.shape) | ||
up5 = concatenate([up5, conv3], axis=4) | ||
conv5 = Conv3D(int(256 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(up5) | ||
conv5 = Conv3D(int(256 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv5) | ||
|
||
up6 = get_upconv(pool_size=pool_size, deconvolution=deconvolution, depth=1, | ||
nb_filters=int(256 / downsize_filters_factor), image_shape=input_shape[-3:])(conv5) | ||
up6 = concatenate([up6, conv2], axis=4) | ||
conv6 = Conv3D(int(128 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(up6) | ||
conv6 = Conv3D(int(128 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv6) | ||
|
||
up7 = get_upconv(pool_size=pool_size, deconvolution=deconvolution, depth=0, | ||
nb_filters=int(128 / downsize_filters_factor), image_shape=input_shape[-3:])(conv6) | ||
up7 = concatenate([up7, conv1], axis=4) | ||
conv7 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(up7) | ||
conv7 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv7) | ||
|
||
conv8 = Conv3D(n_labels, (1, 1, 1))(conv7) | ||
act = Activation('sigmoid')(conv8) | ||
model = Model(inputs=inputs, outputs=act) | ||
|
||
model.compile(optimizer=Adam(lr=initial_learning_rate), loss=dice_coef_loss, metrics=[dice_coef]) | ||
|
||
return model | ||
|
||
|
||
def dice_coef(y_true, y_pred, smooth=1.): | ||
y_true_f = K.flatten(y_true) | ||
y_pred_f = K.flatten(y_pred) | ||
intersection = K.sum(y_true_f * y_pred_f) | ||
return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth) | ||
|
||
|
||
def dice_coef_loss(y_true, y_pred): | ||
return -dice_coef(y_true, y_pred) | ||
|
||
|
||
def compute_level_output_shape(filters, depth, pool_size, image_shape): | ||
""" | ||
Each level has a particular output shape based on the number of filters used in that level and the depth or number | ||
of max pooling operations that have been done on the data at that point. | ||
:param image_shape: shape of the 3d image. | ||
:param pool_size: the pool_size parameter used in the max pooling operation. | ||
:param filters: Number of filters used by the last node in a given level. | ||
:param depth: The number of levels down in the U-shaped model a given node is. | ||
:return: 5D vector of the shape of the output node | ||
""" | ||
if depth != 0: | ||
output_image_shape = np.divide(image_shape, np.multiply(pool_size, depth)).tolist() | ||
else: | ||
output_image_shape = image_shape | ||
return tuple([None, filters] + [int(x) for x in output_image_shape]) | ||
|
||
|
||
def get_upconv(depth, nb_filters, pool_size, image_shape, kernel_size=(2, 2, 2), strides=(2, 2, 2), | ||
deconvolution=False): | ||
if deconvolution: | ||
try: | ||
from keras_contrib.layers import Deconvolution3D | ||
except ImportError: | ||
raise ImportError("Install keras_contrib in order to use deconvolution. Otherwise set deconvolution=False.") | ||
|
||
return Deconvolution3D(filters=nb_filters, kernel_size=kernel_size, | ||
output_shape=compute_level_output_shape(filters=nb_filters, depth=depth, | ||
pool_size=pool_size, image_shape=image_shape), | ||
strides=strides, input_shape=compute_level_output_shape(filters=nb_filters, | ||
depth=depth + 1, | ||
pool_size=pool_size, | ||
image_shape=image_shape)) | ||
else: | ||
return UpSampling3D(size=pool_size) |
Oops, something went wrong.