Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Updating Paths and Path Assignment in Select Files #177

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
60 changes: 32 additions & 28 deletions deepprofiler/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import click

import deepprofiler.dataset.compression
import deepprofiler.dataset.image_dataset
import deepprofiler.dataset.indexing
import deepprofiler.dataset.illumination_statistics
import deepprofiler.dataset.metadata
Expand All @@ -31,41 +30,46 @@
def cli(context, root, config, cores):
dirs = {
"root": root,
"locations": root+"/inputs/locations/", # TODO: use os.path.join()
"config": root+"/inputs/config/",
"images": root+"/inputs/images/",
"metadata": root+"/inputs/metadata/",
"preprocessed": root+"/inputs/preprocessed/",
"pretrained": root+"/inputs/pretrained/",
"intensities": root+"/outputs/intensities/",
"compressed_images": root+"/outputs/compressed/images/",
"compressed_metadata": root+"/outputs/compressed/metadata/",
"training": root+"/outputs/training/",
"checkpoints": root+"/outputs/training/checkpoint/",
"logs": root+"/outputs/training/logs/",
"summaries": root+"/outputs/training/summaries/",
"features": root+"/outputs/features/"
"locations": os.path.join(root, "inputs", "locations"),
"config": os.path.join(root, "inputs", "config"),
"images": os.path.join(root, "inputs", "images"),
"metadata": os.path.join(root, "inputs", "metadata"),
"preprocessed": os.path.join(root, "inputs", "preprocessed"),
"pretrained": os.path.join(root, "inputs", "pretrained"),
"intensities": os.path.join(root, "outputs", "intensities"),
"compressed_images": os.path.join(root, "outputs", "compressed", "images"),
"compressed_metadata": os.path.join(root, "outputs", "compressed", "metadata"),
"training": os.path.join(root, "outputs", "training"),
"checkpoints": os.path.join(root, "outputs", "training", "checkpoint"),
"logs": os.path.join(root, "outputs", "training", "logs"),
"summaries": os.path.join(root, "outputs", "training", "summaries"),
"features": os.path.join(root, "outputs", "features")
}
if config is not None:

context.obj["config"] = {}
context.obj["config"]["paths"] = {}
context.obj["config"]["paths"]["config"] = config
dirs["config"] = os.path.dirname(os.path.abspath(config))
else:
config = dirs["config"] + "/config.json"
config = os.path.join(dirs["config"], "config.json")

context.obj["cores"] = cores

if os.path.isfile(config):
with open(config, "r") as f:
params = json.load(f)
if "paths" in params.keys():
for key, value in dirs.items():
if key not in params["paths"].keys():
params["paths"][key] = dirs[key]
params["paths"][key] = os.path.join(root, dirs[key])
else:
dirs[key] = params["paths"][key]
dirs[key] = os.path.join(root, params["paths"][key])

else:
params["paths"] = dirs
params["paths"]["index"] = params["paths"]["metadata"] + "/index.csv"

params["paths"]["index"] = os.path.join(root, params["paths"]["metadata"], "index.csv")
context.obj["config"] = params
process = deepprofiler.dataset.utils.Parallel(context.obj["config"], numProcs=context.obj["cores"])
context.obj["process"] = process
Expand Down Expand Up @@ -106,7 +110,7 @@ def prepare(context):
metadata = deepprofiler.dataset.metadata.read_plates(context.obj["config"]["paths"]["index"]) # reinitialize generator
process.compute(deepprofiler.dataset.compression.compress_plate, metadata)
deepprofiler.dataset.indexing.write_compression_index(context.obj["config"])
context.parent.obj["config"]["paths"]["index"] = context.obj["config"]["paths"]["compressed_metadata"]+"/compressed.csv"
context.parent.obj["config"]["paths"]["index"] = os.path.join(context.obj["config"]["paths"]["compressed_metadata"], "compressed.csv")
print("Compression complete!")


Expand All @@ -117,7 +121,7 @@ def prepare(context):
@click.pass_context
def optimize(context, epoch, seed):
if context.parent.obj["config"]["prepare"]["compression"]["implement"]:
context.parent.obj["config"]["paths"]["index"] = context.obj["config"]["paths"]["compressed_metadata"]+"/compressed.csv"
context.parent.obj["config"]["paths"]["index"] = os.path.join(context.obj["config"]["paths"]["compressed_metadata"], "compressed.csv")
context.parent.obj["config"]["paths"]["images"] = context.obj["config"]["paths"]["compressed_images"]
metadata = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"])
optim = deepprofiler.learning.optimization.Optimize(context.obj["config"], metadata, epoch, seed)
Expand All @@ -131,7 +135,7 @@ def optimize(context, epoch, seed):
@click.pass_context
def train(context, epoch, seed):
if context.parent.obj["config"]["prepare"]["compression"]["implement"]:
context.parent.obj["config"]["paths"]["index"] = context.obj["config"]["paths"]["compressed_metadata"]+"/compressed.csv"
context.parent.obj["config"]["paths"]["index"] = os.path.join(context.obj["config"]["paths"]["compressed_metadata"], "compressed.csv")
context.parent.obj["config"]["paths"]["images"] = context.obj["config"]["paths"]["compressed_images"]
metadata = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"])
deepprofiler.learning.training.learn_model(context.obj["config"], metadata, epoch, seed)
Expand All @@ -141,30 +145,30 @@ def train(context, epoch, seed):
@cli.command()
@click.pass_context
@click.option("--part",
help="Part of index to process",
default=-1,
help="Part of index to process",
default=-1,
type=click.INT)
def profile(context, part):
if context.parent.obj["config"]["prepare"]["compression"]["implement"]:
context.parent.obj["config"]["paths"]["index"] = context.obj["config"]["paths"]["compressed_metadata"]+"/compressed.csv"
context.parent.obj["config"]["paths"]["index"] = os.path.join(context.obj["config"]["paths"]["compressed_metadata"], "compressed.csv")
context.parent.obj["config"]["paths"]["images"] = context.obj["config"]["paths"]["compressed_images"]
config = context.obj["config"]
if part >= 0:
partfile = "index-{0:03d}.csv".format(part)
config["paths"]["index"] = context.obj["config"]["paths"]["index"].replace("index.csv", partfile)
metadata = deepprofiler.dataset.image_dataset.read_dataset(context.obj["config"])
deepprofiler.learning.profiling.profile(context.obj["config"], metadata)


# Auxiliary tool: Split index in multiple parts
@cli.command()
@click.pass_context
@click.option("--parts",
@click.option("--parts",
help="Number of parts to split the index",
type=click.INT)
def split(context, parts):
if context.parent.obj["config"]["prepare"]["compression"]["implement"]:
context.parent.obj["config"]["paths"]["index"] = context.obj["config"]["paths"]["compressed_metadata"]+"/compressed.csv"
context.parent.obj["config"]["paths"]["index"] = os.path.join(context.obj["config"]["paths"]["compressed_metadata"], "compressed.csv")
context.parent.obj["config"]["paths"]["images"] = context.obj["config"]["paths"]["compressed_images"]
deepprofiler.dataset.indexing.split_index(context.obj["config"], parts)

Expand Down
22 changes: 11 additions & 11 deletions deepprofiler/dataset/image_dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import numpy as np
import pandas as pd

Expand All @@ -21,7 +22,7 @@ def __init__(self, metadata, sampling_field, channels, dataRoot, keyGen):

def getImagePaths(self, r):
key = self.keyGen(r)
image = [self.root + "/" + r[ch] for ch in self.channels]
image = [os.path.join(self.root, r[ch]) for ch in self.channels]
outlines = self.outlines
if outlines is not None:
outlines = self.outlines + r["Outlines"]
Expand Down Expand Up @@ -115,9 +116,9 @@ def read_dataset(config):
# Add outlines if specified
outlines = None
if "outlines" in config["prepare"].keys() and config["prepare"]["outlines"] != "":
df = pd.read_csv(config["paths"]["metadata"] + "/outlines.csv")
df = pd.read_csv(os.path.join(config["paths"]["metadata"], "outlines.csv"))
metadata.mergeOutlines(df)
outlines = config["paths"]["root"] + "inputs/outlines/"
outlines = os.path.join(config["paths"]["root"], "inputs", "outlines")

print(metadata.data.info())

Expand All @@ -128,13 +129,14 @@ def read_dataset(config):
metadata.splitMetadata(trainingFilter, validationFilter)

# Create a dataset
keyGen = lambda r: "{}/{}-{}".format(r["Metadata_Plate"], r["Metadata_Well"], r["Metadata_Site"])
keyGen = lambda r: os.path.join(r["Metadata_Plate"], "{}-{}".format(r["Metadata_Well"], r["Metadata_Site"]))

dset = ImageDataset(
metadata,
config["train"]["sampling"]["field"],
config["dataset"]["images"]["channels"],
config["paths"]["images"],
keyGen
metadata=metadata,
sampling_field=config["train"]["sampling"]["field"],
channels=config["dataset"]["images"]["channels"],
dataRoot=config["paths"]["images"],
keyGen=keyGen
)

# Add training targets
Expand All @@ -147,5 +149,3 @@ def read_dataset(config):
dset.outlines = outlines

return dset


14 changes: 8 additions & 6 deletions deepprofiler/imaging/boxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@

def get_locations(image_key, config, randomize=True, seed=None):
keys = image_key.split("/")
locations_file = "{}/{}-{}.csv".format(
keys[0],
locations_file = os.path.join(keys[0], "{}-{}.csv".format(
keys[1],
config["train"]["sampling"]["locations_field"]
)
locations_path = os.path.join(config["paths"]["locations"], locations_file)
))
locations_path = os.path.join(config["paths"]["root"],
config["paths"]["locations"],
locations_file)
if os.path.exists(locations_path):
locations = pd.read_csv(locations_path)
random_sample = config["train"]["sampling"]["locations"]
Expand All @@ -28,11 +29,13 @@ def get_locations(image_key, config, randomize=True, seed=None):
x_key = config["train"]["sampling"]["locations_field"] + "_Location_Center_X"
return pd.DataFrame(columns=[x_key, y_key])


def load_batch(dataset, config):
batch = dataset.getTrainBatch(config["train"]["sampling"]["images"])
batch["locations"] = [ get_locations(x, config) for x in batch["keys"] ]
return batch


def prepare_boxes(batch, config):
locationsBatch = batch["locations"]
image_targets = batch["targets"]
Expand Down Expand Up @@ -78,6 +81,5 @@ def prepare_boxes(batch, config):
result = (np.concatenate(all_boxes),
np.concatenate(all_indices),
[np.concatenate(t) for t in all_targets],
np.concatenate(all_masks)
)
np.concatenate(all_masks))
return result
27 changes: 18 additions & 9 deletions deepprofiler/learning/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ def train(self, epoch=1, metrics=["accuracy"], verbose=1):
verbose=verbose,
initial_epoch=epoch - 1,
validation_data=(x_validation, y_validation)
)
)

# Stop threads and close sessions
close(self, crop_session)
# Return the feature model and validation data
Expand Down Expand Up @@ -127,10 +127,11 @@ def start_val_session(dpmodel, configuration):
keras.backend.set_session(val_session)
dpmodel.val_crop_generator.start(val_session)
x_validation, y_validation = deepprofiler.learning.validation.validate(
dpmodel.config,
dpmodel.dset,
dpmodel.val_crop_generator,
val_session)
config=dpmodel.config,
dset=dpmodel.dset,
crop_generator=dpmodel.val_crop_generator,
session=val_session
)
gc.collect()
return val_session, x_validation, y_validation

Expand All @@ -142,7 +143,9 @@ def start_main_session(configuration):


def load_weights(dpmodel, epoch):
output_file = dpmodel.config["paths"]["checkpoints"] + "/checkpoint_{epoch:04d}.hdf5"
output_file = os.path.join(dpmodel.config["paths"]["root"],
dpmodel.config["paths"]["checkpoints"],
"checkpoint_{epoch:04d}.hdf5")
previous_model = output_file.format(epoch=epoch - 1)
if epoch >= 1 and os.path.isfile(previous_model):
dpmodel.feature_model.load_weights(previous_model)
Expand All @@ -153,13 +156,19 @@ def load_weights(dpmodel, epoch):


def setup_callbacks(dpmodel):
output_file = dpmodel.config["paths"]["checkpoints"] + "/checkpoint_{epoch:04d}.hdf5"
output_file = os.path.join(dpmodel.config["paths"]["root"],
dpmodel.config["paths"]["checkpoints"],
"checkpoint_{epoch:04d}.hdf5")

callback_model_checkpoint = keras.callbacks.ModelCheckpoint(
filepath=output_file,
save_weights_only=True,
save_best_only=False
)
csv_output = dpmodel.config["paths"]["logs"] + "/log.csv"

csv_output = os.path.join(dpmodel.config["paths"]["root"],
dpmodel.config["paths"]["logs"],
"log.csv")
callback_csv = keras.callbacks.CSVLogger(filename=csv_output)
callbacks = [callback_model_checkpoint, callback_csv]
return callbacks
Expand Down
Loading